mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-24 18:42:35 +00:00
fix(image-generation): clamp OpenAI sizes by model family
This commit is contained in:
parent
ffd85a8611
commit
8c0b2c1a29
@ -761,12 +761,27 @@ def _minimax_images_from_payload(payload: dict[str, Any]) -> list[str]:
|
|||||||
# OpenAI image generation
|
# OpenAI image generation
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_OPENAI_ASPECT_RATIO_SIZES = {
|
_OPENAI_DALLE2_SUPPORTED_SIZES = {"256x256", "512x512", "1024x1024"}
|
||||||
|
_OPENAI_DALLE3_SUPPORTED_SIZES = {"1024x1024", "1792x1024", "1024x1792"}
|
||||||
|
_OPENAI_GPT_IMAGE_SUPPORTED_SIZES = {
|
||||||
|
"1024x1024",
|
||||||
|
"1536x1024",
|
||||||
|
"1024x1536",
|
||||||
|
"auto",
|
||||||
|
}
|
||||||
|
_OPENAI_DALLE2_ASPECT_RATIO_SIZES = {
|
||||||
|
"1:1": "1024x1024",
|
||||||
|
"16:9": "1024x1024",
|
||||||
|
"9:16": "1024x1024",
|
||||||
|
"3:4": "1024x1024",
|
||||||
|
"4:3": "1024x1024",
|
||||||
|
}
|
||||||
|
_OPENAI_DALLE3_ASPECT_RATIO_SIZES = {
|
||||||
"1:1": "1024x1024",
|
"1:1": "1024x1024",
|
||||||
"16:9": "1792x1024",
|
"16:9": "1792x1024",
|
||||||
"9:16": "1024x1792",
|
"9:16": "1024x1792",
|
||||||
"3:4": "1024x1360",
|
"3:4": "1024x1792",
|
||||||
"4:3": "1360x1024",
|
"4:3": "1792x1024",
|
||||||
}
|
}
|
||||||
_OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES = {
|
_OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES = {
|
||||||
"1:1": "1024x1024",
|
"1:1": "1024x1024",
|
||||||
@ -827,8 +842,7 @@ class OpenAIImageGenerationClient(ImageGenerationProvider):
|
|||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
}
|
}
|
||||||
|
|
||||||
# gpt-image-* models don't support response_format or n
|
if not _openai_is_gpt_image_model(clean_model):
|
||||||
if not clean_model.startswith("gpt-image"):
|
|
||||||
body["response_format"] = "b64_json"
|
body["response_format"] = "b64_json"
|
||||||
body["n"] = 1
|
body["n"] = 1
|
||||||
|
|
||||||
@ -988,18 +1002,58 @@ def _openai_size(
|
|||||||
image_size: str | None,
|
image_size: str | None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Resolve aspect ratio or image_size to an OpenAI Images API size string."""
|
"""Resolve aspect ratio or image_size to an OpenAI Images API size string."""
|
||||||
if image_size and "x" in image_size.lower():
|
sizes, supported_sizes = _openai_size_options(model)
|
||||||
return image_size
|
explicit_size = _normalize_openai_image_size(image_size)
|
||||||
sizes = (
|
if explicit_size and _openai_explicit_size_supported(
|
||||||
_OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES
|
explicit_size,
|
||||||
if model.startswith("gpt-image")
|
supported_sizes=supported_sizes,
|
||||||
else _OPENAI_ASPECT_RATIO_SIZES
|
):
|
||||||
)
|
return explicit_size
|
||||||
|
if explicit_size:
|
||||||
|
logger.warning(
|
||||||
|
"OpenAI image size '{}' is not supported by {}; using aspect ratio/default size",
|
||||||
|
explicit_size,
|
||||||
|
model,
|
||||||
|
)
|
||||||
if aspect_ratio and aspect_ratio in sizes:
|
if aspect_ratio and aspect_ratio in sizes:
|
||||||
return sizes[aspect_ratio]
|
return sizes[aspect_ratio]
|
||||||
return "1024x1024"
|
return "1024x1024"
|
||||||
|
|
||||||
|
|
||||||
|
def _openai_is_gpt_image_model(model: str) -> bool:
|
||||||
|
normalized = model.lower()
|
||||||
|
return normalized.startswith(("gpt-image", "chatgpt-image"))
|
||||||
|
|
||||||
|
|
||||||
|
def _openai_size_options(model: str) -> tuple[dict[str, str], set[str] | None]:
|
||||||
|
normalized = model.lower()
|
||||||
|
if normalized.startswith("dall-e-2"):
|
||||||
|
return _OPENAI_DALLE2_ASPECT_RATIO_SIZES, _OPENAI_DALLE2_SUPPORTED_SIZES
|
||||||
|
if normalized.startswith("dall-e-3"):
|
||||||
|
return _OPENAI_DALLE3_ASPECT_RATIO_SIZES, _OPENAI_DALLE3_SUPPORTED_SIZES
|
||||||
|
if normalized.startswith("gpt-image-2"):
|
||||||
|
return _OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES, None
|
||||||
|
return _OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES, _OPENAI_GPT_IMAGE_SUPPORTED_SIZES
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_openai_image_size(image_size: str | None) -> str | None:
|
||||||
|
if not image_size:
|
||||||
|
return None
|
||||||
|
normalized = image_size.strip().lower()
|
||||||
|
return normalized or None
|
||||||
|
|
||||||
|
|
||||||
|
def _openai_explicit_size_supported(
|
||||||
|
size: str,
|
||||||
|
*,
|
||||||
|
supported_sizes: set[str] | None,
|
||||||
|
) -> bool:
|
||||||
|
if supported_sizes is not None:
|
||||||
|
return size in supported_sizes
|
||||||
|
width, sep, height = size.partition("x")
|
||||||
|
return bool(sep and width.isdecimal() and height.isdecimal())
|
||||||
|
|
||||||
|
|
||||||
async def _openai_images_from_payload(
|
async def _openai_images_from_payload(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
payload: dict[str, Any],
|
payload: dict[str, Any],
|
||||||
|
|||||||
@ -624,6 +624,34 @@ async def test_openai_aspect_ratio_to_size() -> None:
|
|||||||
assert fake.calls[0]["json"]["size"] == "1024x1024"
|
assert fake.calls[0]["json"]["size"] == "1024x1024"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_openai_dalle3_uses_supported_orientation_sizes() -> None:
|
||||||
|
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||||
|
client = OpenAIImageGenerationClient(
|
||||||
|
api_key="sk-openai-test",
|
||||||
|
client=fake, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
await client.generate(prompt="draw", model="dall-e-3", aspect_ratio="3:4")
|
||||||
|
await client.generate(prompt="draw", model="dall-e-3", aspect_ratio="4:3")
|
||||||
|
|
||||||
|
assert fake.calls[0]["json"]["size"] == "1024x1792"
|
||||||
|
assert fake.calls[1]["json"]["size"] == "1792x1024"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_openai_dalle2_uses_square_size_for_non_square_ratios() -> None:
|
||||||
|
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||||
|
client = OpenAIImageGenerationClient(
|
||||||
|
api_key="sk-openai-test",
|
||||||
|
client=fake, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
await client.generate(prompt="draw", model="dall-e-2", aspect_ratio="16:9")
|
||||||
|
|
||||||
|
assert fake.calls[0]["json"]["size"] == "1024x1024"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_openai_gpt_image_uses_supported_landscape_size() -> None:
|
async def test_openai_gpt_image_uses_supported_landscape_size() -> None:
|
||||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||||
@ -637,6 +665,21 @@ async def test_openai_gpt_image_uses_supported_landscape_size() -> None:
|
|||||||
assert fake.calls[0]["json"]["size"] == "1536x1024"
|
assert fake.calls[0]["json"]["size"] == "1536x1024"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_openai_gpt_image_uses_supported_orientation_sizes() -> None:
|
||||||
|
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||||
|
client = OpenAIImageGenerationClient(
|
||||||
|
api_key="sk-openai-test",
|
||||||
|
client=fake, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
await client.generate(prompt="draw", model="gpt-image-1", aspect_ratio="3:4")
|
||||||
|
await client.generate(prompt="draw", model="gpt-image-1", aspect_ratio="4:3")
|
||||||
|
|
||||||
|
assert fake.calls[0]["json"]["size"] == "1024x1536"
|
||||||
|
assert fake.calls[1]["json"]["size"] == "1536x1024"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_openai_default_size_when_no_aspect_ratio() -> None:
|
async def test_openai_default_size_when_no_aspect_ratio() -> None:
|
||||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||||
@ -651,6 +694,25 @@ async def test_openai_default_size_when_no_aspect_ratio() -> None:
|
|||||||
assert body["size"] == "1024x1024"
|
assert body["size"] == "1024x1024"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_openai_ignores_explicit_size_unsupported_by_model_family() -> None:
|
||||||
|
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||||
|
client = OpenAIImageGenerationClient(
|
||||||
|
api_key="sk-openai-test",
|
||||||
|
client=fake, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
await client.generate(
|
||||||
|
prompt="draw",
|
||||||
|
model="dall-e-3",
|
||||||
|
aspect_ratio="16:9",
|
||||||
|
image_size="1536x1024",
|
||||||
|
)
|
||||||
|
|
||||||
|
body = fake.calls[0]["json"]
|
||||||
|
assert body["size"] == "1792x1024"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_openai_uses_explicit_image_size() -> None:
|
async def test_openai_uses_explicit_image_size() -> None:
|
||||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user