From 8c0b2c1a29616ffc2f206bb5baab835858a7aa9c Mon Sep 17 00:00:00 2001 From: Xubin Ren <52506698+Re-bin@users.noreply.github.com> Date: Fri, 22 May 2026 15:02:59 +0800 Subject: [PATCH] fix(image-generation): clamp OpenAI sizes by model family --- nanobot/providers/image_generation.py | 78 ++++++++++++++++++++---- tests/providers/test_image_generation.py | 62 +++++++++++++++++++ 2 files changed, 128 insertions(+), 12 deletions(-) diff --git a/nanobot/providers/image_generation.py b/nanobot/providers/image_generation.py index d0aac7860..337387c14 100644 --- a/nanobot/providers/image_generation.py +++ b/nanobot/providers/image_generation.py @@ -761,12 +761,27 @@ def _minimax_images_from_payload(payload: dict[str, Any]) -> list[str]: # OpenAI image generation # --------------------------------------------------------------------------- -_OPENAI_ASPECT_RATIO_SIZES = { +_OPENAI_DALLE2_SUPPORTED_SIZES = {"256x256", "512x512", "1024x1024"} +_OPENAI_DALLE3_SUPPORTED_SIZES = {"1024x1024", "1792x1024", "1024x1792"} +_OPENAI_GPT_IMAGE_SUPPORTED_SIZES = { + "1024x1024", + "1536x1024", + "1024x1536", + "auto", +} +_OPENAI_DALLE2_ASPECT_RATIO_SIZES = { + "1:1": "1024x1024", + "16:9": "1024x1024", + "9:16": "1024x1024", + "3:4": "1024x1024", + "4:3": "1024x1024", +} +_OPENAI_DALLE3_ASPECT_RATIO_SIZES = { "1:1": "1024x1024", "16:9": "1792x1024", "9:16": "1024x1792", - "3:4": "1024x1360", - "4:3": "1360x1024", + "3:4": "1024x1792", + "4:3": "1792x1024", } _OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES = { "1:1": "1024x1024", @@ -827,8 +842,7 @@ class OpenAIImageGenerationClient(ImageGenerationProvider): "prompt": prompt, } - # gpt-image-* models don't support response_format or n - if not clean_model.startswith("gpt-image"): + if not _openai_is_gpt_image_model(clean_model): body["response_format"] = "b64_json" body["n"] = 1 @@ -988,18 +1002,58 @@ def _openai_size( image_size: str | None, ) -> str: """Resolve aspect ratio or image_size to an OpenAI Images API size string.""" - if image_size and "x" in image_size.lower(): - return image_size - sizes = ( - _OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES - if model.startswith("gpt-image") - else _OPENAI_ASPECT_RATIO_SIZES - ) + sizes, supported_sizes = _openai_size_options(model) + explicit_size = _normalize_openai_image_size(image_size) + if explicit_size and _openai_explicit_size_supported( + explicit_size, + supported_sizes=supported_sizes, + ): + return explicit_size + if explicit_size: + logger.warning( + "OpenAI image size '{}' is not supported by {}; using aspect ratio/default size", + explicit_size, + model, + ) if aspect_ratio and aspect_ratio in sizes: return sizes[aspect_ratio] return "1024x1024" +def _openai_is_gpt_image_model(model: str) -> bool: + normalized = model.lower() + return normalized.startswith(("gpt-image", "chatgpt-image")) + + +def _openai_size_options(model: str) -> tuple[dict[str, str], set[str] | None]: + normalized = model.lower() + if normalized.startswith("dall-e-2"): + return _OPENAI_DALLE2_ASPECT_RATIO_SIZES, _OPENAI_DALLE2_SUPPORTED_SIZES + if normalized.startswith("dall-e-3"): + return _OPENAI_DALLE3_ASPECT_RATIO_SIZES, _OPENAI_DALLE3_SUPPORTED_SIZES + if normalized.startswith("gpt-image-2"): + return _OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES, None + return _OPENAI_GPT_IMAGE_ASPECT_RATIO_SIZES, _OPENAI_GPT_IMAGE_SUPPORTED_SIZES + + +def _normalize_openai_image_size(image_size: str | None) -> str | None: + if not image_size: + return None + normalized = image_size.strip().lower() + return normalized or None + + +def _openai_explicit_size_supported( + size: str, + *, + supported_sizes: set[str] | None, +) -> bool: + if supported_sizes is not None: + return size in supported_sizes + width, sep, height = size.partition("x") + return bool(sep and width.isdecimal() and height.isdecimal()) + + async def _openai_images_from_payload( client: httpx.AsyncClient, payload: dict[str, Any], diff --git a/tests/providers/test_image_generation.py b/tests/providers/test_image_generation.py index c8c05c853..2fb3104d8 100644 --- a/tests/providers/test_image_generation.py +++ b/tests/providers/test_image_generation.py @@ -624,6 +624,34 @@ async def test_openai_aspect_ratio_to_size() -> None: assert fake.calls[0]["json"]["size"] == "1024x1024" +@pytest.mark.asyncio +async def test_openai_dalle3_uses_supported_orientation_sizes() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = OpenAIImageGenerationClient( + api_key="sk-openai-test", + client=fake, # type: ignore[arg-type] + ) + + await client.generate(prompt="draw", model="dall-e-3", aspect_ratio="3:4") + await client.generate(prompt="draw", model="dall-e-3", aspect_ratio="4:3") + + assert fake.calls[0]["json"]["size"] == "1024x1792" + assert fake.calls[1]["json"]["size"] == "1792x1024" + + +@pytest.mark.asyncio +async def test_openai_dalle2_uses_square_size_for_non_square_ratios() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = OpenAIImageGenerationClient( + api_key="sk-openai-test", + client=fake, # type: ignore[arg-type] + ) + + await client.generate(prompt="draw", model="dall-e-2", aspect_ratio="16:9") + + assert fake.calls[0]["json"]["size"] == "1024x1024" + + @pytest.mark.asyncio async def test_openai_gpt_image_uses_supported_landscape_size() -> None: fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) @@ -637,6 +665,21 @@ async def test_openai_gpt_image_uses_supported_landscape_size() -> None: assert fake.calls[0]["json"]["size"] == "1536x1024" +@pytest.mark.asyncio +async def test_openai_gpt_image_uses_supported_orientation_sizes() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = OpenAIImageGenerationClient( + api_key="sk-openai-test", + client=fake, # type: ignore[arg-type] + ) + + await client.generate(prompt="draw", model="gpt-image-1", aspect_ratio="3:4") + await client.generate(prompt="draw", model="gpt-image-1", aspect_ratio="4:3") + + assert fake.calls[0]["json"]["size"] == "1024x1536" + assert fake.calls[1]["json"]["size"] == "1536x1024" + + @pytest.mark.asyncio async def test_openai_default_size_when_no_aspect_ratio() -> None: fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) @@ -651,6 +694,25 @@ async def test_openai_default_size_when_no_aspect_ratio() -> None: assert body["size"] == "1024x1024" +@pytest.mark.asyncio +async def test_openai_ignores_explicit_size_unsupported_by_model_family() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = OpenAIImageGenerationClient( + api_key="sk-openai-test", + client=fake, # type: ignore[arg-type] + ) + + await client.generate( + prompt="draw", + model="dall-e-3", + aspect_ratio="16:9", + image_size="1536x1024", + ) + + body = fake.calls[0]["json"] + assert body["size"] == "1792x1024" + + @pytest.mark.asyncio async def test_openai_uses_explicit_image_size() -> None: fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))