From d435cb0b2171d65475566cc8fed2a11febe950e3 Mon Sep 17 00:00:00 2001 From: chengyongru Date: Fri, 5 Jun 2026 14:03:23 +0800 Subject: [PATCH] fix: harden custom image provider compatibility Maintainer edit: preserve provider-specific size hints for custom image generation endpoints while keeping the default 1K mapping compatible. Clarify the custom provider contract in docs and cover response_format/size overrides in tests. --- docs/image-generation.md | 20 ++++++-- nanobot/providers/image_generation.py | 8 +++- tests/providers/test_image_generation.py | 61 ++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/docs/image-generation.md b/docs/image-generation.md index 55250c406..bf34ba620 100644 --- a/docs/image-generation.md +++ b/docs/image-generation.md @@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi } ``` -See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples. +See [Provider Notes](#provider-notes) for Custom, AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples. > [!TIP] > Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup. @@ -86,7 +86,13 @@ Use a model that supports image generation and image editing if you want referen ### Custom (OpenAI-compatible) -Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint. +Use the `custom` provider for services that implement the synchronous OpenAI Images API: + +```text +POST /v1/images/generations +``` + +The response must include generated images in `data[].b64_json` or `data[].url`. Native prediction APIs, such as Replicate's `/v1/models/{owner}/{model}/predictions`, are not directly compatible unless you put an OpenAI-compatible gateway in front of them. Configure: @@ -108,7 +114,15 @@ Configure: } ``` -The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints. +The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints. Reference-image edits are not supported by the generic `custom` provider. + +`extraBody` can adapt provider-specific quirks because it is merged last into the request body. Examples: + +- Agnes AI documents URL responses, so use `"extraBody": {"response_format": "url"}`. +- Together AI documents `"response_format": "base64"`, so override the default. +- Volcengine Ark Seedream models may require size hints such as `"2K"`, `"3K"`, `"4K"`, or explicit dimensions. Set `tools.imageGeneration.defaultImageSize` or `providers.custom.extraBody.size` to a value supported by the selected model. + +For compatibility with the default nanobot setting, custom maps `defaultImageSize: "1K"` to `1024x1024`. Other explicit size hints are passed through unchanged. ### AIHubMix diff --git a/nanobot/providers/image_generation.py b/nanobot/providers/image_generation.py index 18f62e26c..a16a17c77 100644 --- a/nanobot/providers/image_generation.py +++ b/nanobot/providers/image_generation.py @@ -1046,7 +1046,13 @@ class CustomImageGenerationClient(ImageGenerationProvider): @staticmethod def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str: - return _openai_size("gpt-image-2", aspect_ratio, image_size) + if image_size: + requested = image_size.strip() + if requested: + if requested.lower() == "1k": + return "1024x1024" + return requested + return _openai_size("gpt-image-2", aspect_ratio, None) async def generate( self, diff --git a/tests/providers/test_image_generation.py b/tests/providers/test_image_generation.py index b7bd29a4b..29890778d 100644 --- a/tests/providers/test_image_generation.py +++ b/tests/providers/test_image_generation.py @@ -843,6 +843,67 @@ async def test_custom_generate_success() -> None: assert body["size"] == "1536x1024" +@pytest.mark.asyncio +async def test_custom_generate_preserves_provider_size_hint() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = CustomImageGenerationClient( + api_key="sk-custom-test", + api_base="https://custom.example/v1", + client=fake, # type: ignore[arg-type] + ) + + await client.generate( + prompt="a cat on the moon", + model="custom-image-model", + image_size="2K", + ) + + assert fake.calls[0]["json"]["size"] == "2K" + + +@pytest.mark.asyncio +async def test_custom_generate_maps_one_k_to_openai_dimension() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = CustomImageGenerationClient( + api_key="sk-custom-test", + api_base="https://custom.example/v1", + client=fake, # type: ignore[arg-type] + ) + + await client.generate( + prompt="a cat on the moon", + model="custom-image-model", + image_size="1K", + ) + + assert fake.calls[0]["json"]["size"] == "1024x1024" + + +@pytest.mark.asyncio +async def test_custom_generate_extra_body_can_override_defaults() -> None: + fake = FakeClient(FakeResponse({"data": [{"url": "https://images.example/cat.png"}]})) + fake.get_response = FakeResponse({}, content=PNG_BYTES) + client = CustomImageGenerationClient( + api_key="sk-custom-test", + api_base="https://custom.example/v1", + extra_body={"response_format": "url", "size": "2K"}, + client=fake, # type: ignore[arg-type] + ) + + response = await client.generate( + prompt="a cat on the moon", + model="custom-image-model", + image_size="1K", + ) + + expected_data_url = f"data:image/png;base64,{base64.b64encode(PNG_BYTES).decode('ascii')}" + assert response.images == [expected_data_url] + assert fake.get_calls[0]["url"] == "https://images.example/cat.png" + body = fake.calls[0]["json"] + assert body["response_format"] == "url" + assert body["size"] == "2K" + + @pytest.mark.asyncio async def test_custom_generate_without_api_key_omits_authorization() -> None: fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))