From d435cb0b2171d65475566cc8fed2a11febe950e3 Mon Sep 17 00:00:00 2001
From: chengyongru <chengyongru.ai@gmail.com>
Date: Fri, 5 Jun 2026 14:03:23 +0800
Subject: [PATCH] fix: harden custom image provider compatibility

Maintainer edit: preserve provider-specific size hints for custom image generation endpoints while keeping the default 1K mapping compatible. Clarify the custom provider contract in docs and cover response_format/size overrides in tests.
---
 docs/image-generation.md                 | 20 ++++++--
 nanobot/providers/image_generation.py    |  8 +++-
 tests/providers/test_image_generation.py | 61 ++++++++++++++++++++++++
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/docs/image-generation.md b/docs/image-generation.md
index 55250c406..bf34ba620 100644
--- a/docs/image-generation.md
+++ b/docs/image-generation.md
@@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi
 }
 ```
 
-See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples.
+See [Provider Notes](#provider-notes) for Custom, AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples.
 
 > [!TIP]
 > Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup.
@@ -86,7 +86,13 @@ Use a model that supports image generation and image editing if you want referen
 
 ### Custom (OpenAI-compatible)
 
-Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint.
+Use the `custom` provider for services that implement the synchronous OpenAI Images API:
+
+```text
+POST /v1/images/generations
+```
+
+The response must include generated images in `data[].b64_json` or `data[].url`. Native prediction APIs, such as Replicate's `/v1/models/{owner}/{model}/predictions`, are not directly compatible unless you put an OpenAI-compatible gateway in front of them.
 
 Configure:
 
@@ -108,7 +114,15 @@ Configure:
 }
 ```
 
-The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints.
+The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints. Reference-image edits are not supported by the generic `custom` provider.
+
+`extraBody` can adapt provider-specific quirks because it is merged last into the request body. Examples:
+
+- Agnes AI documents URL responses, so use `"extraBody": {"response_format": "url"}`.
+- Together AI documents `"response_format": "base64"`, so override the default.
+- Volcengine Ark Seedream models may require size hints such as `"2K"`, `"3K"`, `"4K"`, or explicit dimensions. Set `tools.imageGeneration.defaultImageSize` or `providers.custom.extraBody.size` to a value supported by the selected model.
+
+For compatibility with the default nanobot setting, custom maps `defaultImageSize: "1K"` to `1024x1024`. Other explicit size hints are passed through unchanged.
 
 ### AIHubMix
 
diff --git a/nanobot/providers/image_generation.py b/nanobot/providers/image_generation.py
index 18f62e26c..a16a17c77 100644
--- a/nanobot/providers/image_generation.py
+++ b/nanobot/providers/image_generation.py
@@ -1046,7 +1046,13 @@ class CustomImageGenerationClient(ImageGenerationProvider):
 
     @staticmethod
     def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str:
-        return _openai_size("gpt-image-2", aspect_ratio, image_size)
+        if image_size:
+            requested = image_size.strip()
+            if requested:
+                if requested.lower() == "1k":
+                    return "1024x1024"
+                return requested
+        return _openai_size("gpt-image-2", aspect_ratio, None)
 
     async def generate(
         self,
diff --git a/tests/providers/test_image_generation.py b/tests/providers/test_image_generation.py
index b7bd29a4b..29890778d 100644
--- a/tests/providers/test_image_generation.py
+++ b/tests/providers/test_image_generation.py
@@ -843,6 +843,67 @@ async def test_custom_generate_success() -> None:
     assert body["size"] == "1536x1024"
 
 
+@pytest.mark.asyncio
+async def test_custom_generate_preserves_provider_size_hint() -> None:
+    fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
+    client = CustomImageGenerationClient(
+        api_key="sk-custom-test",
+        api_base="https://custom.example/v1",
+        client=fake,  # type: ignore[arg-type]
+    )
+
+    await client.generate(
+        prompt="a cat on the moon",
+        model="custom-image-model",
+        image_size="2K",
+    )
+
+    assert fake.calls[0]["json"]["size"] == "2K"
+
+
+@pytest.mark.asyncio
+async def test_custom_generate_maps_one_k_to_openai_dimension() -> None:
+    fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
+    client = CustomImageGenerationClient(
+        api_key="sk-custom-test",
+        api_base="https://custom.example/v1",
+        client=fake,  # type: ignore[arg-type]
+    )
+
+    await client.generate(
+        prompt="a cat on the moon",
+        model="custom-image-model",
+        image_size="1K",
+    )
+
+    assert fake.calls[0]["json"]["size"] == "1024x1024"
+
+
+@pytest.mark.asyncio
+async def test_custom_generate_extra_body_can_override_defaults() -> None:
+    fake = FakeClient(FakeResponse({"data": [{"url": "https://images.example/cat.png"}]}))
+    fake.get_response = FakeResponse({}, content=PNG_BYTES)
+    client = CustomImageGenerationClient(
+        api_key="sk-custom-test",
+        api_base="https://custom.example/v1",
+        extra_body={"response_format": "url", "size": "2K"},
+        client=fake,  # type: ignore[arg-type]
+    )
+
+    response = await client.generate(
+        prompt="a cat on the moon",
+        model="custom-image-model",
+        image_size="1K",
+    )
+
+    expected_data_url = f"data:image/png;base64,{base64.b64encode(PNG_BYTES).decode('ascii')}"
+    assert response.images == [expected_data_url]
+    assert fake.get_calls[0]["url"] == "https://images.example/cat.png"
+    body = fake.calls[0]["json"]
+    assert body["response_format"] == "url"
+    assert body["size"] == "2K"
+
+
 @pytest.mark.asyncio
 async def test_custom_generate_without_api_key_omits_authorization() -> None:
     fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))