fix: harden custom image provider compatibility

Maintainer edit: preserve provider-specific size hints for custom image generation endpoints while keeping the default 1K mapping compatible. Clarify the custom provider contract in docs and cover response_format/size overrides in tests.
2026-06-13 22:34:06 +00:00 · 2026-06-05 14:03:23 +08:00 · 2026-06-05 14:03:23 +08:00 · d435cb0b21
commit d435cb0b21
parent ae17a79bdf
3 changed files with 85 additions and 4 deletions
--- a/docs/image-generation.md
+++ b/docs/image-generation.md
@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi
 }
 ```
-See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples.
+See [Provider Notes](#provider-notes) for Custom, AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples.
 > [!TIP]
 > Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup.
@ -86,7 +86,13 @@ Use a model that supports image generation and image editing if you want referen
 ### Custom (OpenAI-compatible)
-Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint.
+Use the `custom` provider for services that implement the synchronous OpenAI Images API:
 ```text
 POST /v1/images/generations
 ```
 The response must include generated images in `data[].b64_json` or `data[].url`. Native prediction APIs, such as Replicate's `/v1/models/{owner}/{model}/predictions`, are not directly compatible unless you put an OpenAI-compatible gateway in front of them.
 Configure:
@ -108,7 +114,15 @@ Configure:
 }
 ```
-The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints.
+The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints. Reference-image edits are not supported by the generic `custom` provider.
 `extraBody` can adapt provider-specific quirks because it is merged last into the request body. Examples:
 - Agnes AI documents URL responses, so use `"extraBody": {"response_format": "url"}`.
 - Together AI documents `"response_format": "base64"`, so override the default.
 - Volcengine Ark Seedream models may require size hints such as `"2K"`, `"3K"`, `"4K"`, or explicit dimensions. Set `tools.imageGeneration.defaultImageSize` or `providers.custom.extraBody.size` to a value supported by the selected model.
 For compatibility with the default nanobot setting, custom maps `defaultImageSize: "1K"` to `1024x1024`. Other explicit size hints are passed through unchanged.
 ### AIHubMix
--- a/nanobot/providers/image_generation.py
+++ b/nanobot/providers/image_generation.py
@ -1046,7 +1046,13 @@ class CustomImageGenerationClient(ImageGenerationProvider):
    @staticmethod
    def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str:
-        return _openai_size("gpt-image-2", aspect_ratio, image_size)
+        if image_size:
            requested = image_size.strip()
            if requested:
                if requested.lower() == "1k":
                    return "1024x1024"
                return requested
        return _openai_size("gpt-image-2", aspect_ratio, None)
    async def generate(
        self,
--- a/tests/providers/test_image_generation.py
+++ b/tests/providers/test_image_generation.py
@ -843,6 +843,67 @@ async def test_custom_generate_success() -> None:
    assert body["size"] == "1536x1024"
@pytest.mark.asyncio
 async def test_custom_generate_preserves_provider_size_hint() -> None:
    fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
    client = CustomImageGenerationClient(
        api_key="sk-custom-test",
        api_base="https://custom.example/v1",
        client=fake,  # type: ignore[arg-type]
    )
    await client.generate(
        prompt="a cat on the moon",
        model="custom-image-model",
        image_size="2K",
    )
    assert fake.calls[0]["json"]["size"] == "2K"
@pytest.mark.asyncio
 async def test_custom_generate_maps_one_k_to_openai_dimension() -> None:
    fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
    client = CustomImageGenerationClient(
        api_key="sk-custom-test",
        api_base="https://custom.example/v1",
        client=fake,  # type: ignore[arg-type]
    )
    await client.generate(
        prompt="a cat on the moon",
        model="custom-image-model",
        image_size="1K",
    )
    assert fake.calls[0]["json"]["size"] == "1024x1024"
@pytest.mark.asyncio
 async def test_custom_generate_extra_body_can_override_defaults() -> None:
    fake = FakeClient(FakeResponse({"data": [{"url": "https://images.example/cat.png"}]}))
    fake.get_response = FakeResponse({}, content=PNG_BYTES)
    client = CustomImageGenerationClient(
        api_key="sk-custom-test",
        api_base="https://custom.example/v1",
        extra_body={"response_format": "url", "size": "2K"},
        client=fake,  # type: ignore[arg-type]
    )
    response = await client.generate(
        prompt="a cat on the moon",
        model="custom-image-model",
        image_size="1K",
    )
    expected_data_url = f"data:image/png;base64,{base64.b64encode(PNG_BYTES).decode('ascii')}"
    assert response.images == [expected_data_url]
    assert fake.get_calls[0]["url"] == "https://images.example/cat.png"
    body = fake.calls[0]["json"]
    assert body["response_format"] == "url"
    assert body["size"] == "2K"
@pytest.mark.asyncio
 async def test_custom_generate_without_api_key_omits_authorization() -> None:
    fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))