feat(image): support custom image generation provider

Addresses #4132. Add CustomImageGenerationClient for any OpenAI-compatible image generation API (POST {apiBase}/images/generations). Uses the existing providers.custom config slot. No schema changes required. Tests: 54 passed, ruff clean. Signed-off-by: axelray-dev <110029405+axelray-dev@users.noreply.github.com>
2026-06-13 22:34:06 +00:00 · 2026-06-04 13:13:00 +08:00 · 2026-06-04 13:13:00 +08:00 · 748b28da01
commit 748b28da01
parent c574b028c1
3 changed files with 172 additions and 1 deletions
--- a/docs/image-generation.md
+++ b/docs/image-generation.md
@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool |
-| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
+| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `custom`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
 | `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name |
 | `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one |
 | `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` |
@ -84,6 +84,32 @@ OpenRouter uses a chat-completions style image response. Configure:
 Use a model that supports image generation and image editing if you want reference-image edits.
 ### Custom (OpenAI-compatible)
 Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint.
 Configure:
 ```json
 {
  "providers": {
    "custom": {
      "apiKey": "${CUSTOM_IMAGE_API_KEY}",
      "apiBase": "https://api.example.com/v1"
    }
  },
  "tools": {
    "imageGeneration": {
      "enabled": true,
      "provider": "custom",
      "model": "your-model-name"
    }
  }
 }
 ```
 The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`.
 ### AIHubMix
 AIHubMix `gpt-image-2-free` is supported through AIHubMix's unified predictions API. Internally nanobot calls:
--- a/nanobot/providers/image_generation.py
+++ b/nanobot/providers/image_generation.py
@ -1033,6 +1033,92 @@ class OpenAIImageGenerationClient(ImageGenerationProvider):
        return GeneratedImageResponse(images=images, content="", raw=payload)
 class CustomImageGenerationClient(ImageGenerationProvider):
    """OpenAI-compatible Images API for user-configured custom providers."""
    provider_name = "custom"
    missing_key_message = (
        "Custom image generation API key is not configured. Set providers.custom.apiKey."
    )
    def _default_base_url(self) -> str:
        return ""
    @staticmethod
    def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str:
        return _openai_size("gpt-image-2", aspect_ratio, image_size)
    async def generate(
        self,
        *,
        prompt: str,
        model: str,
        reference_images: list[str] | None = None,
        aspect_ratio: str | None = None,
        image_size: str | None = None,
    ) -> GeneratedImageResponse:
        if not self.api_key:
            raise ImageGenerationError(self.missing_key_message)
        if reference_images:
            logger.warning(
                "Custom image generation does not support reference images; "
                "ignoring {} reference image(s) for {}",
                len(reference_images),
                model,
            )
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
            **self.extra_headers,
        }
        body: dict[str, Any] = {
            "model": model,
            "prompt": prompt,
            "response_format": "b64_json",
            "n": 1,
            "size": self._custom_size(aspect_ratio, image_size),
        }
        body.update(self.extra_body)
        logger.info("Custom Images API request: POST {}/images/generations body={}", self.api_base, body)
        response = await self._http_post(
            f"{self.api_base}/images/generations",
            headers=headers,
            body=body,
        )
        try:
            response.raise_for_status()
        except httpx.HTTPStatusError as exc:
            detail = response.text[:1000]
            logger.error("Custom Images API error ({}): {}", response.status_code, detail)
            raise ImageGenerationError(
                f"Custom image generation failed (HTTP {response.status_code}): {detail}"
            ) from exc
        payload = response.json()
        logger.info("Custom Images API response ({}): {}", response.status_code,
                       {k: v for k, v in payload.items() if k != "data"})
        client = self._client
        owns_client = client is None
        if owns_client:
            client = httpx.AsyncClient(timeout=self.timeout)
        try:
            images = await _openai_images_from_payload(client, payload)
        finally:
            if owns_client:
                await client.aclose()
        self._require_images(images, payload)
        return GeneratedImageResponse(images=images, content="", raw=payload)
 # ---------------------------------------------------------------------------
 # OpenAI Codex image generation
 # ---------------------------------------------------------------------------
@ -1594,6 +1680,7 @@ async def _zhipu_images_from_payload(
 register_image_gen_provider(AIHubMixImageGenerationClient)
 register_image_gen_provider(CodexImageGenerationClient)
 register_image_gen_provider(CustomImageGenerationClient)
 register_image_gen_provider(GeminiImageGenerationClient)
 register_image_gen_provider(OllamaImageGenerationClient)
 register_image_gen_provider(MiniMaxImageGenerationClient)
--- a/tests/providers/test_image_generation.py
+++ b/tests/providers/test_image_generation.py
@ -10,6 +10,7 @@ import pytest
 from nanobot.providers.image_generation import (
    AIHubMixImageGenerationClient,
    CodexImageGenerationClient,
    CustomImageGenerationClient,
    GeminiImageGenerationClient,
    GeneratedImageResponse,
    ImageGenerationError,
@ -806,6 +807,63 @@ async def test_openai_requires_api_key() -> None:
        await client.generate(prompt="draw", model="dall-e-3")
 # ---------------------------------------------------------------------------
 # Custom OpenAI-compatible Images API
 # ---------------------------------------------------------------------------
@pytest.mark.asyncio
 async def test_custom_generate_success() -> None:
    fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
    client = CustomImageGenerationClient(
        api_key="sk-custom-test",
        api_base="https://custom.example/v1/",
        extra_headers={"X-Test": "1"},
        client=fake,  # type: ignore[arg-type]
    )
    response = await client.generate(
        prompt="a cat on the moon",
        model="custom-image-model",
        aspect_ratio="16:9",
    )
    assert isinstance(response, GeneratedImageResponse)
    assert response.images == [PNG_DATA_URL]
    assert response.content == ""
    call = fake.calls[0]
    assert call["url"] == "https://custom.example/v1/images/generations"
    assert call["headers"]["Authorization"] == "Bearer sk-custom-test"
    assert call["headers"]["X-Test"] == "1"
    body = call["json"]
    assert body["model"] == "custom-image-model"
    assert body["prompt"] == "a cat on the moon"
    assert body["response_format"] == "b64_json"
    assert body["n"] == 1
    assert body["size"] == "1536x1024"
@pytest.mark.asyncio
 async def test_custom_generate_no_api_key() -> None:
    client = CustomImageGenerationClient(api_key=None)
    with pytest.raises(ImageGenerationError, match="providers.custom.apiKey"):
        await client.generate(prompt="draw", model="custom-image-model")
@pytest.mark.asyncio
 async def test_custom_generate_http_error() -> None:
    fake = FakeClient(FakeResponse({"error": "bad request"}, status_code=400))
    client = CustomImageGenerationClient(
        api_key="sk-custom-test",
        api_base="https://custom.example/v1",
        client=fake,  # type: ignore[arg-type]
    )
    with pytest.raises(ImageGenerationError, match="HTTP 400"):
        await client.generate(prompt="draw", model="custom-image-model")
 # ---------------------------------------------------------------------------
 # OpenAI Codex (Responses API)
 # ---------------------------------------------------------------------------