feat(image): support custom image generation provider

Addresses #4132. Add CustomImageGenerationClient for any OpenAI-compatible image generation API (POST {apiBase}/images/generations). Uses the existing providers.custom config slot. No schema changes required. Tests: 54 passed, ruff clean. Signed-off-by: axelray-dev <110029405+axelray-dev@users.noreply.github.com>
2026-06-13 14:23:58 +00:00 · 2026-06-04 13:13:00 +08:00 · 2026-06-04 13:13:00 +08:00 · 748b28da01
commit 748b28da01
parent c574b028c1
3 changed files with 172 additions and 1 deletions
--- a/docs/image-generation.md
+++ b/docs/image-generation.md
@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool |
-| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
+| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `custom`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
 | `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name |
 | `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one |
 | `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` |
@ -84,6 +84,32 @@ OpenRouter uses a chat-completions style image response. Configure:

 Use a model that supports image generation and image editing if you want reference-image edits.

+### Custom (OpenAI-compatible)
+
+Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint.
+
+Configure:
+
+```json
+{
+  "providers": {
+    "custom": {
+      "apiKey": "${CUSTOM_IMAGE_API_KEY}",
+      "apiBase": "https://api.example.com/v1"
+    }
+  },
+  "tools": {
+    "imageGeneration": {
+      "enabled": true,
+      "provider": "custom",
+      "model": "your-model-name"
+    }
+  }
+}
+```
+
+The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`.
+
 ### AIHubMix

 AIHubMix `gpt-image-2-free` is supported through AIHubMix's unified predictions API. Internally nanobot calls:
--- a/nanobot/providers/image_generation.py
+++ b/nanobot/providers/image_generation.py
@ -1033,6 +1033,92 @@ class OpenAIImageGenerationClient(ImageGenerationProvider):
        return GeneratedImageResponse(images=images, content="", raw=payload)


+class CustomImageGenerationClient(ImageGenerationProvider):
+    """OpenAI-compatible Images API for user-configured custom providers."""
+
+    provider_name = "custom"
+    missing_key_message = (
+        "Custom image generation API key is not configured. Set providers.custom.apiKey."
+    )
+
+    def _default_base_url(self) -> str:
+        return ""
+
+    @staticmethod
+    def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str:
+        return _openai_size("gpt-image-2", aspect_ratio, image_size)
+
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        model: str,
+        reference_images: list[str] | None = None,
+        aspect_ratio: str | None = None,
+        image_size: str | None = None,
+    ) -> GeneratedImageResponse:
+        if not self.api_key:
+            raise ImageGenerationError(self.missing_key_message)
+
+        if reference_images:
+            logger.warning(
+                "Custom image generation does not support reference images; "
+                "ignoring {} reference image(s) for {}",
+                len(reference_images),
+                model,
+            )
+
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            **self.extra_headers,
+        }
+
+        body: dict[str, Any] = {
+            "model": model,
+            "prompt": prompt,
+            "response_format": "b64_json",
+            "n": 1,
+            "size": self._custom_size(aspect_ratio, image_size),
+        }
+        body.update(self.extra_body)
+
+        logger.info("Custom Images API request: POST {}/images/generations body={}", self.api_base, body)
+
+        response = await self._http_post(
+            f"{self.api_base}/images/generations",
+            headers=headers,
+            body=body,
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as exc:
+            detail = response.text[:1000]
+            logger.error("Custom Images API error ({}): {}", response.status_code, detail)
+            raise ImageGenerationError(
+                f"Custom image generation failed (HTTP {response.status_code}): {detail}"
+            ) from exc
+
+        payload = response.json()
+        logger.info("Custom Images API response ({}): {}", response.status_code,
+                       {k: v for k, v in payload.items() if k != "data"})
+
+        client = self._client
+        owns_client = client is None
+        if owns_client:
+            client = httpx.AsyncClient(timeout=self.timeout)
+        try:
+            images = await _openai_images_from_payload(client, payload)
+        finally:
+            if owns_client:
+                await client.aclose()
+
+        self._require_images(images, payload)
+
+        return GeneratedImageResponse(images=images, content="", raw=payload)
+
+
 # ---------------------------------------------------------------------------
 # OpenAI Codex image generation
 # ---------------------------------------------------------------------------
@ -1594,6 +1680,7 @@ async def _zhipu_images_from_payload(

 register_image_gen_provider(AIHubMixImageGenerationClient)
 register_image_gen_provider(CodexImageGenerationClient)
+register_image_gen_provider(CustomImageGenerationClient)
 register_image_gen_provider(GeminiImageGenerationClient)
 register_image_gen_provider(OllamaImageGenerationClient)
 register_image_gen_provider(MiniMaxImageGenerationClient)
--- a/tests/providers/test_image_generation.py
+++ b/tests/providers/test_image_generation.py
@ -10,6 +10,7 @@ import pytest
 from nanobot.providers.image_generation import (
    AIHubMixImageGenerationClient,
    CodexImageGenerationClient,
+    CustomImageGenerationClient,
    GeminiImageGenerationClient,
    GeneratedImageResponse,
    ImageGenerationError,
@ -806,6 +807,63 @@ async def test_openai_requires_api_key() -> None:
        await client.generate(prompt="draw", model="dall-e-3")


+# ---------------------------------------------------------------------------
+# Custom OpenAI-compatible Images API
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_custom_generate_success() -> None:
+    fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
+    client = CustomImageGenerationClient(
+        api_key="sk-custom-test",
+        api_base="https://custom.example/v1/",
+        extra_headers={"X-Test": "1"},
+        client=fake,  # type: ignore[arg-type]
+    )
+
+    response = await client.generate(
+        prompt="a cat on the moon",
+        model="custom-image-model",
+        aspect_ratio="16:9",
+    )
+
+    assert isinstance(response, GeneratedImageResponse)
+    assert response.images == [PNG_DATA_URL]
+    assert response.content == ""
+    call = fake.calls[0]
+    assert call["url"] == "https://custom.example/v1/images/generations"
+    assert call["headers"]["Authorization"] == "Bearer sk-custom-test"
+    assert call["headers"]["X-Test"] == "1"
+    body = call["json"]
+    assert body["model"] == "custom-image-model"
+    assert body["prompt"] == "a cat on the moon"
+    assert body["response_format"] == "b64_json"
+    assert body["n"] == 1
+    assert body["size"] == "1536x1024"
+
+
+@pytest.mark.asyncio
+async def test_custom_generate_no_api_key() -> None:
+    client = CustomImageGenerationClient(api_key=None)
+
+    with pytest.raises(ImageGenerationError, match="providers.custom.apiKey"):
+        await client.generate(prompt="draw", model="custom-image-model")
+
+
+@pytest.mark.asyncio
+async def test_custom_generate_http_error() -> None:
+    fake = FakeClient(FakeResponse({"error": "bad request"}, status_code=400))
+    client = CustomImageGenerationClient(
+        api_key="sk-custom-test",
+        api_base="https://custom.example/v1",
+        client=fake,  # type: ignore[arg-type]
+    )
+
+    with pytest.raises(ImageGenerationError, match="HTTP 400"):
+        await client.generate(prompt="draw", model="custom-image-model")
+
+
 # ---------------------------------------------------------------------------
 # OpenAI Codex (Responses API)
 # ---------------------------------------------------------------------------