From 748b28da013ea92b0410575b61b898d7e2b73a73 Mon Sep 17 00:00:00 2001 From: axelray-dev <110029405+axelray-dev@users.noreply.github.com> Date: Thu, 4 Jun 2026 13:13:00 +0800 Subject: [PATCH] feat(image): support custom image generation provider Addresses #4132. Add CustomImageGenerationClient for any OpenAI-compatible image generation API (POST {apiBase}/images/generations). Uses the existing providers.custom config slot. No schema changes required. Tests: 54 passed, ruff clean. Signed-off-by: axelray-dev <110029405+axelray-dev@users.noreply.github.com> --- docs/image-generation.md | 28 +++++++- nanobot/providers/image_generation.py | 87 ++++++++++++++++++++++++ tests/providers/test_image_generation.py | 58 ++++++++++++++++ 3 files changed, 172 insertions(+), 1 deletion(-) diff --git a/docs/image-generation.md b/docs/image-generation.md index 77f431dc0..f298042dd 100644 --- a/docs/image-generation.md +++ b/docs/image-generation.md @@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved | Option | Type | Default | Description | |--------|------|---------|-------------| | `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool | -| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` | +| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `custom`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` | | `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name | | `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one | | `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` | @@ -84,6 +84,32 @@ OpenRouter uses a chat-completions style image response. Configure: Use a model that supports image generation and image editing if you want reference-image edits. +### Custom (OpenAI-compatible) + +Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint. + +Configure: + +```json +{ + "providers": { + "custom": { + "apiKey": "${CUSTOM_IMAGE_API_KEY}", + "apiBase": "https://api.example.com/v1" + } + }, + "tools": { + "imageGeneration": { + "enabled": true, + "provider": "custom", + "model": "your-model-name" + } + } +} +``` + +The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. + ### AIHubMix AIHubMix `gpt-image-2-free` is supported through AIHubMix's unified predictions API. Internally nanobot calls: diff --git a/nanobot/providers/image_generation.py b/nanobot/providers/image_generation.py index ff5911b01..ccf2be2ba 100644 --- a/nanobot/providers/image_generation.py +++ b/nanobot/providers/image_generation.py @@ -1033,6 +1033,92 @@ class OpenAIImageGenerationClient(ImageGenerationProvider): return GeneratedImageResponse(images=images, content="", raw=payload) +class CustomImageGenerationClient(ImageGenerationProvider): + """OpenAI-compatible Images API for user-configured custom providers.""" + + provider_name = "custom" + missing_key_message = ( + "Custom image generation API key is not configured. Set providers.custom.apiKey." + ) + + def _default_base_url(self) -> str: + return "" + + @staticmethod + def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str: + return _openai_size("gpt-image-2", aspect_ratio, image_size) + + async def generate( + self, + *, + prompt: str, + model: str, + reference_images: list[str] | None = None, + aspect_ratio: str | None = None, + image_size: str | None = None, + ) -> GeneratedImageResponse: + if not self.api_key: + raise ImageGenerationError(self.missing_key_message) + + if reference_images: + logger.warning( + "Custom image generation does not support reference images; " + "ignoring {} reference image(s) for {}", + len(reference_images), + model, + ) + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + **self.extra_headers, + } + + body: dict[str, Any] = { + "model": model, + "prompt": prompt, + "response_format": "b64_json", + "n": 1, + "size": self._custom_size(aspect_ratio, image_size), + } + body.update(self.extra_body) + + logger.info("Custom Images API request: POST {}/images/generations body={}", self.api_base, body) + + response = await self._http_post( + f"{self.api_base}/images/generations", + headers=headers, + body=body, + ) + + try: + response.raise_for_status() + except httpx.HTTPStatusError as exc: + detail = response.text[:1000] + logger.error("Custom Images API error ({}): {}", response.status_code, detail) + raise ImageGenerationError( + f"Custom image generation failed (HTTP {response.status_code}): {detail}" + ) from exc + + payload = response.json() + logger.info("Custom Images API response ({}): {}", response.status_code, + {k: v for k, v in payload.items() if k != "data"}) + + client = self._client + owns_client = client is None + if owns_client: + client = httpx.AsyncClient(timeout=self.timeout) + try: + images = await _openai_images_from_payload(client, payload) + finally: + if owns_client: + await client.aclose() + + self._require_images(images, payload) + + return GeneratedImageResponse(images=images, content="", raw=payload) + + # --------------------------------------------------------------------------- # OpenAI Codex image generation # --------------------------------------------------------------------------- @@ -1594,6 +1680,7 @@ async def _zhipu_images_from_payload( register_image_gen_provider(AIHubMixImageGenerationClient) register_image_gen_provider(CodexImageGenerationClient) +register_image_gen_provider(CustomImageGenerationClient) register_image_gen_provider(GeminiImageGenerationClient) register_image_gen_provider(OllamaImageGenerationClient) register_image_gen_provider(MiniMaxImageGenerationClient) diff --git a/tests/providers/test_image_generation.py b/tests/providers/test_image_generation.py index 181657620..8df0ac03b 100644 --- a/tests/providers/test_image_generation.py +++ b/tests/providers/test_image_generation.py @@ -10,6 +10,7 @@ import pytest from nanobot.providers.image_generation import ( AIHubMixImageGenerationClient, CodexImageGenerationClient, + CustomImageGenerationClient, GeminiImageGenerationClient, GeneratedImageResponse, ImageGenerationError, @@ -806,6 +807,63 @@ async def test_openai_requires_api_key() -> None: await client.generate(prompt="draw", model="dall-e-3") +# --------------------------------------------------------------------------- +# Custom OpenAI-compatible Images API +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_custom_generate_success() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = CustomImageGenerationClient( + api_key="sk-custom-test", + api_base="https://custom.example/v1/", + extra_headers={"X-Test": "1"}, + client=fake, # type: ignore[arg-type] + ) + + response = await client.generate( + prompt="a cat on the moon", + model="custom-image-model", + aspect_ratio="16:9", + ) + + assert isinstance(response, GeneratedImageResponse) + assert response.images == [PNG_DATA_URL] + assert response.content == "" + call = fake.calls[0] + assert call["url"] == "https://custom.example/v1/images/generations" + assert call["headers"]["Authorization"] == "Bearer sk-custom-test" + assert call["headers"]["X-Test"] == "1" + body = call["json"] + assert body["model"] == "custom-image-model" + assert body["prompt"] == "a cat on the moon" + assert body["response_format"] == "b64_json" + assert body["n"] == 1 + assert body["size"] == "1536x1024" + + +@pytest.mark.asyncio +async def test_custom_generate_no_api_key() -> None: + client = CustomImageGenerationClient(api_key=None) + + with pytest.raises(ImageGenerationError, match="providers.custom.apiKey"): + await client.generate(prompt="draw", model="custom-image-model") + + +@pytest.mark.asyncio +async def test_custom_generate_http_error() -> None: + fake = FakeClient(FakeResponse({"error": "bad request"}, status_code=400)) + client = CustomImageGenerationClient( + api_key="sk-custom-test", + api_base="https://custom.example/v1", + client=fake, # type: ignore[arg-type] + ) + + with pytest.raises(ImageGenerationError, match="HTTP 400"): + await client.generate(prompt="draw", model="custom-image-model") + + # --------------------------------------------------------------------------- # OpenAI Codex (Responses API) # ---------------------------------------------------------------------------