feat(image): support custom image generation provider

Addresses #4132.

Add CustomImageGenerationClient for any OpenAI-compatible image generation
API (POST {apiBase}/images/generations). Uses the existing providers.custom
config slot. No schema changes required.

Tests: 54 passed, ruff clean.
Signed-off-by: axelray-dev <110029405+axelray-dev@users.noreply.github.com>
This commit is contained in:
axelray-dev 2026-06-04 13:13:00 +08:00 committed by Xubin Ren
parent c574b028c1
commit 748b28da01
3 changed files with 172 additions and 1 deletions

View File

@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved
| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool |
| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `custom`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
| `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name |
| `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one |
| `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` |
@ -84,6 +84,32 @@ OpenRouter uses a chat-completions style image response. Configure:
Use a model that supports image generation and image editing if you want reference-image edits.
### Custom (OpenAI-compatible)
Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint.
Configure:
```json
{
"providers": {
"custom": {
"apiKey": "${CUSTOM_IMAGE_API_KEY}",
"apiBase": "https://api.example.com/v1"
}
},
"tools": {
"imageGeneration": {
"enabled": true,
"provider": "custom",
"model": "your-model-name"
}
}
}
```
The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`.
### AIHubMix
AIHubMix `gpt-image-2-free` is supported through AIHubMix's unified predictions API. Internally nanobot calls:

View File

@ -1033,6 +1033,92 @@ class OpenAIImageGenerationClient(ImageGenerationProvider):
return GeneratedImageResponse(images=images, content="", raw=payload)
class CustomImageGenerationClient(ImageGenerationProvider):
"""OpenAI-compatible Images API for user-configured custom providers."""
provider_name = "custom"
missing_key_message = (
"Custom image generation API key is not configured. Set providers.custom.apiKey."
)
def _default_base_url(self) -> str:
return ""
@staticmethod
def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str:
return _openai_size("gpt-image-2", aspect_ratio, image_size)
async def generate(
self,
*,
prompt: str,
model: str,
reference_images: list[str] | None = None,
aspect_ratio: str | None = None,
image_size: str | None = None,
) -> GeneratedImageResponse:
if not self.api_key:
raise ImageGenerationError(self.missing_key_message)
if reference_images:
logger.warning(
"Custom image generation does not support reference images; "
"ignoring {} reference image(s) for {}",
len(reference_images),
model,
)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
**self.extra_headers,
}
body: dict[str, Any] = {
"model": model,
"prompt": prompt,
"response_format": "b64_json",
"n": 1,
"size": self._custom_size(aspect_ratio, image_size),
}
body.update(self.extra_body)
logger.info("Custom Images API request: POST {}/images/generations body={}", self.api_base, body)
response = await self._http_post(
f"{self.api_base}/images/generations",
headers=headers,
body=body,
)
try:
response.raise_for_status()
except httpx.HTTPStatusError as exc:
detail = response.text[:1000]
logger.error("Custom Images API error ({}): {}", response.status_code, detail)
raise ImageGenerationError(
f"Custom image generation failed (HTTP {response.status_code}): {detail}"
) from exc
payload = response.json()
logger.info("Custom Images API response ({}): {}", response.status_code,
{k: v for k, v in payload.items() if k != "data"})
client = self._client
owns_client = client is None
if owns_client:
client = httpx.AsyncClient(timeout=self.timeout)
try:
images = await _openai_images_from_payload(client, payload)
finally:
if owns_client:
await client.aclose()
self._require_images(images, payload)
return GeneratedImageResponse(images=images, content="", raw=payload)
# ---------------------------------------------------------------------------
# OpenAI Codex image generation
# ---------------------------------------------------------------------------
@ -1594,6 +1680,7 @@ async def _zhipu_images_from_payload(
register_image_gen_provider(AIHubMixImageGenerationClient)
register_image_gen_provider(CodexImageGenerationClient)
register_image_gen_provider(CustomImageGenerationClient)
register_image_gen_provider(GeminiImageGenerationClient)
register_image_gen_provider(OllamaImageGenerationClient)
register_image_gen_provider(MiniMaxImageGenerationClient)

View File

@ -10,6 +10,7 @@ import pytest
from nanobot.providers.image_generation import (
AIHubMixImageGenerationClient,
CodexImageGenerationClient,
CustomImageGenerationClient,
GeminiImageGenerationClient,
GeneratedImageResponse,
ImageGenerationError,
@ -806,6 +807,63 @@ async def test_openai_requires_api_key() -> None:
await client.generate(prompt="draw", model="dall-e-3")
# ---------------------------------------------------------------------------
# Custom OpenAI-compatible Images API
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_custom_generate_success() -> None:
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
client = CustomImageGenerationClient(
api_key="sk-custom-test",
api_base="https://custom.example/v1/",
extra_headers={"X-Test": "1"},
client=fake, # type: ignore[arg-type]
)
response = await client.generate(
prompt="a cat on the moon",
model="custom-image-model",
aspect_ratio="16:9",
)
assert isinstance(response, GeneratedImageResponse)
assert response.images == [PNG_DATA_URL]
assert response.content == ""
call = fake.calls[0]
assert call["url"] == "https://custom.example/v1/images/generations"
assert call["headers"]["Authorization"] == "Bearer sk-custom-test"
assert call["headers"]["X-Test"] == "1"
body = call["json"]
assert body["model"] == "custom-image-model"
assert body["prompt"] == "a cat on the moon"
assert body["response_format"] == "b64_json"
assert body["n"] == 1
assert body["size"] == "1536x1024"
@pytest.mark.asyncio
async def test_custom_generate_no_api_key() -> None:
client = CustomImageGenerationClient(api_key=None)
with pytest.raises(ImageGenerationError, match="providers.custom.apiKey"):
await client.generate(prompt="draw", model="custom-image-model")
@pytest.mark.asyncio
async def test_custom_generate_http_error() -> None:
fake = FakeClient(FakeResponse({"error": "bad request"}, status_code=400))
client = CustomImageGenerationClient(
api_key="sk-custom-test",
api_base="https://custom.example/v1",
client=fake, # type: ignore[arg-type]
)
with pytest.raises(ImageGenerationError, match="HTTP 400"):
await client.generate(prompt="draw", model="custom-image-model")
# ---------------------------------------------------------------------------
# OpenAI Codex (Responses API)
# ---------------------------------------------------------------------------