fix: harden custom image provider compatibility

Maintainer edit: preserve provider-specific size hints for custom image generation endpoints while keeping the default 1K mapping compatible. Clarify the custom provider contract in docs and cover response_format/size overrides in tests.
This commit is contained in:
chengyongru 2026-06-05 14:03:23 +08:00 committed by Xubin Ren
parent ae17a79bdf
commit d435cb0b21
3 changed files with 85 additions and 4 deletions

View File

@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi
} }
``` ```
See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples. See [Provider Notes](#provider-notes) for Custom, AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples.
> [!TIP] > [!TIP]
> Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup. > Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup.
@ -86,7 +86,13 @@ Use a model that supports image generation and image editing if you want referen
### Custom (OpenAI-compatible) ### Custom (OpenAI-compatible)
Any OpenAI-compatible image generation API can be used with the `custom` provider. This includes local Stable Diffusion servers, Replicate, Agnes AI, and similar services that expose the `/v1/images/generations` endpoint. Use the `custom` provider for services that implement the synchronous OpenAI Images API:
```text
POST /v1/images/generations
```
The response must include generated images in `data[].b64_json` or `data[].url`. Native prediction APIs, such as Replicate's `/v1/models/{owner}/{model}/predictions`, are not directly compatible unless you put an OpenAI-compatible gateway in front of them.
Configure: Configure:
@ -108,7 +114,15 @@ Configure:
} }
``` ```
The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints. The `apiBase` is required. The provider sends requests to `{apiBase}/images/generations` using the OpenAI Images API format with `response_format: "b64_json"`. The `apiKey` is optional for local or unauthenticated endpoints. Reference-image edits are not supported by the generic `custom` provider.
`extraBody` can adapt provider-specific quirks because it is merged last into the request body. Examples:
- Agnes AI documents URL responses, so use `"extraBody": {"response_format": "url"}`.
- Together AI documents `"response_format": "base64"`, so override the default.
- Volcengine Ark Seedream models may require size hints such as `"2K"`, `"3K"`, `"4K"`, or explicit dimensions. Set `tools.imageGeneration.defaultImageSize` or `providers.custom.extraBody.size` to a value supported by the selected model.
For compatibility with the default nanobot setting, custom maps `defaultImageSize: "1K"` to `1024x1024`. Other explicit size hints are passed through unchanged.
### AIHubMix ### AIHubMix

View File

@ -1046,7 +1046,13 @@ class CustomImageGenerationClient(ImageGenerationProvider):
@staticmethod @staticmethod
def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str: def _custom_size(aspect_ratio: str | None, image_size: str | None) -> str:
return _openai_size("gpt-image-2", aspect_ratio, image_size) if image_size:
requested = image_size.strip()
if requested:
if requested.lower() == "1k":
return "1024x1024"
return requested
return _openai_size("gpt-image-2", aspect_ratio, None)
async def generate( async def generate(
self, self,

View File

@ -843,6 +843,67 @@ async def test_custom_generate_success() -> None:
assert body["size"] == "1536x1024" assert body["size"] == "1536x1024"
@pytest.mark.asyncio
async def test_custom_generate_preserves_provider_size_hint() -> None:
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
client = CustomImageGenerationClient(
api_key="sk-custom-test",
api_base="https://custom.example/v1",
client=fake, # type: ignore[arg-type]
)
await client.generate(
prompt="a cat on the moon",
model="custom-image-model",
image_size="2K",
)
assert fake.calls[0]["json"]["size"] == "2K"
@pytest.mark.asyncio
async def test_custom_generate_maps_one_k_to_openai_dimension() -> None:
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
client = CustomImageGenerationClient(
api_key="sk-custom-test",
api_base="https://custom.example/v1",
client=fake, # type: ignore[arg-type]
)
await client.generate(
prompt="a cat on the moon",
model="custom-image-model",
image_size="1K",
)
assert fake.calls[0]["json"]["size"] == "1024x1024"
@pytest.mark.asyncio
async def test_custom_generate_extra_body_can_override_defaults() -> None:
fake = FakeClient(FakeResponse({"data": [{"url": "https://images.example/cat.png"}]}))
fake.get_response = FakeResponse({}, content=PNG_BYTES)
client = CustomImageGenerationClient(
api_key="sk-custom-test",
api_base="https://custom.example/v1",
extra_body={"response_format": "url", "size": "2K"},
client=fake, # type: ignore[arg-type]
)
response = await client.generate(
prompt="a cat on the moon",
model="custom-image-model",
image_size="1K",
)
expected_data_url = f"data:image/png;base64,{base64.b64encode(PNG_BYTES).decode('ascii')}"
assert response.images == [expected_data_url]
assert fake.get_calls[0]["url"] == "https://images.example/cat.png"
body = fake.calls[0]["json"]
assert body["response_format"] == "url"
assert body["size"] == "2K"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_custom_generate_without_api_key_omits_authorization() -> None: async def test_custom_generate_without_api_key_omits_authorization() -> None:
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))