feat: Add Zhipu (智谱) image generation provider

This commit is contained in:
Jiajun Xie 2026-05-23 13:34:35 +08:00 committed by Xubin Ren
parent c0d4f012c8
commit 3e6f9907fe
4 changed files with 294 additions and 3 deletions

View File

@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi
} }
``` ```
See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, and StepFun configuration examples. See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples.
> [!TIP] > [!TIP]
> Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup. > Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup.
@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved
| Option | Type | Default | Description | | Option | Type | Default | Description |
|--------|------|---------|-------------| |--------|------|---------|-------------|
| `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool | | `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool |
| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun` | | `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
| `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name | | `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name |
| `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one | | `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one |
| `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` | | `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` |
@ -245,6 +245,31 @@ StepPlan is StepFun's subscription tier and uses a different API base URL. The i
`apiBase` takes precedence over the registry default, so with the StepPlan base URL configured, image requests are sent to `https://api.stepfun.com/step_plan/v1/images/generations` — the same path prefix used for LLM calls. The API key is shared with the standard StepFun provider. `apiBase` takes precedence over the registry default, so with the StepPlan base URL configured, image requests are sent to `https://api.stepfun.com/step_plan/v1/images/generations` — the same path prefix used for LLM calls. The API key is shared with the standard StepFun provider.
### Zhipu
Zhipu (智谱) `glm-image` model supports text-to-image generation. The API returns temporary image URLs (valid for 30 days); nanobot downloads and re-encodes them as base64 data URLs.
Supported aspect ratios: `1:1`, `16:9`, `9:16`, `3:4`, `4:3`. Sizes can be specified as `WIDTHxHEIGHT` (e.g. `1280x1280`, `1728x960`) or using aspect ratio presets.
```json
{
"providers": {
"zhipu": {
"apiKey": "${ZHIPU_API_KEY}"
}
},
"tools": {
"imageGeneration": {
"enabled": true,
"provider": "zhipu",
"model": "glm-image"
}
}
}
```
Other supported models: `cogview-4`, `cogview-4-250304`, `cogview-3-flash`. Reference images are not supported by this integration.
## Artifacts ## Artifacts
Generated images are stored under the active nanobot instance's media directory: Generated images are stored under the active nanobot instance's media directory:
@ -299,7 +324,7 @@ Use the reference image. Keep the same robot and composition, change the palette
|---------|-------| |---------|-------|
| `generate_image` is not available | Set `tools.imageGeneration.enabled` to `true` and restart the gateway | | `generate_image` is not available | Set `tools.imageGeneration.enabled` to `true` and restart the gateway |
| Missing API key error | Configure `providers.<provider>.apiKey`; if using `${VAR_NAME}`, confirm the environment variable is visible to the gateway process | | Missing API key error | Configure `providers.<provider>.apiKey`; if using `${VAR_NAME}`, confirm the environment variable is visible to the gateway process |
| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, or `stepfun` | | `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, or `zhipu` |
| AIHubMix says `Incorrect model ID` | Use `model: "gpt-image-2-free"`; nanobot expands it to the required `openai/gpt-image-2-free` model path internally | | AIHubMix says `Incorrect model ID` | Use `model: "gpt-image-2-free"`; nanobot expands it to the required `openai/gpt-image-2-free` model path internally |
| Generation times out | Try a smaller/default image size, set AIHubMix `extraBody.quality` to `"low"`, or retry later | | Generation times out | Try a smaller/default image size, set AIHubMix `extraBody.quality` to `"low"`, or retry later |
| Reference image rejected | Reference image paths must be inside the workspace or nanobot media directory and must be valid image files | | Reference image rejected | Reference image paths must be inside the workspace or nanobot media directory and must be valid image files |

View File

@ -1445,6 +1445,138 @@ def _stepfun_images_from_payload(payload: dict[str, Any]) -> list[str]:
return images return images
# ---------------------------------------------------------------------------
# Zhipu (智谱) image generation
# ---------------------------------------------------------------------------
_ZHIPU_TIMEOUT_S = 300.0
_ZHIPU_ASPECT_RATIO_SIZES = {
"1:1": "1280x1280",
"16:9": "1728x960",
"9:16": "960x1728",
"3:4": "1088x1472",
"4:3": "1472x1088",
}
class ZhipuImageGenerationClient(ImageGenerationProvider):
"""Async client for Zhipu (智谱) image generation API.
Supports:
- Text-to-image via glm-image, cogview-4, cogview-3-flash, etc.
- Aspect ratio selection
- Watermark control
"""
provider_name = "zhipu"
missing_key_message = "Zhipu API key is not configured. Set providers.zhipu.apiKey."
default_timeout = _ZHIPU_TIMEOUT_S
def _default_base_url(self) -> str:
return "https://open.bigmodel.cn/api/paas/v4"
async def generate(
self,
*,
prompt: str,
model: str,
reference_images: list[str] | None = None,
aspect_ratio: str | None = None,
image_size: str | None = None,
) -> GeneratedImageResponse:
if not self.api_key:
raise ImageGenerationError(self.missing_key_message)
if reference_images:
logger.warning(
"Zhipu image generation does not support reference images; "
"ignoring {} reference image(s) for {}",
len(reference_images),
model,
)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
**self.extra_headers,
}
body: dict[str, Any] = {
"model": model,
"prompt": prompt,
}
# Resolve size
size = _zhipu_size(aspect_ratio, image_size)
if size:
body["size"] = size
body.update(self.extra_body)
url = f"{self.api_base}/images/generations"
client = self._client or httpx.AsyncClient(timeout=self.timeout)
owns_client = self._client is None
try:
response = await self._http_post(url, headers=headers, body=body, client=client)
except httpx.TimeoutException as exc:
raise ImageGenerationError("Zhipu image generation timed out") from exc
except httpx.RequestError as exc:
raise ImageGenerationError(f"Zhipu image generation request failed: {exc}") from exc
try:
response.raise_for_status()
except httpx.HTTPStatusError as exc:
detail = response.text[:500]
raise ImageGenerationError(f"Zhipu image generation failed: {detail}") from exc
payload = response.json()
images = await _zhipu_images_from_payload(client, payload)
self._require_images(images, payload)
result = GeneratedImageResponse(images=images, content="", raw=payload)
if owns_client:
await client.aclose()
return result
def _zhipu_size(
aspect_ratio: str | None,
image_size: str | None,
) -> str:
"""Resolve aspect ratio / image_size to Zhipu size string.
Zhipu glm-image model supports: 1280x1280 (default), 1568x1056,
1056x1568, 1472x1088, 1088x1472, 1728x960, 960x1728.
"""
if image_size and "x" in image_size.lower():
return image_size
if aspect_ratio and aspect_ratio in _ZHIPU_ASPECT_RATIO_SIZES:
return _ZHIPU_ASPECT_RATIO_SIZES[aspect_ratio]
return "1280x1280"
async def _zhipu_images_from_payload(
client: httpx.AsyncClient,
payload: dict[str, Any],
) -> list[str]:
"""Extract image data URLs from Zhipu API response.
Zhipu returns images as temporary URLs that expire after 30 days.
We download and re-encode as base64 data URLs.
"""
images: list[str] = []
for item in payload.get("data") or []:
if not isinstance(item, dict):
continue
url = item.get("url")
if isinstance(url, str) and url:
images.append(await _download_image_data_url(client, url))
return images
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Provider registration # Provider registration
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -1457,3 +1589,4 @@ register_image_gen_provider(MiniMaxImageGenerationClient)
register_image_gen_provider(OpenAIImageGenerationClient) register_image_gen_provider(OpenAIImageGenerationClient)
register_image_gen_provider(OpenRouterImageGenerationClient) register_image_gen_provider(OpenRouterImageGenerationClient)
register_image_gen_provider(StepFunImageGenerationClient) register_image_gen_provider(StepFunImageGenerationClient)
register_image_gen_provider(ZhipuImageGenerationClient)

View File

@ -18,6 +18,7 @@ from nanobot.providers.image_generation import (
OpenAIImageGenerationClient, OpenAIImageGenerationClient,
OpenRouterImageGenerationClient, OpenRouterImageGenerationClient,
StepFunImageGenerationClient, StepFunImageGenerationClient,
ZhipuImageGenerationClient,
) )
PNG_BYTES = ( PNG_BYTES = (
@ -1027,3 +1028,102 @@ async def test_openai_no_images_raises() -> None:
with pytest.raises(ImageGenerationError, match="returned no images"): with pytest.raises(ImageGenerationError, match="returned no images"):
await client.generate(prompt="draw", model="dall-e-3") await client.generate(prompt="draw", model="dall-e-3")
# ---------------------------------------------------------------------------
# Zhipu
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_zhipu_image_generation_payload_and_response() -> None:
fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]}))
fake.get_response = FakeResponse({}, content=PNG_BYTES)
client = ZhipuImageGenerationClient(
api_key="sk-zhipu-test",
api_base="https://open.bigmodel.cn/api/paas/v4",
extra_headers={"X-Test": "1"},
extra_body={"watermark_enabled": False},
client=fake, # type: ignore[arg-type]
)
response = await client.generate(
prompt="a sunset over the ocean",
model="glm-image",
aspect_ratio="16:9",
image_size="2K",
)
assert response.images[0].startswith("data:image/png;base64,")
call = fake.calls[0]
assert call["url"] == "https://open.bigmodel.cn/api/paas/v4/images/generations"
assert call["headers"]["Authorization"] == "Bearer sk-zhipu-test"
assert call["headers"]["X-Test"] == "1"
body = call["json"]
assert body["model"] == "glm-image"
assert body["prompt"] == "a sunset over the ocean"
assert body["size"] == "1728x960"
assert body["watermark_enabled"] is False
@pytest.mark.asyncio
async def test_zhipu_image_generation_with_explicit_size() -> None:
fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]}))
fake.get_response = FakeResponse({}, content=PNG_BYTES)
client = ZhipuImageGenerationClient(
api_key="sk-zhipu-test",
client=fake, # type: ignore[arg-type]
)
await client.generate(
prompt="a cat",
model="cogview-4",
image_size="1024x1024",
)
body = fake.calls[0]["json"]
assert body["size"] == "1024x1024"
@pytest.mark.asyncio
async def test_zhipu_image_generation_downloads_url_response() -> None:
fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]}))
fake.get_response = FakeResponse({}, content=PNG_BYTES)
client = ZhipuImageGenerationClient(
api_key="sk-zhipu-test",
client=fake, # type: ignore[arg-type]
)
response = await client.generate(prompt="draw", model="glm-image")
assert response.images[0].startswith("data:image/png;base64,")
assert fake.get_calls[0]["url"] == "https://cdn.example/image.png"
@pytest.mark.asyncio
async def test_zhipu_image_generation_requires_api_key() -> None:
client = ZhipuImageGenerationClient(api_key=None)
with pytest.raises(ImageGenerationError, match="API key"):
await client.generate(prompt="draw", model="glm-image")
@pytest.mark.asyncio
async def test_zhipu_image_generation_no_images_raises() -> None:
fake = FakeClient(FakeResponse({"data": [{"text": "sorry"}]}))
client = ZhipuImageGenerationClient(api_key="sk-zhipu-test", client=fake) # type: ignore[arg-type]
with pytest.raises(ImageGenerationError, match="returned no images"):
await client.generate(prompt="draw", model="glm-image")
@pytest.mark.asyncio
async def test_zhipu_image_generation_rejects_reference_images() -> None:
client = ZhipuImageGenerationClient(api_key="sk-zhipu-test")
with pytest.raises(ImageGenerationError, match="reference images"):
await client.generate(
prompt="edit this",
model="glm-image",
reference_images=["ref.png"],
)

View File

@ -171,6 +171,39 @@ async def test_generate_image_tool_allows_ollama_without_api_key(
assert fake.calls[0]["image_size"] == "1K" assert fake.calls[0]["image_size"] == "1K"
@pytest.mark.asyncio
async def test_generate_image_tool_allows_zhipu_without_api_key(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
set_config_path(tmp_path / "config.json")
FakeImageClient.instances = []
monkeypatch.setattr(
"nanobot.agent.tools.image_generation.get_image_gen_provider",
lambda name: FakeImageClient if name == "zhipu" else None,
)
tool = ImageGenerationTool(
workspace=tmp_path,
config=ImageGenerationToolConfig(
enabled=True,
provider="zhipu",
model="glm-image",
),
provider_configs={"zhipu": ProviderConfig(api_base="https://open.bigmodel.cn/api/paas/v4")},
)
result = await tool.execute(prompt="draw a cat")
payload = json.loads(result)
assert len(payload["artifacts"]) == 1
fake = FakeImageClient.instances[0]
assert fake.kwargs["api_key"] is None
assert fake.kwargs["api_base"] == "https://open.bigmodel.cn/api/paas/v4"
assert fake.calls[0]["aspect_ratio"] == "1:1"
assert fake.calls[0]["image_size"] == "1K"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_generate_image_tool_rejects_reference_outside_workspace(tmp_path: Path) -> None: async def test_generate_image_tool_rejects_reference_outside_workspace(tmp_path: Path) -> None:
set_config_path(tmp_path / "config.json") set_config_path(tmp_path / "config.json")