From 3e6f9907fefeab9f470e5a854d4ae12e64f2f4da Mon Sep 17 00:00:00 2001 From: Jiajun Xie Date: Sat, 23 May 2026 13:34:35 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20Add=20Zhipu=20(=E6=99=BA=E8=B0=B1)=20im?= =?UTF-8?q?age=20generation=20provider?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/image-generation.md | 31 ++++- nanobot/providers/image_generation.py | 133 ++++++++++++++++++++++ tests/providers/test_image_generation.py | 100 ++++++++++++++++ tests/tools/test_image_generation_tool.py | 33 ++++++ 4 files changed, 294 insertions(+), 3 deletions(-) diff --git a/docs/image-generation.md b/docs/image-generation.md index a9d6b620c..374b1cb17 100644 --- a/docs/image-generation.md +++ b/docs/image-generation.md @@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi } ``` -See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, and StepFun configuration examples. +See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples. > [!TIP] > Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup. @@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved | Option | Type | Default | Description | |--------|------|---------|-------------| | `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool | -| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun` | +| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` | | `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name | | `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one | | `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` | @@ -245,6 +245,31 @@ StepPlan is StepFun's subscription tier and uses a different API base URL. The i `apiBase` takes precedence over the registry default, so with the StepPlan base URL configured, image requests are sent to `https://api.stepfun.com/step_plan/v1/images/generations` — the same path prefix used for LLM calls. The API key is shared with the standard StepFun provider. +### Zhipu + +Zhipu (智谱) `glm-image` model supports text-to-image generation. The API returns temporary image URLs (valid for 30 days); nanobot downloads and re-encodes them as base64 data URLs. + +Supported aspect ratios: `1:1`, `16:9`, `9:16`, `3:4`, `4:3`. Sizes can be specified as `WIDTHxHEIGHT` (e.g. `1280x1280`, `1728x960`) or using aspect ratio presets. + +```json +{ + "providers": { + "zhipu": { + "apiKey": "${ZHIPU_API_KEY}" + } + }, + "tools": { + "imageGeneration": { + "enabled": true, + "provider": "zhipu", + "model": "glm-image" + } + } +} +``` + +Other supported models: `cogview-4`, `cogview-4-250304`, `cogview-3-flash`. Reference images are not supported by this integration. + ## Artifacts Generated images are stored under the active nanobot instance's media directory: @@ -299,7 +324,7 @@ Use the reference image. Keep the same robot and composition, change the palette |---------|-------| | `generate_image` is not available | Set `tools.imageGeneration.enabled` to `true` and restart the gateway | | Missing API key error | Configure `providers..apiKey`; if using `${VAR_NAME}`, confirm the environment variable is visible to the gateway process | -| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, or `stepfun` | +| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, or `zhipu` | | AIHubMix says `Incorrect model ID` | Use `model: "gpt-image-2-free"`; nanobot expands it to the required `openai/gpt-image-2-free` model path internally | | Generation times out | Try a smaller/default image size, set AIHubMix `extraBody.quality` to `"low"`, or retry later | | Reference image rejected | Reference image paths must be inside the workspace or nanobot media directory and must be valid image files | diff --git a/nanobot/providers/image_generation.py b/nanobot/providers/image_generation.py index 1316f1d43..837d46da0 100644 --- a/nanobot/providers/image_generation.py +++ b/nanobot/providers/image_generation.py @@ -1445,6 +1445,138 @@ def _stepfun_images_from_payload(payload: dict[str, Any]) -> list[str]: return images +# --------------------------------------------------------------------------- +# Zhipu (智谱) image generation +# --------------------------------------------------------------------------- + +_ZHIPU_TIMEOUT_S = 300.0 + +_ZHIPU_ASPECT_RATIO_SIZES = { + "1:1": "1280x1280", + "16:9": "1728x960", + "9:16": "960x1728", + "3:4": "1088x1472", + "4:3": "1472x1088", +} + + +class ZhipuImageGenerationClient(ImageGenerationProvider): + """Async client for Zhipu (智谱) image generation API. + + Supports: + - Text-to-image via glm-image, cogview-4, cogview-3-flash, etc. + - Aspect ratio selection + - Watermark control + """ + + provider_name = "zhipu" + missing_key_message = "Zhipu API key is not configured. Set providers.zhipu.apiKey." + default_timeout = _ZHIPU_TIMEOUT_S + + def _default_base_url(self) -> str: + return "https://open.bigmodel.cn/api/paas/v4" + + async def generate( + self, + *, + prompt: str, + model: str, + reference_images: list[str] | None = None, + aspect_ratio: str | None = None, + image_size: str | None = None, + ) -> GeneratedImageResponse: + if not self.api_key: + raise ImageGenerationError(self.missing_key_message) + + if reference_images: + logger.warning( + "Zhipu image generation does not support reference images; " + "ignoring {} reference image(s) for {}", + len(reference_images), + model, + ) + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + **self.extra_headers, + } + + body: dict[str, Any] = { + "model": model, + "prompt": prompt, + } + + # Resolve size + size = _zhipu_size(aspect_ratio, image_size) + if size: + body["size"] = size + + body.update(self.extra_body) + + url = f"{self.api_base}/images/generations" + + client = self._client or httpx.AsyncClient(timeout=self.timeout) + owns_client = self._client is None + try: + response = await self._http_post(url, headers=headers, body=body, client=client) + except httpx.TimeoutException as exc: + raise ImageGenerationError("Zhipu image generation timed out") from exc + except httpx.RequestError as exc: + raise ImageGenerationError(f"Zhipu image generation request failed: {exc}") from exc + + try: + response.raise_for_status() + except httpx.HTTPStatusError as exc: + detail = response.text[:500] + raise ImageGenerationError(f"Zhipu image generation failed: {detail}") from exc + + payload = response.json() + images = await _zhipu_images_from_payload(client, payload) + + self._require_images(images, payload) + + result = GeneratedImageResponse(images=images, content="", raw=payload) + if owns_client: + await client.aclose() + return result + + +def _zhipu_size( + aspect_ratio: str | None, + image_size: str | None, +) -> str: + """Resolve aspect ratio / image_size to Zhipu size string. + + Zhipu glm-image model supports: 1280x1280 (default), 1568x1056, + 1056x1568, 1472x1088, 1088x1472, 1728x960, 960x1728. + """ + if image_size and "x" in image_size.lower(): + return image_size + if aspect_ratio and aspect_ratio in _ZHIPU_ASPECT_RATIO_SIZES: + return _ZHIPU_ASPECT_RATIO_SIZES[aspect_ratio] + return "1280x1280" + + +async def _zhipu_images_from_payload( + client: httpx.AsyncClient, + payload: dict[str, Any], +) -> list[str]: + """Extract image data URLs from Zhipu API response. + + Zhipu returns images as temporary URLs that expire after 30 days. + We download and re-encode as base64 data URLs. + """ + images: list[str] = [] + for item in payload.get("data") or []: + if not isinstance(item, dict): + continue + url = item.get("url") + if isinstance(url, str) and url: + images.append(await _download_image_data_url(client, url)) + return images + + # --------------------------------------------------------------------------- # Provider registration # --------------------------------------------------------------------------- @@ -1457,3 +1589,4 @@ register_image_gen_provider(MiniMaxImageGenerationClient) register_image_gen_provider(OpenAIImageGenerationClient) register_image_gen_provider(OpenRouterImageGenerationClient) register_image_gen_provider(StepFunImageGenerationClient) +register_image_gen_provider(ZhipuImageGenerationClient) diff --git a/tests/providers/test_image_generation.py b/tests/providers/test_image_generation.py index 77025895c..181657620 100644 --- a/tests/providers/test_image_generation.py +++ b/tests/providers/test_image_generation.py @@ -18,6 +18,7 @@ from nanobot.providers.image_generation import ( OpenAIImageGenerationClient, OpenRouterImageGenerationClient, StepFunImageGenerationClient, + ZhipuImageGenerationClient, ) PNG_BYTES = ( @@ -1027,3 +1028,102 @@ async def test_openai_no_images_raises() -> None: with pytest.raises(ImageGenerationError, match="returned no images"): await client.generate(prompt="draw", model="dall-e-3") + + +# --------------------------------------------------------------------------- +# Zhipu +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_zhipu_image_generation_payload_and_response() -> None: + fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]})) + fake.get_response = FakeResponse({}, content=PNG_BYTES) + client = ZhipuImageGenerationClient( + api_key="sk-zhipu-test", + api_base="https://open.bigmodel.cn/api/paas/v4", + extra_headers={"X-Test": "1"}, + extra_body={"watermark_enabled": False}, + client=fake, # type: ignore[arg-type] + ) + + response = await client.generate( + prompt="a sunset over the ocean", + model="glm-image", + aspect_ratio="16:9", + image_size="2K", + ) + + assert response.images[0].startswith("data:image/png;base64,") + call = fake.calls[0] + assert call["url"] == "https://open.bigmodel.cn/api/paas/v4/images/generations" + assert call["headers"]["Authorization"] == "Bearer sk-zhipu-test" + assert call["headers"]["X-Test"] == "1" + body = call["json"] + assert body["model"] == "glm-image" + assert body["prompt"] == "a sunset over the ocean" + assert body["size"] == "1728x960" + assert body["watermark_enabled"] is False + + +@pytest.mark.asyncio +async def test_zhipu_image_generation_with_explicit_size() -> None: + fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]})) + fake.get_response = FakeResponse({}, content=PNG_BYTES) + client = ZhipuImageGenerationClient( + api_key="sk-zhipu-test", + client=fake, # type: ignore[arg-type] + ) + + await client.generate( + prompt="a cat", + model="cogview-4", + image_size="1024x1024", + ) + + body = fake.calls[0]["json"] + assert body["size"] == "1024x1024" + + +@pytest.mark.asyncio +async def test_zhipu_image_generation_downloads_url_response() -> None: + fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]})) + fake.get_response = FakeResponse({}, content=PNG_BYTES) + client = ZhipuImageGenerationClient( + api_key="sk-zhipu-test", + client=fake, # type: ignore[arg-type] + ) + + response = await client.generate(prompt="draw", model="glm-image") + + assert response.images[0].startswith("data:image/png;base64,") + assert fake.get_calls[0]["url"] == "https://cdn.example/image.png" + + +@pytest.mark.asyncio +async def test_zhipu_image_generation_requires_api_key() -> None: + client = ZhipuImageGenerationClient(api_key=None) + + with pytest.raises(ImageGenerationError, match="API key"): + await client.generate(prompt="draw", model="glm-image") + + +@pytest.mark.asyncio +async def test_zhipu_image_generation_no_images_raises() -> None: + fake = FakeClient(FakeResponse({"data": [{"text": "sorry"}]})) + client = ZhipuImageGenerationClient(api_key="sk-zhipu-test", client=fake) # type: ignore[arg-type] + + with pytest.raises(ImageGenerationError, match="returned no images"): + await client.generate(prompt="draw", model="glm-image") + + +@pytest.mark.asyncio +async def test_zhipu_image_generation_rejects_reference_images() -> None: + client = ZhipuImageGenerationClient(api_key="sk-zhipu-test") + + with pytest.raises(ImageGenerationError, match="reference images"): + await client.generate( + prompt="edit this", + model="glm-image", + reference_images=["ref.png"], + ) diff --git a/tests/tools/test_image_generation_tool.py b/tests/tools/test_image_generation_tool.py index f5d2d9183..23f207de1 100644 --- a/tests/tools/test_image_generation_tool.py +++ b/tests/tools/test_image_generation_tool.py @@ -171,6 +171,39 @@ async def test_generate_image_tool_allows_ollama_without_api_key( assert fake.calls[0]["image_size"] == "1K" +@pytest.mark.asyncio +async def test_generate_image_tool_allows_zhipu_without_api_key( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + set_config_path(tmp_path / "config.json") + FakeImageClient.instances = [] + monkeypatch.setattr( + "nanobot.agent.tools.image_generation.get_image_gen_provider", + lambda name: FakeImageClient if name == "zhipu" else None, + ) + tool = ImageGenerationTool( + workspace=tmp_path, + config=ImageGenerationToolConfig( + enabled=True, + provider="zhipu", + model="glm-image", + ), + provider_configs={"zhipu": ProviderConfig(api_base="https://open.bigmodel.cn/api/paas/v4")}, + ) + + result = await tool.execute(prompt="draw a cat") + + payload = json.loads(result) + assert len(payload["artifacts"]) == 1 + + fake = FakeImageClient.instances[0] + assert fake.kwargs["api_key"] is None + assert fake.kwargs["api_base"] == "https://open.bigmodel.cn/api/paas/v4" + assert fake.calls[0]["aspect_ratio"] == "1:1" + assert fake.calls[0]["image_size"] == "1K" + + @pytest.mark.asyncio async def test_generate_image_tool_rejects_reference_outside_workspace(tmp_path: Path) -> None: set_config_path(tmp_path / "config.json")