mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-13 14:23:58 +00:00
feat: Add Zhipu (智谱) image generation provider
This commit is contained in:
parent
c0d4f012c8
commit
3e6f9907fe
@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi
|
||||
}
|
||||
```
|
||||
|
||||
See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, and StepFun configuration examples.
|
||||
See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, StepFun, and Zhipu configuration examples.
|
||||
|
||||
> [!TIP]
|
||||
> Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup.
|
||||
@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool |
|
||||
| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun` |
|
||||
| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, `zhipu` |
|
||||
| `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name |
|
||||
| `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one |
|
||||
| `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` |
|
||||
@ -245,6 +245,31 @@ StepPlan is StepFun's subscription tier and uses a different API base URL. The i
|
||||
|
||||
`apiBase` takes precedence over the registry default, so with the StepPlan base URL configured, image requests are sent to `https://api.stepfun.com/step_plan/v1/images/generations` — the same path prefix used for LLM calls. The API key is shared with the standard StepFun provider.
|
||||
|
||||
### Zhipu
|
||||
|
||||
Zhipu (智谱) `glm-image` model supports text-to-image generation. The API returns temporary image URLs (valid for 30 days); nanobot downloads and re-encodes them as base64 data URLs.
|
||||
|
||||
Supported aspect ratios: `1:1`, `16:9`, `9:16`, `3:4`, `4:3`. Sizes can be specified as `WIDTHxHEIGHT` (e.g. `1280x1280`, `1728x960`) or using aspect ratio presets.
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"zhipu": {
|
||||
"apiKey": "${ZHIPU_API_KEY}"
|
||||
}
|
||||
},
|
||||
"tools": {
|
||||
"imageGeneration": {
|
||||
"enabled": true,
|
||||
"provider": "zhipu",
|
||||
"model": "glm-image"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Other supported models: `cogview-4`, `cogview-4-250304`, `cogview-3-flash`. Reference images are not supported by this integration.
|
||||
|
||||
## Artifacts
|
||||
|
||||
Generated images are stored under the active nanobot instance's media directory:
|
||||
@ -299,7 +324,7 @@ Use the reference image. Keep the same robot and composition, change the palette
|
||||
|---------|-------|
|
||||
| `generate_image` is not available | Set `tools.imageGeneration.enabled` to `true` and restart the gateway |
|
||||
| Missing API key error | Configure `providers.<provider>.apiKey`; if using `${VAR_NAME}`, confirm the environment variable is visible to the gateway process |
|
||||
| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, or `stepfun` |
|
||||
| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun`, or `zhipu` |
|
||||
| AIHubMix says `Incorrect model ID` | Use `model: "gpt-image-2-free"`; nanobot expands it to the required `openai/gpt-image-2-free` model path internally |
|
||||
| Generation times out | Try a smaller/default image size, set AIHubMix `extraBody.quality` to `"low"`, or retry later |
|
||||
| Reference image rejected | Reference image paths must be inside the workspace or nanobot media directory and must be valid image files |
|
||||
|
||||
@ -1445,6 +1445,138 @@ def _stepfun_images_from_payload(payload: dict[str, Any]) -> list[str]:
|
||||
return images
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Zhipu (智谱) image generation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_ZHIPU_TIMEOUT_S = 300.0
|
||||
|
||||
_ZHIPU_ASPECT_RATIO_SIZES = {
|
||||
"1:1": "1280x1280",
|
||||
"16:9": "1728x960",
|
||||
"9:16": "960x1728",
|
||||
"3:4": "1088x1472",
|
||||
"4:3": "1472x1088",
|
||||
}
|
||||
|
||||
|
||||
class ZhipuImageGenerationClient(ImageGenerationProvider):
|
||||
"""Async client for Zhipu (智谱) image generation API.
|
||||
|
||||
Supports:
|
||||
- Text-to-image via glm-image, cogview-4, cogview-3-flash, etc.
|
||||
- Aspect ratio selection
|
||||
- Watermark control
|
||||
"""
|
||||
|
||||
provider_name = "zhipu"
|
||||
missing_key_message = "Zhipu API key is not configured. Set providers.zhipu.apiKey."
|
||||
default_timeout = _ZHIPU_TIMEOUT_S
|
||||
|
||||
def _default_base_url(self) -> str:
|
||||
return "https://open.bigmodel.cn/api/paas/v4"
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
*,
|
||||
prompt: str,
|
||||
model: str,
|
||||
reference_images: list[str] | None = None,
|
||||
aspect_ratio: str | None = None,
|
||||
image_size: str | None = None,
|
||||
) -> GeneratedImageResponse:
|
||||
if not self.api_key:
|
||||
raise ImageGenerationError(self.missing_key_message)
|
||||
|
||||
if reference_images:
|
||||
logger.warning(
|
||||
"Zhipu image generation does not support reference images; "
|
||||
"ignoring {} reference image(s) for {}",
|
||||
len(reference_images),
|
||||
model,
|
||||
)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
**self.extra_headers,
|
||||
}
|
||||
|
||||
body: dict[str, Any] = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
}
|
||||
|
||||
# Resolve size
|
||||
size = _zhipu_size(aspect_ratio, image_size)
|
||||
if size:
|
||||
body["size"] = size
|
||||
|
||||
body.update(self.extra_body)
|
||||
|
||||
url = f"{self.api_base}/images/generations"
|
||||
|
||||
client = self._client or httpx.AsyncClient(timeout=self.timeout)
|
||||
owns_client = self._client is None
|
||||
try:
|
||||
response = await self._http_post(url, headers=headers, body=body, client=client)
|
||||
except httpx.TimeoutException as exc:
|
||||
raise ImageGenerationError("Zhipu image generation timed out") from exc
|
||||
except httpx.RequestError as exc:
|
||||
raise ImageGenerationError(f"Zhipu image generation request failed: {exc}") from exc
|
||||
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
detail = response.text[:500]
|
||||
raise ImageGenerationError(f"Zhipu image generation failed: {detail}") from exc
|
||||
|
||||
payload = response.json()
|
||||
images = await _zhipu_images_from_payload(client, payload)
|
||||
|
||||
self._require_images(images, payload)
|
||||
|
||||
result = GeneratedImageResponse(images=images, content="", raw=payload)
|
||||
if owns_client:
|
||||
await client.aclose()
|
||||
return result
|
||||
|
||||
|
||||
def _zhipu_size(
|
||||
aspect_ratio: str | None,
|
||||
image_size: str | None,
|
||||
) -> str:
|
||||
"""Resolve aspect ratio / image_size to Zhipu size string.
|
||||
|
||||
Zhipu glm-image model supports: 1280x1280 (default), 1568x1056,
|
||||
1056x1568, 1472x1088, 1088x1472, 1728x960, 960x1728.
|
||||
"""
|
||||
if image_size and "x" in image_size.lower():
|
||||
return image_size
|
||||
if aspect_ratio and aspect_ratio in _ZHIPU_ASPECT_RATIO_SIZES:
|
||||
return _ZHIPU_ASPECT_RATIO_SIZES[aspect_ratio]
|
||||
return "1280x1280"
|
||||
|
||||
|
||||
async def _zhipu_images_from_payload(
|
||||
client: httpx.AsyncClient,
|
||||
payload: dict[str, Any],
|
||||
) -> list[str]:
|
||||
"""Extract image data URLs from Zhipu API response.
|
||||
|
||||
Zhipu returns images as temporary URLs that expire after 30 days.
|
||||
We download and re-encode as base64 data URLs.
|
||||
"""
|
||||
images: list[str] = []
|
||||
for item in payload.get("data") or []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
url = item.get("url")
|
||||
if isinstance(url, str) and url:
|
||||
images.append(await _download_image_data_url(client, url))
|
||||
return images
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider registration
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -1457,3 +1589,4 @@ register_image_gen_provider(MiniMaxImageGenerationClient)
|
||||
register_image_gen_provider(OpenAIImageGenerationClient)
|
||||
register_image_gen_provider(OpenRouterImageGenerationClient)
|
||||
register_image_gen_provider(StepFunImageGenerationClient)
|
||||
register_image_gen_provider(ZhipuImageGenerationClient)
|
||||
|
||||
@ -18,6 +18,7 @@ from nanobot.providers.image_generation import (
|
||||
OpenAIImageGenerationClient,
|
||||
OpenRouterImageGenerationClient,
|
||||
StepFunImageGenerationClient,
|
||||
ZhipuImageGenerationClient,
|
||||
)
|
||||
|
||||
PNG_BYTES = (
|
||||
@ -1027,3 +1028,102 @@ async def test_openai_no_images_raises() -> None:
|
||||
|
||||
with pytest.raises(ImageGenerationError, match="returned no images"):
|
||||
await client.generate(prompt="draw", model="dall-e-3")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Zhipu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zhipu_image_generation_payload_and_response() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]}))
|
||||
fake.get_response = FakeResponse({}, content=PNG_BYTES)
|
||||
client = ZhipuImageGenerationClient(
|
||||
api_key="sk-zhipu-test",
|
||||
api_base="https://open.bigmodel.cn/api/paas/v4",
|
||||
extra_headers={"X-Test": "1"},
|
||||
extra_body={"watermark_enabled": False},
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
response = await client.generate(
|
||||
prompt="a sunset over the ocean",
|
||||
model="glm-image",
|
||||
aspect_ratio="16:9",
|
||||
image_size="2K",
|
||||
)
|
||||
|
||||
assert response.images[0].startswith("data:image/png;base64,")
|
||||
call = fake.calls[0]
|
||||
assert call["url"] == "https://open.bigmodel.cn/api/paas/v4/images/generations"
|
||||
assert call["headers"]["Authorization"] == "Bearer sk-zhipu-test"
|
||||
assert call["headers"]["X-Test"] == "1"
|
||||
body = call["json"]
|
||||
assert body["model"] == "glm-image"
|
||||
assert body["prompt"] == "a sunset over the ocean"
|
||||
assert body["size"] == "1728x960"
|
||||
assert body["watermark_enabled"] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zhipu_image_generation_with_explicit_size() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]}))
|
||||
fake.get_response = FakeResponse({}, content=PNG_BYTES)
|
||||
client = ZhipuImageGenerationClient(
|
||||
api_key="sk-zhipu-test",
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
await client.generate(
|
||||
prompt="a cat",
|
||||
model="cogview-4",
|
||||
image_size="1024x1024",
|
||||
)
|
||||
|
||||
body = fake.calls[0]["json"]
|
||||
assert body["size"] == "1024x1024"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zhipu_image_generation_downloads_url_response() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"url": "https://cdn.example/image.png"}]}))
|
||||
fake.get_response = FakeResponse({}, content=PNG_BYTES)
|
||||
client = ZhipuImageGenerationClient(
|
||||
api_key="sk-zhipu-test",
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
response = await client.generate(prompt="draw", model="glm-image")
|
||||
|
||||
assert response.images[0].startswith("data:image/png;base64,")
|
||||
assert fake.get_calls[0]["url"] == "https://cdn.example/image.png"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zhipu_image_generation_requires_api_key() -> None:
|
||||
client = ZhipuImageGenerationClient(api_key=None)
|
||||
|
||||
with pytest.raises(ImageGenerationError, match="API key"):
|
||||
await client.generate(prompt="draw", model="glm-image")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zhipu_image_generation_no_images_raises() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"text": "sorry"}]}))
|
||||
client = ZhipuImageGenerationClient(api_key="sk-zhipu-test", client=fake) # type: ignore[arg-type]
|
||||
|
||||
with pytest.raises(ImageGenerationError, match="returned no images"):
|
||||
await client.generate(prompt="draw", model="glm-image")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zhipu_image_generation_rejects_reference_images() -> None:
|
||||
client = ZhipuImageGenerationClient(api_key="sk-zhipu-test")
|
||||
|
||||
with pytest.raises(ImageGenerationError, match="reference images"):
|
||||
await client.generate(
|
||||
prompt="edit this",
|
||||
model="glm-image",
|
||||
reference_images=["ref.png"],
|
||||
)
|
||||
|
||||
@ -171,6 +171,39 @@ async def test_generate_image_tool_allows_ollama_without_api_key(
|
||||
assert fake.calls[0]["image_size"] == "1K"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_image_tool_allows_zhipu_without_api_key(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
set_config_path(tmp_path / "config.json")
|
||||
FakeImageClient.instances = []
|
||||
monkeypatch.setattr(
|
||||
"nanobot.agent.tools.image_generation.get_image_gen_provider",
|
||||
lambda name: FakeImageClient if name == "zhipu" else None,
|
||||
)
|
||||
tool = ImageGenerationTool(
|
||||
workspace=tmp_path,
|
||||
config=ImageGenerationToolConfig(
|
||||
enabled=True,
|
||||
provider="zhipu",
|
||||
model="glm-image",
|
||||
),
|
||||
provider_configs={"zhipu": ProviderConfig(api_base="https://open.bigmodel.cn/api/paas/v4")},
|
||||
)
|
||||
|
||||
result = await tool.execute(prompt="draw a cat")
|
||||
|
||||
payload = json.loads(result)
|
||||
assert len(payload["artifacts"]) == 1
|
||||
|
||||
fake = FakeImageClient.instances[0]
|
||||
assert fake.kwargs["api_key"] is None
|
||||
assert fake.kwargs["api_base"] == "https://open.bigmodel.cn/api/paas/v4"
|
||||
assert fake.calls[0]["aspect_ratio"] == "1:1"
|
||||
assert fake.calls[0]["image_size"] == "1K"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_image_tool_rejects_reference_outside_workspace(tmp_path: Path) -> None:
|
||||
set_config_path(tmp_path / "config.json")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user