diff --git a/docs/image-generation.md b/docs/image-generation.md index 6ca049290..f9812a885 100644 --- a/docs/image-generation.md +++ b/docs/image-generation.md @@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved | Option | Type | Default | Description | |--------|------|---------|-------------| | `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool | -| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini` | +| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `stepfun` | | `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name | | `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one | | `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` | @@ -168,6 +168,58 @@ For reference-image edits, use a Gemini Flash image model: Imagen 4 supports the aspect ratios `1:1`, `9:16`, `16:9`, `3:4`, and `4:3`. Unsupported ratios are ignored and the model uses its default. The `defaultImageSize` setting has no effect on Gemini models; sizing is controlled by `defaultAspectRatio` only. Reference images passed with an Imagen model are ignored (with a warning logged). +### StepFun + +StepFun (阶跃星辰) `step-image-edit-2` supports text-to-image generation. The `step-1x-medium` variant additionally supports **style-reference** image edits, where a reference image guides the visual style of the output. + +Supported aspect ratios: `1:1`, `16:9`, `9:16`, `3:4`, `4:3`. Sizes are specified as `WIDTHxHEIGHT` (e.g. `1024x1024`, `1280x800`, `800x1280`). + +```json +{ + "providers": { + "stepfun": { + "apiKey": "${STEPFUN_API_KEY}" + } + }, + "tools": { + "imageGeneration": { + "enabled": true, + "provider": "stepfun", + "model": "step-image-edit-2" + } + } +} +``` + +> [!NOTE] +> The StepFun provider reuses the existing `providers.stepfun` config block (the same one used for StepFun's LLM API). Set `providers.stepfun.apiKey` once and it is shared between text and image generation. +> +> When `step-image-edit-2` is used, `reference_images` are ignored (the model does not support style reference). Switch to `step-1x-medium` to use reference-image-guided generation. + +#### StepPlan (订阅制) + +StepPlan 是阶跃星辰的订阅制服务,使用不同的 API base URL。图片生成端点路径相同,只需覆盖 `apiBase`: + +```json +{ + "providers": { + "stepfun": { + "apiKey": "${STEPFUN_API_KEY}", + "apiBase": "https://api.stepfun.com/step_plan/v1" + } + }, + "tools": { + "imageGeneration": { + "enabled": true, + "provider": "stepfun", + "model": "step-image-edit-2" + } + } +} +``` + +`apiBase` 优先级高于 registry 默认值,因此配了 StepPlan 地址后图片请求会走 `https://api.stepfun.com/step_plan/v1/images/generations`,与 LLM 调用路径一致。API Key 与普通 StepFun 共用同一套。 + ## Artifacts Generated images are stored under the active nanobot instance's media directory: @@ -222,7 +274,7 @@ Use the reference image. Keep the same robot and composition, change the palette |---------|-------| | `generate_image` is not available | Set `tools.imageGeneration.enabled` to `true` and restart the gateway | | Missing API key error | Configure `providers..apiKey`; if using `${VAR_NAME}`, confirm the environment variable is visible to the gateway process | -| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, or `gemini` | +| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, or `stepfun` | | AIHubMix says `Incorrect model ID` | Use `model: "gpt-image-2-free"`; nanobot expands it to the required `openai/gpt-image-2-free` model path internally | | Generation times out | Try a smaller/default image size, set AIHubMix `extraBody.quality` to `"low"`, or retry later | | Reference image rejected | Reference image paths must be inside the workspace or nanobot media directory and must be valid image files | diff --git a/nanobot/providers/image_generation.py b/nanobot/providers/image_generation.py index 08e57cbad..501b98fd2 100644 --- a/nanobot/providers/image_generation.py +++ b/nanobot/providers/image_generation.py @@ -756,6 +756,129 @@ def _minimax_images_from_payload(payload: dict[str, Any]) -> list[str]: return images +# --------------------------------------------------------------------------- +# StepFun (阶跃星辰) image generation +# --------------------------------------------------------------------------- + +_STEPFUN_ASPECT_RATIO_SIZES = { + "1:1": "1024x1024", + "16:9": "1280x800", + "9:16": "800x1280", + "3:4": "768x1360", + "4:3": "1360x768", +} + + +class StepFunImageGenerationClient(ImageGenerationProvider): + """Async client for StepFun (阶跃星辰) image generation. + + Supports: + - Text-to-image via step-image-edit-2 (default model) + - Reference-image-guided generation via style_reference (step-1x-medium) + """ + + provider_name = "stepfun" + missing_key_message = ( + "StepFun API key is not configured. Set providers.stepfun.apiKey." + ) + default_timeout = 120.0 + + def _default_base_url(self) -> str: + return "https://api.stepfun.com/v1" + + async def generate( + self, + *, + prompt: str, + model: str, + reference_images: list[str] | None = None, + aspect_ratio: str | None = None, + image_size: str | None = None, + ) -> GeneratedImageResponse: + if not self.api_key: + raise ImageGenerationError(self.missing_key_message) + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + **self.extra_headers, + } + + body: dict[str, Any] = { + "model": model, + "prompt": prompt, + "response_format": "b64_json", + "n": 1, + } + + # Map aspect ratio / image_size to StepFun size string + size = _stepfun_size(aspect_ratio, image_size) + if size: + body["size"] = size + + # step-1x-medium supports style_reference for reference-image-guided generation + refs = list(reference_images or []) + if refs and "1x" in model: + body["style_reference"] = { + "source_url": image_path_to_data_url(refs[0]), + } + + body.update(self.extra_body) + + response = await self._http_post( + f"{self.api_base}/images/generations", + headers=headers, + body=body, + ) + + try: + response.raise_for_status() + except httpx.HTTPStatusError as exc: + detail = response.text[:500] + raise ImageGenerationError( + f"StepFun image generation failed: {detail}" + ) from exc + + payload = response.json() + images = _stepfun_images_from_payload(payload) + + self._require_images(images, payload) + + return GeneratedImageResponse(images=images, content="", raw=payload) + + +def _stepfun_size( + aspect_ratio: str | None, + image_size: str | None, +) -> str: + """Resolve aspect ratio / image_size to StepFun size string. + + StepFun expects ``WIDTHxHEIGHT`` (note: width x height, not the more + common ``HxW`` order used by other providers). The accepted sizes are + ``1024x1024``, ``768x1360``, ``896x1184``, ``1360x768``, ``1184x896``. + """ + if image_size and "x" in image_size.lower(): + return image_size + if aspect_ratio and aspect_ratio in _STEPFUN_ASPECT_RATIO_SIZES: + return _STEPFUN_ASPECT_RATIO_SIZES[aspect_ratio] + return "1024x1024" + + +def _stepfun_images_from_payload(payload: dict[str, Any]) -> list[str]: + """Extract base64 images from StepFun API response. + + StepFun returns images in ``data[].b64_json`` (base64 strings). + """ + images: list[str] = [] + for item in payload.get("data") or []: + if not isinstance(item, dict): + continue + b64 = item.get("b64_json") + if isinstance(b64, str) and b64: + images.append(_b64_image_data_url(b64)) + return images + + # --------------------------------------------------------------------------- # Provider registration # --------------------------------------------------------------------------- @@ -764,3 +887,4 @@ register_image_gen_provider(OpenRouterImageGenerationClient) register_image_gen_provider(AIHubMixImageGenerationClient) register_image_gen_provider(GeminiImageGenerationClient) register_image_gen_provider(MiniMaxImageGenerationClient) +register_image_gen_provider(StepFunImageGenerationClient) diff --git a/tests/providers/test_image_generation.py b/tests/providers/test_image_generation.py index c38f9488c..3bee376d8 100644 --- a/tests/providers/test_image_generation.py +++ b/tests/providers/test_image_generation.py @@ -14,6 +14,7 @@ from nanobot.providers.image_generation import ( ImageGenerationError, MiniMaxImageGenerationClient, OpenRouterImageGenerationClient, + StepFunImageGenerationClient, ) PNG_BYTES = ( @@ -387,3 +388,130 @@ async def test_minimax_payload_and_response_with_reference_image(tmp_path: Path) assert body["aspect_ratio"] == "21:9" assert body["subject_reference"][0]["type"] == "character" assert body["subject_reference"][0]["image_file"].startswith("data:image/png;base64,") + + +# --------------------------------------------------------------------------- +# StepFun (阶跃星辰) +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_stepfun_payload_and_response_with_aspect_ratio() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = StepFunImageGenerationClient( + api_key="sk-sf-test", + api_base="https://api.stepfun.com/v1", + extra_headers={"X-Test": "1"}, + client=fake, # type: ignore[arg-type] + ) + + response = await client.generate( + prompt="a cat on the moon", + model="step-image-edit-2", + aspect_ratio="16:9", + ) + + assert response.images == [PNG_DATA_URL] + call = fake.calls[0] + assert call["url"] == "https://api.stepfun.com/v1/images/generations" + assert call["headers"]["Authorization"] == "Bearer sk-sf-test" + assert call["headers"]["X-Test"] == "1" + body = call["json"] + assert body["model"] == "step-image-edit-2" + assert body["prompt"] == "a cat on the moon" + assert body["response_format"] == "b64_json" + assert body["n"] == 1 + assert body["size"] == "1280x800" + + +@pytest.mark.asyncio +async def test_stepfun_default_size_when_no_aspect_ratio() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = StepFunImageGenerationClient( + api_key="sk-sf-test", + api_base="https://api.stepfun.com/v1", + client=fake, # type: ignore[arg-type] + ) + + await client.generate(prompt="a dog", model="step-image-edit-2") + + body = fake.calls[0]["json"] + assert body["size"] == "1024x1024" + + +@pytest.mark.asyncio +async def test_stepfun_uses_explicit_image_size() -> None: + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = StepFunImageGenerationClient( + api_key="sk-sf-test", + api_base="https://api.stepfun.com/v1", + client=fake, # type: ignore[arg-type] + ) + + await client.generate( + prompt="a bird", + model="step-image-edit-2", + image_size="1024x1024", + ) + + body = fake.calls[0]["json"] + assert body["size"] == "1024x1024" + + +@pytest.mark.asyncio +async def test_stepfun_style_reference_on_1x_model(tmp_path: Path) -> None: + """step-1x-medium supports style_reference for reference-image generation.""" + ref = tmp_path / "ref.png" + ref.write_bytes(PNG_BYTES) + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = StepFunImageGenerationClient( + api_key="sk-sf-test", + api_base="https://api.stepfun.com/v1", + client=fake, # type: ignore[arg-type] + ) + + await client.generate( + prompt="in this style", + model="step-1x-medium", + reference_images=[str(ref)], + ) + + body = fake.calls[0]["json"] + assert "style_reference" in body + assert body["style_reference"]["source_url"].startswith("data:image/png;base64,") + + +@pytest.mark.asyncio +async def test_stepfun_no_style_reference_on_non_1x_model() -> None: + """step-image-edit-2 does not use style_reference; reference images are ignored.""" + fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]})) + client = StepFunImageGenerationClient( + api_key="sk-sf-test", + api_base="https://api.stepfun.com/v1", + client=fake, # type: ignore[arg-type] + ) + + await client.generate( + prompt="a flower", + model="step-image-edit-2", + reference_images=["/tmp/ref.png"], + ) + + body = fake.calls[0]["json"] + assert "style_reference" not in body + + +@pytest.mark.asyncio +async def test_stepfun_requires_api_key() -> None: + client = StepFunImageGenerationClient(api_key=None) + + with pytest.raises(ImageGenerationError, match="API key"): + await client.generate(prompt="draw", model="step-image-edit-2") + + +@pytest.mark.asyncio +async def test_stepfun_no_images_raises() -> None: + fake = FakeClient(FakeResponse({"data": [{"text": "sorry"}]})) + client = StepFunImageGenerationClient(api_key="sk-sf-test", client=fake) # type: ignore[arg-type] + + with pytest.raises(ImageGenerationError, match="returned no images"): + await client.generate(prompt="draw", model="step-image-edit-2")