mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-19 16:12:30 +00:00
feat(image-generation): add StepFun provider support and StepPlan docs
- Add StepFunImageGenerationClient with step-image-edit-2 / step-1x-medium support - Map aspect ratios to StepFun size strings (WxH order) - Add style_reference for step-1x-medium reference-image generation - Register in image gen provider registry (auto-discovered by nanobot.py) - Add 7 unit tests: payload, default size, explicit size, style_reference (1x/non-1x), missing key, no-images - Add StepFun section to docs/image-generation.md with provider config - Add StepPlan (订阅制) subsection with apiBase override example
This commit is contained in:
parent
3f321179eb
commit
2d302a006e
@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool |
|
||||
| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini` |
|
||||
| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `stepfun` |
|
||||
| `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name |
|
||||
| `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one |
|
||||
| `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` |
|
||||
@ -168,6 +168,58 @@ For reference-image edits, use a Gemini Flash image model:
|
||||
|
||||
Imagen 4 supports the aspect ratios `1:1`, `9:16`, `16:9`, `3:4`, and `4:3`. Unsupported ratios are ignored and the model uses its default. The `defaultImageSize` setting has no effect on Gemini models; sizing is controlled by `defaultAspectRatio` only. Reference images passed with an Imagen model are ignored (with a warning logged).
|
||||
|
||||
### StepFun
|
||||
|
||||
StepFun (阶跃星辰) `step-image-edit-2` supports text-to-image generation. The `step-1x-medium` variant additionally supports **style-reference** image edits, where a reference image guides the visual style of the output.
|
||||
|
||||
Supported aspect ratios: `1:1`, `16:9`, `9:16`, `3:4`, `4:3`. Sizes are specified as `WIDTHxHEIGHT` (e.g. `1024x1024`, `1280x800`, `800x1280`).
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"stepfun": {
|
||||
"apiKey": "${STEPFUN_API_KEY}"
|
||||
}
|
||||
},
|
||||
"tools": {
|
||||
"imageGeneration": {
|
||||
"enabled": true,
|
||||
"provider": "stepfun",
|
||||
"model": "step-image-edit-2"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> The StepFun provider reuses the existing `providers.stepfun` config block (the same one used for StepFun's LLM API). Set `providers.stepfun.apiKey` once and it is shared between text and image generation.
|
||||
>
|
||||
> When `step-image-edit-2` is used, `reference_images` are ignored (the model does not support style reference). Switch to `step-1x-medium` to use reference-image-guided generation.
|
||||
|
||||
#### StepPlan (订阅制)
|
||||
|
||||
StepPlan 是阶跃星辰的订阅制服务,使用不同的 API base URL。图片生成端点路径相同,只需覆盖 `apiBase`:
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"stepfun": {
|
||||
"apiKey": "${STEPFUN_API_KEY}",
|
||||
"apiBase": "https://api.stepfun.com/step_plan/v1"
|
||||
}
|
||||
},
|
||||
"tools": {
|
||||
"imageGeneration": {
|
||||
"enabled": true,
|
||||
"provider": "stepfun",
|
||||
"model": "step-image-edit-2"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`apiBase` 优先级高于 registry 默认值,因此配了 StepPlan 地址后图片请求会走 `https://api.stepfun.com/step_plan/v1/images/generations`,与 LLM 调用路径一致。API Key 与普通 StepFun 共用同一套。
|
||||
|
||||
## Artifacts
|
||||
|
||||
Generated images are stored under the active nanobot instance's media directory:
|
||||
@ -222,7 +274,7 @@ Use the reference image. Keep the same robot and composition, change the palette
|
||||
|---------|-------|
|
||||
| `generate_image` is not available | Set `tools.imageGeneration.enabled` to `true` and restart the gateway |
|
||||
| Missing API key error | Configure `providers.<provider>.apiKey`; if using `${VAR_NAME}`, confirm the environment variable is visible to the gateway process |
|
||||
| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, or `gemini` |
|
||||
| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, or `stepfun` |
|
||||
| AIHubMix says `Incorrect model ID` | Use `model: "gpt-image-2-free"`; nanobot expands it to the required `openai/gpt-image-2-free` model path internally |
|
||||
| Generation times out | Try a smaller/default image size, set AIHubMix `extraBody.quality` to `"low"`, or retry later |
|
||||
| Reference image rejected | Reference image paths must be inside the workspace or nanobot media directory and must be valid image files |
|
||||
|
||||
@ -756,6 +756,129 @@ def _minimax_images_from_payload(payload: dict[str, Any]) -> list[str]:
|
||||
return images
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# StepFun (阶跃星辰) image generation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_STEPFUN_ASPECT_RATIO_SIZES = {
|
||||
"1:1": "1024x1024",
|
||||
"16:9": "1280x800",
|
||||
"9:16": "800x1280",
|
||||
"3:4": "768x1360",
|
||||
"4:3": "1360x768",
|
||||
}
|
||||
|
||||
|
||||
class StepFunImageGenerationClient(ImageGenerationProvider):
|
||||
"""Async client for StepFun (阶跃星辰) image generation.
|
||||
|
||||
Supports:
|
||||
- Text-to-image via step-image-edit-2 (default model)
|
||||
- Reference-image-guided generation via style_reference (step-1x-medium)
|
||||
"""
|
||||
|
||||
provider_name = "stepfun"
|
||||
missing_key_message = (
|
||||
"StepFun API key is not configured. Set providers.stepfun.apiKey."
|
||||
)
|
||||
default_timeout = 120.0
|
||||
|
||||
def _default_base_url(self) -> str:
|
||||
return "https://api.stepfun.com/v1"
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
*,
|
||||
prompt: str,
|
||||
model: str,
|
||||
reference_images: list[str] | None = None,
|
||||
aspect_ratio: str | None = None,
|
||||
image_size: str | None = None,
|
||||
) -> GeneratedImageResponse:
|
||||
if not self.api_key:
|
||||
raise ImageGenerationError(self.missing_key_message)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
**self.extra_headers,
|
||||
}
|
||||
|
||||
body: dict[str, Any] = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"response_format": "b64_json",
|
||||
"n": 1,
|
||||
}
|
||||
|
||||
# Map aspect ratio / image_size to StepFun size string
|
||||
size = _stepfun_size(aspect_ratio, image_size)
|
||||
if size:
|
||||
body["size"] = size
|
||||
|
||||
# step-1x-medium supports style_reference for reference-image-guided generation
|
||||
refs = list(reference_images or [])
|
||||
if refs and "1x" in model:
|
||||
body["style_reference"] = {
|
||||
"source_url": image_path_to_data_url(refs[0]),
|
||||
}
|
||||
|
||||
body.update(self.extra_body)
|
||||
|
||||
response = await self._http_post(
|
||||
f"{self.api_base}/images/generations",
|
||||
headers=headers,
|
||||
body=body,
|
||||
)
|
||||
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
detail = response.text[:500]
|
||||
raise ImageGenerationError(
|
||||
f"StepFun image generation failed: {detail}"
|
||||
) from exc
|
||||
|
||||
payload = response.json()
|
||||
images = _stepfun_images_from_payload(payload)
|
||||
|
||||
self._require_images(images, payload)
|
||||
|
||||
return GeneratedImageResponse(images=images, content="", raw=payload)
|
||||
|
||||
|
||||
def _stepfun_size(
|
||||
aspect_ratio: str | None,
|
||||
image_size: str | None,
|
||||
) -> str:
|
||||
"""Resolve aspect ratio / image_size to StepFun size string.
|
||||
|
||||
StepFun expects ``WIDTHxHEIGHT`` (note: width x height, not the more
|
||||
common ``HxW`` order used by other providers). The accepted sizes are
|
||||
``1024x1024``, ``768x1360``, ``896x1184``, ``1360x768``, ``1184x896``.
|
||||
"""
|
||||
if image_size and "x" in image_size.lower():
|
||||
return image_size
|
||||
if aspect_ratio and aspect_ratio in _STEPFUN_ASPECT_RATIO_SIZES:
|
||||
return _STEPFUN_ASPECT_RATIO_SIZES[aspect_ratio]
|
||||
return "1024x1024"
|
||||
|
||||
|
||||
def _stepfun_images_from_payload(payload: dict[str, Any]) -> list[str]:
|
||||
"""Extract base64 images from StepFun API response.
|
||||
|
||||
StepFun returns images in ``data[].b64_json`` (base64 strings).
|
||||
"""
|
||||
images: list[str] = []
|
||||
for item in payload.get("data") or []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
b64 = item.get("b64_json")
|
||||
if isinstance(b64, str) and b64:
|
||||
images.append(_b64_image_data_url(b64))
|
||||
return images
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider registration
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -764,3 +887,4 @@ register_image_gen_provider(OpenRouterImageGenerationClient)
|
||||
register_image_gen_provider(AIHubMixImageGenerationClient)
|
||||
register_image_gen_provider(GeminiImageGenerationClient)
|
||||
register_image_gen_provider(MiniMaxImageGenerationClient)
|
||||
register_image_gen_provider(StepFunImageGenerationClient)
|
||||
|
||||
@ -14,6 +14,7 @@ from nanobot.providers.image_generation import (
|
||||
ImageGenerationError,
|
||||
MiniMaxImageGenerationClient,
|
||||
OpenRouterImageGenerationClient,
|
||||
StepFunImageGenerationClient,
|
||||
)
|
||||
|
||||
PNG_BYTES = (
|
||||
@ -387,3 +388,130 @@ async def test_minimax_payload_and_response_with_reference_image(tmp_path: Path)
|
||||
assert body["aspect_ratio"] == "21:9"
|
||||
assert body["subject_reference"][0]["type"] == "character"
|
||||
assert body["subject_reference"][0]["image_file"].startswith("data:image/png;base64,")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# StepFun (阶跃星辰)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stepfun_payload_and_response_with_aspect_ratio() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||
client = StepFunImageGenerationClient(
|
||||
api_key="sk-sf-test",
|
||||
api_base="https://api.stepfun.com/v1",
|
||||
extra_headers={"X-Test": "1"},
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
response = await client.generate(
|
||||
prompt="a cat on the moon",
|
||||
model="step-image-edit-2",
|
||||
aspect_ratio="16:9",
|
||||
)
|
||||
|
||||
assert response.images == [PNG_DATA_URL]
|
||||
call = fake.calls[0]
|
||||
assert call["url"] == "https://api.stepfun.com/v1/images/generations"
|
||||
assert call["headers"]["Authorization"] == "Bearer sk-sf-test"
|
||||
assert call["headers"]["X-Test"] == "1"
|
||||
body = call["json"]
|
||||
assert body["model"] == "step-image-edit-2"
|
||||
assert body["prompt"] == "a cat on the moon"
|
||||
assert body["response_format"] == "b64_json"
|
||||
assert body["n"] == 1
|
||||
assert body["size"] == "1280x800"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stepfun_default_size_when_no_aspect_ratio() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||
client = StepFunImageGenerationClient(
|
||||
api_key="sk-sf-test",
|
||||
api_base="https://api.stepfun.com/v1",
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
await client.generate(prompt="a dog", model="step-image-edit-2")
|
||||
|
||||
body = fake.calls[0]["json"]
|
||||
assert body["size"] == "1024x1024"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stepfun_uses_explicit_image_size() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||
client = StepFunImageGenerationClient(
|
||||
api_key="sk-sf-test",
|
||||
api_base="https://api.stepfun.com/v1",
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
await client.generate(
|
||||
prompt="a bird",
|
||||
model="step-image-edit-2",
|
||||
image_size="1024x1024",
|
||||
)
|
||||
|
||||
body = fake.calls[0]["json"]
|
||||
assert body["size"] == "1024x1024"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stepfun_style_reference_on_1x_model(tmp_path: Path) -> None:
|
||||
"""step-1x-medium supports style_reference for reference-image generation."""
|
||||
ref = tmp_path / "ref.png"
|
||||
ref.write_bytes(PNG_BYTES)
|
||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||
client = StepFunImageGenerationClient(
|
||||
api_key="sk-sf-test",
|
||||
api_base="https://api.stepfun.com/v1",
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
await client.generate(
|
||||
prompt="in this style",
|
||||
model="step-1x-medium",
|
||||
reference_images=[str(ref)],
|
||||
)
|
||||
|
||||
body = fake.calls[0]["json"]
|
||||
assert "style_reference" in body
|
||||
assert body["style_reference"]["source_url"].startswith("data:image/png;base64,")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stepfun_no_style_reference_on_non_1x_model() -> None:
|
||||
"""step-image-edit-2 does not use style_reference; reference images are ignored."""
|
||||
fake = FakeClient(FakeResponse({"data": [{"b64_json": RAW_B64}]}))
|
||||
client = StepFunImageGenerationClient(
|
||||
api_key="sk-sf-test",
|
||||
api_base="https://api.stepfun.com/v1",
|
||||
client=fake, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
await client.generate(
|
||||
prompt="a flower",
|
||||
model="step-image-edit-2",
|
||||
reference_images=["/tmp/ref.png"],
|
||||
)
|
||||
|
||||
body = fake.calls[0]["json"]
|
||||
assert "style_reference" not in body
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stepfun_requires_api_key() -> None:
|
||||
client = StepFunImageGenerationClient(api_key=None)
|
||||
|
||||
with pytest.raises(ImageGenerationError, match="API key"):
|
||||
await client.generate(prompt="draw", model="step-image-edit-2")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stepfun_no_images_raises() -> None:
|
||||
fake = FakeClient(FakeResponse({"data": [{"text": "sorry"}]}))
|
||||
client = StepFunImageGenerationClient(api_key="sk-sf-test", client=fake) # type: ignore[arg-type]
|
||||
|
||||
with pytest.raises(ImageGenerationError, match="returned no images"):
|
||||
await client.generate(prompt="draw", model="step-image-edit-2")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user