Add Ollama image generation support

2026-05-23 18:12:32 +00:00 · 2026-05-21 12:06:08 +05:00 · 2026-05-21 12:06:08 +05:00 · 84603f4cf2
commit 84603f4cf2
parent eae51333ad
5 changed files with 259 additions and 4 deletions
--- a/docs/image-generation.md
+++ b/docs/image-generation.md
@ -23,7 +23,7 @@ The feature is disabled by default. Enable it in `~/.nanobot/config.json`, confi
 }
 ```

-See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, and Gemini configuration examples.
+See [Provider Notes](#provider-notes) for AIHubMix, MiniMax, Gemini, Ollama, and StepFun configuration examples.

 > [!TIP]
 > Prefer environment variables for API keys. nanobot resolves `${VAR_NAME}` values from the environment at startup.
@ -46,7 +46,7 @@ The WebUI hides provider storage details from the user. The agent sees the saved
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `tools.imageGeneration.enabled` | boolean | `false` | Register the `generate_image` tool |
-| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `stepfun` |
+| `tools.imageGeneration.provider` | string | `"openrouter"` | Image provider name. Supported values: `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, `stepfun` |
 | `tools.imageGeneration.model` | string | `"openai/gpt-5.4-image-2"` | Provider model name |
 | `tools.imageGeneration.defaultAspectRatio` | string | `"1:1"` | Default ratio when the prompt/tool call does not specify one |
 | `tools.imageGeneration.defaultImageSize` | string | `"1K"` | Default size hint, for example `1K`, `2K`, `4K`, or `1024x1024` |
@ -168,6 +168,31 @@ For reference-image edits, use a Gemini Flash image model:

 Imagen 4 supports the aspect ratios `1:1`, `9:16`, `16:9`, `3:4`, and `4:3`. Unsupported ratios are ignored and the model uses its default. The `defaultImageSize` setting has no effect on Gemini models; sizing is controlled by `defaultAspectRatio` only. Reference images passed with an Imagen model are ignored (with a warning logged).

+### Ollama
+
+Ollama's experimental native image generation API works with local servers and hosted ollama.com models. Local access at `http://localhost:11434/api` does not require an API key; set `providers.ollama.apiKey` only when targeting `https://ollama.com/api`.
+
+```json
+{
+  "providers": {
+    "ollama": {
+      "apiBase": "http://localhost:11434/api"
+    }
+  },
+  "tools": {
+    "imageGeneration": {
+      "enabled": true,
+      "provider": "ollama",
+      "model": "x/z-image-turbo",
+      "defaultAspectRatio": "16:9",
+      "defaultImageSize": "2K"
+    }
+  }
+}
+```
+
+Ollama maps `defaultAspectRatio` and `defaultImageSize` to native `width` and `height` values. Reference images are not supported by this integration.
+
 ### StepFun

 StepFun (阶跃星辰) `step-image-edit-2` supports text-to-image generation.  The `step-1x-medium` variant additionally supports **style-reference** image edits, where a reference image guides the visual style of the output.
@ -274,7 +299,7 @@ Use the reference image. Keep the same robot and composition, change the palette
 |---------|-------|
 | `generate_image` is not available | Set `tools.imageGeneration.enabled` to `true` and restart the gateway |
 | Missing API key error | Configure `providers.<provider>.apiKey`; if using `${VAR_NAME}`, confirm the environment variable is visible to the gateway process |
-| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, or `stepfun` |
+| `unsupported image generation provider` | Use `openrouter`, `aihubmix`, `minimax`, `gemini`, `ollama`, or `stepfun` |
 | AIHubMix says `Incorrect model ID` | Use `model: "gpt-image-2-free"`; nanobot expands it to the required `openai/gpt-image-2-free` model path internally |
 | Generation times out | Try a smaller/default image size, set AIHubMix `extraBody.quality` to `"low"`, or retry later |
 | Reference image rejected | Reference image paths must be inside the workspace or nanobot media directory and must be valid image files |
--- a/nanobot/agent/tools/image_generation.py
+++ b/nanobot/agent/tools/image_generation.py
@ -21,6 +21,7 @@ from nanobot.providers.image_generation import (
    ImageGenerationProvider,
    get_image_gen_provider,
 )
+from nanobot.providers.registry import find_by_name
 from nanobot.utils.artifacts import (
    ArtifactError,
    generated_image_tool_result,
@ -117,6 +118,10 @@ class ImageGenerationTool(Tool):
    def _provider_config(self) -> ProviderConfig | None:
        return self.provider_configs.get(self.config.provider)

+    def _provider_allows_missing_api_key(self) -> bool:
+        spec = find_by_name(self.config.provider)
+        return bool(spec and (spec.is_local or spec.is_direct or spec.is_oauth))
+
    def _provider_client(self) -> ImageGenerationProvider | None:
        provider = self._provider_config()
        cls = get_image_gen_provider(self.config.provider)
@ -174,7 +179,7 @@ class ImageGenerationTool(Tool):
        if client is None:
            return f"Error: unsupported image generation provider '{self.config.provider}'"
        provider = self._provider_config()
-        if not provider or not provider.api_key:
+        if not self._provider_allows_missing_api_key() and (not provider or not provider.api_key):
            return self._missing_api_key_error()

        requested = count or 1
--- a/nanobot/providers/image_generation.py
+++ b/nanobot/providers/image_generation.py
@ -4,6 +4,7 @@ from __future__ import annotations

 import base64
 import binascii
+import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from pathlib import Path
@ -31,6 +32,14 @@ _AIHUBMIX_ASPECT_RATIO_SIZES = {
 }
 _GEMINI_DEFAULT_TIMEOUT_S = 120.0
 _GEMINI_IMAGEN_ASPECT_RATIOS = {"1:1", "9:16", "16:9", "3:4", "4:3"}
+_OLLAMA_DEFAULT_SIDE = 1024
+_OLLAMA_SIZE_PRESETS = {
+    "1K": 1024,
+    "2K": 2048,
+    "4K": 4096,
+}
+_OLLAMA_EXPLICIT_SIZE_RE = re.compile(r"^\s*(\d+)\s*[xX]\s*(\d+)\s*$")
+_OLLAMA_ASPECT_RATIO_RE = re.compile(r"^\s*(\d+)\s*:\s*(\d+)\s*$")


 class ImageGenerationError(RuntimeError):
@ -429,6 +438,139 @@ def _http_error_detail(response: httpx.Response) -> str:
    return response.text[:500] or "<empty response body>"


+def _round_to_multiple(value: float, multiple: int = 8) -> int:
+    rounded = int(round(value / multiple) * multiple)
+    return max(multiple, rounded)
+
+
+def _ollama_dimensions(aspect_ratio: str | None, image_size: str | None) -> tuple[int, int]:
+    if image_size:
+        size = image_size.strip()
+        explicit = _OLLAMA_EXPLICIT_SIZE_RE.fullmatch(size)
+        if explicit:
+            return int(explicit.group(1)), int(explicit.group(2))
+        long_side = _OLLAMA_SIZE_PRESETS.get(size.upper(), _OLLAMA_DEFAULT_SIDE)
+    else:
+        long_side = _OLLAMA_DEFAULT_SIDE
+
+    if not aspect_ratio:
+        return long_side, long_side
+
+    ratio = _OLLAMA_ASPECT_RATIO_RE.fullmatch(aspect_ratio.strip())
+    if ratio is None:
+        return long_side, long_side
+
+    width_ratio = int(ratio.group(1))
+    height_ratio = int(ratio.group(2))
+    if width_ratio <= 0 or height_ratio <= 0:
+        return long_side, long_side
+
+    if width_ratio >= height_ratio:
+        width = long_side
+        height = _round_to_multiple(long_side * height_ratio / width_ratio)
+    else:
+        height = long_side
+        width = _round_to_multiple(long_side * width_ratio / height_ratio)
+    return max(8, width), max(8, height)
+
+
+def _ollama_image_data_url(value: str) -> str:
+    if value.startswith("data:image/"):
+        return value
+    return _b64_image_data_url(value)
+
+
+def _ollama_images_from_payload(payload: dict[str, Any]) -> list[str]:
+    images: list[str] = []
+
+    def collect(value: Any) -> None:
+        if isinstance(value, str) and value:
+            images.append(_ollama_image_data_url(value))
+        elif isinstance(value, list):
+            for item in value:
+                collect(item)
+
+    collect(payload.get("image"))
+    collect(payload.get("images"))
+    return images
+
+
+class OllamaImageGenerationClient(ImageGenerationProvider):
+    """Async client for Ollama native image generation models."""
+
+    provider_name = "ollama"
+    default_timeout = 300.0
+
+    def _default_base_url(self) -> str:
+        return "http://localhost:11434/api"
+
+    def _resolve_base_url(self, api_base: str | None) -> str:
+        if api_base:
+            base = api_base.rstrip("/")
+            if base.endswith("/v1"):
+                return f"{base[:-3]}/api"
+            return base
+        return self._default_base_url()
+
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        model: str,
+        reference_images: list[str] | None = None,
+        aspect_ratio: str | None = None,
+        image_size: str | None = None,
+    ) -> GeneratedImageResponse:
+        if reference_images:
+            raise ImageGenerationError(
+                "Ollama image generation does not support reference images"
+            )
+
+        width, height = _ollama_dimensions(aspect_ratio, image_size)
+        body: dict[str, Any] = {
+            "model": model,
+            "prompt": prompt,
+            "width": width,
+            "height": height,
+            "steps": 0,
+        }
+        body.update(self.extra_body)
+        body["stream"] = False
+
+        headers = {
+            "Content-Type": "application/json",
+            **self.extra_headers,
+        }
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+
+        url = f"{self.api_base}/generate"
+        response = await self._http_post(url, headers=headers, body=body)
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as exc:
+            detail = _http_error_detail(response)
+            logger.error(
+                "Ollama image generation failed (HTTP {}): {}",
+                response.status_code,
+                detail,
+            )
+            raise ImageGenerationError(
+                f"Ollama image generation failed (HTTP {response.status_code}): {detail}"
+            ) from exc
+
+        data = response.json()
+        images = _ollama_images_from_payload(data)
+
+        self._require_images(images, data)
+
+        response_text = data.get("response")
+        content = response_text if isinstance(response_text, str) else ""
+
+        return GeneratedImageResponse(images=images, content=content, raw=data)
+
+
 class GeminiImageGenerationClient(ImageGenerationProvider):
    """Async client for Gemini/Imagen image generation via the Generative Language API."""

@ -886,5 +1028,6 @@ def _stepfun_images_from_payload(payload: dict[str, Any]) -> list[str]:
 register_image_gen_provider(OpenRouterImageGenerationClient)
 register_image_gen_provider(AIHubMixImageGenerationClient)
 register_image_gen_provider(GeminiImageGenerationClient)
+register_image_gen_provider(OllamaImageGenerationClient)
 register_image_gen_provider(MiniMaxImageGenerationClient)
 register_image_gen_provider(StepFunImageGenerationClient)
--- a/tests/providers/test_image_generation.py
+++ b/tests/providers/test_image_generation.py
@ -13,6 +13,7 @@ from nanobot.providers.image_generation import (
    GeneratedImageResponse,
    ImageGenerationError,
    MiniMaxImageGenerationClient,
+    OllamaImageGenerationClient,
    OpenRouterImageGenerationClient,
    StepFunImageGenerationClient,
 )
@ -133,6 +134,54 @@ async def test_openrouter_image_generation_requires_api_key() -> None:
        await client.generate(prompt="draw", model="model")


+@pytest.mark.asyncio
+async def test_ollama_image_generation_payload_and_response() -> None:
+    raw_b64 = PNG_DATA_URL.removeprefix("data:image/png;base64,")
+    fake = FakeClient(FakeResponse({"image": raw_b64}))
+    client = OllamaImageGenerationClient(
+        api_key="ollama-test",
+        api_base="http://localhost:11434/v1/",
+        extra_headers={"X-Test": "1"},
+        extra_body={"seed": 123},
+        client=fake,  # type: ignore[arg-type]
+    )
+
+    response = await client.generate(
+        prompt="a sunset",
+        model="x/z-image-turbo",
+        aspect_ratio="16:9",
+        image_size="1K",
+    )
+
+    assert response.images == [PNG_DATA_URL]
+    assert response.content == ""
+
+    call = fake.calls[0]
+    assert call["url"] == "http://localhost:11434/api/generate"
+    assert call["headers"]["Authorization"] == "Bearer ollama-test"
+    assert call["headers"]["X-Test"] == "1"
+    body = call["json"]
+    assert body["model"] == "x/z-image-turbo"
+    assert body["prompt"] == "a sunset"
+    assert body["width"] == 1024
+    assert body["height"] == 576
+    assert body["steps"] == 0
+    assert body["stream"] is False
+    assert body["seed"] == 123
+
+
+@pytest.mark.asyncio
+async def test_ollama_image_generation_rejects_reference_images() -> None:
+    client = OllamaImageGenerationClient(api_key=None)
+
+    with pytest.raises(ImageGenerationError, match="reference images"):
+        await client.generate(
+            prompt="edit this",
+            model="x/z-image-turbo",
+            reference_images=["ref.png"],
+        )
+
+
@pytest.mark.asyncio
 async def test_aihubmix_image_generation_payload_and_response() -> None:
    raw_b64 = PNG_DATA_URL.removeprefix("data:image/png;base64,")
--- a/tests/tools/test_image_generation_tool.py
+++ b/tests/tools/test_image_generation_tool.py
@ -138,6 +138,39 @@ async def test_generate_image_tool_reports_missing_aihubmix_key(tmp_path: Path)
    assert result.startswith("Error: AIHubMix API key is not configured")


+@pytest.mark.asyncio
+async def test_generate_image_tool_allows_ollama_without_api_key(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    set_config_path(tmp_path / "config.json")
+    FakeImageClient.instances = []
+    monkeypatch.setattr(
+        "nanobot.agent.tools.image_generation.get_image_gen_provider",
+        lambda name: FakeImageClient if name == "ollama" else None,
+    )
+    tool = ImageGenerationTool(
+        workspace=tmp_path,
+        config=ImageGenerationToolConfig(
+            enabled=True,
+            provider="ollama",
+            model="x/z-image-turbo",
+        ),
+        provider_configs={"ollama": ProviderConfig(api_base="http://localhost:11434/v1")},
+    )
+
+    result = await tool.execute(prompt="draw a cat")
+
+    payload = json.loads(result)
+    assert len(payload["artifacts"]) == 1
+
+    fake = FakeImageClient.instances[0]
+    assert fake.kwargs["api_key"] is None
+    assert fake.kwargs["api_base"] == "http://localhost:11434/v1"
+    assert fake.calls[0]["aspect_ratio"] == "1:1"
+    assert fake.calls[0]["image_size"] == "1K"
+
+
@pytest.mark.asyncio
 async def test_generate_image_tool_rejects_reference_outside_workspace(tmp_path: Path) -> None:
    set_config_path(tmp_path / "config.json")