Make multimodal input limits configurable

2026-05-20 00:22:31 +00:00 · 2026-03-17 22:30:58 +08:00 · 2026-03-17 22:30:58 +08:00 · b2e220e0fd
commit b2e220e0fd
parent 16f0191c32
5 changed files with 49 additions and 16 deletions
--- a/nanobot/agent/context.py
+++ b/nanobot/agent/context.py
@ -10,6 +10,7 @@ from nanobot.utils.helpers import current_time_str

 from nanobot.agent.memory import MemoryStore
 from nanobot.agent.skills import SkillsLoader
+from nanobot.config.schema import InputLimitsConfig
 from nanobot.utils.helpers import build_assistant_message, detect_image_mime


@ -18,13 +19,12 @@ class ContextBuilder:

    BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"]
    _RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]"
-    _MAX_INPUT_IMAGES = 3
-    _MAX_IMAGE_BYTES = 10 * 1024 * 1024

-    def __init__(self, workspace: Path):
+    def __init__(self, workspace: Path, input_limits: InputLimitsConfig | None = None):
        self.workspace = workspace
        self.memory = MemoryStore(workspace)
        self.skills = SkillsLoader(workspace)
+        self.input_limits = input_limits or InputLimitsConfig()

    def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
        """Build the system prompt from identity, bootstrap files, memory, and skills."""
@ -152,15 +152,18 @@ Reply directly with text for conversations. Only use the 'message' tool to send

        images = []
        notes: list[str] = []
-        extra_count = max(0, len(media) - self._MAX_INPUT_IMAGES)
+        max_images = self.input_limits.max_input_images
+        max_image_bytes = self.input_limits.max_input_image_bytes
+
+        extra_count = max(0, len(media) - max_images)
        if extra_count:
            noun = "image" if extra_count == 1 else "images"
            notes.append(
                f"[Skipped {extra_count} {noun}: "
-                f"only the first {self._MAX_INPUT_IMAGES} images are included]"
+                f"only the first {max_images} images are included]"
            )

-        for path in media[:self._MAX_INPUT_IMAGES]:
+        for path in media[:max_images]:
            p = Path(path)
            if not p.is_file():
                notes.append(f"[Skipped image: file not found ({p.name or path})]")
@ -170,8 +173,8 @@ Reply directly with text for conversations. Only use the 'message' tool to send
            except OSError:
                notes.append(f"[Skipped image: unable to read ({p.name or path})]")
                continue
-            if size > self._MAX_IMAGE_BYTES:
-                size_mb = self._MAX_IMAGE_BYTES // (1024 * 1024)
+            if size > max_image_bytes:
+                size_mb = max_image_bytes // (1024 * 1024)
                notes.append(f"[Skipped image: file too large ({p.name}, limit {size_mb} MB)]")
                continue
            raw = p.read_bytes()
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -30,7 +30,7 @@ from nanobot.providers.base import LLMProvider
 from nanobot.session.manager import Session, SessionManager

 if TYPE_CHECKING:
-    from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig
+    from nanobot.config.schema import ChannelsConfig, ExecToolConfig, InputLimitsConfig, WebSearchConfig
    from nanobot.cron.service import CronService


@ -59,13 +59,14 @@ class AgentLoop:
        web_search_config: WebSearchConfig | None = None,
        web_proxy: str | None = None,
        exec_config: ExecToolConfig | None = None,
+        input_limits: InputLimitsConfig | None = None,
        cron_service: CronService | None = None,
        restrict_to_workspace: bool = False,
        session_manager: SessionManager | None = None,
        mcp_servers: dict | None = None,
        channels_config: ChannelsConfig | None = None,
    ):
-        from nanobot.config.schema import ExecToolConfig, WebSearchConfig
+        from nanobot.config.schema import ExecToolConfig, InputLimitsConfig, WebSearchConfig

        self.bus = bus
        self.channels_config = channels_config
@ -77,10 +78,11 @@ class AgentLoop:
        self.web_search_config = web_search_config or WebSearchConfig()
        self.web_proxy = web_proxy
        self.exec_config = exec_config or ExecToolConfig()
+        self.input_limits = input_limits or InputLimitsConfig()
        self.cron_service = cron_service
        self.restrict_to_workspace = restrict_to_workspace

-        self.context = ContextBuilder(workspace)
+        self.context = ContextBuilder(workspace, input_limits=self.input_limits)
        self.sessions = session_manager or SessionManager(workspace)
        self.tools = ToolRegistry()
        self.subagents = SubagentManager(
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@ -526,6 +526,7 @@ def gateway(
        web_search_config=config.tools.web.search,
        web_proxy=config.tools.web.proxy or None,
        exec_config=config.tools.exec,
+        input_limits=config.tools.input_limits,
        cron_service=cron,
        restrict_to_workspace=config.tools.restrict_to_workspace,
        session_manager=session_manager,
@ -718,6 +719,7 @@ def agent(
        web_search_config=config.tools.web.search,
        web_proxy=config.tools.web.proxy or None,
        exec_config=config.tools.exec,
+        input_limits=config.tools.input_limits,
        cron_service=cron,
        restrict_to_workspace=config.tools.restrict_to_workspace,
        mcp_servers=config.tools.mcp_servers,
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@ -130,6 +130,13 @@ class ExecToolConfig(Base):
    path_append: str = ""


+class InputLimitsConfig(Base):
+    """Limits for user-provided multimodal inputs."""
+
+    max_input_images: int = 3
+    max_input_image_bytes: int = 10 * 1024 * 1024
+
+
 class MCPServerConfig(Base):
    """MCP server connection configuration (stdio or HTTP)."""

@ -147,6 +154,7 @@ class ToolsConfig(Base):

    web: WebToolsConfig = Field(default_factory=WebToolsConfig)
    exec: ExecToolConfig = Field(default_factory=ExecToolConfig)
+    input_limits: InputLimitsConfig = Field(default_factory=InputLimitsConfig)
    restrict_to_workspace: bool = False  # If true, restrict all tool access to workspace directory
    mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)

--- a/tests/test_context_multimodal.py
+++ b/tests/test_context_multimodal.py
@ -1,6 +1,7 @@
 from pathlib import Path

 from nanobot.agent.context import ContextBuilder
+from nanobot.config.schema import InputLimitsConfig


 PNG_BYTES = (
@ -14,13 +15,13 @@ PNG_BYTES = (
 )


-def _builder(tmp_path: Path) -> ContextBuilder:
-    return ContextBuilder(tmp_path)
+def _builder(tmp_path: Path, input_limits: InputLimitsConfig | None = None) -> ContextBuilder:
+    return ContextBuilder(tmp_path, input_limits=input_limits)


 def test_build_user_content_keeps_only_first_three_images(tmp_path: Path) -> None:
    builder = _builder(tmp_path)
-    max_images = ContextBuilder._MAX_INPUT_IMAGES
+    max_images = builder.input_limits.max_input_images
    paths = []
    for i in range(max_images + 1):
        path = tmp_path / f"img{i}.png"
@ -61,15 +62,32 @@ def test_build_user_content_skips_missing_file(tmp_path: Path) -> None:
 def test_build_user_content_skips_large_images_with_note(tmp_path: Path) -> None:
    builder = _builder(tmp_path)
    big = tmp_path / "big.png"
-    big.write_bytes(PNG_BYTES + b"x" * ContextBuilder._MAX_IMAGE_BYTES)
+    big.write_bytes(PNG_BYTES + b"x" * builder.input_limits.max_input_image_bytes)

    content = builder._build_user_content("analyze", [str(big)])

-    limit_mb = ContextBuilder._MAX_IMAGE_BYTES // (1024 * 1024)
+    limit_mb = builder.input_limits.max_input_image_bytes // (1024 * 1024)
    assert isinstance(content, str)
    assert f"[Skipped image: file too large (big.png, limit {limit_mb} MB)]" in content


+def test_build_user_content_respects_custom_input_limits(tmp_path: Path) -> None:
+    builder = _builder(
+        tmp_path,
+        input_limits=InputLimitsConfig(max_input_images=1, max_input_image_bytes=1024),
+    )
+    small = tmp_path / "small.png"
+    large = tmp_path / "large.png"
+    small.write_bytes(PNG_BYTES)
+    large.write_bytes(PNG_BYTES + b"x" * 1024)
+
+    content = builder._build_user_content("describe", [str(small), str(large)])
+
+    assert isinstance(content, list)
+    assert sum(1 for block in content if block.get("type") == "image_url") == 1
+    assert content[-1]["text"].startswith("[Skipped 1 image: only the first 1 images are included]")
+
+
 def test_build_user_content_keeps_valid_images_and_skip_notes_together(tmp_path: Path) -> None:
    builder = _builder(tmp_path)
    good = tmp_path / "good.png"