From b2e220e0fd8664ecfd68d49bf98298d4c9547d9d Mon Sep 17 00:00:00 2001
From: Jinxiang Gan <jganad@connect.ust.hk>
Date: Tue, 17 Mar 2026 22:30:58 +0800
Subject: [PATCH] Make multimodal input limits configurable

---
 nanobot/agent/context.py         | 19 +++++++++++--------
 nanobot/agent/loop.py            |  8 +++++---
 nanobot/cli/commands.py          |  2 ++
 nanobot/config/schema.py         |  8 ++++++++
 tests/test_context_multimodal.py | 28 +++++++++++++++++++++++-----
 5 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py
index f7c4396b5..608c11af4 100644
--- a/nanobot/agent/context.py
+++ b/nanobot/agent/context.py
@@ -10,6 +10,7 @@ from nanobot.utils.helpers import current_time_str
 
 from nanobot.agent.memory import MemoryStore
 from nanobot.agent.skills import SkillsLoader
+from nanobot.config.schema import InputLimitsConfig
 from nanobot.utils.helpers import build_assistant_message, detect_image_mime
 
 
@@ -18,13 +19,12 @@ class ContextBuilder:
 
     BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"]
     _RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]"
-    _MAX_INPUT_IMAGES = 3
-    _MAX_IMAGE_BYTES = 10 * 1024 * 1024
 
-    def __init__(self, workspace: Path):
+    def __init__(self, workspace: Path, input_limits: InputLimitsConfig | None = None):
         self.workspace = workspace
         self.memory = MemoryStore(workspace)
         self.skills = SkillsLoader(workspace)
+        self.input_limits = input_limits or InputLimitsConfig()
 
     def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
         """Build the system prompt from identity, bootstrap files, memory, and skills."""
@@ -152,15 +152,18 @@ Reply directly with text for conversations. Only use the 'message' tool to send
 
         images = []
         notes: list[str] = []
-        extra_count = max(0, len(media) - self._MAX_INPUT_IMAGES)
+        max_images = self.input_limits.max_input_images
+        max_image_bytes = self.input_limits.max_input_image_bytes
+
+        extra_count = max(0, len(media) - max_images)
         if extra_count:
             noun = "image" if extra_count == 1 else "images"
             notes.append(
                 f"[Skipped {extra_count} {noun}: "
-                f"only the first {self._MAX_INPUT_IMAGES} images are included]"
+                f"only the first {max_images} images are included]"
             )
 
-        for path in media[:self._MAX_INPUT_IMAGES]:
+        for path in media[:max_images]:
             p = Path(path)
             if not p.is_file():
                 notes.append(f"[Skipped image: file not found ({p.name or path})]")
@@ -170,8 +173,8 @@ Reply directly with text for conversations. Only use the 'message' tool to send
             except OSError:
                 notes.append(f"[Skipped image: unable to read ({p.name or path})]")
                 continue
-            if size > self._MAX_IMAGE_BYTES:
-                size_mb = self._MAX_IMAGE_BYTES // (1024 * 1024)
+            if size > max_image_bytes:
+                size_mb = max_image_bytes // (1024 * 1024)
                 notes.append(f"[Skipped image: file too large ({p.name}, limit {size_mb} MB)]")
                 continue
             raw = p.read_bytes()
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 8d3d16547..3da926631 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -30,7 +30,7 @@ from nanobot.providers.base import LLMProvider
 from nanobot.session.manager import Session, SessionManager
 
 if TYPE_CHECKING:
-    from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig
+    from nanobot.config.schema import ChannelsConfig, ExecToolConfig, InputLimitsConfig, WebSearchConfig
     from nanobot.cron.service import CronService
 
 
@@ -59,13 +59,14 @@ class AgentLoop:
         web_search_config: WebSearchConfig | None = None,
         web_proxy: str | None = None,
         exec_config: ExecToolConfig | None = None,
+        input_limits: InputLimitsConfig | None = None,
         cron_service: CronService | None = None,
         restrict_to_workspace: bool = False,
         session_manager: SessionManager | None = None,
         mcp_servers: dict | None = None,
         channels_config: ChannelsConfig | None = None,
     ):
-        from nanobot.config.schema import ExecToolConfig, WebSearchConfig
+        from nanobot.config.schema import ExecToolConfig, InputLimitsConfig, WebSearchConfig
 
         self.bus = bus
         self.channels_config = channels_config
@@ -77,10 +78,11 @@ class AgentLoop:
         self.web_search_config = web_search_config or WebSearchConfig()
         self.web_proxy = web_proxy
         self.exec_config = exec_config or ExecToolConfig()
+        self.input_limits = input_limits or InputLimitsConfig()
         self.cron_service = cron_service
         self.restrict_to_workspace = restrict_to_workspace
 
-        self.context = ContextBuilder(workspace)
+        self.context = ContextBuilder(workspace, input_limits=self.input_limits)
         self.sessions = session_manager or SessionManager(workspace)
         self.tools = ToolRegistry()
         self.subagents = SubagentManager(
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 01097b1da..8796f9cac 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -526,6 +526,7 @@ def gateway(
         web_search_config=config.tools.web.search,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
+        input_limits=config.tools.input_limits,
         cron_service=cron,
         restrict_to_workspace=config.tools.restrict_to_workspace,
         session_manager=session_manager,
@@ -718,6 +719,7 @@ def agent(
         web_search_config=config.tools.web.search,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
+        input_limits=config.tools.input_limits,
         cron_service=cron,
         restrict_to_workspace=config.tools.restrict_to_workspace,
         mcp_servers=config.tools.mcp_servers,
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 1fe84c66c..b45958c55 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -130,6 +130,13 @@ class ExecToolConfig(Base):
     path_append: str = ""
 
 
+class InputLimitsConfig(Base):
+    """Limits for user-provided multimodal inputs."""
+
+    max_input_images: int = 3
+    max_input_image_bytes: int = 10 * 1024 * 1024
+
+
 class MCPServerConfig(Base):
     """MCP server connection configuration (stdio or HTTP)."""
 
@@ -147,6 +154,7 @@ class ToolsConfig(Base):
 
     web: WebToolsConfig = Field(default_factory=WebToolsConfig)
     exec: ExecToolConfig = Field(default_factory=ExecToolConfig)
+    input_limits: InputLimitsConfig = Field(default_factory=InputLimitsConfig)
     restrict_to_workspace: bool = False  # If true, restrict all tool access to workspace directory
     mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)
 
diff --git a/tests/test_context_multimodal.py b/tests/test_context_multimodal.py
index 3206c0b5f..37efb843e 100644
--- a/tests/test_context_multimodal.py
+++ b/tests/test_context_multimodal.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 from nanobot.agent.context import ContextBuilder
+from nanobot.config.schema import InputLimitsConfig
 
 
 PNG_BYTES = (
@@ -14,13 +15,13 @@ PNG_BYTES = (
 )
 
 
-def _builder(tmp_path: Path) -> ContextBuilder:
-    return ContextBuilder(tmp_path)
+def _builder(tmp_path: Path, input_limits: InputLimitsConfig | None = None) -> ContextBuilder:
+    return ContextBuilder(tmp_path, input_limits=input_limits)
 
 
 def test_build_user_content_keeps_only_first_three_images(tmp_path: Path) -> None:
     builder = _builder(tmp_path)
-    max_images = ContextBuilder._MAX_INPUT_IMAGES
+    max_images = builder.input_limits.max_input_images
     paths = []
     for i in range(max_images + 1):
         path = tmp_path / f"img{i}.png"
@@ -61,15 +62,32 @@ def test_build_user_content_skips_missing_file(tmp_path: Path) -> None:
 def test_build_user_content_skips_large_images_with_note(tmp_path: Path) -> None:
     builder = _builder(tmp_path)
     big = tmp_path / "big.png"
-    big.write_bytes(PNG_BYTES + b"x" * ContextBuilder._MAX_IMAGE_BYTES)
+    big.write_bytes(PNG_BYTES + b"x" * builder.input_limits.max_input_image_bytes)
 
     content = builder._build_user_content("analyze", [str(big)])
 
-    limit_mb = ContextBuilder._MAX_IMAGE_BYTES // (1024 * 1024)
+    limit_mb = builder.input_limits.max_input_image_bytes // (1024 * 1024)
     assert isinstance(content, str)
     assert f"[Skipped image: file too large (big.png, limit {limit_mb} MB)]" in content
 
 
+def test_build_user_content_respects_custom_input_limits(tmp_path: Path) -> None:
+    builder = _builder(
+        tmp_path,
+        input_limits=InputLimitsConfig(max_input_images=1, max_input_image_bytes=1024),
+    )
+    small = tmp_path / "small.png"
+    large = tmp_path / "large.png"
+    small.write_bytes(PNG_BYTES)
+    large.write_bytes(PNG_BYTES + b"x" * 1024)
+
+    content = builder._build_user_content("describe", [str(small), str(large)])
+
+    assert isinstance(content, list)
+    assert sum(1 for block in content if block.get("type") == "image_url") == 1
+    assert content[-1]["text"].startswith("[Skipped 1 image: only the first 1 images are included]")
+
+
 def test_build_user_content_keeps_valid_images_and_skip_notes_together(tmp_path: Path) -> None:
     builder = _builder(tmp_path)
     good = tmp_path / "good.png"