From b2e220e0fd8664ecfd68d49bf98298d4c9547d9d Mon Sep 17 00:00:00 2001 From: Jinxiang Gan Date: Tue, 17 Mar 2026 22:30:58 +0800 Subject: [PATCH] Make multimodal input limits configurable --- nanobot/agent/context.py | 19 +++++++++++-------- nanobot/agent/loop.py | 8 +++++--- nanobot/cli/commands.py | 2 ++ nanobot/config/schema.py | 8 ++++++++ tests/test_context_multimodal.py | 28 +++++++++++++++++++++++----- 5 files changed, 49 insertions(+), 16 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index f7c4396b5..608c11af4 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -10,6 +10,7 @@ from nanobot.utils.helpers import current_time_str from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader +from nanobot.config.schema import InputLimitsConfig from nanobot.utils.helpers import build_assistant_message, detect_image_mime @@ -18,13 +19,12 @@ class ContextBuilder: BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"] _RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]" - _MAX_INPUT_IMAGES = 3 - _MAX_IMAGE_BYTES = 10 * 1024 * 1024 - def __init__(self, workspace: Path): + def __init__(self, workspace: Path, input_limits: InputLimitsConfig | None = None): self.workspace = workspace self.memory = MemoryStore(workspace) self.skills = SkillsLoader(workspace) + self.input_limits = input_limits or InputLimitsConfig() def build_system_prompt(self, skill_names: list[str] | None = None) -> str: """Build the system prompt from identity, bootstrap files, memory, and skills.""" @@ -152,15 +152,18 @@ Reply directly with text for conversations. Only use the 'message' tool to send images = [] notes: list[str] = [] - extra_count = max(0, len(media) - self._MAX_INPUT_IMAGES) + max_images = self.input_limits.max_input_images + max_image_bytes = self.input_limits.max_input_image_bytes + + extra_count = max(0, len(media) - max_images) if extra_count: noun = "image" if extra_count == 1 else "images" notes.append( f"[Skipped {extra_count} {noun}: " - f"only the first {self._MAX_INPUT_IMAGES} images are included]" + f"only the first {max_images} images are included]" ) - for path in media[:self._MAX_INPUT_IMAGES]: + for path in media[:max_images]: p = Path(path) if not p.is_file(): notes.append(f"[Skipped image: file not found ({p.name or path})]") @@ -170,8 +173,8 @@ Reply directly with text for conversations. Only use the 'message' tool to send except OSError: notes.append(f"[Skipped image: unable to read ({p.name or path})]") continue - if size > self._MAX_IMAGE_BYTES: - size_mb = self._MAX_IMAGE_BYTES // (1024 * 1024) + if size > max_image_bytes: + size_mb = max_image_bytes // (1024 * 1024) notes.append(f"[Skipped image: file too large ({p.name}, limit {size_mb} MB)]") continue raw = p.read_bytes() diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 8d3d16547..3da926631 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -30,7 +30,7 @@ from nanobot.providers.base import LLMProvider from nanobot.session.manager import Session, SessionManager if TYPE_CHECKING: - from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig + from nanobot.config.schema import ChannelsConfig, ExecToolConfig, InputLimitsConfig, WebSearchConfig from nanobot.cron.service import CronService @@ -59,13 +59,14 @@ class AgentLoop: web_search_config: WebSearchConfig | None = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, + input_limits: InputLimitsConfig | None = None, cron_service: CronService | None = None, restrict_to_workspace: bool = False, session_manager: SessionManager | None = None, mcp_servers: dict | None = None, channels_config: ChannelsConfig | None = None, ): - from nanobot.config.schema import ExecToolConfig, WebSearchConfig + from nanobot.config.schema import ExecToolConfig, InputLimitsConfig, WebSearchConfig self.bus = bus self.channels_config = channels_config @@ -77,10 +78,11 @@ class AgentLoop: self.web_search_config = web_search_config or WebSearchConfig() self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() + self.input_limits = input_limits or InputLimitsConfig() self.cron_service = cron_service self.restrict_to_workspace = restrict_to_workspace - self.context = ContextBuilder(workspace) + self.context = ContextBuilder(workspace, input_limits=self.input_limits) self.sessions = session_manager or SessionManager(workspace) self.tools = ToolRegistry() self.subagents = SubagentManager( diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 01097b1da..8796f9cac 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -526,6 +526,7 @@ def gateway( web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, + input_limits=config.tools.input_limits, cron_service=cron, restrict_to_workspace=config.tools.restrict_to_workspace, session_manager=session_manager, @@ -718,6 +719,7 @@ def agent( web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, + input_limits=config.tools.input_limits, cron_service=cron, restrict_to_workspace=config.tools.restrict_to_workspace, mcp_servers=config.tools.mcp_servers, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 1fe84c66c..b45958c55 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -130,6 +130,13 @@ class ExecToolConfig(Base): path_append: str = "" +class InputLimitsConfig(Base): + """Limits for user-provided multimodal inputs.""" + + max_input_images: int = 3 + max_input_image_bytes: int = 10 * 1024 * 1024 + + class MCPServerConfig(Base): """MCP server connection configuration (stdio or HTTP).""" @@ -147,6 +154,7 @@ class ToolsConfig(Base): web: WebToolsConfig = Field(default_factory=WebToolsConfig) exec: ExecToolConfig = Field(default_factory=ExecToolConfig) + input_limits: InputLimitsConfig = Field(default_factory=InputLimitsConfig) restrict_to_workspace: bool = False # If true, restrict all tool access to workspace directory mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict) diff --git a/tests/test_context_multimodal.py b/tests/test_context_multimodal.py index 3206c0b5f..37efb843e 100644 --- a/tests/test_context_multimodal.py +++ b/tests/test_context_multimodal.py @@ -1,6 +1,7 @@ from pathlib import Path from nanobot.agent.context import ContextBuilder +from nanobot.config.schema import InputLimitsConfig PNG_BYTES = ( @@ -14,13 +15,13 @@ PNG_BYTES = ( ) -def _builder(tmp_path: Path) -> ContextBuilder: - return ContextBuilder(tmp_path) +def _builder(tmp_path: Path, input_limits: InputLimitsConfig | None = None) -> ContextBuilder: + return ContextBuilder(tmp_path, input_limits=input_limits) def test_build_user_content_keeps_only_first_three_images(tmp_path: Path) -> None: builder = _builder(tmp_path) - max_images = ContextBuilder._MAX_INPUT_IMAGES + max_images = builder.input_limits.max_input_images paths = [] for i in range(max_images + 1): path = tmp_path / f"img{i}.png" @@ -61,15 +62,32 @@ def test_build_user_content_skips_missing_file(tmp_path: Path) -> None: def test_build_user_content_skips_large_images_with_note(tmp_path: Path) -> None: builder = _builder(tmp_path) big = tmp_path / "big.png" - big.write_bytes(PNG_BYTES + b"x" * ContextBuilder._MAX_IMAGE_BYTES) + big.write_bytes(PNG_BYTES + b"x" * builder.input_limits.max_input_image_bytes) content = builder._build_user_content("analyze", [str(big)]) - limit_mb = ContextBuilder._MAX_IMAGE_BYTES // (1024 * 1024) + limit_mb = builder.input_limits.max_input_image_bytes // (1024 * 1024) assert isinstance(content, str) assert f"[Skipped image: file too large (big.png, limit {limit_mb} MB)]" in content +def test_build_user_content_respects_custom_input_limits(tmp_path: Path) -> None: + builder = _builder( + tmp_path, + input_limits=InputLimitsConfig(max_input_images=1, max_input_image_bytes=1024), + ) + small = tmp_path / "small.png" + large = tmp_path / "large.png" + small.write_bytes(PNG_BYTES) + large.write_bytes(PNG_BYTES + b"x" * 1024) + + content = builder._build_user_content("describe", [str(small), str(large)]) + + assert isinstance(content, list) + assert sum(1 for block in content if block.get("type") == "image_url") == 1 + assert content[-1]["text"].startswith("[Skipped 1 image: only the first 1 images are included]") + + def test_build_user_content_keeps_valid_images_and_skip_notes_together(tmp_path: Path) -> None: builder = _builder(tmp_path) good = tmp_path / "good.png"