mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-04 02:12:35 +00:00
Make multimodal input limits configurable
This commit is contained in:
parent
16f0191c32
commit
b2e220e0fd
@ -10,6 +10,7 @@ from nanobot.utils.helpers import current_time_str
|
||||
|
||||
from nanobot.agent.memory import MemoryStore
|
||||
from nanobot.agent.skills import SkillsLoader
|
||||
from nanobot.config.schema import InputLimitsConfig
|
||||
from nanobot.utils.helpers import build_assistant_message, detect_image_mime
|
||||
|
||||
|
||||
@ -18,13 +19,12 @@ class ContextBuilder:
|
||||
|
||||
BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"]
|
||||
_RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]"
|
||||
_MAX_INPUT_IMAGES = 3
|
||||
_MAX_IMAGE_BYTES = 10 * 1024 * 1024
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
def __init__(self, workspace: Path, input_limits: InputLimitsConfig | None = None):
|
||||
self.workspace = workspace
|
||||
self.memory = MemoryStore(workspace)
|
||||
self.skills = SkillsLoader(workspace)
|
||||
self.input_limits = input_limits or InputLimitsConfig()
|
||||
|
||||
def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
|
||||
"""Build the system prompt from identity, bootstrap files, memory, and skills."""
|
||||
@ -152,15 +152,18 @@ Reply directly with text for conversations. Only use the 'message' tool to send
|
||||
|
||||
images = []
|
||||
notes: list[str] = []
|
||||
extra_count = max(0, len(media) - self._MAX_INPUT_IMAGES)
|
||||
max_images = self.input_limits.max_input_images
|
||||
max_image_bytes = self.input_limits.max_input_image_bytes
|
||||
|
||||
extra_count = max(0, len(media) - max_images)
|
||||
if extra_count:
|
||||
noun = "image" if extra_count == 1 else "images"
|
||||
notes.append(
|
||||
f"[Skipped {extra_count} {noun}: "
|
||||
f"only the first {self._MAX_INPUT_IMAGES} images are included]"
|
||||
f"only the first {max_images} images are included]"
|
||||
)
|
||||
|
||||
for path in media[:self._MAX_INPUT_IMAGES]:
|
||||
for path in media[:max_images]:
|
||||
p = Path(path)
|
||||
if not p.is_file():
|
||||
notes.append(f"[Skipped image: file not found ({p.name or path})]")
|
||||
@ -170,8 +173,8 @@ Reply directly with text for conversations. Only use the 'message' tool to send
|
||||
except OSError:
|
||||
notes.append(f"[Skipped image: unable to read ({p.name or path})]")
|
||||
continue
|
||||
if size > self._MAX_IMAGE_BYTES:
|
||||
size_mb = self._MAX_IMAGE_BYTES // (1024 * 1024)
|
||||
if size > max_image_bytes:
|
||||
size_mb = max_image_bytes // (1024 * 1024)
|
||||
notes.append(f"[Skipped image: file too large ({p.name}, limit {size_mb} MB)]")
|
||||
continue
|
||||
raw = p.read_bytes()
|
||||
|
||||
@ -30,7 +30,7 @@ from nanobot.providers.base import LLMProvider
|
||||
from nanobot.session.manager import Session, SessionManager
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig
|
||||
from nanobot.config.schema import ChannelsConfig, ExecToolConfig, InputLimitsConfig, WebSearchConfig
|
||||
from nanobot.cron.service import CronService
|
||||
|
||||
|
||||
@ -59,13 +59,14 @@ class AgentLoop:
|
||||
web_search_config: WebSearchConfig | None = None,
|
||||
web_proxy: str | None = None,
|
||||
exec_config: ExecToolConfig | None = None,
|
||||
input_limits: InputLimitsConfig | None = None,
|
||||
cron_service: CronService | None = None,
|
||||
restrict_to_workspace: bool = False,
|
||||
session_manager: SessionManager | None = None,
|
||||
mcp_servers: dict | None = None,
|
||||
channels_config: ChannelsConfig | None = None,
|
||||
):
|
||||
from nanobot.config.schema import ExecToolConfig, WebSearchConfig
|
||||
from nanobot.config.schema import ExecToolConfig, InputLimitsConfig, WebSearchConfig
|
||||
|
||||
self.bus = bus
|
||||
self.channels_config = channels_config
|
||||
@ -77,10 +78,11 @@ class AgentLoop:
|
||||
self.web_search_config = web_search_config or WebSearchConfig()
|
||||
self.web_proxy = web_proxy
|
||||
self.exec_config = exec_config or ExecToolConfig()
|
||||
self.input_limits = input_limits or InputLimitsConfig()
|
||||
self.cron_service = cron_service
|
||||
self.restrict_to_workspace = restrict_to_workspace
|
||||
|
||||
self.context = ContextBuilder(workspace)
|
||||
self.context = ContextBuilder(workspace, input_limits=self.input_limits)
|
||||
self.sessions = session_manager or SessionManager(workspace)
|
||||
self.tools = ToolRegistry()
|
||||
self.subagents = SubagentManager(
|
||||
|
||||
@ -526,6 +526,7 @@ def gateway(
|
||||
web_search_config=config.tools.web.search,
|
||||
web_proxy=config.tools.web.proxy or None,
|
||||
exec_config=config.tools.exec,
|
||||
input_limits=config.tools.input_limits,
|
||||
cron_service=cron,
|
||||
restrict_to_workspace=config.tools.restrict_to_workspace,
|
||||
session_manager=session_manager,
|
||||
@ -718,6 +719,7 @@ def agent(
|
||||
web_search_config=config.tools.web.search,
|
||||
web_proxy=config.tools.web.proxy or None,
|
||||
exec_config=config.tools.exec,
|
||||
input_limits=config.tools.input_limits,
|
||||
cron_service=cron,
|
||||
restrict_to_workspace=config.tools.restrict_to_workspace,
|
||||
mcp_servers=config.tools.mcp_servers,
|
||||
|
||||
@ -130,6 +130,13 @@ class ExecToolConfig(Base):
|
||||
path_append: str = ""
|
||||
|
||||
|
||||
class InputLimitsConfig(Base):
|
||||
"""Limits for user-provided multimodal inputs."""
|
||||
|
||||
max_input_images: int = 3
|
||||
max_input_image_bytes: int = 10 * 1024 * 1024
|
||||
|
||||
|
||||
class MCPServerConfig(Base):
|
||||
"""MCP server connection configuration (stdio or HTTP)."""
|
||||
|
||||
@ -147,6 +154,7 @@ class ToolsConfig(Base):
|
||||
|
||||
web: WebToolsConfig = Field(default_factory=WebToolsConfig)
|
||||
exec: ExecToolConfig = Field(default_factory=ExecToolConfig)
|
||||
input_limits: InputLimitsConfig = Field(default_factory=InputLimitsConfig)
|
||||
restrict_to_workspace: bool = False # If true, restrict all tool access to workspace directory
|
||||
mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from pathlib import Path
|
||||
|
||||
from nanobot.agent.context import ContextBuilder
|
||||
from nanobot.config.schema import InputLimitsConfig
|
||||
|
||||
|
||||
PNG_BYTES = (
|
||||
@ -14,13 +15,13 @@ PNG_BYTES = (
|
||||
)
|
||||
|
||||
|
||||
def _builder(tmp_path: Path) -> ContextBuilder:
|
||||
return ContextBuilder(tmp_path)
|
||||
def _builder(tmp_path: Path, input_limits: InputLimitsConfig | None = None) -> ContextBuilder:
|
||||
return ContextBuilder(tmp_path, input_limits=input_limits)
|
||||
|
||||
|
||||
def test_build_user_content_keeps_only_first_three_images(tmp_path: Path) -> None:
|
||||
builder = _builder(tmp_path)
|
||||
max_images = ContextBuilder._MAX_INPUT_IMAGES
|
||||
max_images = builder.input_limits.max_input_images
|
||||
paths = []
|
||||
for i in range(max_images + 1):
|
||||
path = tmp_path / f"img{i}.png"
|
||||
@ -61,15 +62,32 @@ def test_build_user_content_skips_missing_file(tmp_path: Path) -> None:
|
||||
def test_build_user_content_skips_large_images_with_note(tmp_path: Path) -> None:
|
||||
builder = _builder(tmp_path)
|
||||
big = tmp_path / "big.png"
|
||||
big.write_bytes(PNG_BYTES + b"x" * ContextBuilder._MAX_IMAGE_BYTES)
|
||||
big.write_bytes(PNG_BYTES + b"x" * builder.input_limits.max_input_image_bytes)
|
||||
|
||||
content = builder._build_user_content("analyze", [str(big)])
|
||||
|
||||
limit_mb = ContextBuilder._MAX_IMAGE_BYTES // (1024 * 1024)
|
||||
limit_mb = builder.input_limits.max_input_image_bytes // (1024 * 1024)
|
||||
assert isinstance(content, str)
|
||||
assert f"[Skipped image: file too large (big.png, limit {limit_mb} MB)]" in content
|
||||
|
||||
|
||||
def test_build_user_content_respects_custom_input_limits(tmp_path: Path) -> None:
|
||||
builder = _builder(
|
||||
tmp_path,
|
||||
input_limits=InputLimitsConfig(max_input_images=1, max_input_image_bytes=1024),
|
||||
)
|
||||
small = tmp_path / "small.png"
|
||||
large = tmp_path / "large.png"
|
||||
small.write_bytes(PNG_BYTES)
|
||||
large.write_bytes(PNG_BYTES + b"x" * 1024)
|
||||
|
||||
content = builder._build_user_content("describe", [str(small), str(large)])
|
||||
|
||||
assert isinstance(content, list)
|
||||
assert sum(1 for block in content if block.get("type") == "image_url") == 1
|
||||
assert content[-1]["text"].startswith("[Skipped 1 image: only the first 1 images are included]")
|
||||
|
||||
|
||||
def test_build_user_content_keeps_valid_images_and_skip_notes_together(tmp_path: Path) -> None:
|
||||
builder = _builder(tmp_path)
|
||||
good = tmp_path / "good.png"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user