mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-24 11:55:52 +00:00
Make multimodal input limits configurable
This commit is contained in:
parent
16f0191c32
commit
b2e220e0fd
@ -10,6 +10,7 @@ from nanobot.utils.helpers import current_time_str
|
|||||||
|
|
||||||
from nanobot.agent.memory import MemoryStore
|
from nanobot.agent.memory import MemoryStore
|
||||||
from nanobot.agent.skills import SkillsLoader
|
from nanobot.agent.skills import SkillsLoader
|
||||||
|
from nanobot.config.schema import InputLimitsConfig
|
||||||
from nanobot.utils.helpers import build_assistant_message, detect_image_mime
|
from nanobot.utils.helpers import build_assistant_message, detect_image_mime
|
||||||
|
|
||||||
|
|
||||||
@ -18,13 +19,12 @@ class ContextBuilder:
|
|||||||
|
|
||||||
BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"]
|
BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"]
|
||||||
_RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]"
|
_RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]"
|
||||||
_MAX_INPUT_IMAGES = 3
|
|
||||||
_MAX_IMAGE_BYTES = 10 * 1024 * 1024
|
|
||||||
|
|
||||||
def __init__(self, workspace: Path):
|
def __init__(self, workspace: Path, input_limits: InputLimitsConfig | None = None):
|
||||||
self.workspace = workspace
|
self.workspace = workspace
|
||||||
self.memory = MemoryStore(workspace)
|
self.memory = MemoryStore(workspace)
|
||||||
self.skills = SkillsLoader(workspace)
|
self.skills = SkillsLoader(workspace)
|
||||||
|
self.input_limits = input_limits or InputLimitsConfig()
|
||||||
|
|
||||||
def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
|
def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
|
||||||
"""Build the system prompt from identity, bootstrap files, memory, and skills."""
|
"""Build the system prompt from identity, bootstrap files, memory, and skills."""
|
||||||
@ -152,15 +152,18 @@ Reply directly with text for conversations. Only use the 'message' tool to send
|
|||||||
|
|
||||||
images = []
|
images = []
|
||||||
notes: list[str] = []
|
notes: list[str] = []
|
||||||
extra_count = max(0, len(media) - self._MAX_INPUT_IMAGES)
|
max_images = self.input_limits.max_input_images
|
||||||
|
max_image_bytes = self.input_limits.max_input_image_bytes
|
||||||
|
|
||||||
|
extra_count = max(0, len(media) - max_images)
|
||||||
if extra_count:
|
if extra_count:
|
||||||
noun = "image" if extra_count == 1 else "images"
|
noun = "image" if extra_count == 1 else "images"
|
||||||
notes.append(
|
notes.append(
|
||||||
f"[Skipped {extra_count} {noun}: "
|
f"[Skipped {extra_count} {noun}: "
|
||||||
f"only the first {self._MAX_INPUT_IMAGES} images are included]"
|
f"only the first {max_images} images are included]"
|
||||||
)
|
)
|
||||||
|
|
||||||
for path in media[:self._MAX_INPUT_IMAGES]:
|
for path in media[:max_images]:
|
||||||
p = Path(path)
|
p = Path(path)
|
||||||
if not p.is_file():
|
if not p.is_file():
|
||||||
notes.append(f"[Skipped image: file not found ({p.name or path})]")
|
notes.append(f"[Skipped image: file not found ({p.name or path})]")
|
||||||
@ -170,8 +173,8 @@ Reply directly with text for conversations. Only use the 'message' tool to send
|
|||||||
except OSError:
|
except OSError:
|
||||||
notes.append(f"[Skipped image: unable to read ({p.name or path})]")
|
notes.append(f"[Skipped image: unable to read ({p.name or path})]")
|
||||||
continue
|
continue
|
||||||
if size > self._MAX_IMAGE_BYTES:
|
if size > max_image_bytes:
|
||||||
size_mb = self._MAX_IMAGE_BYTES // (1024 * 1024)
|
size_mb = max_image_bytes // (1024 * 1024)
|
||||||
notes.append(f"[Skipped image: file too large ({p.name}, limit {size_mb} MB)]")
|
notes.append(f"[Skipped image: file too large ({p.name}, limit {size_mb} MB)]")
|
||||||
continue
|
continue
|
||||||
raw = p.read_bytes()
|
raw = p.read_bytes()
|
||||||
|
|||||||
@ -30,7 +30,7 @@ from nanobot.providers.base import LLMProvider
|
|||||||
from nanobot.session.manager import Session, SessionManager
|
from nanobot.session.manager import Session, SessionManager
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig
|
from nanobot.config.schema import ChannelsConfig, ExecToolConfig, InputLimitsConfig, WebSearchConfig
|
||||||
from nanobot.cron.service import CronService
|
from nanobot.cron.service import CronService
|
||||||
|
|
||||||
|
|
||||||
@ -59,13 +59,14 @@ class AgentLoop:
|
|||||||
web_search_config: WebSearchConfig | None = None,
|
web_search_config: WebSearchConfig | None = None,
|
||||||
web_proxy: str | None = None,
|
web_proxy: str | None = None,
|
||||||
exec_config: ExecToolConfig | None = None,
|
exec_config: ExecToolConfig | None = None,
|
||||||
|
input_limits: InputLimitsConfig | None = None,
|
||||||
cron_service: CronService | None = None,
|
cron_service: CronService | None = None,
|
||||||
restrict_to_workspace: bool = False,
|
restrict_to_workspace: bool = False,
|
||||||
session_manager: SessionManager | None = None,
|
session_manager: SessionManager | None = None,
|
||||||
mcp_servers: dict | None = None,
|
mcp_servers: dict | None = None,
|
||||||
channels_config: ChannelsConfig | None = None,
|
channels_config: ChannelsConfig | None = None,
|
||||||
):
|
):
|
||||||
from nanobot.config.schema import ExecToolConfig, WebSearchConfig
|
from nanobot.config.schema import ExecToolConfig, InputLimitsConfig, WebSearchConfig
|
||||||
|
|
||||||
self.bus = bus
|
self.bus = bus
|
||||||
self.channels_config = channels_config
|
self.channels_config = channels_config
|
||||||
@ -77,10 +78,11 @@ class AgentLoop:
|
|||||||
self.web_search_config = web_search_config or WebSearchConfig()
|
self.web_search_config = web_search_config or WebSearchConfig()
|
||||||
self.web_proxy = web_proxy
|
self.web_proxy = web_proxy
|
||||||
self.exec_config = exec_config or ExecToolConfig()
|
self.exec_config = exec_config or ExecToolConfig()
|
||||||
|
self.input_limits = input_limits or InputLimitsConfig()
|
||||||
self.cron_service = cron_service
|
self.cron_service = cron_service
|
||||||
self.restrict_to_workspace = restrict_to_workspace
|
self.restrict_to_workspace = restrict_to_workspace
|
||||||
|
|
||||||
self.context = ContextBuilder(workspace)
|
self.context = ContextBuilder(workspace, input_limits=self.input_limits)
|
||||||
self.sessions = session_manager or SessionManager(workspace)
|
self.sessions = session_manager or SessionManager(workspace)
|
||||||
self.tools = ToolRegistry()
|
self.tools = ToolRegistry()
|
||||||
self.subagents = SubagentManager(
|
self.subagents = SubagentManager(
|
||||||
|
|||||||
@ -526,6 +526,7 @@ def gateway(
|
|||||||
web_search_config=config.tools.web.search,
|
web_search_config=config.tools.web.search,
|
||||||
web_proxy=config.tools.web.proxy or None,
|
web_proxy=config.tools.web.proxy or None,
|
||||||
exec_config=config.tools.exec,
|
exec_config=config.tools.exec,
|
||||||
|
input_limits=config.tools.input_limits,
|
||||||
cron_service=cron,
|
cron_service=cron,
|
||||||
restrict_to_workspace=config.tools.restrict_to_workspace,
|
restrict_to_workspace=config.tools.restrict_to_workspace,
|
||||||
session_manager=session_manager,
|
session_manager=session_manager,
|
||||||
@ -718,6 +719,7 @@ def agent(
|
|||||||
web_search_config=config.tools.web.search,
|
web_search_config=config.tools.web.search,
|
||||||
web_proxy=config.tools.web.proxy or None,
|
web_proxy=config.tools.web.proxy or None,
|
||||||
exec_config=config.tools.exec,
|
exec_config=config.tools.exec,
|
||||||
|
input_limits=config.tools.input_limits,
|
||||||
cron_service=cron,
|
cron_service=cron,
|
||||||
restrict_to_workspace=config.tools.restrict_to_workspace,
|
restrict_to_workspace=config.tools.restrict_to_workspace,
|
||||||
mcp_servers=config.tools.mcp_servers,
|
mcp_servers=config.tools.mcp_servers,
|
||||||
|
|||||||
@ -130,6 +130,13 @@ class ExecToolConfig(Base):
|
|||||||
path_append: str = ""
|
path_append: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class InputLimitsConfig(Base):
|
||||||
|
"""Limits for user-provided multimodal inputs."""
|
||||||
|
|
||||||
|
max_input_images: int = 3
|
||||||
|
max_input_image_bytes: int = 10 * 1024 * 1024
|
||||||
|
|
||||||
|
|
||||||
class MCPServerConfig(Base):
|
class MCPServerConfig(Base):
|
||||||
"""MCP server connection configuration (stdio or HTTP)."""
|
"""MCP server connection configuration (stdio or HTTP)."""
|
||||||
|
|
||||||
@ -147,6 +154,7 @@ class ToolsConfig(Base):
|
|||||||
|
|
||||||
web: WebToolsConfig = Field(default_factory=WebToolsConfig)
|
web: WebToolsConfig = Field(default_factory=WebToolsConfig)
|
||||||
exec: ExecToolConfig = Field(default_factory=ExecToolConfig)
|
exec: ExecToolConfig = Field(default_factory=ExecToolConfig)
|
||||||
|
input_limits: InputLimitsConfig = Field(default_factory=InputLimitsConfig)
|
||||||
restrict_to_workspace: bool = False # If true, restrict all tool access to workspace directory
|
restrict_to_workspace: bool = False # If true, restrict all tool access to workspace directory
|
||||||
mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)
|
mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from nanobot.agent.context import ContextBuilder
|
from nanobot.agent.context import ContextBuilder
|
||||||
|
from nanobot.config.schema import InputLimitsConfig
|
||||||
|
|
||||||
|
|
||||||
PNG_BYTES = (
|
PNG_BYTES = (
|
||||||
@ -14,13 +15,13 @@ PNG_BYTES = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _builder(tmp_path: Path) -> ContextBuilder:
|
def _builder(tmp_path: Path, input_limits: InputLimitsConfig | None = None) -> ContextBuilder:
|
||||||
return ContextBuilder(tmp_path)
|
return ContextBuilder(tmp_path, input_limits=input_limits)
|
||||||
|
|
||||||
|
|
||||||
def test_build_user_content_keeps_only_first_three_images(tmp_path: Path) -> None:
|
def test_build_user_content_keeps_only_first_three_images(tmp_path: Path) -> None:
|
||||||
builder = _builder(tmp_path)
|
builder = _builder(tmp_path)
|
||||||
max_images = ContextBuilder._MAX_INPUT_IMAGES
|
max_images = builder.input_limits.max_input_images
|
||||||
paths = []
|
paths = []
|
||||||
for i in range(max_images + 1):
|
for i in range(max_images + 1):
|
||||||
path = tmp_path / f"img{i}.png"
|
path = tmp_path / f"img{i}.png"
|
||||||
@ -61,15 +62,32 @@ def test_build_user_content_skips_missing_file(tmp_path: Path) -> None:
|
|||||||
def test_build_user_content_skips_large_images_with_note(tmp_path: Path) -> None:
|
def test_build_user_content_skips_large_images_with_note(tmp_path: Path) -> None:
|
||||||
builder = _builder(tmp_path)
|
builder = _builder(tmp_path)
|
||||||
big = tmp_path / "big.png"
|
big = tmp_path / "big.png"
|
||||||
big.write_bytes(PNG_BYTES + b"x" * ContextBuilder._MAX_IMAGE_BYTES)
|
big.write_bytes(PNG_BYTES + b"x" * builder.input_limits.max_input_image_bytes)
|
||||||
|
|
||||||
content = builder._build_user_content("analyze", [str(big)])
|
content = builder._build_user_content("analyze", [str(big)])
|
||||||
|
|
||||||
limit_mb = ContextBuilder._MAX_IMAGE_BYTES // (1024 * 1024)
|
limit_mb = builder.input_limits.max_input_image_bytes // (1024 * 1024)
|
||||||
assert isinstance(content, str)
|
assert isinstance(content, str)
|
||||||
assert f"[Skipped image: file too large (big.png, limit {limit_mb} MB)]" in content
|
assert f"[Skipped image: file too large (big.png, limit {limit_mb} MB)]" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_user_content_respects_custom_input_limits(tmp_path: Path) -> None:
|
||||||
|
builder = _builder(
|
||||||
|
tmp_path,
|
||||||
|
input_limits=InputLimitsConfig(max_input_images=1, max_input_image_bytes=1024),
|
||||||
|
)
|
||||||
|
small = tmp_path / "small.png"
|
||||||
|
large = tmp_path / "large.png"
|
||||||
|
small.write_bytes(PNG_BYTES)
|
||||||
|
large.write_bytes(PNG_BYTES + b"x" * 1024)
|
||||||
|
|
||||||
|
content = builder._build_user_content("describe", [str(small), str(large)])
|
||||||
|
|
||||||
|
assert isinstance(content, list)
|
||||||
|
assert sum(1 for block in content if block.get("type") == "image_url") == 1
|
||||||
|
assert content[-1]["text"].startswith("[Skipped 1 image: only the first 1 images are included]")
|
||||||
|
|
||||||
|
|
||||||
def test_build_user_content_keeps_valid_images_and_skip_notes_together(tmp_path: Path) -> None:
|
def test_build_user_content_keeps_valid_images_and_skip_notes_together(tmp_path: Path) -> None:
|
||||||
builder = _builder(tmp_path)
|
builder = _builder(tmp_path)
|
||||||
good = tmp_path / "good.png"
|
good = tmp_path / "good.png"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user