diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 3fe11aa79..f7c4396b5 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -18,6 +18,8 @@ class ContextBuilder: BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"] _RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]" + _MAX_INPUT_IMAGES = 3 + _MAX_IMAGE_BYTES = 10 * 1024 * 1024 def __init__(self, workspace: Path): self.workspace = workspace @@ -149,21 +151,44 @@ Reply directly with text for conversations. Only use the 'message' tool to send return text images = [] - for path in media: + notes: list[str] = [] + extra_count = max(0, len(media) - self._MAX_INPUT_IMAGES) + if extra_count: + noun = "image" if extra_count == 1 else "images" + notes.append( + f"[Skipped {extra_count} {noun}: " + f"only the first {self._MAX_INPUT_IMAGES} images are included]" + ) + + for path in media[:self._MAX_INPUT_IMAGES]: p = Path(path) if not p.is_file(): + notes.append(f"[Skipped image: file not found ({p.name or path})]") + continue + try: + size = p.stat().st_size + except OSError: + notes.append(f"[Skipped image: unable to read ({p.name or path})]") + continue + if size > self._MAX_IMAGE_BYTES: + size_mb = self._MAX_IMAGE_BYTES // (1024 * 1024) + notes.append(f"[Skipped image: file too large ({p.name}, limit {size_mb} MB)]") continue raw = p.read_bytes() # Detect real MIME type from magic bytes; fallback to filename guess mime = detect_image_mime(raw) or mimetypes.guess_type(path)[0] if not mime or not mime.startswith("image/"): + notes.append(f"[Skipped image: unsupported or invalid image format ({p.name})]") continue b64 = base64.b64encode(raw).decode() images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}) + note_text = "\n".join(notes).strip() + text_block = text if not note_text else (f"{note_text}\n\n{text}" if text else note_text) + if not images: - return text - return images + [{"type": "text", "text": text}] + return text_block + return images + [{"type": "text", "text": text_block}] def add_tool_result( self, messages: list[dict[str, Any]], diff --git a/tests/test_context_multimodal.py b/tests/test_context_multimodal.py new file mode 100644 index 000000000..3206c0b5f --- /dev/null +++ b/tests/test_context_multimodal.py @@ -0,0 +1,88 @@ +from pathlib import Path + +from nanobot.agent.context import ContextBuilder + + +PNG_BYTES = ( + b"\x89PNG\r\n\x1a\n" + b"\x00\x00\x00\rIHDR" + b"\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00" + b"\x90wS\xde" + b"\x00\x00\x00\x0cIDATx\x9cc``\x00\x00\x00\x04\x00\x01" + b"\x0b\x0e-\xb4" + b"\x00\x00\x00\x00IEND\xaeB`\x82" +) + + +def _builder(tmp_path: Path) -> ContextBuilder: + return ContextBuilder(tmp_path) + + +def test_build_user_content_keeps_only_first_three_images(tmp_path: Path) -> None: + builder = _builder(tmp_path) + max_images = ContextBuilder._MAX_INPUT_IMAGES + paths = [] + for i in range(max_images + 1): + path = tmp_path / f"img{i}.png" + path.write_bytes(PNG_BYTES) + paths.append(str(path)) + + content = builder._build_user_content("describe these", paths) + + assert isinstance(content, list) + assert sum(1 for block in content if block.get("type") == "image_url") == max_images + assert content[-1]["text"].startswith( + f"[Skipped 1 image: only the first {max_images} images are included]" + ) + + +def test_build_user_content_skips_invalid_images_with_note(tmp_path: Path) -> None: + builder = _builder(tmp_path) + bad = tmp_path / "not-image.txt" + bad.write_text("hello", encoding="utf-8") + + content = builder._build_user_content("what is this?", [str(bad)]) + + assert isinstance(content, str) + assert "[Skipped image: unsupported or invalid image format (not-image.txt)]" in content + assert content.endswith("what is this?") + + +def test_build_user_content_skips_missing_file(tmp_path: Path) -> None: + builder = _builder(tmp_path) + + content = builder._build_user_content("hello", [str(tmp_path / "ghost.png")]) + + assert isinstance(content, str) + assert "[Skipped image: file not found (ghost.png)]" in content + assert content.endswith("hello") + + +def test_build_user_content_skips_large_images_with_note(tmp_path: Path) -> None: + builder = _builder(tmp_path) + big = tmp_path / "big.png" + big.write_bytes(PNG_BYTES + b"x" * ContextBuilder._MAX_IMAGE_BYTES) + + content = builder._build_user_content("analyze", [str(big)]) + + limit_mb = ContextBuilder._MAX_IMAGE_BYTES // (1024 * 1024) + assert isinstance(content, str) + assert f"[Skipped image: file too large (big.png, limit {limit_mb} MB)]" in content + + +def test_build_user_content_keeps_valid_images_and_skip_notes_together(tmp_path: Path) -> None: + builder = _builder(tmp_path) + good = tmp_path / "good.png" + bad = tmp_path / "bad.txt" + good.write_bytes(PNG_BYTES) + bad.write_text("oops", encoding="utf-8") + + content = builder._build_user_content("check both", [str(good), str(bad)]) + + assert isinstance(content, list) + assert content[0]["type"] == "image_url" + assert ( + "[Skipped image: unsupported or invalid image format (bad.txt)]" + in content[-1]["text"] + ) + assert content[-1]["text"].endswith("check both")