diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index b866c942a..1573ce769 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -4,7 +4,6 @@ from __future__ import annotations import asyncio import dataclasses -import mimetypes import os import time from contextlib import AsyncExitStack, nullcontext, suppress @@ -24,22 +23,22 @@ from nanobot.agent.memory import Consolidator, Dream from nanobot.agent.progress_hook import AgentProgressHook from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec from nanobot.agent.subagent import SubagentManager +from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states from nanobot.agent.tools.message import MessageTool -from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.self import MyTool -from nanobot.security.workspace_access import ( - WorkspaceScopeResolver, - bind_workspace_scope, - reset_workspace_scope, -) from nanobot.bus.events import InboundMessage, OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.command import CommandContext, CommandRouter, register_builtin_commands from nanobot.config.schema import AgentDefaults, ModelPresetConfig from nanobot.providers.base import LLMProvider from nanobot.providers.factory import ProviderSnapshot +from nanobot.security.workspace_access import ( + WorkspaceScopeResolver, + bind_workspace_scope, + reset_workspace_scope, +) from nanobot.session.goal_state import ( goal_state_runtime_lines, runner_wall_llm_timeout_s, @@ -51,8 +50,8 @@ from nanobot.session.webui_turns import ( build_bus_progress_callback, mark_webui_session, ) -from nanobot.utils.document import extract_documents -from nanobot.utils.helpers import detect_image_mime, image_placeholder_text +from nanobot.utils.document import extract_documents, reference_non_image_attachments +from nanobot.utils.helpers import image_placeholder_text from nanobot.utils.helpers import truncate_text as truncate_text_fn from nanobot.utils.image_generation_intent import image_generation_prompt from nanobot.utils.llm_runtime import LLMRuntime @@ -1296,45 +1295,12 @@ class AgentLoop: def _prepare_message_media(self, content: str, media: list[str]) -> tuple[str, list[str]]: if self._should_extract_document_text(): return extract_documents(content, media) - return self._reference_non_image_attachments(content, media) + return reference_non_image_attachments(content, media) def _should_extract_document_text(self) -> bool: - cfg = self.channels_config - if cfg is None: + if self.channels_config is None: return True - if isinstance(cfg, dict): - value = cfg.get("extract_document_text", cfg.get("extractDocumentText", True)) - else: - value = getattr(cfg, "extract_document_text", True) - return value is not False - - @staticmethod - def _reference_non_image_attachments(content: str, media: list[str]) -> tuple[str, list[str]]: - image_paths: list[str] = [] - attachment_refs: list[str] = [] - for path in media: - if AgentLoop._looks_like_image(path): - image_paths.append(path) - else: - attachment_refs.append(f"[Attachment: {path}]") - if attachment_refs: - suffix = "\n".join(attachment_refs) - content = f"{content}\n\n{suffix}" if content else suffix - return content, image_paths - - @staticmethod - def _looks_like_image(path: str) -> bool: - p = Path(path) - mime: str | None = None - if p.is_file(): - try: - with p.open("rb") as f: - mime = detect_image_mime(f.read(16)) - except OSError: - mime = None - if not mime: - mime = mimetypes.guess_type(path)[0] - return bool(mime and mime.startswith("image/")) + return self.channels_config.extract_document_text async def _state_compact(self, ctx: TurnContext) -> str: ctx.session, pending = self.auto_compact.prepare_session(ctx.session, ctx.session_key) diff --git a/nanobot/utils/document.py b/nanobot/utils/document.py index 53039e97f..07e102dbb 100644 --- a/nanobot/utils/document.py +++ b/nanobot/utils/document.py @@ -7,7 +7,6 @@ from loguru import logger from nanobot.utils.helpers import detect_image_mime - # Supported file extensions for text extraction SUPPORTED_EXTENSIONS: set[str] = { # Document formats @@ -232,6 +231,46 @@ def _is_text_extension(ext: str) -> bool: _MAX_EXTRACT_FILE_SIZE = 50 * 1024 * 1024 # 50 MB +def is_image_file(path: str) -> bool: + """Check whether *path* looks like an image file. + + Uses magic-byte detection (reads first 16 bytes) with a ``mimetypes`` + extension-based fallback. + """ + p = Path(path) + mime: str | None = None + if p.is_file(): + try: + with p.open("rb") as f: + mime = detect_image_mime(f.read(16)) + except OSError: + mime = None + if not mime: + mime = mimetypes.guess_type(path)[0] + return bool(mime and mime.startswith("image/")) + + +def reference_non_image_attachments( + content: str, media: list[str], +) -> tuple[str, list[str]]: + """Separate images from non-image attachments without reading file content. + + Image paths are preserved for downstream vision-block construction. + Non-image paths are appended as ``[Attachment: path]`` references. + """ + image_paths: list[str] = [] + attachment_refs: list[str] = [] + for path in media: + if is_image_file(path): + image_paths.append(path) + else: + attachment_refs.append(f"[Attachment: {path}]") + if attachment_refs: + suffix = "\n".join(attachment_refs) + content = f"{content}\n\n{suffix}" if content else suffix + return content, image_paths + + def extract_documents( text: str, media_paths: list[str], @@ -267,10 +306,7 @@ def extract_documents( ) continue - with open(p, "rb") as f: - header = f.read(16) - mime = detect_image_mime(header) or mimetypes.guess_type(path_str)[0] - if mime and mime.startswith("image/"): + if is_image_file(path_str): image_paths.append(path_str) else: extracted = extract_text(p) diff --git a/tests/agent/test_document_extraction_toggle.py b/tests/agent/test_document_extraction_toggle.py index 429e95a18..67e566cf5 100644 --- a/tests/agent/test_document_extraction_toggle.py +++ b/tests/agent/test_document_extraction_toggle.py @@ -10,6 +10,7 @@ from nanobot.bus.events import InboundMessage from nanobot.bus.queue import MessageBus from nanobot.config.schema import ChannelsConfig from nanobot.providers.base import LLMResponse +from nanobot.utils.document import reference_non_image_attachments def _make_loop(tmp_path: Path, channels_config: ChannelsConfig | None = None) -> AgentLoop: @@ -159,7 +160,7 @@ def test_document_extraction_disabled_still_preserves_images(tmp_path: Path) -> doc_path = tmp_path / "report.txt" doc_path.write_text("manual extraction target", encoding="utf-8") - content, media = AgentLoop._reference_non_image_attachments( + content, media = reference_non_image_attachments( "review these", [str(image_path), str(doc_path)], )