refactor(agent): move document media logic out of AgentLoop into document.py

Extract is_image_file() and reference_non_image_attachments() from AgentLoop private static methods into nanobot/utils/document.py where they belong alongside extract_documents(). Simplify config lookup by removing dead isinstance(dict) branch.
2026-06-13 14:23:58 +00:00 · 2026-05-29 13:34:59 +08:00 · 2026-05-29 13:34:59 +08:00 · 672fabe5be
commit 672fabe5be
parent ec4f9e9857
3 changed files with 54 additions and 51 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -4,7 +4,6 @@ from __future__ import annotations

 import asyncio
 import dataclasses
-import mimetypes
 import os
 import time
 from contextlib import AsyncExitStack, nullcontext, suppress
@ -24,22 +23,22 @@ from nanobot.agent.memory import Consolidator, Dream
 from nanobot.agent.progress_hook import AgentProgressHook
 from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
 from nanobot.agent.subagent import SubagentManager
+from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context
 from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states
 from nanobot.agent.tools.message import MessageTool
-from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context
 from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.agent.tools.self import MyTool
-from nanobot.security.workspace_access import (
-    WorkspaceScopeResolver,
-    bind_workspace_scope,
-    reset_workspace_scope,
-)
 from nanobot.bus.events import InboundMessage, OutboundMessage
 from nanobot.bus.queue import MessageBus
 from nanobot.command import CommandContext, CommandRouter, register_builtin_commands
 from nanobot.config.schema import AgentDefaults, ModelPresetConfig
 from nanobot.providers.base import LLMProvider
 from nanobot.providers.factory import ProviderSnapshot
+from nanobot.security.workspace_access import (
+    WorkspaceScopeResolver,
+    bind_workspace_scope,
+    reset_workspace_scope,
+)
 from nanobot.session.goal_state import (
    goal_state_runtime_lines,
    runner_wall_llm_timeout_s,
@ -51,8 +50,8 @@ from nanobot.session.webui_turns import (
    build_bus_progress_callback,
    mark_webui_session,
 )
-from nanobot.utils.document import extract_documents
-from nanobot.utils.helpers import detect_image_mime, image_placeholder_text
+from nanobot.utils.document import extract_documents, reference_non_image_attachments
+from nanobot.utils.helpers import image_placeholder_text
 from nanobot.utils.helpers import truncate_text as truncate_text_fn
 from nanobot.utils.image_generation_intent import image_generation_prompt
 from nanobot.utils.llm_runtime import LLMRuntime
@ -1296,45 +1295,12 @@ class AgentLoop:
    def _prepare_message_media(self, content: str, media: list[str]) -> tuple[str, list[str]]:
        if self._should_extract_document_text():
            return extract_documents(content, media)
-        return self._reference_non_image_attachments(content, media)
+        return reference_non_image_attachments(content, media)

    def _should_extract_document_text(self) -> bool:
-        cfg = self.channels_config
-        if cfg is None:
+        if self.channels_config is None:
            return True
-        if isinstance(cfg, dict):
-            value = cfg.get("extract_document_text", cfg.get("extractDocumentText", True))
-        else:
-            value = getattr(cfg, "extract_document_text", True)
-        return value is not False
-
-    @staticmethod
-    def _reference_non_image_attachments(content: str, media: list[str]) -> tuple[str, list[str]]:
-        image_paths: list[str] = []
-        attachment_refs: list[str] = []
-        for path in media:
-            if AgentLoop._looks_like_image(path):
-                image_paths.append(path)
-            else:
-                attachment_refs.append(f"[Attachment: {path}]")
-        if attachment_refs:
-            suffix = "\n".join(attachment_refs)
-            content = f"{content}\n\n{suffix}" if content else suffix
-        return content, image_paths
-
-    @staticmethod
-    def _looks_like_image(path: str) -> bool:
-        p = Path(path)
-        mime: str | None = None
-        if p.is_file():
-            try:
-                with p.open("rb") as f:
-                    mime = detect_image_mime(f.read(16))
-            except OSError:
-                mime = None
-        if not mime:
-            mime = mimetypes.guess_type(path)[0]
-        return bool(mime and mime.startswith("image/"))
+        return self.channels_config.extract_document_text

    async def _state_compact(self, ctx: TurnContext) -> str:
        ctx.session, pending = self.auto_compact.prepare_session(ctx.session, ctx.session_key)
--- a/nanobot/utils/document.py
+++ b/nanobot/utils/document.py
@ -7,7 +7,6 @@ from loguru import logger

 from nanobot.utils.helpers import detect_image_mime

-
 # Supported file extensions for text extraction
 SUPPORTED_EXTENSIONS: set[str] = {
    # Document formats
@ -232,6 +231,46 @@ def _is_text_extension(ext: str) -> bool:
 _MAX_EXTRACT_FILE_SIZE = 50 * 1024 * 1024  # 50 MB


+def is_image_file(path: str) -> bool:
+    """Check whether *path* looks like an image file.
+
+    Uses magic-byte detection (reads first 16 bytes) with a ``mimetypes``
+    extension-based fallback.
+    """
+    p = Path(path)
+    mime: str | None = None
+    if p.is_file():
+        try:
+            with p.open("rb") as f:
+                mime = detect_image_mime(f.read(16))
+        except OSError:
+            mime = None
+    if not mime:
+        mime = mimetypes.guess_type(path)[0]
+    return bool(mime and mime.startswith("image/"))
+
+
+def reference_non_image_attachments(
+    content: str, media: list[str],
+) -> tuple[str, list[str]]:
+    """Separate images from non-image attachments without reading file content.
+
+    Image paths are preserved for downstream vision-block construction.
+    Non-image paths are appended as ``[Attachment: path]`` references.
+    """
+    image_paths: list[str] = []
+    attachment_refs: list[str] = []
+    for path in media:
+        if is_image_file(path):
+            image_paths.append(path)
+        else:
+            attachment_refs.append(f"[Attachment: {path}]")
+    if attachment_refs:
+        suffix = "\n".join(attachment_refs)
+        content = f"{content}\n\n{suffix}" if content else suffix
+    return content, image_paths
+
+
 def extract_documents(
    text: str,
    media_paths: list[str],
@ -267,10 +306,7 @@ def extract_documents(
            )
            continue

-        with open(p, "rb") as f:
-            header = f.read(16)
-        mime = detect_image_mime(header) or mimetypes.guess_type(path_str)[0]
-        if mime and mime.startswith("image/"):
+        if is_image_file(path_str):
            image_paths.append(path_str)
        else:
            extracted = extract_text(p)
--- a/tests/agent/test_document_extraction_toggle.py
+++ b/tests/agent/test_document_extraction_toggle.py
@ -10,6 +10,7 @@ from nanobot.bus.events import InboundMessage
 from nanobot.bus.queue import MessageBus
 from nanobot.config.schema import ChannelsConfig
 from nanobot.providers.base import LLMResponse
+from nanobot.utils.document import reference_non_image_attachments


 def _make_loop(tmp_path: Path, channels_config: ChannelsConfig | None = None) -> AgentLoop:
@ -159,7 +160,7 @@ def test_document_extraction_disabled_still_preserves_images(tmp_path: Path) ->
    doc_path = tmp_path / "report.txt"
    doc_path.write_text("manual extraction target", encoding="utf-8")

-    content, media = AgentLoop._reference_non_image_attachments(
+    content, media = reference_non_image_attachments(
        "review these",
        [str(image_path), str(doc_path)],
    )