refactor(agent): move document media logic out of AgentLoop into document.py

Extract is_image_file() and reference_non_image_attachments() from
AgentLoop private static methods into nanobot/utils/document.py where
they belong alongside extract_documents(). Simplify config lookup by
removing dead isinstance(dict) branch.
This commit is contained in:
chengyongru 2026-05-29 13:34:59 +08:00 committed by Xubin Ren
parent ec4f9e9857
commit 672fabe5be
3 changed files with 54 additions and 51 deletions

View File

@ -4,7 +4,6 @@ from __future__ import annotations
import asyncio import asyncio
import dataclasses import dataclasses
import mimetypes
import os import os
import time import time
from contextlib import AsyncExitStack, nullcontext, suppress from contextlib import AsyncExitStack, nullcontext, suppress
@ -24,22 +23,22 @@ from nanobot.agent.memory import Consolidator, Dream
from nanobot.agent.progress_hook import AgentProgressHook from nanobot.agent.progress_hook import AgentProgressHook
from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
from nanobot.agent.subagent import SubagentManager from nanobot.agent.subagent import SubagentManager
from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context
from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states
from nanobot.agent.tools.message import MessageTool from nanobot.agent.tools.message import MessageTool
from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context
from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.registry import ToolRegistry
from nanobot.agent.tools.self import MyTool from nanobot.agent.tools.self import MyTool
from nanobot.security.workspace_access import (
WorkspaceScopeResolver,
bind_workspace_scope,
reset_workspace_scope,
)
from nanobot.bus.events import InboundMessage, OutboundMessage from nanobot.bus.events import InboundMessage, OutboundMessage
from nanobot.bus.queue import MessageBus from nanobot.bus.queue import MessageBus
from nanobot.command import CommandContext, CommandRouter, register_builtin_commands from nanobot.command import CommandContext, CommandRouter, register_builtin_commands
from nanobot.config.schema import AgentDefaults, ModelPresetConfig from nanobot.config.schema import AgentDefaults, ModelPresetConfig
from nanobot.providers.base import LLMProvider from nanobot.providers.base import LLMProvider
from nanobot.providers.factory import ProviderSnapshot from nanobot.providers.factory import ProviderSnapshot
from nanobot.security.workspace_access import (
WorkspaceScopeResolver,
bind_workspace_scope,
reset_workspace_scope,
)
from nanobot.session.goal_state import ( from nanobot.session.goal_state import (
goal_state_runtime_lines, goal_state_runtime_lines,
runner_wall_llm_timeout_s, runner_wall_llm_timeout_s,
@ -51,8 +50,8 @@ from nanobot.session.webui_turns import (
build_bus_progress_callback, build_bus_progress_callback,
mark_webui_session, mark_webui_session,
) )
from nanobot.utils.document import extract_documents from nanobot.utils.document import extract_documents, reference_non_image_attachments
from nanobot.utils.helpers import detect_image_mime, image_placeholder_text from nanobot.utils.helpers import image_placeholder_text
from nanobot.utils.helpers import truncate_text as truncate_text_fn from nanobot.utils.helpers import truncate_text as truncate_text_fn
from nanobot.utils.image_generation_intent import image_generation_prompt from nanobot.utils.image_generation_intent import image_generation_prompt
from nanobot.utils.llm_runtime import LLMRuntime from nanobot.utils.llm_runtime import LLMRuntime
@ -1296,45 +1295,12 @@ class AgentLoop:
def _prepare_message_media(self, content: str, media: list[str]) -> tuple[str, list[str]]: def _prepare_message_media(self, content: str, media: list[str]) -> tuple[str, list[str]]:
if self._should_extract_document_text(): if self._should_extract_document_text():
return extract_documents(content, media) return extract_documents(content, media)
return self._reference_non_image_attachments(content, media) return reference_non_image_attachments(content, media)
def _should_extract_document_text(self) -> bool: def _should_extract_document_text(self) -> bool:
cfg = self.channels_config if self.channels_config is None:
if cfg is None:
return True return True
if isinstance(cfg, dict): return self.channels_config.extract_document_text
value = cfg.get("extract_document_text", cfg.get("extractDocumentText", True))
else:
value = getattr(cfg, "extract_document_text", True)
return value is not False
@staticmethod
def _reference_non_image_attachments(content: str, media: list[str]) -> tuple[str, list[str]]:
image_paths: list[str] = []
attachment_refs: list[str] = []
for path in media:
if AgentLoop._looks_like_image(path):
image_paths.append(path)
else:
attachment_refs.append(f"[Attachment: {path}]")
if attachment_refs:
suffix = "\n".join(attachment_refs)
content = f"{content}\n\n{suffix}" if content else suffix
return content, image_paths
@staticmethod
def _looks_like_image(path: str) -> bool:
p = Path(path)
mime: str | None = None
if p.is_file():
try:
with p.open("rb") as f:
mime = detect_image_mime(f.read(16))
except OSError:
mime = None
if not mime:
mime = mimetypes.guess_type(path)[0]
return bool(mime and mime.startswith("image/"))
async def _state_compact(self, ctx: TurnContext) -> str: async def _state_compact(self, ctx: TurnContext) -> str:
ctx.session, pending = self.auto_compact.prepare_session(ctx.session, ctx.session_key) ctx.session, pending = self.auto_compact.prepare_session(ctx.session, ctx.session_key)

View File

@ -7,7 +7,6 @@ from loguru import logger
from nanobot.utils.helpers import detect_image_mime from nanobot.utils.helpers import detect_image_mime
# Supported file extensions for text extraction # Supported file extensions for text extraction
SUPPORTED_EXTENSIONS: set[str] = { SUPPORTED_EXTENSIONS: set[str] = {
# Document formats # Document formats
@ -232,6 +231,46 @@ def _is_text_extension(ext: str) -> bool:
_MAX_EXTRACT_FILE_SIZE = 50 * 1024 * 1024 # 50 MB _MAX_EXTRACT_FILE_SIZE = 50 * 1024 * 1024 # 50 MB
def is_image_file(path: str) -> bool:
"""Check whether *path* looks like an image file.
Uses magic-byte detection (reads first 16 bytes) with a ``mimetypes``
extension-based fallback.
"""
p = Path(path)
mime: str | None = None
if p.is_file():
try:
with p.open("rb") as f:
mime = detect_image_mime(f.read(16))
except OSError:
mime = None
if not mime:
mime = mimetypes.guess_type(path)[0]
return bool(mime and mime.startswith("image/"))
def reference_non_image_attachments(
content: str, media: list[str],
) -> tuple[str, list[str]]:
"""Separate images from non-image attachments without reading file content.
Image paths are preserved for downstream vision-block construction.
Non-image paths are appended as ``[Attachment: path]`` references.
"""
image_paths: list[str] = []
attachment_refs: list[str] = []
for path in media:
if is_image_file(path):
image_paths.append(path)
else:
attachment_refs.append(f"[Attachment: {path}]")
if attachment_refs:
suffix = "\n".join(attachment_refs)
content = f"{content}\n\n{suffix}" if content else suffix
return content, image_paths
def extract_documents( def extract_documents(
text: str, text: str,
media_paths: list[str], media_paths: list[str],
@ -267,10 +306,7 @@ def extract_documents(
) )
continue continue
with open(p, "rb") as f: if is_image_file(path_str):
header = f.read(16)
mime = detect_image_mime(header) or mimetypes.guess_type(path_str)[0]
if mime and mime.startswith("image/"):
image_paths.append(path_str) image_paths.append(path_str)
else: else:
extracted = extract_text(p) extracted = extract_text(p)

View File

@ -10,6 +10,7 @@ from nanobot.bus.events import InboundMessage
from nanobot.bus.queue import MessageBus from nanobot.bus.queue import MessageBus
from nanobot.config.schema import ChannelsConfig from nanobot.config.schema import ChannelsConfig
from nanobot.providers.base import LLMResponse from nanobot.providers.base import LLMResponse
from nanobot.utils.document import reference_non_image_attachments
def _make_loop(tmp_path: Path, channels_config: ChannelsConfig | None = None) -> AgentLoop: def _make_loop(tmp_path: Path, channels_config: ChannelsConfig | None = None) -> AgentLoop:
@ -159,7 +160,7 @@ def test_document_extraction_disabled_still_preserves_images(tmp_path: Path) ->
doc_path = tmp_path / "report.txt" doc_path = tmp_path / "report.txt"
doc_path.write_text("manual extraction target", encoding="utf-8") doc_path.write_text("manual extraction target", encoding="utf-8")
content, media = AgentLoop._reference_non_image_attachments( content, media = reference_non_image_attachments(
"review these", "review these",
[str(image_path), str(doc_path)], [str(image_path), str(doc_path)],
) )