mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-13 22:34:06 +00:00
refactor(agent): move document media logic out of AgentLoop into document.py
Extract is_image_file() and reference_non_image_attachments() from AgentLoop private static methods into nanobot/utils/document.py where they belong alongside extract_documents(). Simplify config lookup by removing dead isinstance(dict) branch.
This commit is contained in:
parent
ec4f9e9857
commit
672fabe5be
@ -4,7 +4,6 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import mimetypes
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from contextlib import AsyncExitStack, nullcontext, suppress
|
from contextlib import AsyncExitStack, nullcontext, suppress
|
||||||
@ -24,22 +23,22 @@ from nanobot.agent.memory import Consolidator, Dream
|
|||||||
from nanobot.agent.progress_hook import AgentProgressHook
|
from nanobot.agent.progress_hook import AgentProgressHook
|
||||||
from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
|
from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
|
||||||
from nanobot.agent.subagent import SubagentManager
|
from nanobot.agent.subagent import SubagentManager
|
||||||
|
from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context
|
||||||
from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states
|
from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states
|
||||||
from nanobot.agent.tools.message import MessageTool
|
from nanobot.agent.tools.message import MessageTool
|
||||||
from nanobot.agent.tools.context import RequestContext, bind_request_context, reset_request_context
|
|
||||||
from nanobot.agent.tools.registry import ToolRegistry
|
from nanobot.agent.tools.registry import ToolRegistry
|
||||||
from nanobot.agent.tools.self import MyTool
|
from nanobot.agent.tools.self import MyTool
|
||||||
from nanobot.security.workspace_access import (
|
|
||||||
WorkspaceScopeResolver,
|
|
||||||
bind_workspace_scope,
|
|
||||||
reset_workspace_scope,
|
|
||||||
)
|
|
||||||
from nanobot.bus.events import InboundMessage, OutboundMessage
|
from nanobot.bus.events import InboundMessage, OutboundMessage
|
||||||
from nanobot.bus.queue import MessageBus
|
from nanobot.bus.queue import MessageBus
|
||||||
from nanobot.command import CommandContext, CommandRouter, register_builtin_commands
|
from nanobot.command import CommandContext, CommandRouter, register_builtin_commands
|
||||||
from nanobot.config.schema import AgentDefaults, ModelPresetConfig
|
from nanobot.config.schema import AgentDefaults, ModelPresetConfig
|
||||||
from nanobot.providers.base import LLMProvider
|
from nanobot.providers.base import LLMProvider
|
||||||
from nanobot.providers.factory import ProviderSnapshot
|
from nanobot.providers.factory import ProviderSnapshot
|
||||||
|
from nanobot.security.workspace_access import (
|
||||||
|
WorkspaceScopeResolver,
|
||||||
|
bind_workspace_scope,
|
||||||
|
reset_workspace_scope,
|
||||||
|
)
|
||||||
from nanobot.session.goal_state import (
|
from nanobot.session.goal_state import (
|
||||||
goal_state_runtime_lines,
|
goal_state_runtime_lines,
|
||||||
runner_wall_llm_timeout_s,
|
runner_wall_llm_timeout_s,
|
||||||
@ -51,8 +50,8 @@ from nanobot.session.webui_turns import (
|
|||||||
build_bus_progress_callback,
|
build_bus_progress_callback,
|
||||||
mark_webui_session,
|
mark_webui_session,
|
||||||
)
|
)
|
||||||
from nanobot.utils.document import extract_documents
|
from nanobot.utils.document import extract_documents, reference_non_image_attachments
|
||||||
from nanobot.utils.helpers import detect_image_mime, image_placeholder_text
|
from nanobot.utils.helpers import image_placeholder_text
|
||||||
from nanobot.utils.helpers import truncate_text as truncate_text_fn
|
from nanobot.utils.helpers import truncate_text as truncate_text_fn
|
||||||
from nanobot.utils.image_generation_intent import image_generation_prompt
|
from nanobot.utils.image_generation_intent import image_generation_prompt
|
||||||
from nanobot.utils.llm_runtime import LLMRuntime
|
from nanobot.utils.llm_runtime import LLMRuntime
|
||||||
@ -1296,45 +1295,12 @@ class AgentLoop:
|
|||||||
def _prepare_message_media(self, content: str, media: list[str]) -> tuple[str, list[str]]:
|
def _prepare_message_media(self, content: str, media: list[str]) -> tuple[str, list[str]]:
|
||||||
if self._should_extract_document_text():
|
if self._should_extract_document_text():
|
||||||
return extract_documents(content, media)
|
return extract_documents(content, media)
|
||||||
return self._reference_non_image_attachments(content, media)
|
return reference_non_image_attachments(content, media)
|
||||||
|
|
||||||
def _should_extract_document_text(self) -> bool:
|
def _should_extract_document_text(self) -> bool:
|
||||||
cfg = self.channels_config
|
if self.channels_config is None:
|
||||||
if cfg is None:
|
|
||||||
return True
|
return True
|
||||||
if isinstance(cfg, dict):
|
return self.channels_config.extract_document_text
|
||||||
value = cfg.get("extract_document_text", cfg.get("extractDocumentText", True))
|
|
||||||
else:
|
|
||||||
value = getattr(cfg, "extract_document_text", True)
|
|
||||||
return value is not False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _reference_non_image_attachments(content: str, media: list[str]) -> tuple[str, list[str]]:
|
|
||||||
image_paths: list[str] = []
|
|
||||||
attachment_refs: list[str] = []
|
|
||||||
for path in media:
|
|
||||||
if AgentLoop._looks_like_image(path):
|
|
||||||
image_paths.append(path)
|
|
||||||
else:
|
|
||||||
attachment_refs.append(f"[Attachment: {path}]")
|
|
||||||
if attachment_refs:
|
|
||||||
suffix = "\n".join(attachment_refs)
|
|
||||||
content = f"{content}\n\n{suffix}" if content else suffix
|
|
||||||
return content, image_paths
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _looks_like_image(path: str) -> bool:
|
|
||||||
p = Path(path)
|
|
||||||
mime: str | None = None
|
|
||||||
if p.is_file():
|
|
||||||
try:
|
|
||||||
with p.open("rb") as f:
|
|
||||||
mime = detect_image_mime(f.read(16))
|
|
||||||
except OSError:
|
|
||||||
mime = None
|
|
||||||
if not mime:
|
|
||||||
mime = mimetypes.guess_type(path)[0]
|
|
||||||
return bool(mime and mime.startswith("image/"))
|
|
||||||
|
|
||||||
async def _state_compact(self, ctx: TurnContext) -> str:
|
async def _state_compact(self, ctx: TurnContext) -> str:
|
||||||
ctx.session, pending = self.auto_compact.prepare_session(ctx.session, ctx.session_key)
|
ctx.session, pending = self.auto_compact.prepare_session(ctx.session, ctx.session_key)
|
||||||
|
|||||||
@ -7,7 +7,6 @@ from loguru import logger
|
|||||||
|
|
||||||
from nanobot.utils.helpers import detect_image_mime
|
from nanobot.utils.helpers import detect_image_mime
|
||||||
|
|
||||||
|
|
||||||
# Supported file extensions for text extraction
|
# Supported file extensions for text extraction
|
||||||
SUPPORTED_EXTENSIONS: set[str] = {
|
SUPPORTED_EXTENSIONS: set[str] = {
|
||||||
# Document formats
|
# Document formats
|
||||||
@ -232,6 +231,46 @@ def _is_text_extension(ext: str) -> bool:
|
|||||||
_MAX_EXTRACT_FILE_SIZE = 50 * 1024 * 1024 # 50 MB
|
_MAX_EXTRACT_FILE_SIZE = 50 * 1024 * 1024 # 50 MB
|
||||||
|
|
||||||
|
|
||||||
|
def is_image_file(path: str) -> bool:
|
||||||
|
"""Check whether *path* looks like an image file.
|
||||||
|
|
||||||
|
Uses magic-byte detection (reads first 16 bytes) with a ``mimetypes``
|
||||||
|
extension-based fallback.
|
||||||
|
"""
|
||||||
|
p = Path(path)
|
||||||
|
mime: str | None = None
|
||||||
|
if p.is_file():
|
||||||
|
try:
|
||||||
|
with p.open("rb") as f:
|
||||||
|
mime = detect_image_mime(f.read(16))
|
||||||
|
except OSError:
|
||||||
|
mime = None
|
||||||
|
if not mime:
|
||||||
|
mime = mimetypes.guess_type(path)[0]
|
||||||
|
return bool(mime and mime.startswith("image/"))
|
||||||
|
|
||||||
|
|
||||||
|
def reference_non_image_attachments(
|
||||||
|
content: str, media: list[str],
|
||||||
|
) -> tuple[str, list[str]]:
|
||||||
|
"""Separate images from non-image attachments without reading file content.
|
||||||
|
|
||||||
|
Image paths are preserved for downstream vision-block construction.
|
||||||
|
Non-image paths are appended as ``[Attachment: path]`` references.
|
||||||
|
"""
|
||||||
|
image_paths: list[str] = []
|
||||||
|
attachment_refs: list[str] = []
|
||||||
|
for path in media:
|
||||||
|
if is_image_file(path):
|
||||||
|
image_paths.append(path)
|
||||||
|
else:
|
||||||
|
attachment_refs.append(f"[Attachment: {path}]")
|
||||||
|
if attachment_refs:
|
||||||
|
suffix = "\n".join(attachment_refs)
|
||||||
|
content = f"{content}\n\n{suffix}" if content else suffix
|
||||||
|
return content, image_paths
|
||||||
|
|
||||||
|
|
||||||
def extract_documents(
|
def extract_documents(
|
||||||
text: str,
|
text: str,
|
||||||
media_paths: list[str],
|
media_paths: list[str],
|
||||||
@ -267,10 +306,7 @@ def extract_documents(
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
with open(p, "rb") as f:
|
if is_image_file(path_str):
|
||||||
header = f.read(16)
|
|
||||||
mime = detect_image_mime(header) or mimetypes.guess_type(path_str)[0]
|
|
||||||
if mime and mime.startswith("image/"):
|
|
||||||
image_paths.append(path_str)
|
image_paths.append(path_str)
|
||||||
else:
|
else:
|
||||||
extracted = extract_text(p)
|
extracted = extract_text(p)
|
||||||
|
|||||||
@ -10,6 +10,7 @@ from nanobot.bus.events import InboundMessage
|
|||||||
from nanobot.bus.queue import MessageBus
|
from nanobot.bus.queue import MessageBus
|
||||||
from nanobot.config.schema import ChannelsConfig
|
from nanobot.config.schema import ChannelsConfig
|
||||||
from nanobot.providers.base import LLMResponse
|
from nanobot.providers.base import LLMResponse
|
||||||
|
from nanobot.utils.document import reference_non_image_attachments
|
||||||
|
|
||||||
|
|
||||||
def _make_loop(tmp_path: Path, channels_config: ChannelsConfig | None = None) -> AgentLoop:
|
def _make_loop(tmp_path: Path, channels_config: ChannelsConfig | None = None) -> AgentLoop:
|
||||||
@ -159,7 +160,7 @@ def test_document_extraction_disabled_still_preserves_images(tmp_path: Path) ->
|
|||||||
doc_path = tmp_path / "report.txt"
|
doc_path = tmp_path / "report.txt"
|
||||||
doc_path.write_text("manual extraction target", encoding="utf-8")
|
doc_path.write_text("manual extraction target", encoding="utf-8")
|
||||||
|
|
||||||
content, media = AgentLoop._reference_non_image_attachments(
|
content, media = reference_non_image_attachments(
|
||||||
"review these",
|
"review these",
|
||||||
[str(image_path), str(doc_path)],
|
[str(image_path), str(doc_path)],
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user