mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-30 06:45:55 +00:00
Keep the API file upload branch current with main, enforce the documented JSON base64 per-file limit, and avoid leaking document extraction error strings into user prompts. Made-with: Cursor
82 lines
2.8 KiB
Python
82 lines
2.8 KiB
Python
"""Tests for context builder document handling."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from nanobot.agent.context import ContextBuilder
|
|
|
|
|
|
def _make_builder(tmp_path: Path) -> ContextBuilder:
|
|
"""Create a minimal ContextBuilder for testing."""
|
|
return ContextBuilder(workspace=tmp_path, timezone="UTC")
|
|
|
|
|
|
def test_build_user_content_with_no_media_returns_string(tmp_path: Path) -> None:
|
|
builder = _make_builder(tmp_path)
|
|
result = builder._build_user_content("hello", None)
|
|
assert result == "hello"
|
|
|
|
|
|
def test_build_user_content_with_image_returns_list(tmp_path: Path) -> None:
|
|
"""Image files should produce base64 content blocks."""
|
|
builder = _make_builder(tmp_path)
|
|
png = tmp_path / "test.png"
|
|
png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100)
|
|
result = builder._build_user_content("describe this", [str(png)])
|
|
assert isinstance(result, list)
|
|
types = [b["type"] for b in result]
|
|
assert "image_url" in types
|
|
assert "text" in types
|
|
|
|
|
|
def test_build_user_content_with_docx_includes_extracted_text(tmp_path: Path) -> None:
|
|
"""Document files should have their text extracted and included."""
|
|
from docx import Document
|
|
|
|
doc = Document()
|
|
doc.add_paragraph("Quarterly revenue is $5M")
|
|
docx_path = tmp_path / "report.docx"
|
|
doc.save(docx_path)
|
|
|
|
builder = _make_builder(tmp_path)
|
|
result = builder._build_user_content("summarize this", [str(docx_path)])
|
|
assert isinstance(result, str)
|
|
assert "Quarterly revenue" in result
|
|
|
|
|
|
def test_build_user_content_mixed_image_and_document(tmp_path: Path) -> None:
|
|
"""Mix of images and documents: images as base64, docs as text."""
|
|
from docx import Document
|
|
|
|
png = tmp_path / "chart.png"
|
|
png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100)
|
|
|
|
doc = Document()
|
|
doc.add_paragraph("Report text here")
|
|
docx = tmp_path / "report.docx"
|
|
doc.save(docx)
|
|
|
|
builder = _make_builder(tmp_path)
|
|
result = builder._build_user_content("analyze both", [str(png), str(docx)])
|
|
assert isinstance(result, list)
|
|
assert any(b["type"] == "image_url" for b in result)
|
|
text_parts = [b.get("text", "") for b in result if b.get("type") == "text"]
|
|
assert any("Report text here" in t for t in text_parts)
|
|
|
|
|
|
def test_build_user_content_skips_document_extraction_errors(tmp_path: Path, monkeypatch) -> None:
|
|
"""Document extraction errors should not be embedded into the user prompt."""
|
|
docx_path = tmp_path / "broken.docx"
|
|
docx_path.write_text("not a real docx", encoding="utf-8")
|
|
|
|
builder = _make_builder(tmp_path)
|
|
|
|
monkeypatch.setattr(
|
|
"nanobot.utils.document.extract_text",
|
|
lambda _path: "[error: failed to extract DOCX: boom]",
|
|
)
|
|
|
|
result = builder._build_user_content("summarize this", [str(docx_path)])
|
|
assert result == "summarize this"
|