diff --git a/tests/agent/conftest.py b/tests/agent/conftest.py
new file mode 100644
index 000000000..57f678aa9
--- /dev/null
+++ b/tests/agent/conftest.py
@@ -0,0 +1,93 @@
+"""Shared fixtures and helpers for agent tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.agent.loop import AgentLoop
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMProvider
+
+
+def make_provider(
+ default_model: str = "test-model",
+ *,
+ max_tokens: int = 4096,
+ spec: bool = True,
+) -> MagicMock:
+ """Create a spec-limited LLM provider mock."""
+ mock_type = MagicMock(spec=LLMProvider) if spec else MagicMock()
+ provider = mock_type
+ provider.get_default_model.return_value = default_model
+ provider.generation = SimpleNamespace(
+ max_tokens=max_tokens,
+ temperature=0.1,
+ reasoning_effort=None,
+ )
+ provider.estimate_prompt_tokens.return_value = (10_000, "test")
+ return provider
+
+
+def make_loop(
+ tmp_path: Path,
+ *,
+ model: str = "test-model",
+ context_window_tokens: int = 128_000,
+ session_ttl_minutes: int = 0,
+ max_messages: int = 120,
+ unified_session: bool = False,
+ mcp_servers: dict | None = None,
+ tools_config=None,
+ model_presets: dict | None = None,
+ hooks: list | None = None,
+ provider: MagicMock | None = None,
+ patch_deps: bool = False,
+) -> AgentLoop:
+ """Create a real AgentLoop for testing.
+
+ Args:
+ patch_deps: If True, patch ContextBuilder/SessionManager/SubagentManager
+ during construction (needed when workspace has no real files).
+ """
+ bus = MessageBus()
+ if provider is None:
+ provider = make_provider(default_model=model)
+
+ kwargs = dict(
+ bus=bus,
+ provider=provider,
+ workspace=tmp_path,
+ model=model,
+ context_window_tokens=context_window_tokens,
+ session_ttl_minutes=session_ttl_minutes,
+ max_messages=max_messages,
+ unified_session=unified_session,
+ )
+ if mcp_servers is not None:
+ kwargs["mcp_servers"] = mcp_servers
+ if tools_config is not None:
+ kwargs["tools_config"] = tools_config
+ if model_presets is not None:
+ kwargs["model_presets"] = model_presets
+ if hooks is not None:
+ kwargs["hooks"] = hooks
+
+ if patch_deps:
+ with patch("nanobot.agent.loop.ContextBuilder"), \
+ patch("nanobot.agent.loop.SessionManager"), \
+ patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+ MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+ return AgentLoop(**kwargs)
+ return AgentLoop(**kwargs)
+
+
+@pytest.fixture
+def loop_factory(tmp_path):
+ """Fixture providing a factory for creating AgentLoop instances."""
+ def _factory(**kwargs):
+ return make_loop(tmp_path, **kwargs)
+ return _factory
diff --git a/tests/agent/test_autocompact_unit.py b/tests/agent/test_autocompact_unit.py
new file mode 100644
index 000000000..d501770dd
--- /dev/null
+++ b/tests/agent/test_autocompact_unit.py
@@ -0,0 +1,554 @@
+"""Direct unit tests for AutoCompact class methods in isolation."""
+
+from datetime import datetime, timedelta
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.agent.autocompact import AutoCompact
+from nanobot.session.manager import Session, SessionManager
+
+
+def _make_session(
+ key: str = "cli:test",
+ messages: list | None = None,
+ last_consolidated: int = 0,
+ updated_at: datetime | None = None,
+ metadata: dict | None = None,
+) -> Session:
+ """Create a Session with sensible defaults for testing."""
+ session = Session(
+ key=key,
+ messages=messages or [],
+ metadata=metadata or {},
+ last_consolidated=last_consolidated,
+ )
+ if updated_at is not None:
+ session.updated_at = updated_at
+ return session
+
+
+def _make_autocompact(
+ ttl: int = 15,
+ sessions: SessionManager | None = None,
+ consolidator: MagicMock | None = None,
+) -> AutoCompact:
+ """Create an AutoCompact with mock dependencies."""
+ if sessions is None:
+ sessions = MagicMock(spec=SessionManager)
+ if consolidator is None:
+ consolidator = MagicMock()
+ consolidator.archive = AsyncMock(return_value="Summary.")
+ return AutoCompact(
+ sessions=sessions,
+ consolidator=consolidator,
+ session_ttl_minutes=ttl,
+ )
+
+
+def _add_turns(session: Session, turns: int, *, prefix: str = "msg") -> None:
+ """Append simple user/assistant turns to a session."""
+ for i in range(turns):
+ session.add_message("user", f"{prefix} user {i}")
+ session.add_message("assistant", f"{prefix} assistant {i}")
+
+
+# ---------------------------------------------------------------------------
+# __init__
+# ---------------------------------------------------------------------------
+
+
+class TestInit:
+ """Test AutoCompact.__init__ stores constructor arguments correctly."""
+
+ def test_stores_ttl(self):
+ """_ttl should match session_ttl_minutes argument."""
+ ac = _make_autocompact(ttl=30)
+ assert ac._ttl == 30
+
+ def test_default_ttl_is_zero(self):
+ """Default TTL should be 0."""
+ ac = _make_autocompact(ttl=0)
+ assert ac._ttl == 0
+
+ def test_archiving_set_is_empty(self):
+ """_archiving should start as an empty set."""
+ ac = _make_autocompact()
+ assert ac._archiving == set()
+
+ def test_summaries_dict_is_empty(self):
+ """_summaries should start as an empty dict."""
+ ac = _make_autocompact()
+ assert ac._summaries == {}
+
+ def test_stores_sessions_reference(self):
+ """sessions attribute should reference the passed SessionManager."""
+ mock_sm = MagicMock(spec=SessionManager)
+ ac = _make_autocompact(sessions=mock_sm)
+ assert ac.sessions is mock_sm
+
+ def test_stores_consolidator_reference(self):
+ """consolidator attribute should reference the passed Consolidator."""
+ mock_c = MagicMock()
+ ac = _make_autocompact(consolidator=mock_c)
+ assert ac.consolidator is mock_c
+
+
+# ---------------------------------------------------------------------------
+# _is_expired
+# ---------------------------------------------------------------------------
+
+
+class TestIsExpired:
+ """Test AutoCompact._is_expired edge cases."""
+
+ def test_ttl_zero_always_false(self):
+ """TTL=0 means auto-compact is disabled; always returns False."""
+ ac = _make_autocompact(ttl=0)
+ old = datetime.now() - timedelta(days=365)
+ assert ac._is_expired(old) is False
+
+ def test_none_timestamp_returns_false(self):
+ """None timestamp should return False."""
+ ac = _make_autocompact(ttl=15)
+ assert ac._is_expired(None) is False
+
+ def test_empty_string_timestamp_returns_false(self):
+ """Empty string timestamp should return False (falsy)."""
+ ac = _make_autocompact(ttl=15)
+ assert ac._is_expired("") is False
+
+ def test_exactly_at_boundary_is_expired(self):
+ """Timestamp exactly at TTL boundary should be expired (>=)."""
+ ac = _make_autocompact(ttl=15)
+ now = datetime(2026, 1, 1, 12, 0, 0)
+ ts = now - timedelta(minutes=15)
+ assert ac._is_expired(ts, now=now) is True
+
+ def test_just_under_boundary_not_expired(self):
+ """Timestamp just under TTL boundary should NOT be expired."""
+ ac = _make_autocompact(ttl=15)
+ now = datetime(2026, 1, 1, 12, 0, 0)
+ ts = now - timedelta(minutes=14, seconds=59)
+ assert ac._is_expired(ts, now=now) is False
+
+ def test_iso_string_parses_correctly(self):
+ """ISO format string timestamp should be parsed and evaluated."""
+ ac = _make_autocompact(ttl=15)
+ now = datetime(2026, 1, 1, 12, 0, 0)
+ ts = (now - timedelta(minutes=20)).isoformat()
+ assert ac._is_expired(ts, now=now) is True
+
+ def test_custom_now_parameter(self):
+ """Custom 'now' parameter should override datetime.now()."""
+ ac = _make_autocompact(ttl=10)
+ ts = datetime(2026, 1, 1, 10, 0, 0)
+ # 9 minutes later → not expired
+ now_under = datetime(2026, 1, 1, 10, 9, 0)
+ assert ac._is_expired(ts, now=now_under) is False
+ # 10 minutes later → expired
+ now_over = datetime(2026, 1, 1, 10, 10, 0)
+ assert ac._is_expired(ts, now=now_over) is True
+
+
+# ---------------------------------------------------------------------------
+# _format_summary
+# ---------------------------------------------------------------------------
+
+
+class TestFormatSummary:
+ """Test AutoCompact._format_summary static method."""
+
+ def test_contains_isoformat_timestamp(self):
+ """Output should contain last_active as isoformat."""
+ last_active = datetime(2026, 5, 13, 14, 30, 0)
+ result = AutoCompact._format_summary("Some text", last_active)
+ assert "2026-05-13T14:30:00" in result
+
+ def test_contains_summary_text(self):
+ """Output should contain the provided text verbatim."""
+ last_active = datetime(2026, 1, 1)
+ result = AutoCompact._format_summary("User discussed Python.", last_active)
+ assert "User discussed Python." in result
+
+ def test_output_starts_with_label(self):
+ """Output should start with the standard prefix."""
+ last_active = datetime(2026, 1, 1)
+ result = AutoCompact._format_summary("text", last_active)
+ assert result.startswith("Previous conversation summary (last active ")
+
+
+# ---------------------------------------------------------------------------
+# _split_unconsolidated
+# ---------------------------------------------------------------------------
+
+
+class TestSplitUnconsolidated:
+ """Test AutoCompact._split_unconsolidated splitting logic."""
+
+ def test_empty_session_returns_both_empty(self):
+ """Empty session should return ([], [])."""
+ ac = _make_autocompact()
+ session = _make_session(messages=[])
+ archive, kept = ac._split_unconsolidated(session)
+ assert archive == []
+ assert kept == []
+
+ def test_all_messages_archivable_when_more_than_suffix(self):
+ """Session with many messages should archive a prefix and keep suffix."""
+ ac = _make_autocompact()
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ session = _make_session(messages=msgs)
+ archive, kept = ac._split_unconsolidated(session)
+ assert len(archive) > 0
+ assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES
+
+ def test_fewer_messages_than_suffix_returns_empty_archive(self):
+ """Session with fewer messages than suffix should have empty archive."""
+ ac = _make_autocompact()
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(3)]
+ session = _make_session(messages=msgs)
+ archive, kept = ac._split_unconsolidated(session)
+ assert archive == []
+ assert len(kept) == len(msgs)
+
+ def test_respects_last_consolidated_offset(self):
+ """Only messages after last_consolidated should be considered."""
+ ac = _make_autocompact()
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ # First 10 are already consolidated
+ session = _make_session(messages=msgs, last_consolidated=10)
+ archive, kept = ac._split_unconsolidated(session)
+ # Only the tail of 10 messages is considered for splitting
+ assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in kept)
+ assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in archive)
+
+ def test_retain_recent_legal_suffix_keeps_last_n(self):
+ """The kept suffix should be at most _RECENT_SUFFIX_MESSAGES long."""
+ ac = _make_autocompact()
+ # 20 user messages = 20 messages total, all after last_consolidated=0
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ session = _make_session(messages=msgs)
+ archive, kept = ac._split_unconsolidated(session)
+ assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES
+ assert len(archive) == len(msgs) - len(kept)
+
+
+# ---------------------------------------------------------------------------
+# check_expired
+# ---------------------------------------------------------------------------
+
+
+class TestCheckExpired:
+ """Test AutoCompact.check_expired scheduling logic."""
+
+ def test_empty_sessions_list(self):
+ """No sessions → schedule_background should never be called."""
+ ac = _make_autocompact(ttl=15)
+ mock_sm = MagicMock(spec=SessionManager)
+ mock_sm.list_sessions.return_value = []
+ ac.sessions = mock_sm
+ scheduler = MagicMock()
+ ac.check_expired(scheduler)
+ scheduler.assert_not_called()
+
+ def test_expired_session_schedules_background(self):
+ """Expired session should trigger schedule_background."""
+ ac = _make_autocompact(ttl=15)
+ mock_sm = MagicMock(spec=SessionManager)
+ old_ts = (datetime.now() - timedelta(minutes=20)).isoformat()
+ mock_sm.list_sessions.return_value = [{"key": "cli:old", "updated_at": old_ts}]
+ ac.sessions = mock_sm
+ scheduler = MagicMock()
+ ac.check_expired(scheduler)
+ scheduler.assert_called_once()
+ assert "cli:old" in ac._archiving
+
+ def test_active_session_key_skips(self):
+ """Session in active_session_keys should be skipped."""
+ ac = _make_autocompact(ttl=15)
+ mock_sm = MagicMock(spec=SessionManager)
+ old_ts = (datetime.now() - timedelta(minutes=20)).isoformat()
+ mock_sm.list_sessions.return_value = [{"key": "cli:busy", "updated_at": old_ts}]
+ ac.sessions = mock_sm
+ scheduler = MagicMock()
+ ac.check_expired(scheduler, active_session_keys={"cli:busy"})
+ scheduler.assert_not_called()
+
+ def test_session_already_in_archiving_skips(self):
+ """Session already in _archiving set should be skipped."""
+ ac = _make_autocompact(ttl=15)
+ mock_sm = MagicMock(spec=SessionManager)
+ old_ts = (datetime.now() - timedelta(minutes=20)).isoformat()
+ mock_sm.list_sessions.return_value = [{"key": "cli:dup", "updated_at": old_ts}]
+ ac.sessions = mock_sm
+ ac._archiving.add("cli:dup")
+ scheduler = MagicMock()
+ ac.check_expired(scheduler)
+ scheduler.assert_not_called()
+
+ def test_session_with_no_key_skips(self):
+ """Session info with empty/missing key should be skipped."""
+ ac = _make_autocompact(ttl=15)
+ mock_sm = MagicMock(spec=SessionManager)
+ mock_sm.list_sessions.return_value = [{"key": "", "updated_at": "old"}]
+ ac.sessions = mock_sm
+ scheduler = MagicMock()
+ ac.check_expired(scheduler)
+ scheduler.assert_not_called()
+
+ def test_session_with_missing_key_field_skips(self):
+ """Session info dict without 'key' field should be skipped."""
+ ac = _make_autocompact(ttl=15)
+ mock_sm = MagicMock(spec=SessionManager)
+ mock_sm.list_sessions.return_value = [{"updated_at": "old"}]
+ ac.sessions = mock_sm
+ scheduler = MagicMock()
+ ac.check_expired(scheduler)
+ scheduler.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# _archive
+# ---------------------------------------------------------------------------
+
+
+class TestArchive:
+ """Test AutoCompact._archive async method."""
+
+ @pytest.mark.asyncio
+ async def test_empty_session_updates_timestamp_no_archive_call(self):
+ """Empty session should refresh updated_at and not call consolidator.archive."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ empty_session = _make_session(messages=[])
+ mock_sm.get_or_create.return_value = empty_session
+ ac.sessions = mock_sm
+ ac.consolidator.archive = AsyncMock(return_value="Summary.")
+
+ await ac._archive("cli:test")
+
+ ac.consolidator.archive.assert_not_called()
+ mock_sm.save.assert_called_once_with(empty_session)
+ # updated_at was refreshed
+ assert empty_session.updated_at > datetime.now() - timedelta(seconds=5)
+
+ @pytest.mark.asyncio
+ async def test_archive_returns_empty_string_no_summary_stored(self):
+ """If archive returns empty string, no summary should be stored."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ session = _make_session(messages=msgs)
+ mock_sm.get_or_create.return_value = session
+ ac.sessions = mock_sm
+ ac.consolidator.archive = AsyncMock(return_value="")
+
+ await ac._archive("cli:test")
+
+ assert "cli:test" not in ac._summaries
+
+ @pytest.mark.asyncio
+ async def test_archive_returns_nothing_no_summary_stored(self):
+ """If archive returns '(nothing)', no summary should be stored."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ session = _make_session(messages=msgs)
+ mock_sm.get_or_create.return_value = session
+ ac.sessions = mock_sm
+ ac.consolidator.archive = AsyncMock(return_value="(nothing)")
+
+ await ac._archive("cli:test")
+
+ assert "cli:test" not in ac._summaries
+
+ @pytest.mark.asyncio
+ async def test_archive_exception_caught_key_removed_from_archiving(self):
+ """If archive raises, exception is caught and key removed from _archiving."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ session = _make_session(messages=msgs)
+ mock_sm.get_or_create.return_value = session
+ ac.sessions = mock_sm
+ ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("LLM down"))
+
+ # Should not raise
+ await ac._archive("cli:test")
+
+ assert "cli:test" not in ac._archiving
+
+ @pytest.mark.asyncio
+ async def test_successful_archive_stores_summary_in_summaries_and_metadata(self):
+ """Successful archive should store summary in _summaries dict and metadata."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ last_active = datetime(2026, 5, 13, 10, 0, 0)
+ session = _make_session(messages=msgs, updated_at=last_active)
+ mock_sm.get_or_create.return_value = session
+ ac.sessions = mock_sm
+ ac.consolidator.archive = AsyncMock(return_value="User discussed AI.")
+
+ await ac._archive("cli:test")
+
+ # _summaries
+ entry = ac._summaries.get("cli:test")
+ assert entry is not None
+ assert entry[0] == "User discussed AI."
+ assert entry[1] == last_active
+ # metadata
+ meta = session.metadata.get("_last_summary")
+ assert meta is not None
+ assert meta["text"] == "User discussed AI."
+ assert "last_active" in meta
+
+ @pytest.mark.asyncio
+ async def test_finally_block_always_removes_from_archiving(self):
+ """Finally block should always remove key from _archiving, even on error."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ session = _make_session(messages=msgs)
+ mock_sm.get_or_create.return_value = session
+ ac.sessions = mock_sm
+ ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("fail"))
+
+ # Pre-add key to archiving to verify it gets removed
+ ac._archiving.add("cli:test")
+ await ac._archive("cli:test")
+ assert "cli:test" not in ac._archiving
+
+ @pytest.mark.asyncio
+ async def test_finally_removes_from_archiving_on_success(self):
+ """Finally block should remove key from _archiving on success too."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+ session = _make_session(messages=msgs)
+ mock_sm.get_or_create.return_value = session
+ ac.sessions = mock_sm
+ ac.consolidator.archive = AsyncMock(return_value="Summary.")
+
+ ac._archiving.add("cli:test")
+ await ac._archive("cli:test")
+ assert "cli:test" not in ac._archiving
+
+
+# ---------------------------------------------------------------------------
+# prepare_session
+# ---------------------------------------------------------------------------
+
+
+class TestPrepareSession:
+ """Test AutoCompact.prepare_session logic."""
+
+ def test_key_in_archiving_reloads_session(self):
+ """If key is in _archiving, session should be reloaded via get_or_create."""
+ ac = _make_autocompact()
+ mock_sm = MagicMock(spec=SessionManager)
+ reloaded = _make_session(key="cli:test")
+ mock_sm.get_or_create.return_value = reloaded
+ ac.sessions = mock_sm
+ ac._archiving.add("cli:test")
+
+ original_session = _make_session()
+ result_session, summary = ac.prepare_session(original_session, "cli:test")
+
+ mock_sm.get_or_create.assert_called_once_with("cli:test")
+ assert result_session is reloaded
+
+ def test_expired_session_reloads(self):
+ """If session is expired, it should be reloaded via get_or_create."""
+ ac = _make_autocompact(ttl=15)
+ mock_sm = MagicMock(spec=SessionManager)
+ reloaded = _make_session(key="cli:test", updated_at=datetime.now())
+ mock_sm.get_or_create.return_value = reloaded
+ ac.sessions = mock_sm
+
+ old_session = _make_session(updated_at=datetime.now() - timedelta(minutes=20))
+ result_session, summary = ac.prepare_session(old_session, "cli:test")
+
+ mock_sm.get_or_create.assert_called_once_with("cli:test")
+ assert result_session is reloaded
+
+ def test_hot_path_summary_from_summaries(self):
+ """Summary from _summaries dict should be returned (hot path)."""
+ ac = _make_autocompact()
+ session = _make_session()
+ last_active = datetime(2026, 5, 13, 14, 0, 0)
+ ac._summaries["cli:test"] = ("Hot summary.", last_active)
+
+ result_session, summary = ac.prepare_session(session, "cli:test")
+
+ assert result_session is session
+ assert summary is not None
+ assert "Hot summary." in summary
+ assert "Previous conversation summary" in summary
+
+ def test_hot_path_pops_summary_one_shot(self):
+ """Hot path should pop the summary (one-shot; second call returns None)."""
+ ac = _make_autocompact()
+ session = _make_session()
+ last_active = datetime(2026, 1, 1)
+ ac._summaries["cli:test"] = ("One-shot.", last_active)
+
+ _, summary1 = ac.prepare_session(session, "cli:test")
+ assert summary1 is not None
+ # Second call: hot path entry was popped
+ _, summary2 = ac.prepare_session(session, "cli:test")
+ assert summary2 is None
+
+ def test_cold_path_summary_from_metadata(self):
+ """When _summaries is empty, summary should come from metadata (cold path)."""
+ ac = _make_autocompact()
+ last_active = datetime(2026, 5, 13, 14, 0, 0)
+ session = _make_session(metadata={
+ "_last_summary": {
+ "text": "Cold summary.",
+ "last_active": last_active.isoformat(),
+ },
+ })
+
+ result_session, summary = ac.prepare_session(session, "cli:test")
+
+ assert result_session is session
+ assert summary is not None
+ assert "Cold summary." in summary
+
+ def test_no_summary_available_returns_none(self):
+ """When no summary is available, should return (session, None)."""
+ ac = _make_autocompact()
+ session = _make_session()
+
+ result_session, summary = ac.prepare_session(session, "cli:test")
+
+ assert result_session is session
+ assert summary is None
+
+ def test_cold_path_metadata_not_dict_returns_none(self):
+ """If metadata _last_summary is not a dict, should return None summary."""
+ ac = _make_autocompact()
+ session = _make_session(metadata={"_last_summary": "not a dict"})
+
+ result_session, summary = ac.prepare_session(session, "cli:test")
+
+ assert result_session is session
+ assert summary is None
+
+ def test_hot_path_takes_priority_over_metadata(self):
+ """Hot path (_summaries) should take priority over metadata."""
+ ac = _make_autocompact()
+ session = _make_session(metadata={
+ "_last_summary": {
+ "text": "Cold summary.",
+ "last_active": datetime(2026, 1, 1).isoformat(),
+ },
+ })
+ last_active = datetime(2026, 5, 13, 14, 0, 0)
+ ac._summaries["cli:test"] = ("Hot summary.", last_active)
+
+ _, summary = ac.prepare_session(session, "cli:test")
+ assert "Hot summary." in summary
+ # After hot path pops, cold path would kick in on next call
diff --git a/tests/agent/test_context_builder.py b/tests/agent/test_context_builder.py
new file mode 100644
index 000000000..862f1ff2b
--- /dev/null
+++ b/tests/agent/test_context_builder.py
@@ -0,0 +1,333 @@
+"""Tests for ContextBuilder — system prompt and message assembly."""
+
+import base64
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from nanobot.agent.context import ContextBuilder
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _builder(tmp_path: Path, **kw) -> ContextBuilder:
+ return ContextBuilder(workspace=tmp_path, **kw)
+
+
+# ---------------------------------------------------------------------------
+# _build_runtime_context (static)
+# ---------------------------------------------------------------------------
+
+
+class TestBuildRuntimeContext:
+ def test_time_only(self):
+ ctx = ContextBuilder._build_runtime_context(None, None)
+ assert "[Runtime Context" in ctx
+ assert "[/Runtime Context]" in ctx
+ assert "Current Time:" in ctx
+ assert "Channel:" not in ctx
+
+ def test_with_channel_and_chat_id(self):
+ ctx = ContextBuilder._build_runtime_context("telegram", "chat123")
+ assert "Channel: telegram" in ctx
+ assert "Chat ID: chat123" in ctx
+
+ def test_with_sender_id(self):
+ ctx = ContextBuilder._build_runtime_context("cli", "direct", sender_id="user1")
+ assert "Sender ID: user1" in ctx
+
+ def test_with_timezone(self):
+ ctx = ContextBuilder._build_runtime_context(None, None, timezone="Asia/Shanghai")
+ assert "Current Time:" in ctx
+
+ def test_no_channel_no_chat_id_omits_both(self):
+ ctx = ContextBuilder._build_runtime_context(None, None)
+ assert "Channel:" not in ctx
+ assert "Chat ID:" not in ctx
+
+ def test_no_sender_id_omits(self):
+ ctx = ContextBuilder._build_runtime_context("cli", "direct")
+ assert "Sender ID:" not in ctx
+
+
+# ---------------------------------------------------------------------------
+# _merge_message_content (static)
+# ---------------------------------------------------------------------------
+
+
+class TestMergeMessageContent:
+ def test_str_plus_str(self):
+ result = ContextBuilder._merge_message_content("hello", "world")
+ assert result == "hello\n\nworld"
+
+ def test_empty_left_plus_str(self):
+ result = ContextBuilder._merge_message_content("", "world")
+ assert result == "world"
+
+ def test_list_plus_list(self):
+ left = [{"type": "text", "text": "a"}]
+ right = [{"type": "text", "text": "b"}]
+ result = ContextBuilder._merge_message_content(left, right)
+ assert len(result) == 2
+ assert result[0]["text"] == "a"
+ assert result[1]["text"] == "b"
+
+ def test_str_plus_list(self):
+ right = [{"type": "text", "text": "b"}]
+ result = ContextBuilder._merge_message_content("hello", right)
+ assert len(result) == 2
+ assert result[0]["text"] == "hello"
+ assert result[1]["text"] == "b"
+
+ def test_list_plus_str(self):
+ left = [{"type": "text", "text": "a"}]
+ result = ContextBuilder._merge_message_content(left, "world")
+ assert len(result) == 2
+ assert result[0]["text"] == "a"
+ assert result[1]["text"] == "world"
+
+ def test_none_plus_str(self):
+ result = ContextBuilder._merge_message_content(None, "hello")
+ assert result == [{"type": "text", "text": "hello"}]
+
+ def test_str_plus_none(self):
+ result = ContextBuilder._merge_message_content("hello", None)
+ assert result == [{"type": "text", "text": "hello"}]
+
+ def test_none_plus_none(self):
+ result = ContextBuilder._merge_message_content(None, None)
+ assert result == []
+
+ def test_list_items_not_dicts_wrapped(self):
+ result = ContextBuilder._merge_message_content(["raw_item"], None)
+ assert result == [{"type": "text", "text": "raw_item"}]
+
+
+# ---------------------------------------------------------------------------
+# _load_bootstrap_files
+# ---------------------------------------------------------------------------
+
+
+class TestLoadBootstrapFiles:
+ def test_no_bootstrap_files(self, tmp_path):
+ builder = _builder(tmp_path)
+ assert builder._load_bootstrap_files() == ""
+
+ def test_agents_md(self, tmp_path):
+ (tmp_path / "AGENTS.md").write_text("Be helpful.", encoding="utf-8")
+ builder = _builder(tmp_path)
+ result = builder._load_bootstrap_files()
+ assert "## AGENTS.md" in result
+ assert "Be helpful." in result
+
+ def test_multiple_bootstrap_files(self, tmp_path):
+ (tmp_path / "AGENTS.md").write_text("Rules.", encoding="utf-8")
+ (tmp_path / "SOUL.md").write_text("Soul.", encoding="utf-8")
+ builder = _builder(tmp_path)
+ result = builder._load_bootstrap_files()
+ assert "## AGENTS.md" in result
+ assert "## SOUL.md" in result
+ assert "Rules." in result
+ assert "Soul." in result
+
+ def test_all_bootstrap_files(self, tmp_path):
+ for name in ContextBuilder.BOOTSTRAP_FILES:
+ (tmp_path / name).write_text(f"Content of {name}", encoding="utf-8")
+ builder = _builder(tmp_path)
+ result = builder._load_bootstrap_files()
+ for name in ContextBuilder.BOOTSTRAP_FILES:
+ assert f"## {name}" in result
+
+ def test_utf8_content(self, tmp_path):
+ (tmp_path / "AGENTS.md").write_text("用中文回复", encoding="utf-8")
+ builder = _builder(tmp_path)
+ result = builder._load_bootstrap_files()
+ assert "用中文回复" in result
+
+
+# ---------------------------------------------------------------------------
+# _is_template_content (static)
+# ---------------------------------------------------------------------------
+
+
+class TestIsTemplateContent:
+ def test_nonexistent_template_returns_false(self):
+ assert ContextBuilder._is_template_content("anything", "nonexistent/path.md") is False
+
+ def test_content_matching_template(self):
+ from importlib.resources import files as pkg_files
+ tpl = pkg_files("nanobot") / "templates" / "memory" / "MEMORY.md"
+ if not tpl.is_file():
+ pytest.skip("MEMORY.md template not bundled")
+ original = tpl.read_text(encoding="utf-8")
+ assert ContextBuilder._is_template_content(original, "memory/MEMORY.md") is True
+
+ def test_modified_content_returns_false(self):
+ from importlib.resources import files as pkg_files
+ tpl = pkg_files("nanobot") / "templates" / "memory" / "MEMORY.md"
+ if not tpl.is_file():
+ pytest.skip("MEMORY.md template not bundled")
+ assert ContextBuilder._is_template_content("totally different", "memory/MEMORY.md") is False
+
+
+# ---------------------------------------------------------------------------
+# _build_user_content
+# ---------------------------------------------------------------------------
+
+
+class TestBuildUserContent:
+ def test_no_media_returns_string(self, tmp_path):
+ builder = _builder(tmp_path)
+ result = builder._build_user_content("hello", None)
+ assert result == "hello"
+
+ def test_empty_media_returns_string(self, tmp_path):
+ builder = _builder(tmp_path)
+ result = builder._build_user_content("hello", [])
+ assert result == "hello"
+
+ def test_nonexistent_media_file_returns_string(self, tmp_path):
+ builder = _builder(tmp_path)
+ result = builder._build_user_content("hello", ["/nonexistent/image.png"])
+ assert result == "hello"
+
+ def test_non_image_file_returns_string(self, tmp_path):
+ txt = tmp_path / "doc.txt"
+ txt.write_text("not an image", encoding="utf-8")
+ builder = _builder(tmp_path)
+ result = builder._build_user_content("hello", [str(txt)])
+ assert result == "hello"
+
+ def test_valid_image_returns_list(self, tmp_path):
+ png = tmp_path / "test.png"
+ png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
+ builder = _builder(tmp_path)
+ result = builder._build_user_content("hello", [str(png)])
+ assert isinstance(result, list)
+ assert len(result) == 2
+ assert result[0]["type"] == "image_url"
+ assert result[0]["image_url"]["url"].startswith("data:image/png;base64,")
+ assert result[1]["type"] == "text"
+ assert result[1]["text"] == "hello"
+
+ def test_image_meta_includes_path(self, tmp_path):
+ png = tmp_path / "test.png"
+ png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
+ builder = _builder(tmp_path)
+ result = builder._build_user_content("hello", [str(png)])
+ assert "_meta" in result[0]
+ assert "path" in result[0]["_meta"]
+
+
+# ---------------------------------------------------------------------------
+# build_system_prompt
+# ---------------------------------------------------------------------------
+
+
+class TestBuildSystemPrompt:
+ def test_returns_nonempty_string(self, tmp_path):
+ builder = _builder(tmp_path)
+ result = builder.build_system_prompt()
+ assert isinstance(result, str)
+ assert len(result) > 0
+
+ def test_includes_identity_section(self, tmp_path):
+ builder = _builder(tmp_path)
+ result = builder.build_system_prompt()
+ assert "workspace" in result.lower() or "python" in result.lower()
+
+ def test_includes_bootstrap_files(self, tmp_path):
+ (tmp_path / "AGENTS.md").write_text("Be helpful and concise.", encoding="utf-8")
+ builder = _builder(tmp_path)
+ result = builder.build_system_prompt()
+ assert "Be helpful and concise." in result
+
+ def test_includes_session_summary(self, tmp_path):
+ builder = _builder(tmp_path)
+ result = builder.build_system_prompt(session_summary="Previous chat about Python.")
+ assert "Previous chat about Python." in result
+ assert "[Archived Context Summary]" in result
+
+ def test_sections_separated_by_separator(self, tmp_path):
+ (tmp_path / "AGENTS.md").write_text("Rules.", encoding="utf-8")
+ builder = _builder(tmp_path)
+ result = builder.build_system_prompt(session_summary="Summary.")
+ assert "\n\n---\n\n" in result
+
+ def test_no_bootstrap_no_summary(self, tmp_path):
+ builder = _builder(tmp_path)
+ result = builder.build_system_prompt()
+ assert "## AGENTS.md" not in result
+ assert "[Archived Context Summary]" not in result
+
+
+# ---------------------------------------------------------------------------
+# build_messages
+# ---------------------------------------------------------------------------
+
+
+class TestBuildMessages:
+ def test_basic_empty_history(self, tmp_path):
+ builder = _builder(tmp_path)
+ messages = builder.build_messages([], "hello")
+ assert len(messages) == 2
+ assert messages[0]["role"] == "system"
+ assert messages[1]["role"] == "user"
+ assert "hello" in str(messages[1]["content"])
+
+ def test_runtime_context_injected(self, tmp_path):
+ builder = _builder(tmp_path)
+ messages = builder.build_messages([], "hello", channel="cli", chat_id="direct")
+ user_msg = str(messages[-1]["content"])
+ assert "[Runtime Context" in user_msg
+ assert "hello" in user_msg
+
+ def test_consecutive_same_role_merged(self, tmp_path):
+ builder = _builder(tmp_path)
+ history = [{"role": "user", "content": "previous user message"}]
+ messages = builder.build_messages(history, "new message")
+ assert len(messages) == 2 # system + merged user
+ assert "previous user message" in str(messages[1]["content"])
+ assert "new message" in str(messages[1]["content"])
+
+ def test_different_role_appended(self, tmp_path):
+ builder = _builder(tmp_path)
+ history = [{"role": "assistant", "content": "previous response"}]
+ messages = builder.build_messages(history, "new message")
+ assert len(messages) == 3 # system + assistant + user
+
+ def test_media_with_history(self, tmp_path):
+ png = tmp_path / "img.png"
+ png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
+ builder = _builder(tmp_path)
+ history = [{"role": "assistant", "content": "see this"}]
+ messages = builder.build_messages(history, "check image", media=[str(png)])
+ user_msg = messages[-1]["content"]
+ assert isinstance(user_msg, list)
+ assert any(b.get("type") == "image_url" for b in user_msg)
+
+
+# ---------------------------------------------------------------------------
+# add_tool_result
+# ---------------------------------------------------------------------------
+
+
+class TestAddToolResult:
+ def test_appends_tool_message(self, tmp_path):
+ builder = _builder(tmp_path)
+ msgs = [{"role": "user", "content": "hello"}]
+ result = builder.add_tool_result(msgs, "call_123", "read_file", "file content")
+ assert len(result) == 2
+ assert result[1]["role"] == "tool"
+ assert result[1]["tool_call_id"] == "call_123"
+ assert result[1]["name"] == "read_file"
+ assert result[1]["content"] == "file content"
+
+ def test_returns_same_list(self, tmp_path):
+ builder = _builder(tmp_path)
+ msgs = []
+ result = builder.add_tool_result(msgs, "id", "tool", "ok")
+ assert result is msgs
diff --git a/tests/agent/test_loop_runner_integration.py b/tests/agent/test_loop_runner_integration.py
new file mode 100644
index 000000000..3cfe07f41
--- /dev/null
+++ b/tests/agent/test_loop_runner_integration.py
@@ -0,0 +1,301 @@
+"""Tests for AgentLoop integration with AgentRunner: streaming, think-filter, error handling, subagent."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+def _make_loop(tmp_path):
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.bus.queue import MessageBus
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+
+ with patch("nanobot.agent.loop.ContextBuilder"), \
+ patch("nanobot.agent.loop.SessionManager"), \
+ patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+ MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+ loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
+ return loop
+
+@pytest.mark.asyncio
+async def test_loop_max_iterations_message_stays_stable(tmp_path):
+ loop = _make_loop(tmp_path)
+ loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
+ ))
+ loop.tools.get_definitions = MagicMock(return_value=[])
+ loop.tools.execute = AsyncMock(return_value="ok")
+ loop.max_iterations = 2
+
+ final_content, _, _, _, _ = await loop._run_agent_loop([])
+
+ assert final_content == (
+ "I reached the maximum number of tool call iterations (2) "
+ "without completing the task. You can try breaking the task into smaller steps."
+ )
+
+
+@pytest.mark.asyncio
+async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp_path):
+ loop = _make_loop(tmp_path)
+ deltas: list[str] = []
+ endings: list[bool] = []
+
+ async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+ await on_content_delta("hidden")
+ await on_content_delta("Hello")
+ return LLMResponse(content="hiddenHello", tool_calls=[], usage={})
+
+ loop.provider.chat_stream_with_retry = chat_stream_with_retry
+
+ async def on_stream(delta: str) -> None:
+ deltas.append(delta)
+
+ async def on_stream_end(*, resuming: bool = False) -> None:
+ endings.append(resuming)
+
+ final_content, _, _, _, _ = await loop._run_agent_loop(
+ [],
+ on_stream=on_stream,
+ on_stream_end=on_stream_end,
+ )
+
+ assert final_content == "Hello"
+ assert deltas == ["Hello"]
+ assert endings == [False]
+
+
+@pytest.mark.asyncio
+async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path):
+ loop = _make_loop(tmp_path)
+ deltas: list[str] = []
+
+ async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+ await on_content_delta("Hello hiddenWorld")
+ return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={})
+
+ loop.provider.chat_stream_with_retry = chat_stream_with_retry
+
+ async def on_stream(delta: str) -> None:
+ deltas.append(delta)
+
+ final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
+
+ assert final_content == "Hello World"
+ assert deltas == ["Hello", " World"]
+
+
+@pytest.mark.asyncio
+async def test_loop_stream_filter_hides_complete_trailing_think_tag(tmp_path):
+ loop = _make_loop(tmp_path)
+ deltas: list[str] = []
+
+ async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+ await on_content_delta("Hello ")
+ await on_content_delta("hiddenWorld")
+ return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={})
+
+ loop.provider.chat_stream_with_retry = chat_stream_with_retry
+
+ async def on_stream(delta: str) -> None:
+ deltas.append(delta)
+
+ final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
+
+ assert final_content == "Hello World"
+ assert deltas == ["Hello", " World"]
+
+
+@pytest.mark.asyncio
+async def test_loop_retries_think_only_final_response(tmp_path):
+ loop = _make_loop(tmp_path)
+ call_count = {"n": 0}
+
+ async def chat_with_retry(**kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(content="hidden", tool_calls=[], usage={})
+ return LLMResponse(content="Recovered answer", tool_calls=[], usage={})
+
+ loop.provider.chat_with_retry = chat_with_retry
+
+ final_content, _, _, _, _ = await loop._run_agent_loop([])
+
+ assert final_content == "Recovered answer"
+ assert call_count["n"] == 2
+
+
+@pytest.mark.asyncio
+async def test_streamed_flag_not_set_on_llm_error(tmp_path):
+ """When LLM errors during a streaming-capable channel interaction,
+ _streamed must NOT be set so ChannelManager delivers the error."""
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.bus.events import InboundMessage
+ from nanobot.bus.queue import MessageBus
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+ error_resp = LLMResponse(
+ content="503 service unavailable", finish_reason="error", tool_calls=[], usage={},
+ )
+ loop.provider.chat_with_retry = AsyncMock(return_value=error_resp)
+ loop.provider.chat_stream_with_retry = AsyncMock(return_value=error_resp)
+ loop.tools.get_definitions = MagicMock(return_value=[])
+
+ msg = InboundMessage(
+ channel="feishu", sender_id="u1", chat_id="c1", content="hi",
+ )
+ result = await loop._process_message(
+ msg,
+ on_stream=AsyncMock(),
+ on_stream_end=AsyncMock(),
+ )
+
+ assert result is not None
+ assert "503" in result.content
+ assert not result.metadata.get("_streamed"), \
+ "_streamed must not be set when stop_reason is error"
+
+
+@pytest.mark.asyncio
+async def test_ssrf_soft_block_can_finalize_after_streamed_tool_call(tmp_path):
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.bus.events import InboundMessage
+ from nanobot.bus.queue import MessageBus
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ tool_call_resp = LLMResponse(
+ content="checking metadata",
+ tool_calls=[ToolCallRequest(
+ id="call_ssrf",
+ name="exec",
+ arguments={"command": "curl http://169.254.169.254/latest/meta-data/"},
+ )],
+ usage={},
+ )
+ provider.chat_stream_with_retry = AsyncMock(side_effect=[
+ tool_call_resp,
+ LLMResponse(
+ content="I cannot access private URLs. Please share the local file.",
+ tool_calls=[],
+ usage={},
+ ),
+ ])
+
+ loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+ loop.tools.get_definitions = MagicMock(return_value=[])
+ loop.tools.prepare_call = MagicMock(return_value=(None, {}, None))
+ loop.tools.execute = AsyncMock(return_value=(
+ "Error: Command blocked by safety guard (internal/private URL detected)"
+ ))
+
+ result = await loop._process_message(
+ InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="hi"),
+ on_stream=AsyncMock(),
+ on_stream_end=AsyncMock(),
+ )
+
+ assert result is not None
+ assert result.content == "I cannot access private URLs. Please share the local file."
+ assert result.metadata.get("_streamed") is True
+
+
+@pytest.mark.asyncio
+async def test_next_turn_after_llm_error_keeps_turn_boundary(tmp_path):
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.agent.runner import _PERSISTED_MODEL_ERROR_PLACEHOLDER
+ from nanobot.bus.events import InboundMessage
+ from nanobot.bus.queue import MessageBus
+
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ provider.chat_with_retry = AsyncMock(side_effect=[
+ LLMResponse(content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}),
+ LLMResponse(content="Recovered answer", tool_calls=[], usage={}),
+ ])
+
+ loop = AgentLoop(bus=MessageBus(), provider=provider, workspace=tmp_path, model="test-model")
+ loop.tools.get_definitions = MagicMock(return_value=[])
+ loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign]
+
+ first = await loop._process_message(
+ InboundMessage(channel="cli", sender_id="user", chat_id="test", content="first question")
+ )
+ assert first is not None
+ assert first.content == "429 rate limit exceeded"
+
+ session = loop.sessions.get_or_create("cli:test")
+ assert [
+ {key: value for key, value in message.items() if key in {"role", "content"}}
+ for message in session.messages
+ ] == [
+ {"role": "user", "content": "first question"},
+ {"role": "assistant", "content": _PERSISTED_MODEL_ERROR_PLACEHOLDER},
+ ]
+
+ second = await loop._process_message(
+ InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second question")
+ )
+ assert second is not None
+ assert second.content == "Recovered answer"
+
+ request_messages = provider.chat_with_retry.await_args_list[1].kwargs["messages"]
+ non_system = [message for message in request_messages if message.get("role") != "system"]
+ assert non_system[0]["role"] == "user"
+ assert "first question" in non_system[0]["content"]
+ assert non_system[1]["role"] == "assistant"
+ assert _PERSISTED_MODEL_ERROR_PLACEHOLDER in non_system[1]["content"]
+ assert non_system[2]["role"] == "user"
+ assert "second question" in non_system[2]["content"]
+
+
+@pytest.mark.asyncio
+async def test_subagent_max_iterations_announces_existing_fallback(tmp_path, monkeypatch):
+ from nanobot.agent.subagent import SubagentManager, SubagentStatus
+ from nanobot.bus.queue import MessageBus
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+ ))
+ mgr = SubagentManager(
+ provider=provider,
+ workspace=tmp_path,
+ bus=bus,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ )
+ mgr._announce_result = AsyncMock()
+
+ async def fake_execute(self, **kwargs):
+ return "tool result"
+
+ monkeypatch.setattr("nanobot.agent.tools.filesystem.ListDirTool.execute", fake_execute)
+
+ status = SubagentStatus(task_id="sub-1", label="label", task_description="do task", started_at=time.monotonic())
+ await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}, status)
+
+ mgr._announce_result.assert_awaited_once()
+ args = mgr._announce_result.await_args.args
+ assert args[3] == "Task completed but no final response was generated."
+ assert args[5] == "ok"
diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py
deleted file mode 100644
index b821d9bab..000000000
--- a/tests/agent/test_runner.py
+++ /dev/null
@@ -1,3313 +0,0 @@
-"""Tests for the shared agent runner and its integration contracts."""
-
-from __future__ import annotations
-
-import asyncio
-import base64
-import os
-import time
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from nanobot.config.schema import AgentDefaults
-from nanobot.agent.tools.base import Tool
-from nanobot.agent.tools.registry import ToolRegistry
-from nanobot.providers.base import LLMResponse, ToolCallRequest
-
-_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
-
-
-def _make_injection_callback(queue: asyncio.Queue):
- """Return an async callback that drains *queue* into a list of dicts."""
- async def inject_cb():
- items = []
- while not queue.empty():
- items.append(await queue.get())
- return items
- return inject_cb
-
-
-def _make_loop(tmp_path):
- from nanobot.agent.loop import AgentLoop
- from nanobot.bus.queue import MessageBus
-
- bus = MessageBus()
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
-
- with patch("nanobot.agent.loop.ContextBuilder"), \
- patch("nanobot.agent.loop.SessionManager"), \
- patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
- MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
- loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
- return loop
-
-
-@pytest.mark.asyncio
-async def test_runner_preserves_reasoning_fields_and_tool_results():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_second_call: list[dict] = []
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content="thinking",
- tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
- reasoning_content="hidden reasoning",
- thinking_blocks=[{"type": "thinking", "thinking": "step"}],
- usage={"prompt_tokens": 5, "completion_tokens": 3},
- )
- captured_second_call[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="tool result")
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[
- {"role": "system", "content": "system"},
- {"role": "user", "content": "do task"},
- ],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == "done"
- assert result.tools_used == ["list_dir"]
- assert result.tool_events == [
- {"name": "list_dir", "status": "ok", "detail": "tool result"}
- ]
-
- assistant_messages = [
- msg for msg in captured_second_call
- if msg.get("role") == "assistant" and msg.get("tool_calls")
- ]
- assert len(assistant_messages) == 1
- assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
- assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
- assert any(
- msg.get("role") == "tool" and msg.get("content") == "tool result"
- for msg in captured_second_call
- )
-
-
-@pytest.mark.asyncio
-async def test_runner_calls_hooks_in_order():
- from nanobot.agent.hook import AgentHook, AgentHookContext
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- call_count = {"n": 0}
- events: list[tuple] = []
-
- async def chat_with_retry(**kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content="thinking",
- tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
- )
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="tool result")
-
- class RecordingHook(AgentHook):
- async def before_iteration(self, context: AgentHookContext) -> None:
- events.append(("before_iteration", context.iteration))
-
- async def before_execute_tools(self, context: AgentHookContext) -> None:
- events.append((
- "before_execute_tools",
- context.iteration,
- [tc.name for tc in context.tool_calls],
- ))
-
- async def after_iteration(self, context: AgentHookContext) -> None:
- events.append((
- "after_iteration",
- context.iteration,
- context.final_content,
- list(context.tool_results),
- list(context.tool_events),
- context.stop_reason,
- ))
-
- def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
- events.append(("finalize_content", context.iteration, content))
- return content.upper() if content else content
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- hook=RecordingHook(),
- ))
-
- assert result.final_content == "DONE"
- assert events == [
- ("before_iteration", 0),
- ("before_execute_tools", 0, ["list_dir"]),
- (
- "after_iteration",
- 0,
- None,
- ["tool result"],
- [{"name": "list_dir", "status": "ok", "detail": "tool result"}],
- None,
- ),
- ("before_iteration", 1),
- ("finalize_content", 1, "done"),
- ("after_iteration", 1, "DONE", [], [], "completed"),
- ]
-
-
-@pytest.mark.asyncio
-async def test_runner_streaming_hook_receives_deltas_and_end_signal():
- from nanobot.agent.hook import AgentHook, AgentHookContext
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- streamed: list[str] = []
- endings: list[bool] = []
-
- async def chat_stream_with_retry(*, on_content_delta, **kwargs):
- await on_content_delta("he")
- await on_content_delta("llo")
- return LLMResponse(content="hello", tool_calls=[], usage={})
-
- provider.chat_stream_with_retry = chat_stream_with_retry
- provider.chat_with_retry = AsyncMock()
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- class StreamingHook(AgentHook):
- def wants_streaming(self) -> bool:
- return True
-
- async def on_stream(self, context: AgentHookContext, delta: str) -> None:
- streamed.append(delta)
-
- async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
- endings.append(resuming)
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- hook=StreamingHook(),
- ))
-
- assert result.final_content == "hello"
- assert streamed == ["he", "llo"]
- assert endings == [False]
- provider.chat_with_retry.assert_not_awaited()
-
-
-@pytest.mark.asyncio
-async def test_runner_returns_max_iterations_fallback():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
- content="still working",
- tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
- ))
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="tool result")
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=2,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.stop_reason == "max_iterations"
- assert result.final_content == (
- "I reached the maximum number of tool call iterations (2) "
- "without completing the task. You can try breaking the task into smaller steps."
- )
- assert result.messages[-1]["role"] == "assistant"
- assert result.messages[-1]["content"] == result.final_content
-
-
-@pytest.mark.asyncio
-async def test_runner_times_out_hung_llm_request():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
-
- async def chat_with_retry(**kwargs):
- await asyncio.sleep(3600)
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- started = time.monotonic()
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- llm_timeout_s=0.05,
- ))
-
- assert (time.monotonic() - started) < 1.0
- assert result.stop_reason == "error"
- assert "timed out" in (result.final_content or "").lower()
-
-@pytest.mark.asyncio
-async def test_runner_returns_structured_tool_error():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
- ))
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
-
- runner = AgentRunner(provider)
-
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=2,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- fail_on_tool_error=True,
- ))
-
- assert result.stop_reason == "tool_error"
- assert result.error == "Error: RuntimeError: boom"
- assert result.tool_events == [
- {"name": "list_dir", "status": "error", "detail": "boom"}
- ]
-
-
-@pytest.mark.asyncio
-async def test_runner_does_not_abort_on_workspace_violation_anymore():
- """v2 behavior: workspace-bound rejections are *soft* tool errors.
-
- Previously (PR #3493) any workspace boundary error became a fatal
- RuntimeError that aborted the turn. That silently killed legitimate
- workspace commands once the heuristic guard misfired (#3599 #3605), so
- we now hand the error back to the LLM as a recoverable tool result and
- rely on ``repeated_workspace_violation_error`` to throttle bypass loops.
- """
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- provider.chat_with_retry = AsyncMock(side_effect=[
- LLMResponse(
- content="trying outside",
- tool_calls=[ToolCallRequest(
- id="call_1", name="read_file", arguments={"path": "/tmp/outside.md"},
- )],
- ),
- LLMResponse(content="ok, telling the user instead", tool_calls=[]),
- ])
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(
- side_effect=PermissionError(
- "Path /tmp/outside.md is outside allowed directory /workspace"
- )
- )
-
- runner = AgentRunner(provider)
-
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert provider.chat_with_retry.await_count == 2, (
- "workspace violation must NOT short-circuit the loop"
- )
- assert result.stop_reason != "tool_error"
- assert result.error is None
- assert result.final_content == "ok, telling the user instead"
- assert result.tool_events and result.tool_events[0]["status"] == "error"
- # Detail still carries the workspace_violation breadcrumb for telemetry,
- # but the runner did not raise.
- assert "workspace_violation" in result.tool_events[0]["detail"]
-
-
-def test_is_ssrf_violation_recognizes_private_url_blocks():
- """SSRF rejections are classified separately from workspace boundaries."""
- from nanobot.agent.runner import AgentRunner
-
- ssrf_msg = "Error: Command blocked by safety guard (internal/private URL detected)"
- assert AgentRunner._is_ssrf_violation(ssrf_msg) is True
- assert AgentRunner._is_ssrf_violation(
- "URL validation failed: Blocked: host resolves to private/internal address 192.168.1.2"
- ) is True
-
- # Workspace-bound markers are NOT classified as SSRF.
- assert AgentRunner._is_ssrf_violation(
- "Error: Command blocked by safety guard (path outside working dir)"
- ) is False
- assert AgentRunner._is_ssrf_violation(
- "Path /tmp/x is outside allowed directory /ws"
- ) is False
- # Deny / allowlist filter messages stay non-fatal too.
- assert AgentRunner._is_ssrf_violation(
- "Error: Command blocked by deny pattern filter"
- ) is False
-
-
-@pytest.mark.asyncio
-async def test_runner_returns_non_retryable_hint_on_ssrf_violation():
- """SSRF stays blocked, but the runtime gives the LLM a final chance to recover."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- provider.chat_with_retry = AsyncMock(side_effect=[
- LLMResponse(
- content="curl-ing metadata",
- tool_calls=[ToolCallRequest(
- id="call_ssrf",
- name="exec",
- arguments={"command": "curl http://169.254.169.254"},
- )],
- ),
- LLMResponse(
- content="I cannot access that private URL. Please share local files.",
- tool_calls=[],
- ),
- ])
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value=(
- "Error: Command blocked by safety guard (internal/private URL detected)"
- ))
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert provider.chat_with_retry.await_count == 2
- assert result.stop_reason == "completed"
- assert result.error is None
- assert result.final_content == "I cannot access that private URL. Please share local files."
- assert result.tool_events and result.tool_events[0]["detail"].startswith("ssrf_violation:")
- tool_messages = [m for m in result.messages if m.get("role") == "tool"]
- assert tool_messages
- assert "non-bypassable security boundary" in tool_messages[0]["content"]
- assert "Do not retry" in tool_messages[0]["content"]
- assert "tools.ssrfWhitelist" in tool_messages[0]["content"]
-
-
-@pytest.mark.asyncio
-async def test_runner_lets_llm_recover_from_shell_guard_path_outside():
- """Reporter scenario for #3599 / #3605 -- guard hit, agent recovers.
-
- The shell `_guard_command` heuristic fires on `2>/dev/null`-style
- redirects and other shell idioms. Before v2 that abort'd the whole
- turn (silent hang on Telegram per #3605); now the LLM gets the soft
- error back and can finalize on the next iteration.
- """
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_second_call: list[dict] = []
-
- async def chat_with_retry(*, messages, **kwargs):
- if provider.chat_with_retry.await_count == 1:
- return LLMResponse(
- content="trying noisy cleanup",
- tool_calls=[ToolCallRequest(
- id="call_blocked",
- name="exec",
- arguments={"command": "rm scratch.txt 2>/dev/null"},
- )],
- )
- captured_second_call[:] = list(messages)
- return LLMResponse(content="recovered final answer", tool_calls=[])
-
- provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry)
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(
- return_value="Error: Command blocked by safety guard (path outside working dir)"
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert provider.chat_with_retry.await_count == 2, (
- "guard hit must NOT short-circuit the loop -- LLM should get a second turn"
- )
- assert result.stop_reason != "tool_error"
- assert result.error is None
- assert result.final_content == "recovered final answer"
- assert result.tool_events and result.tool_events[0]["status"] == "error"
- # v2: detail keeps the breadcrumb but the runner did not raise.
- assert "workspace_violation" in result.tool_events[0]["detail"]
-
-
-@pytest.mark.asyncio
-async def test_runner_throttles_repeated_workspace_bypass_attempts():
- """#3493 motivation: stop the LLM bypass loop without aborting the turn.
-
- LLM keeps switching tools (read_file -> exec cat -> python -c open(...))
- against the same outside path. After the soft retry budget is exhausted
- the runner replaces the tool result with a hard "stop trying" message
- so the model finally gives up and surfaces the boundary to the user.
- """
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- bypass_attempts = [
- ToolCallRequest(
- id=f"a{i}", name="exec",
- arguments={"command": f"cat /Users/x/Downloads/01.md # try {i}"},
- )
- for i in range(4)
- ]
- responses: list[LLMResponse] = [
- LLMResponse(content=f"try {i}", tool_calls=[bypass_attempts[i]])
- for i in range(4)
- ]
- responses.append(LLMResponse(content="ok telling user", tool_calls=[]))
-
- provider = MagicMock()
- provider.chat_with_retry = AsyncMock(side_effect=responses)
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(
- return_value="Error: Command blocked by safety guard (path outside working dir)"
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=10,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- # All 4 bypass attempts surface to the LLM (no fatal abort), and the
- # runner finally completes once the LLM stops asking.
- assert result.stop_reason != "tool_error"
- assert result.error is None
- assert result.final_content == "ok telling user"
- # The third+ attempts must have been escalated -- look at the events.
- escalated = [
- ev for ev in result.tool_events
- if ev["status"] == "error"
- and ev["detail"].startswith("workspace_violation_escalated:")
- ]
- assert escalated, (
- "expected at least one escalated workspace_violation event, got: "
- f"{result.tool_events}"
- )
-
-
-@pytest.mark.asyncio
-async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_second_call: list[dict] = []
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id="call_big", name="list_dir", arguments={"path": "."})],
- usage={"prompt_tokens": 5, "completion_tokens": 3},
- )
- captured_second_call[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="x" * 20_000)
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do task"}],
- tools=tools,
- model="test-model",
- max_iterations=2,
- workspace=tmp_path,
- session_key="test:runner",
- max_tool_result_chars=2048,
- ))
-
- assert result.final_content == "done"
- tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
- assert "[tool output persisted]" in tool_message["content"]
- assert "tool-results" in tool_message["content"]
- assert (tmp_path / ".nanobot" / "tool-results" / "test_runner" / "call_big.txt").exists()
-
-
-def test_persist_tool_result_prunes_old_session_buckets(tmp_path):
- from nanobot.utils.helpers import maybe_persist_tool_result
-
- root = tmp_path / ".nanobot" / "tool-results"
- old_bucket = root / "old_session"
- recent_bucket = root / "recent_session"
- old_bucket.mkdir(parents=True)
- recent_bucket.mkdir(parents=True)
- (old_bucket / "old.txt").write_text("old", encoding="utf-8")
- (recent_bucket / "recent.txt").write_text("recent", encoding="utf-8")
-
- stale = time.time() - (8 * 24 * 60 * 60)
- os.utime(old_bucket, (stale, stale))
- os.utime(old_bucket / "old.txt", (stale, stale))
-
- persisted = maybe_persist_tool_result(
- tmp_path,
- "current:session",
- "call_big",
- "x" * 5000,
- max_chars=64,
- )
-
- assert "[tool output persisted]" in persisted
- assert not old_bucket.exists()
- assert recent_bucket.exists()
- assert (root / "current_session" / "call_big.txt").exists()
-
-
-def test_persist_tool_result_leaves_no_temp_files(tmp_path):
- from nanobot.utils.helpers import maybe_persist_tool_result
-
- root = tmp_path / ".nanobot" / "tool-results"
- maybe_persist_tool_result(
- tmp_path,
- "current:session",
- "call_big",
- "x" * 5000,
- max_chars=64,
- )
-
- assert (root / "current_session" / "call_big.txt").exists()
- assert list((root / "current_session").glob("*.tmp")) == []
-
-
-def test_persist_tool_result_logs_cleanup_failures(monkeypatch, tmp_path):
- from nanobot.utils.helpers import maybe_persist_tool_result
-
- warnings: list[str] = []
-
- monkeypatch.setattr(
- "nanobot.utils.helpers._cleanup_tool_result_buckets",
- lambda *_args, **_kwargs: (_ for _ in ()).throw(OSError("busy")),
- )
- monkeypatch.setattr(
- "nanobot.utils.helpers.logger.exception",
- lambda message, *args: warnings.append(message.format(*args)),
- )
-
- persisted = maybe_persist_tool_result(
- tmp_path,
- "current:session",
- "call_big",
- "x" * 5000,
- max_chars=64,
- )
-
- assert "[tool output persisted]" in persisted
- assert warnings and "Failed to clean stale tool result buckets" in warnings[0]
-
-
-@pytest.mark.asyncio
-async def test_runner_replaces_empty_tool_result_with_marker():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_second_call: list[dict] = []
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})],
- usage={},
- )
- captured_second_call[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="")
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do task"}],
- tools=tools,
- model="test-model",
- max_iterations=2,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == "done"
- tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
- assert tool_message["content"] == "(noop completed with no output)"
-
-
-@pytest.mark.asyncio
-async def test_runner_uses_raw_messages_when_context_governance_fails():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_messages: list[dict] = []
-
- async def chat_with_retry(*, messages, **kwargs):
- captured_messages[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- initial_messages = [
- {"role": "system", "content": "system"},
- {"role": "user", "content": "hello"},
- ]
-
- runner = AgentRunner(provider)
- runner._snip_history = MagicMock(side_effect=RuntimeError("boom")) # type: ignore[method-assign]
- result = await runner.run(AgentRunSpec(
- initial_messages=initial_messages,
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == "done"
- assert captured_messages == initial_messages
-
-
-@pytest.mark.asyncio
-async def test_runner_retries_empty_final_response_with_summary_prompt():
- """Empty responses get 2 silent retries before finalization kicks in."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- calls: list[dict] = []
-
- async def chat_with_retry(*, messages, tools=None, **kwargs):
- calls.append({"messages": messages, "tools": tools})
- if len(calls) <= 2:
- return LLMResponse(
- content=None,
- tool_calls=[],
- usage={"prompt_tokens": 5, "completion_tokens": 1},
- )
- return LLMResponse(
- content="final answer",
- tool_calls=[],
- usage={"prompt_tokens": 3, "completion_tokens": 7},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do task"}],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == "final answer"
- # 2 silent retries (iterations 0,1) + finalization on iteration 1
- assert len(calls) == 3
- assert calls[0]["tools"] is not None
- assert calls[1]["tools"] is not None
- assert calls[2]["tools"] is None
- assert result.usage["prompt_tokens"] == 13
- assert result.usage["completion_tokens"] == 9
-
-
-@pytest.mark.asyncio
-async def test_runner_uses_specific_message_after_empty_finalization_retry():
- """After silent retries + finalization all return empty, stop_reason is empty_final_response."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
-
- provider = MagicMock()
-
- async def chat_with_retry(*, messages, **kwargs):
- return LLMResponse(content=None, tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do task"}],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE
- assert result.stop_reason == "empty_final_response"
-
-
-@pytest.mark.asyncio
-async def test_runner_empty_response_does_not_break_tool_chain():
- """An empty intermediate response must not kill an ongoing tool chain.
-
- Sequence: tool_call → empty → tool_call → final text.
- The runner should recover via silent retry and complete normally.
- """
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- call_count = 0
-
- async def chat_with_retry(*, messages, tools=None, **kwargs):
- nonlocal call_count
- call_count += 1
- if call_count == 1:
- return LLMResponse(
- content=None,
- tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})],
- usage={"prompt_tokens": 10, "completion_tokens": 5},
- )
- if call_count == 2:
- return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1})
- if call_count == 3:
- return LLMResponse(
- content=None,
- tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})],
- usage={"prompt_tokens": 10, "completion_tokens": 5},
- )
- return LLMResponse(
- content="Here are the results.",
- tool_calls=[],
- usage={"prompt_tokens": 10, "completion_tokens": 10},
- )
-
- provider.chat_with_retry = chat_with_retry
- provider.chat_stream_with_retry = chat_with_retry
-
- async def fake_tool(name, args, **kw):
- return "file content"
-
- tool_registry = MagicMock()
- tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}]
- tool_registry.execute = AsyncMock(side_effect=fake_tool)
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "read both files"}],
- tools=tool_registry,
- model="test-model",
- max_iterations=10,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == "Here are the results."
- assert result.stop_reason == "completed"
- assert call_count == 4
- assert "read_file" in result.tools_used
-
-
-def test_snip_history_drops_orphaned_tool_results_from_trimmed_slice(monkeypatch):
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- tools = MagicMock()
- tools.get_definitions.return_value = []
- runner = AgentRunner(provider)
- messages = [
- {"role": "system", "content": "system"},
- {"role": "user", "content": "old user"},
- {
- "role": "assistant",
- "content": "tool call",
- "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "ls", "arguments": "{}"}}],
- },
- {"role": "tool", "tool_call_id": "call_1", "content": "tool output"},
- {"role": "assistant", "content": "after tool"},
- ]
- spec = AgentRunSpec(
- initial_messages=messages,
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- context_window_tokens=2000,
- context_block_limit=100,
- )
-
- monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_args, **_kwargs: (500, None))
- token_sizes = {
- "old user": 120,
- "tool call": 120,
- "tool output": 40,
- "after tool": 40,
- "system": 0,
- }
- monkeypatch.setattr(
- "nanobot.agent.runner.estimate_message_tokens",
- lambda msg: token_sizes.get(str(msg.get("content")), 40),
- )
-
- trimmed = runner._snip_history(spec, messages)
-
- # After the fix, the user message is recovered so the sequence is valid
- # for providers that require system → user (e.g. GLM error 1214).
- assert trimmed[0]["role"] == "system"
- non_system = [m for m in trimmed if m["role"] != "system"]
- assert non_system[0]["role"] == "user", f"Expected user after system, got {non_system[0]['role']}"
-
-
-@pytest.mark.asyncio
-async def test_runner_keeps_going_when_tool_result_persistence_fails():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_second_call: list[dict] = []
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
- usage={"prompt_tokens": 5, "completion_tokens": 3},
- )
- captured_second_call[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="tool result")
-
- runner = AgentRunner(provider)
- with patch("nanobot.agent.runner.maybe_persist_tool_result", side_effect=RuntimeError("disk full")):
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do task"}],
- tools=tools,
- model="test-model",
- max_iterations=2,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == "done"
- tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
- assert tool_message["content"] == "tool result"
-
-
-class _DelayTool(Tool):
- def __init__(
- self,
- name: str,
- *,
- delay: float,
- read_only: bool,
- shared_events: list[str],
- exclusive: bool = False,
- ):
- self._name = name
- self._delay = delay
- self._read_only = read_only
- self._shared_events = shared_events
- self._exclusive = exclusive
-
- @property
- def name(self) -> str:
- return self._name
-
- @property
- def description(self) -> str:
- return self._name
-
- @property
- def parameters(self) -> dict:
- return {"type": "object", "properties": {}, "required": []}
-
- @property
- def read_only(self) -> bool:
- return self._read_only
-
- @property
- def exclusive(self) -> bool:
- return self._exclusive
-
- async def execute(self, **kwargs):
- self._shared_events.append(f"start:{self._name}")
- await asyncio.sleep(self._delay)
- self._shared_events.append(f"end:{self._name}")
- return self._name
-
-
-@pytest.mark.asyncio
-async def test_runner_batches_read_only_tools_before_exclusive_work():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- tools = ToolRegistry()
- shared_events: list[str] = []
- read_a = _DelayTool("read_a", delay=0.05, read_only=True, shared_events=shared_events)
- read_b = _DelayTool("read_b", delay=0.05, read_only=True, shared_events=shared_events)
- write_a = _DelayTool("write_a", delay=0.01, read_only=False, shared_events=shared_events)
- tools.register(read_a)
- tools.register(read_b)
- tools.register(write_a)
-
- runner = AgentRunner(MagicMock())
- await runner._execute_tools(
- AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- concurrent_tools=True,
- ),
- [
- ToolCallRequest(id="ro1", name="read_a", arguments={}),
- ToolCallRequest(id="ro2", name="read_b", arguments={}),
- ToolCallRequest(id="rw1", name="write_a", arguments={}),
- ],
- {},
- {},
- )
-
- assert shared_events[0:2] == ["start:read_a", "start:read_b"]
- assert "end:read_a" in shared_events and "end:read_b" in shared_events
- assert shared_events.index("end:read_a") < shared_events.index("start:write_a")
- assert shared_events.index("end:read_b") < shared_events.index("start:write_a")
- assert shared_events[-2:] == ["start:write_a", "end:write_a"]
-
-
-@pytest.mark.asyncio
-async def test_runner_does_not_batch_exclusive_read_only_tools():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- tools = ToolRegistry()
- shared_events: list[str] = []
- read_a = _DelayTool("read_a", delay=0.03, read_only=True, shared_events=shared_events)
- read_b = _DelayTool("read_b", delay=0.03, read_only=True, shared_events=shared_events)
- ddg_like = _DelayTool(
- "ddg_like",
- delay=0.01,
- read_only=True,
- shared_events=shared_events,
- exclusive=True,
- )
- tools.register(read_a)
- tools.register(ddg_like)
- tools.register(read_b)
-
- runner = AgentRunner(MagicMock())
- await runner._execute_tools(
- AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- concurrent_tools=True,
- ),
- [
- ToolCallRequest(id="ro1", name="read_a", arguments={}),
- ToolCallRequest(id="ddg1", name="ddg_like", arguments={}),
- ToolCallRequest(id="ro2", name="read_b", arguments={}),
- ],
- {},
- {},
- )
-
- assert shared_events[0] == "start:read_a"
- assert shared_events.index("end:read_a") < shared_events.index("start:ddg_like")
- assert shared_events.index("end:ddg_like") < shared_events.index("start:read_b")
-
-
-@pytest.mark.asyncio
-async def test_runner_blocks_repeated_external_fetches():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_final_call: list[dict] = []
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] <= 3:
- return LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id=f"call_{call_count['n']}", name="web_fetch", arguments={"url": "https://example.com"})],
- usage={},
- )
- captured_final_call[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="page content")
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "research task"}],
- tools=tools,
- model="test-model",
- max_iterations=4,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.final_content == "done"
- assert tools.execute.await_count == 2
- blocked_tool_message = [
- msg for msg in captured_final_call
- if msg.get("role") == "tool" and msg.get("tool_call_id") == "call_3"
- ][0]
- assert "repeated external lookup blocked" in blocked_tool_message["content"]
-
-
-@pytest.mark.asyncio
-async def test_loop_max_iterations_message_stays_stable(tmp_path):
- loop = _make_loop(tmp_path)
- loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
- ))
- loop.tools.get_definitions = MagicMock(return_value=[])
- loop.tools.execute = AsyncMock(return_value="ok")
- loop.max_iterations = 2
-
- final_content, _, _, _, _ = await loop._run_agent_loop([])
-
- assert final_content == (
- "I reached the maximum number of tool call iterations (2) "
- "without completing the task. You can try breaking the task into smaller steps."
- )
-
-
-@pytest.mark.asyncio
-async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp_path):
- loop = _make_loop(tmp_path)
- deltas: list[str] = []
- endings: list[bool] = []
-
- async def chat_stream_with_retry(*, on_content_delta, **kwargs):
- await on_content_delta("hidden")
- await on_content_delta("Hello")
- return LLMResponse(content="hiddenHello", tool_calls=[], usage={})
-
- loop.provider.chat_stream_with_retry = chat_stream_with_retry
-
- async def on_stream(delta: str) -> None:
- deltas.append(delta)
-
- async def on_stream_end(*, resuming: bool = False) -> None:
- endings.append(resuming)
-
- final_content, _, _, _, _ = await loop._run_agent_loop(
- [],
- on_stream=on_stream,
- on_stream_end=on_stream_end,
- )
-
- assert final_content == "Hello"
- assert deltas == ["Hello"]
- assert endings == [False]
-
-
-@pytest.mark.asyncio
-async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path):
- loop = _make_loop(tmp_path)
- deltas: list[str] = []
-
- async def chat_stream_with_retry(*, on_content_delta, **kwargs):
- await on_content_delta("Hello hiddenWorld")
- return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={})
-
- loop.provider.chat_stream_with_retry = chat_stream_with_retry
-
- async def on_stream(delta: str) -> None:
- deltas.append(delta)
-
- final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
-
- assert final_content == "Hello World"
- assert deltas == ["Hello", " World"]
-
-
-@pytest.mark.asyncio
-async def test_loop_stream_filter_hides_complete_trailing_think_tag(tmp_path):
- loop = _make_loop(tmp_path)
- deltas: list[str] = []
-
- async def chat_stream_with_retry(*, on_content_delta, **kwargs):
- await on_content_delta("Hello ")
- await on_content_delta("hiddenWorld")
- return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={})
-
- loop.provider.chat_stream_with_retry = chat_stream_with_retry
-
- async def on_stream(delta: str) -> None:
- deltas.append(delta)
-
- final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
-
- assert final_content == "Hello World"
- assert deltas == ["Hello", " World"]
-
-
-@pytest.mark.asyncio
-async def test_loop_retries_think_only_final_response(tmp_path):
- loop = _make_loop(tmp_path)
- call_count = {"n": 0}
-
- async def chat_with_retry(**kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(content="hidden", tool_calls=[], usage={})
- return LLMResponse(content="Recovered answer", tool_calls=[], usage={})
-
- loop.provider.chat_with_retry = chat_with_retry
-
- final_content, _, _, _, _ = await loop._run_agent_loop([])
-
- assert final_content == "Recovered answer"
- assert call_count["n"] == 2
-
-
-@pytest.mark.asyncio
-async def test_llm_error_not_appended_to_session_messages():
- """When LLM returns finish_reason='error', the error content must NOT be
- appended to the messages list (prevents polluting session history)."""
- from nanobot.agent.runner import (
- AgentRunSpec,
- AgentRunner,
- _PERSISTED_MODEL_ERROR_PLACEHOLDER,
- )
-
- provider = MagicMock()
- provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
- content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={},
- ))
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=5,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.stop_reason == "error"
- assert result.final_content == "429 rate limit exceeded"
- assistant_msgs = [m for m in result.messages if m.get("role") == "assistant"]
- assert all("429" not in (m.get("content") or "") for m in assistant_msgs), \
- "Error content should not appear in session messages"
- assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER
-
-
-@pytest.mark.asyncio
-async def test_streamed_flag_not_set_on_llm_error(tmp_path):
- """When LLM errors during a streaming-capable channel interaction,
- _streamed must NOT be set so ChannelManager delivers the error."""
- from nanobot.agent.loop import AgentLoop
- from nanobot.bus.events import InboundMessage
- from nanobot.bus.queue import MessageBus
-
- bus = MessageBus()
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
- loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
- error_resp = LLMResponse(
- content="503 service unavailable", finish_reason="error", tool_calls=[], usage={},
- )
- loop.provider.chat_with_retry = AsyncMock(return_value=error_resp)
- loop.provider.chat_stream_with_retry = AsyncMock(return_value=error_resp)
- loop.tools.get_definitions = MagicMock(return_value=[])
-
- msg = InboundMessage(
- channel="feishu", sender_id="u1", chat_id="c1", content="hi",
- )
- result = await loop._process_message(
- msg,
- on_stream=AsyncMock(),
- on_stream_end=AsyncMock(),
- )
-
- assert result is not None
- assert "503" in result.content
- assert not result.metadata.get("_streamed"), \
- "_streamed must not be set when stop_reason is error"
-
-
-@pytest.mark.asyncio
-async def test_ssrf_soft_block_can_finalize_after_streamed_tool_call(tmp_path):
- from nanobot.agent.loop import AgentLoop
- from nanobot.bus.events import InboundMessage
- from nanobot.bus.queue import MessageBus
-
- bus = MessageBus()
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
- tool_call_resp = LLMResponse(
- content="checking metadata",
- tool_calls=[ToolCallRequest(
- id="call_ssrf",
- name="exec",
- arguments={"command": "curl http://169.254.169.254/latest/meta-data/"},
- )],
- usage={},
- )
- provider.chat_stream_with_retry = AsyncMock(side_effect=[
- tool_call_resp,
- LLMResponse(
- content="I cannot access private URLs. Please share the local file.",
- tool_calls=[],
- usage={},
- ),
- ])
-
- loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
- loop.tools.get_definitions = MagicMock(return_value=[])
- loop.tools.prepare_call = MagicMock(return_value=(None, {}, None))
- loop.tools.execute = AsyncMock(return_value=(
- "Error: Command blocked by safety guard (internal/private URL detected)"
- ))
-
- result = await loop._process_message(
- InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="hi"),
- on_stream=AsyncMock(),
- on_stream_end=AsyncMock(),
- )
-
- assert result is not None
- assert result.content == "I cannot access private URLs. Please share the local file."
- assert result.metadata.get("_streamed") is True
-
-
-@pytest.mark.asyncio
-async def test_next_turn_after_llm_error_keeps_turn_boundary(tmp_path):
- from nanobot.agent.loop import AgentLoop
- from nanobot.agent.runner import _PERSISTED_MODEL_ERROR_PLACEHOLDER
- from nanobot.bus.events import InboundMessage
- from nanobot.bus.queue import MessageBus
-
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
- provider.chat_with_retry = AsyncMock(side_effect=[
- LLMResponse(content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}),
- LLMResponse(content="Recovered answer", tool_calls=[], usage={}),
- ])
-
- loop = AgentLoop(bus=MessageBus(), provider=provider, workspace=tmp_path, model="test-model")
- loop.tools.get_definitions = MagicMock(return_value=[])
- loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign]
-
- first = await loop._process_message(
- InboundMessage(channel="cli", sender_id="user", chat_id="test", content="first question")
- )
- assert first is not None
- assert first.content == "429 rate limit exceeded"
-
- session = loop.sessions.get_or_create("cli:test")
- assert [
- {key: value for key, value in message.items() if key in {"role", "content"}}
- for message in session.messages
- ] == [
- {"role": "user", "content": "first question"},
- {"role": "assistant", "content": _PERSISTED_MODEL_ERROR_PLACEHOLDER},
- ]
-
- second = await loop._process_message(
- InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second question")
- )
- assert second is not None
- assert second.content == "Recovered answer"
-
- request_messages = provider.chat_with_retry.await_args_list[1].kwargs["messages"]
- non_system = [message for message in request_messages if message.get("role") != "system"]
- assert non_system[0]["role"] == "user"
- assert "first question" in non_system[0]["content"]
- assert non_system[1]["role"] == "assistant"
- assert _PERSISTED_MODEL_ERROR_PLACEHOLDER in non_system[1]["content"]
- assert non_system[2]["role"] == "user"
- assert "second question" in non_system[2]["content"]
-
-
-@pytest.mark.asyncio
-async def test_runner_tool_error_sets_final_content():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
-
- async def chat_with_retry(*, messages, **kwargs):
- return LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
- usage={},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do task"}],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- fail_on_tool_error=True,
- ))
-
- assert result.final_content == "Error: RuntimeError: boom"
- assert result.stop_reason == "tool_error"
-
-
-@pytest.mark.asyncio
-async def test_subagent_max_iterations_announces_existing_fallback(tmp_path, monkeypatch):
- from nanobot.agent.subagent import SubagentManager, SubagentStatus
- from nanobot.bus.queue import MessageBus
-
- bus = MessageBus()
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
- provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
- content="working",
- tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
- ))
- mgr = SubagentManager(
- provider=provider,
- workspace=tmp_path,
- bus=bus,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- )
- mgr._announce_result = AsyncMock()
-
- async def fake_execute(self, **kwargs):
- return "tool result"
-
- monkeypatch.setattr("nanobot.agent.tools.filesystem.ListDirTool.execute", fake_execute)
-
- status = SubagentStatus(task_id="sub-1", label="label", task_description="do task", started_at=time.monotonic())
- await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}, status)
-
- mgr._announce_result.assert_awaited_once()
- args = mgr._announce_result.await_args.args
- assert args[3] == "Task completed but no final response was generated."
- assert args[5] == "ok"
-
-
-@pytest.mark.asyncio
-async def test_runner_accumulates_usage_and_preserves_cached_tokens():
- """Runner should accumulate prompt/completion tokens across iterations
- and preserve cached_tokens from provider responses."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content="thinking",
- tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
- usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80},
- )
- return LLMResponse(
- content="done",
- tool_calls=[],
- usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="file content")
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do task"}],
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- # Usage should be accumulated across iterations
- assert result.usage["prompt_tokens"] == 300 # 100 + 200
- assert result.usage["completion_tokens"] == 30 # 10 + 20
- assert result.usage["cached_tokens"] == 230 # 80 + 150
-
-
-@pytest.mark.asyncio
-async def test_runner_passes_cached_tokens_to_hook_context():
- """Hook context.usage should contain cached_tokens."""
- from nanobot.agent.hook import AgentHook, AgentHookContext
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_usage: list[dict] = []
-
- class UsageHook(AgentHook):
- async def after_iteration(self, context: AgentHookContext) -> None:
- captured_usage.append(dict(context.usage))
-
- async def chat_with_retry(**kwargs):
- return LLMResponse(
- content="done",
- tool_calls=[],
- usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- await runner.run(AgentRunSpec(
- initial_messages=[],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- hook=UsageHook(),
- ))
-
- assert len(captured_usage) == 1
- assert captured_usage[0]["cached_tokens"] == 150
-
-
-# ---------------------------------------------------------------------------
-# Length recovery (auto-continue on finish_reason == "length")
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_length_recovery_continues_from_truncated_output():
- """When finish_reason is 'length', runner should insert a continuation
- prompt and retry, stitching partial outputs into the final result."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] <= 2:
- return LLMResponse(
- content=f"part{call_count['n']} ",
- finish_reason="length",
- usage={},
- )
- return LLMResponse(content="final", finish_reason="stop", usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "write a long essay"}],
- tools=tools,
- model="test-model",
- max_iterations=10,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.stop_reason == "completed"
- assert result.final_content == "final"
- assert call_count["n"] == 3
- roles = [m["role"] for m in result.messages if m["role"] == "user"]
- assert len(roles) >= 3 # original + 2 recovery prompts
-
-
-@pytest.mark.asyncio
-async def test_length_recovery_streaming_calls_on_stream_end_with_resuming():
- """During length recovery with streaming, on_stream_end should be called
- with resuming=True so the hook knows the conversation is continuing."""
- from nanobot.agent.hook import AgentHook, AgentHookContext
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- call_count = {"n": 0}
- stream_end_calls: list[bool] = []
-
- class StreamHook(AgentHook):
- def wants_streaming(self) -> bool:
- return True
-
- async def on_stream(self, context: AgentHookContext, delta: str) -> None:
- pass
-
- async def on_stream_end(self, context: AgentHookContext, resuming: bool = False) -> None:
- stream_end_calls.append(resuming)
-
- async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(content="partial ", finish_reason="length", usage={})
- return LLMResponse(content="done", finish_reason="stop", usage={})
-
- provider.chat_stream_with_retry = chat_stream_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "go"}],
- tools=tools,
- model="test-model",
- max_iterations=10,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- hook=StreamHook(),
- ))
-
- assert len(stream_end_calls) == 2
- assert stream_end_calls[0] is True # length recovery: resuming
- assert stream_end_calls[1] is False # final response: done
-
-
-@pytest.mark.asyncio
-async def test_length_recovery_gives_up_after_max_retries():
- """After _MAX_LENGTH_RECOVERIES attempts the runner should stop retrying."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_LENGTH_RECOVERIES
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- return LLMResponse(
- content=f"chunk{call_count['n']}",
- finish_reason="length",
- usage={},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "go"}],
- tools=tools,
- model="test-model",
- max_iterations=20,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert call_count["n"] == _MAX_LENGTH_RECOVERIES + 1
- assert result.final_content is not None
-
-
-# ---------------------------------------------------------------------------
-# Backfill missing tool_results
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_backfill_missing_tool_results_inserts_error():
- """Orphaned tool_use (no matching tool_result) should get a synthetic error."""
- from nanobot.agent.runner import AgentRunner, _BACKFILL_CONTENT
-
- messages = [
- {"role": "user", "content": "hi"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {"id": "call_a", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
- {"id": "call_b", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
- ],
- },
- {"role": "tool", "tool_call_id": "call_a", "name": "exec", "content": "ok"},
- ]
- result = AgentRunner._backfill_missing_tool_results(messages)
- tool_msgs = [m for m in result if m.get("role") == "tool"]
- assert len(tool_msgs) == 2
- backfilled = [m for m in tool_msgs if m.get("tool_call_id") == "call_b"]
- assert len(backfilled) == 1
- assert backfilled[0]["content"] == _BACKFILL_CONTENT
- assert backfilled[0]["name"] == "read_file"
-
-
-def test_drop_orphan_tool_results_removes_unmatched_tool_messages():
- from nanobot.agent.runner import AgentRunner
-
- messages = [
- {"role": "system", "content": "system"},
- {"role": "user", "content": "old user"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
- ],
- },
- {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
- {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
- {"role": "assistant", "content": "after tool"},
- ]
-
- cleaned = AgentRunner._drop_orphan_tool_results(messages)
-
- assert cleaned == [
- {"role": "system", "content": "system"},
- {"role": "user", "content": "old user"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
- ],
- },
- {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
- {"role": "assistant", "content": "after tool"},
- ]
-
-
-@pytest.mark.asyncio
-async def test_backfill_noop_when_complete():
- """Complete message chains should not be modified."""
- from nanobot.agent.runner import AgentRunner
-
- messages = [
- {"role": "user", "content": "hi"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {"id": "call_x", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
- ],
- },
- {"role": "tool", "tool_call_id": "call_x", "name": "exec", "content": "done"},
- {"role": "assistant", "content": "all good"},
- ]
- result = AgentRunner._backfill_missing_tool_results(messages)
- assert result is messages # same object — no copy
-
-
-@pytest.mark.asyncio
-async def test_runner_drops_orphan_tool_results_before_model_request():
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- captured_messages: list[dict] = []
-
- async def chat_with_retry(*, messages, **kwargs):
- captured_messages[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[
- {"role": "system", "content": "system"},
- {"role": "user", "content": "old user"},
- {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
- {"role": "assistant", "content": "after orphan"},
- {"role": "user", "content": "new prompt"},
- ],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert all(
- message.get("tool_call_id") != "call_orphan"
- for message in captured_messages
- if message.get("role") == "tool"
- )
- assert result.messages[2]["tool_call_id"] == "call_orphan"
- assert result.final_content == "done"
-
-
-@pytest.mark.asyncio
-async def test_backfill_repairs_model_context_without_shifting_save_turn_boundary(tmp_path):
- """Historical backfill should not duplicate old tail messages on persist."""
- from nanobot.agent.loop import AgentLoop
- from nanobot.agent.runner import _BACKFILL_CONTENT
- from nanobot.bus.events import InboundMessage
- from nanobot.bus.queue import MessageBus
-
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
- response = LLMResponse(content="new answer", tool_calls=[], usage={})
- provider.chat_with_retry = AsyncMock(return_value=response)
- provider.chat_stream_with_retry = AsyncMock(return_value=response)
-
- loop = AgentLoop(
- bus=MessageBus(),
- provider=provider,
- workspace=tmp_path,
- model="test-model",
- )
- loop.tools.get_definitions = MagicMock(return_value=[])
- loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign]
-
- session = loop.sessions.get_or_create("cli:test")
- session.messages = [
- {"role": "user", "content": "old user", "timestamp": "2026-01-01T00:00:00"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {
- "id": "call_missing",
- "type": "function",
- "function": {"name": "read_file", "arguments": "{}"},
- }
- ],
- "timestamp": "2026-01-01T00:00:01",
- },
- {"role": "assistant", "content": "old tail", "timestamp": "2026-01-01T00:00:02"},
- ]
- loop.sessions.save(session)
-
- result = await loop._process_message(
- InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new prompt")
- )
-
- assert result is not None
- assert result.content == "new answer"
-
- request_messages = provider.chat_with_retry.await_args.kwargs["messages"]
- synthetic = [
- message
- for message in request_messages
- if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
- ]
- assert len(synthetic) == 1
- assert synthetic[0]["content"] == _BACKFILL_CONTENT
-
- session_after = loop.sessions.get_or_create("cli:test")
- assert [
- {
- key: value
- for key, value in message.items()
- if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
- }
- for message in session_after.messages
- ] == [
- {"role": "user", "content": "old user"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {
- "id": "call_missing",
- "type": "function",
- "function": {"name": "read_file", "arguments": "{}"},
- }
- ],
- },
- {"role": "assistant", "content": "old tail"},
- {"role": "user", "content": "new prompt"},
- {"role": "assistant", "content": "new answer"},
- ]
-
-
-@pytest.mark.asyncio
-async def test_runner_backfill_only_mutates_model_context_not_returned_messages():
- """Runner should repair orphaned tool calls for the model without rewriting result.messages."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner, _BACKFILL_CONTENT
-
- provider = MagicMock()
- captured_messages: list[dict] = []
-
- async def chat_with_retry(*, messages, **kwargs):
- captured_messages[:] = messages
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- initial_messages = [
- {"role": "system", "content": "system"},
- {"role": "user", "content": "old user"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {
- "id": "call_missing",
- "type": "function",
- "function": {"name": "read_file", "arguments": "{}"},
- }
- ],
- },
- {"role": "assistant", "content": "old tail"},
- {"role": "user", "content": "new prompt"},
- ]
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=initial_messages,
- tools=tools,
- model="test-model",
- max_iterations=3,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- synthetic = [
- message
- for message in captured_messages
- if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
- ]
- assert len(synthetic) == 1
- assert synthetic[0]["content"] == _BACKFILL_CONTENT
-
- assert [
- {
- key: value
- for key, value in message.items()
- if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
- }
- for message in result.messages
- ] == [
- {"role": "system", "content": "system"},
- {"role": "user", "content": "old user"},
- {
- "role": "assistant",
- "content": "",
- "tool_calls": [
- {
- "id": "call_missing",
- "type": "function",
- "function": {"name": "read_file", "arguments": "{}"},
- }
- ],
- },
- {"role": "assistant", "content": "old tail"},
- {"role": "user", "content": "new prompt"},
- {"role": "assistant", "content": "done"},
- ]
-
-
-# ---------------------------------------------------------------------------
-# Microcompact (stale tool result compaction)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_microcompact_replaces_old_tool_results():
- """Tool results beyond _MICROCOMPACT_KEEP_RECENT should be summarized."""
- from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
-
- total = _MICROCOMPACT_KEEP_RECENT + 5
- long_content = "x" * 600
- messages: list[dict] = [{"role": "system", "content": "sys"}]
- for i in range(total):
- messages.append({
- "role": "assistant",
- "content": "",
- "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}],
- })
- messages.append({
- "role": "tool", "tool_call_id": f"c{i}", "name": "read_file",
- "content": long_content,
- })
-
- result = AgentRunner._microcompact(messages)
- tool_msgs = [m for m in result if m.get("role") == "tool"]
- stale_count = total - _MICROCOMPACT_KEEP_RECENT
- compacted = [m for m in tool_msgs if "omitted from context" in str(m.get("content", ""))]
- preserved = [m for m in tool_msgs if m.get("content") == long_content]
- assert len(compacted) == stale_count
- assert len(preserved) == _MICROCOMPACT_KEEP_RECENT
-
-
-@pytest.mark.asyncio
-async def test_microcompact_preserves_short_results():
- """Short tool results (< _MICROCOMPACT_MIN_CHARS) should not be replaced."""
- from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
-
- total = _MICROCOMPACT_KEEP_RECENT + 5
- messages: list[dict] = []
- for i in range(total):
- messages.append({
- "role": "assistant",
- "content": "",
- "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
- })
- messages.append({
- "role": "tool", "tool_call_id": f"c{i}", "name": "exec",
- "content": "short",
- })
-
- result = AgentRunner._microcompact(messages)
- assert result is messages # no copy needed — all stale results are short
-
-
-@pytest.mark.asyncio
-async def test_microcompact_skips_non_compactable_tools():
- """Non-compactable tools (e.g. 'message') should never be replaced."""
- from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
-
- total = _MICROCOMPACT_KEEP_RECENT + 5
- long_content = "y" * 1000
- messages: list[dict] = []
- for i in range(total):
- messages.append({
- "role": "assistant",
- "content": "",
- "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "message", "arguments": "{}"}}],
- })
- messages.append({
- "role": "tool", "tool_call_id": f"c{i}", "name": "message",
- "content": long_content,
- })
-
- result = AgentRunner._microcompact(messages)
- assert result is messages # no compactable tools found
-
-
-@pytest.mark.asyncio
-async def test_runner_tool_error_preserves_tool_results_in_messages():
- """When a tool raises a fatal error, its results must still be appended
- to messages so the session never contains orphan tool_calls (#2943)."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
-
- async def chat_with_retry(*, messages, **kwargs):
- return LLMResponse(
- content=None,
- tool_calls=[
- ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}),
- ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}),
- ],
- usage={},
- )
-
- provider.chat_with_retry = chat_with_retry
- provider.chat_stream_with_retry = chat_with_retry
-
- call_idx = 0
-
- async def fake_execute(name, args, **kw):
- nonlocal call_idx
- call_idx += 1
- if call_idx == 2:
- raise RuntimeError("boom")
- return "file content"
-
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(side_effect=fake_execute)
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "do stuff"}],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- fail_on_tool_error=True,
- ))
-
- assert result.stop_reason == "tool_error"
- # Both tool results must be in messages even though tc2 had a fatal error.
- tool_msgs = [m for m in result.messages if m.get("role") == "tool"]
- assert len(tool_msgs) == 2
- assert tool_msgs[0]["tool_call_id"] == "tc1"
- assert tool_msgs[1]["tool_call_id"] == "tc2"
- # The assistant message with tool_calls must precede the tool results.
- asst_tc_idx = next(
- i for i, m in enumerate(result.messages)
- if m.get("role") == "assistant" and m.get("tool_calls")
- )
- tool_indices = [
- i for i, m in enumerate(result.messages) if m.get("role") == "tool"
- ]
- assert all(ti > asst_tc_idx for ti in tool_indices)
-
-
-def test_governance_repairs_orphans_after_snip():
- """After _snip_history clips an assistant+tool_calls, the second
- _drop_orphan_tool_results pass must clean up the resulting orphans."""
- from nanobot.agent.runner import AgentRunner
-
- messages = [
- {"role": "system", "content": "system"},
- {"role": "user", "content": "old msg"},
- {"role": "assistant", "content": None,
- "tool_calls": [{"id": "tc_old", "type": "function",
- "function": {"name": "search", "arguments": "{}"}}]},
- {"role": "tool", "tool_call_id": "tc_old", "name": "search",
- "content": "old result"},
- {"role": "assistant", "content": "old answer"},
- {"role": "user", "content": "new msg"},
- ]
-
- # Simulate snipping that keeps only the tail: drop the assistant with
- # tool_calls but keep its tool result (orphan).
- snipped = [
- {"role": "system", "content": "system"},
- {"role": "tool", "tool_call_id": "tc_old", "name": "search",
- "content": "old result"},
- {"role": "assistant", "content": "old answer"},
- {"role": "user", "content": "new msg"},
- ]
-
- cleaned = AgentRunner._drop_orphan_tool_results(snipped)
- # The orphan tool result should be removed.
- assert not any(
- m.get("role") == "tool" and m.get("tool_call_id") == "tc_old"
- for m in cleaned
- )
-
-
-def test_governance_fallback_still_repairs_orphans():
- """When full governance fails, the fallback must still run
- _drop_orphan_tool_results and _backfill_missing_tool_results."""
- from nanobot.agent.runner import AgentRunner
-
- # Messages with an orphan tool result (no matching assistant tool_call).
- messages = [
- {"role": "user", "content": "hello"},
- {"role": "tool", "tool_call_id": "orphan_tc", "name": "read",
- "content": "stale"},
- {"role": "assistant", "content": "hi"},
- ]
-
- repaired = AgentRunner._drop_orphan_tool_results(messages)
- repaired = AgentRunner._backfill_missing_tool_results(repaired)
- # Orphan tool result should be gone.
- assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired)
-# ── Mid-turn injection tests ──────────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_returns_empty_when_no_callback():
- """No injection_callback → empty list."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- runner = AgentRunner(provider)
- tools = MagicMock()
- tools.get_definitions.return_value = []
- spec = AgentRunSpec(
- initial_messages=[], tools=tools, model="m",
- max_iterations=1, max_tool_result_chars=1000,
- injection_callback=None,
- )
- result = await runner._drain_injections(spec)
- assert result == []
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_extracts_content_from_inbound_messages():
- """Should extract .content from InboundMessage objects."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- runner = AgentRunner(provider)
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- msgs = [
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello"),
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="world"),
- ]
-
- async def cb():
- return msgs
-
- spec = AgentRunSpec(
- initial_messages=[], tools=tools, model="m",
- max_iterations=1, max_tool_result_chars=1000,
- injection_callback=cb,
- )
- result = await runner._drain_injections(spec)
- assert result == [
- {"role": "user", "content": "hello"},
- {"role": "user", "content": "world"},
- ]
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_passes_limit_to_callback_when_supported():
- """Limit-aware callbacks can preserve overflow in their own queue."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- runner = AgentRunner(provider)
- tools = MagicMock()
- tools.get_definitions.return_value = []
- seen_limits: list[int] = []
-
- msgs = [
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg{i}")
- for i in range(_MAX_INJECTIONS_PER_TURN + 3)
- ]
-
- async def cb(*, limit: int):
- seen_limits.append(limit)
- return msgs[:limit]
-
- spec = AgentRunSpec(
- initial_messages=[], tools=tools, model="m",
- max_iterations=1, max_tool_result_chars=1000,
- injection_callback=cb,
- )
- result = await runner._drain_injections(spec)
- assert seen_limits == [_MAX_INJECTIONS_PER_TURN]
- assert result == [
- {"role": "user", "content": "msg0"},
- {"role": "user", "content": "msg1"},
- {"role": "user", "content": "msg2"},
- ]
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_skips_empty_content():
- """Messages with blank content should be filtered out."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- runner = AgentRunner(provider)
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- msgs = [
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content=""),
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content=" "),
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="valid"),
- ]
-
- async def cb():
- return msgs
-
- spec = AgentRunSpec(
- initial_messages=[], tools=tools, model="m",
- max_iterations=1, max_tool_result_chars=1000,
- injection_callback=cb,
- )
- result = await runner._drain_injections(spec)
- assert result == [{"role": "user", "content": "valid"}]
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_handles_callback_exception():
- """If the callback raises, return empty list (error is logged)."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- runner = AgentRunner(provider)
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- async def cb():
- raise RuntimeError("boom")
-
- spec = AgentRunSpec(
- initial_messages=[], tools=tools, model="m",
- max_iterations=1, max_tool_result_chars=1000,
- injection_callback=cb,
- )
- result = await runner._drain_injections(spec)
- assert result == []
-
-
-@pytest.mark.asyncio
-async def test_checkpoint1_injects_after_tool_execution():
- """Follow-up messages are injected after tool execution, before next LLM call."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
- captured_messages = []
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- captured_messages.append(list(messages))
- if call_count["n"] == 1:
- return LLMResponse(
- content="using tool",
- tool_calls=[ToolCallRequest(id="c1", name="read_file", arguments={"path": "x"})],
- usage={},
- )
- return LLMResponse(content="final answer", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="file content")
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- # Put a follow-up message in the queue before the run starts
- await injection_queue.put(
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=5,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.had_injections is True
- assert result.final_content == "final answer"
- # The second call should have the injected user message
- assert call_count["n"] == 2
- last_messages = captured_messages[-1]
- injected = [m for m in last_messages if m.get("role") == "user" and m.get("content") == "follow-up question"]
- assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
- """After final response, if injections exist, stream_end should get resuming=True."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.agent.hook import AgentHook, AgentHookContext
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
- stream_end_calls = []
-
- class TrackingHook(AgentHook):
- def wants_streaming(self) -> bool:
- return True
-
- async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
- stream_end_calls.append(resuming)
-
- def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
- return content
-
- async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(content="first answer", tool_calls=[], usage={})
- return LLMResponse(content="second answer", tool_calls=[], usage={})
-
- provider.chat_stream_with_retry = chat_stream_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- # Inject a follow-up that arrives during the first response
- await injection_queue.put(
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="quick follow-up")
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=5,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- hook=TrackingHook(),
- injection_callback=inject_cb,
- ))
-
- assert result.had_injections is True
- assert result.final_content == "second answer"
- assert call_count["n"] == 2
- # First stream_end should have resuming=True (because injections found)
- assert stream_end_calls[0] is True
- # Second (final) stream_end should have resuming=False
- assert stream_end_calls[-1] is False
-
-
-@pytest.mark.asyncio
-async def test_checkpoint2_preserves_final_response_in_history_before_followup():
- """A follow-up injected after a final answer must still see that answer in history."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
- captured_messages = []
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- captured_messages.append([dict(message) for message in messages])
- if call_count["n"] == 1:
- return LLMResponse(content="first answer", tool_calls=[], usage={})
- return LLMResponse(content="second answer", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- await injection_queue.put(
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=5,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.final_content == "second answer"
- assert call_count["n"] == 2
- assert captured_messages[-1] == [
- {"role": "user", "content": "hello"},
- {"role": "assistant", "content": "first answer"},
- {"role": "user", "content": "follow-up question"},
- ]
- assert [
- {"role": message["role"], "content": message["content"]}
- for message in result.messages
- if message.get("role") == "assistant"
- ] == [
- {"role": "assistant", "content": "first answer"},
- {"role": "assistant", "content": "second answer"},
- ]
-
-
-@pytest.mark.asyncio
-async def test_loop_injected_followup_preserves_image_media(tmp_path):
- """Mid-turn follow-ups with images should keep multimodal content."""
- from nanobot.agent.loop import AgentLoop
- from nanobot.bus.events import InboundMessage
- from nanobot.bus.queue import MessageBus
-
- image_path = tmp_path / "followup.png"
- image_path.write_bytes(base64.b64decode(
- "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+yF9kAAAAASUVORK5CYII="
- ))
-
- bus = MessageBus()
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
- captured_messages: list[list[dict]] = []
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- captured_messages.append(list(messages))
- if call_count["n"] == 1:
- return LLMResponse(content="first answer", tool_calls=[], usage={})
- return LLMResponse(content="second answer", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
- loop.tools.get_definitions = MagicMock(return_value=[])
-
- pending_queue = asyncio.Queue()
- await pending_queue.put(InboundMessage(
- channel="cli",
- sender_id="u",
- chat_id="c",
- content="",
- media=[str(image_path)],
- ))
-
- final_content, _, _, _, had_injections = await loop._run_agent_loop(
- [{"role": "user", "content": "hello"}],
- channel="cli",
- chat_id="c",
- pending_queue=pending_queue,
- )
-
- assert final_content == "second answer"
- assert had_injections is True
- assert call_count["n"] == 2
- injected_user_messages = [
- message for message in captured_messages[-1]
- if message.get("role") == "user" and isinstance(message.get("content"), list)
- ]
- assert injected_user_messages
- assert any(
- block.get("type") == "image_url"
- for block in injected_user_messages[-1]["content"]
- if isinstance(block, dict)
- )
-
-
-@pytest.mark.asyncio
-async def test_runner_merges_multiple_injected_user_messages_without_losing_media():
- """Multiple injected follow-ups should not create lossy consecutive user messages."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- call_count = {"n": 0}
- captured_messages = []
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- captured_messages.append([dict(message) for message in messages])
- if call_count["n"] == 1:
- return LLMResponse(content="first answer", tool_calls=[], usage={})
- return LLMResponse(content="second answer", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- async def inject_cb():
- if call_count["n"] == 1:
- return [
- {
- "role": "user",
- "content": [
- {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
- {"type": "text", "text": "look at this"},
- ],
- },
- {"role": "user", "content": "and answer briefly"},
- ]
- return []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=5,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.final_content == "second answer"
- assert call_count["n"] == 2
- second_call = captured_messages[-1]
- user_messages = [message for message in second_call if message.get("role") == "user"]
- assert len(user_messages) == 2
- injected = user_messages[-1]
- assert isinstance(injected["content"], list)
- assert any(
- block.get("type") == "image_url"
- for block in injected["content"]
- if isinstance(block, dict)
- )
- assert any(
- block.get("type") == "text" and block.get("text") == "and answer briefly"
- for block in injected["content"]
- if isinstance(block, dict)
- )
-
-
-@pytest.mark.asyncio
-async def test_injection_cycles_capped_at_max():
- """Injection cycles should be capped at _MAX_INJECTION_CYCLES."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- drain_count = {"n": 0}
-
- async def inject_cb():
- drain_count["n"] += 1
- # Only inject for the first _MAX_INJECTION_CYCLES drains
- if drain_count["n"] <= _MAX_INJECTION_CYCLES:
- return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
- return []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "start"}],
- tools=tools,
- model="test-model",
- max_iterations=20,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.had_injections is True
- # Should be capped: _MAX_INJECTION_CYCLES injection rounds + 1 final round
- assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
-
-
-@pytest.mark.asyncio
-async def test_no_injections_flag_is_false_by_default():
- """had_injections should be False when no injection callback or no messages."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
-
- async def chat_with_retry(**kwargs):
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hi"}],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- ))
-
- assert result.had_injections is False
-
-
-@pytest.mark.asyncio
-async def test_pending_queue_cleanup_on_dispatch(tmp_path):
- """_pending_queues should be cleaned up after _dispatch completes."""
- loop = _make_loop(tmp_path)
-
- async def chat_with_retry(**kwargs):
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- loop.provider.chat_with_retry = chat_with_retry
-
- from nanobot.bus.events import InboundMessage
-
- msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello")
- # The queue should not exist before dispatch
- assert msg.session_key not in loop._pending_queues
-
- await loop._dispatch(msg)
-
- # The queue should be cleaned up after dispatch
- assert msg.session_key not in loop._pending_queues
-
-
-@pytest.mark.asyncio
-async def test_followup_routed_to_pending_queue(tmp_path):
- """Unified-session follow-ups should route into the active pending queue."""
- from nanobot.agent.loop import UNIFIED_SESSION_KEY
- from nanobot.bus.events import InboundMessage
-
- loop = _make_loop(tmp_path)
- loop._unified_session = True
- loop._dispatch = AsyncMock() # type: ignore[method-assign]
-
- pending = asyncio.Queue(maxsize=20)
- loop._pending_queues[UNIFIED_SESSION_KEY] = pending
-
- run_task = asyncio.create_task(loop.run())
- msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up")
- await loop.bus.publish_inbound(msg)
-
- deadline = time.time() + 2
- while pending.empty() and time.time() < deadline:
- await asyncio.sleep(0.01)
-
- loop.stop()
- await asyncio.wait_for(run_task, timeout=2)
-
- assert loop._dispatch.await_count == 0
- assert not pending.empty()
- queued_msg = pending.get_nowait()
- assert queued_msg.content == "follow-up"
- assert queued_msg.session_key == UNIFIED_SESSION_KEY
-
-
-@pytest.mark.asyncio
-async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path):
- """Pending queue should leave overflow messages queued for later drains."""
- from nanobot.agent.loop import AgentLoop
- from nanobot.bus.events import InboundMessage
- from nanobot.bus.queue import MessageBus
- from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN
-
- bus = MessageBus()
- provider = MagicMock()
- provider.get_default_model.return_value = "test-model"
- captured_messages: list[list[dict]] = []
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- captured_messages.append([dict(message) for message in messages])
- return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
- loop.tools.get_definitions = MagicMock(return_value=[])
-
- pending_queue = asyncio.Queue()
- total_followups = _MAX_INJECTIONS_PER_TURN + 2
- for idx in range(total_followups):
- await pending_queue.put(InboundMessage(
- channel="cli",
- sender_id="u",
- chat_id="c",
- content=f"follow-up-{idx}",
- ))
-
- final_content, _, _, _, had_injections = await loop._run_agent_loop(
- [{"role": "user", "content": "hello"}],
- channel="cli",
- chat_id="c",
- pending_queue=pending_queue,
- )
-
- assert final_content == "answer-3"
- assert had_injections is True
- assert call_count["n"] == 3
- flattened_user_content = "\n".join(
- message["content"]
- for message in captured_messages[-1]
- if message.get("role") == "user" and isinstance(message.get("content"), str)
- )
- for idx in range(total_followups):
- assert f"follow-up-{idx}" in flattened_user_content
- assert pending_queue.empty()
-
-
-@pytest.mark.asyncio
-async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
- """QueueFull should preserve the message by dispatching a queued task."""
- from nanobot.bus.events import InboundMessage
-
- loop = _make_loop(tmp_path)
- loop._dispatch = AsyncMock() # type: ignore[method-assign]
-
- pending = asyncio.Queue(maxsize=1)
- pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued"))
- loop._pending_queues["cli:c"] = pending
-
- run_task = asyncio.create_task(loop.run())
- msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up")
- await loop.bus.publish_inbound(msg)
-
- deadline = time.time() + 2
- while loop._dispatch.await_count == 0 and time.time() < deadline:
- await asyncio.sleep(0.01)
-
- loop.stop()
- await asyncio.wait_for(run_task, timeout=2)
-
- assert loop._dispatch.await_count == 1
- dispatched_msg = loop._dispatch.await_args.args[0]
- assert dispatched_msg.content == "follow-up"
- assert pending.qsize() == 1
-
-
-@pytest.mark.asyncio
-async def test_dispatch_republishes_leftover_queue_messages(tmp_path):
- """Messages left in the pending queue after _dispatch are re-published to the bus.
-
- This tests the finally-block cleanup that prevents message loss when
- the runner exits early (e.g., max_iterations, tool_error) with messages
- still in the queue.
- """
- from nanobot.bus.events import InboundMessage
-
- loop = _make_loop(tmp_path)
- bus = loop.bus
-
- # Simulate a completed dispatch by manually registering a queue
- # with leftover messages, then running the cleanup logic directly.
- pending = asyncio.Queue(maxsize=20)
- session_key = "cli:c"
- loop._pending_queues[session_key] = pending
- pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-1"))
- pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-2"))
-
- # Execute the cleanup logic from the finally block
- queue = loop._pending_queues.pop(session_key, None)
- assert queue is not None
- leftover = 0
- while True:
- try:
- item = queue.get_nowait()
- except asyncio.QueueEmpty:
- break
- await bus.publish_inbound(item)
- leftover += 1
-
- assert leftover == 2
-
- # Verify the messages are now on the bus
- msgs = []
- while not bus.inbound.empty():
- msgs.append(await asyncio.wait_for(bus.consume_inbound(), timeout=0.5))
- contents = [m.content for m in msgs]
- assert "leftover-1" in contents
- assert "leftover-2" in contents
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_fatal_tool_error():
- """Pending injections should be drained even when a fatal tool error occurs."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content="",
- tool_calls=[ToolCallRequest(id="c1", name="exec", arguments={"cmd": "bad"})],
- usage={},
- )
- # Second call: respond normally to the injected follow-up
- return LLMResponse(content="reply to follow-up", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(side_effect=RuntimeError("tool exploded"))
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- await injection_queue.put(
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after error")
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=5,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- fail_on_tool_error=True,
- injection_callback=inject_cb,
- ))
-
- assert result.had_injections is True
- assert result.final_content == "reply to follow-up"
- # The injection should be in the messages history
- injected = [
- m for m in result.messages
- if m.get("role") == "user" and m.get("content") == "follow-up after error"
- ]
- assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_llm_error():
- """Pending injections should be drained when the LLM returns an error finish_reason."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] == 1:
- return LLMResponse(
- content=None,
- tool_calls=[],
- finish_reason="error",
- usage={},
- )
- # Second call: respond normally to the injected follow-up
- return LLMResponse(content="recovered answer", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- await injection_queue.put(
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after LLM error")
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[
- {"role": "user", "content": "hello"},
- {"role": "assistant", "content": "previous response"},
- {"role": "user", "content": "trigger error"},
- ],
- tools=tools,
- model="test-model",
- max_iterations=5,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.had_injections is True
- assert result.final_content == "recovered answer"
- injected = [
- m for m in result.messages
- if m.get("role") == "user" and "follow-up after LLM error" in str(m.get("content", ""))
- ]
- assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_empty_final_response():
- """Pending injections should be drained when the runner exits due to empty response."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- if call_count["n"] <= _MAX_EMPTY_RETRIES + 1:
- return LLMResponse(content="", tool_calls=[], usage={})
- # After retries exhausted + injection drain, respond normally
- return LLMResponse(content="answer after empty", tool_calls=[], usage={})
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- await injection_queue.put(
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after empty")
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[
- {"role": "user", "content": "hello"},
- {"role": "assistant", "content": "previous response"},
- {"role": "user", "content": "trigger empty"},
- ],
- tools=tools,
- model="test-model",
- max_iterations=10,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.had_injections is True
- assert result.final_content == "answer after empty"
- injected = [
- m for m in result.messages
- if m.get("role") == "user" and "follow-up after empty" in str(m.get("content", ""))
- ]
- assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_max_iterations():
- """Pending injections should be drained when the runner hits max_iterations.
-
- Unlike other error paths, max_iterations cannot continue the loop, so
- injections are appended to messages but not processed by the LLM.
- The key point is they are consumed from the queue to prevent re-publish.
- """
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- return LLMResponse(
- content="",
- tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
- usage={},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="file content")
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- await injection_queue.put(
- InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after max iters")
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=2,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.stop_reason == "max_iterations"
- assert result.had_injections is True
- # The injection was consumed from the queue (preventing re-publish)
- assert injection_queue.empty()
- # The injection message is appended to conversation history
- injected = [
- m for m in result.messages
- if m.get("role") == "user" and m.get("content") == "follow-up after max iters"
- ]
- assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration():
- """Late follow-ups drained in max_iterations should still flip had_injections."""
- from nanobot.agent.hook import AgentHook
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- return LLMResponse(
- content="",
- tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
- usage={},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
- tools.execute = AsyncMock(return_value="file content")
-
- injection_queue = asyncio.Queue()
- inject_cb = _make_injection_callback(injection_queue)
-
- class InjectOnLastAfterIterationHook(AgentHook):
- def __init__(self) -> None:
- self.after_iteration_calls = 0
-
- async def after_iteration(self, context) -> None:
- self.after_iteration_calls += 1
- if self.after_iteration_calls == 2:
- await injection_queue.put(
- InboundMessage(
- channel="cli",
- sender_id="u",
- chat_id="c",
- content="late follow-up after max iters",
- )
- )
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[{"role": "user", "content": "hello"}],
- tools=tools,
- model="test-model",
- max_iterations=2,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- hook=InjectOnLastAfterIterationHook(),
- ))
-
- assert result.stop_reason == "max_iterations"
- assert result.had_injections is True
- assert injection_queue.empty()
- injected = [
- m for m in result.messages
- if m.get("role") == "user" and m.get("content") == "late follow-up after max iters"
- ]
- assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_injection_cycle_cap_on_error_path():
- """Injection cycles should be capped even when every iteration hits an LLM error."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
- from nanobot.bus.events import InboundMessage
-
- provider = MagicMock()
- call_count = {"n": 0}
-
- async def chat_with_retry(*, messages, **kwargs):
- call_count["n"] += 1
- return LLMResponse(
- content=None,
- tool_calls=[],
- finish_reason="error",
- usage={},
- )
-
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- drain_count = {"n": 0}
-
- async def inject_cb():
- drain_count["n"] += 1
- if drain_count["n"] <= _MAX_INJECTION_CYCLES:
- return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
- return []
-
- runner = AgentRunner(provider)
- result = await runner.run(AgentRunSpec(
- initial_messages=[
- {"role": "user", "content": "hello"},
- {"role": "assistant", "content": "previous"},
- {"role": "user", "content": "trigger error"},
- ],
- tools=tools,
- model="test-model",
- max_iterations=20,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- injection_callback=inject_cb,
- ))
-
- assert result.had_injections is True
- # Should cap: _MAX_INJECTION_CYCLES drained rounds + 1 final round that breaks
- assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
-
-
-# ---------------------------------------------------------------------------
-# Regression tests for GLM-1214: _snip_history must preserve a user message
-# ---------------------------------------------------------------------------
-
-
-def test_snip_history_preserves_user_message_after_truncation(monkeypatch):
- """When _snip_history truncates messages and the only user message ends up
- outside the kept window, the method must recover the nearest user message
- so the resulting sequence is valid for providers like GLM (which reject
- system→assistant with error 1214).
-
- This reproduces the exact scenario from the bug report:
- - Normal interaction: user asks, assistant calls tool, tool returns,
- assistant replies.
- - Injection adds a phantom user message, triggering more tool calls.
- - _snip_history activates, keeping only recent assistant/tool pairs.
- - The injected user message is in the truncated prefix and gets lost.
- """
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- tools = MagicMock()
- tools.get_definitions.return_value = []
- runner = AgentRunner(provider)
-
- messages = [
- {"role": "system", "content": "system"},
- {"role": "assistant", "content": "previous reply"},
- {"role": "user", "content": ".nanobot的同目录"},
- {
- "role": "assistant",
- "content": None,
- "tool_calls": [{"id": "tc_1", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
- },
- {"role": "tool", "tool_call_id": "tc_1", "content": "tool output 1"},
- {
- "role": "assistant",
- "content": None,
- "tool_calls": [{"id": "tc_2", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
- },
- {"role": "tool", "tool_call_id": "tc_2", "content": "tool output 2"},
- ]
-
- spec = AgentRunSpec(
- initial_messages=messages,
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- context_window_tokens=2000,
- context_block_limit=100,
- )
-
- # Make estimate_prompt_tokens_chain report above budget so _snip_history activates.
- monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
- # Make kept window small: only the last 2 messages fit the budget.
- token_sizes = {
- "system": 0,
- "previous reply": 200,
- ".nanobot的同目录": 80,
- "tool output 1": 80,
- "tool output 2": 80,
- }
- monkeypatch.setattr(
- "nanobot.agent.runner.estimate_message_tokens",
- lambda msg: token_sizes.get(str(msg.get("content")), 100),
- )
-
- trimmed = runner._snip_history(spec, messages)
-
- # The first non-system message MUST be user (not assistant).
- non_system = [m for m in trimmed if m.get("role") != "system"]
- assert non_system, "trimmed should contain at least one non-system message"
- assert non_system[0]["role"] == "user", (
- f"First non-system message must be 'user', got '{non_system[0]['role']}'. "
- f"Roles: {[m['role'] for m in trimmed]}"
- )
-
-
-def test_snip_history_no_user_at_all_falls_back_gracefully(monkeypatch):
- """Edge case: if non_system has zero user messages, _snip_history should
- still return a valid sequence (not crash or produce system→assistant)."""
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- provider = MagicMock()
- tools = MagicMock()
- tools.get_definitions.return_value = []
- runner = AgentRunner(provider)
-
- messages = [
- {"role": "system", "content": "system"},
- {"role": "assistant", "content": "reply"},
- {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
- {"role": "assistant", "content": "reply 2"},
- {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
- ]
-
- spec = AgentRunSpec(
- initial_messages=messages,
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- context_window_tokens=2000,
- context_block_limit=100,
- )
-
- monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
- monkeypatch.setattr(
- "nanobot.agent.runner.estimate_message_tokens",
- lambda msg: 100,
- )
-
- trimmed = runner._snip_history(spec, messages)
-
- # Should not crash. The result should still be a valid list.
- assert isinstance(trimmed, list)
- # Must have at least system.
- assert any(m.get("role") == "system" for m in trimmed)
- # The _enforce_role_alternation safety net must be able to fix whatever
- # _snip_history returns here — verify it produces a valid sequence.
- from nanobot.providers.base import LLMProvider
- fixed = LLMProvider._enforce_role_alternation(trimmed)
- non_system = [m for m in fixed if m["role"] != "system"]
- if non_system:
- assert non_system[0]["role"] in ("user", "tool"), (
- f"Safety net should ensure first non-system is user/tool, got {non_system[0]['role']}"
- )
-
-
-@pytest.mark.asyncio
-async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
- """Regression: provider retry heartbeats must route through
- ``retry_wait_callback``, not ``progress_callback``. Binding them to
- the progress callback (as an earlier runtime refactor did) caused
- internal retry diagnostics like "Model request failed, retry in 1s"
- to leak to end-user channels as normal progress updates.
- """
- from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
- captured: dict = {}
-
- async def chat_with_retry(**kwargs):
- captured.update(kwargs)
- return LLMResponse(content="done", tool_calls=[], usage={})
-
- provider = MagicMock()
- provider.chat_with_retry = chat_with_retry
- tools = MagicMock()
- tools.get_definitions.return_value = []
-
- progress_cb = AsyncMock()
- retry_wait_cb = AsyncMock()
-
- runner = AgentRunner(provider)
- await runner.run(AgentRunSpec(
- initial_messages=[
- {"role": "system", "content": "system"},
- {"role": "user", "content": "hi"},
- ],
- tools=tools,
- model="test-model",
- max_iterations=1,
- max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
- progress_callback=progress_cb,
- retry_wait_callback=retry_wait_cb,
- ))
-
- assert captured["on_retry_wait"] is retry_wait_cb
- assert captured["on_retry_wait"] is not progress_cb
diff --git a/tests/agent/test_runner_core.py b/tests/agent/test_runner_core.py
new file mode 100644
index 000000000..dd28fa1cc
--- /dev/null
+++ b/tests/agent/test_runner_core.py
@@ -0,0 +1,481 @@
+"""Tests for core AgentRunner behavior: message passing, iteration limits,
+timeouts, empty-response handling, usage accumulation, and config passthrough."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.agent.tools.registry import ToolRegistry
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+@pytest.mark.asyncio
+async def test_runner_preserves_reasoning_fields_and_tool_results():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ captured_second_call: list[dict] = []
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="thinking",
+ tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+ reasoning_content="hidden reasoning",
+ thinking_blocks=[{"type": "thinking", "thinking": "step"}],
+ usage={"prompt_tokens": 5, "completion_tokens": 3},
+ )
+ captured_second_call[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="tool result")
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "do task"},
+ ],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == "done"
+ assert result.tools_used == ["list_dir"]
+ assert result.tool_events == [
+ {"name": "list_dir", "status": "ok", "detail": "tool result"}
+ ]
+
+ assistant_messages = [
+ msg for msg in captured_second_call
+ if msg.get("role") == "assistant" and msg.get("tool_calls")
+ ]
+ assert len(assistant_messages) == 1
+ assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
+ assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
+ assert any(
+ msg.get("role") == "tool" and msg.get("content") == "tool result"
+ for msg in captured_second_call
+ )
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_max_iterations_fallback():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+ content="still working",
+ tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+ ))
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="tool result")
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=2,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.stop_reason == "max_iterations"
+ assert result.final_content == (
+ "I reached the maximum number of tool call iterations (2) "
+ "without completing the task. You can try breaking the task into smaller steps."
+ )
+ assert result.messages[-1]["role"] == "assistant"
+ assert result.messages[-1]["content"] == result.final_content
+
+
+@pytest.mark.asyncio
+async def test_runner_times_out_hung_llm_request():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+
+ async def chat_with_retry(**kwargs):
+ await asyncio.sleep(3600)
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ started = time.monotonic()
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ llm_timeout_s=0.05,
+ ))
+
+ assert (time.monotonic() - started) < 1.0
+ assert result.stop_reason == "error"
+ assert "timed out" in (result.final_content or "").lower()
+
+
+@pytest.mark.asyncio
+async def test_runner_replaces_empty_tool_result_with_marker():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ captured_second_call: list[dict] = []
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})],
+ usage={},
+ )
+ captured_second_call[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="")
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=2,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == "done"
+ tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
+ assert tool_message["content"] == "(noop completed with no output)"
+
+
+@pytest.mark.asyncio
+async def test_runner_retries_empty_final_response_with_summary_prompt():
+ """Empty responses get 2 silent retries before finalization kicks in."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ calls: list[dict] = []
+
+ async def chat_with_retry(*, messages, tools=None, **kwargs):
+ calls.append({"messages": messages, "tools": tools})
+ if len(calls) <= 2:
+ return LLMResponse(
+ content=None,
+ tool_calls=[],
+ usage={"prompt_tokens": 5, "completion_tokens": 1},
+ )
+ return LLMResponse(
+ content="final answer",
+ tool_calls=[],
+ usage={"prompt_tokens": 3, "completion_tokens": 7},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == "final answer"
+ # 2 silent retries (iterations 0,1) + finalization on iteration 1
+ assert len(calls) == 3
+ assert calls[0]["tools"] is not None
+ assert calls[1]["tools"] is not None
+ assert calls[2]["tools"] is None
+ assert result.usage["prompt_tokens"] == 13
+ assert result.usage["completion_tokens"] == 9
+
+
+@pytest.mark.asyncio
+async def test_runner_uses_specific_message_after_empty_finalization_retry():
+ """After silent retries + finalization all return empty, stop_reason is empty_final_response."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
+
+ provider = MagicMock(spec=LLMProvider)
+
+ async def chat_with_retry(*, messages, **kwargs):
+ return LLMResponse(content=None, tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE
+ assert result.stop_reason == "empty_final_response"
+
+
+@pytest.mark.asyncio
+async def test_runner_empty_response_does_not_break_tool_chain():
+ """An empty intermediate response must not kill an ongoing tool chain.
+
+ Sequence: tool_call -> empty -> tool_call -> final text.
+ The runner should recover via silent retry and complete normally.
+ """
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ call_count = 0
+
+ async def chat_with_retry(*, messages, tools=None, **kwargs):
+ nonlocal call_count
+ call_count += 1
+ if call_count == 1:
+ return LLMResponse(
+ content=None,
+ tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})],
+ usage={"prompt_tokens": 10, "completion_tokens": 5},
+ )
+ if call_count == 2:
+ return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1})
+ if call_count == 3:
+ return LLMResponse(
+ content=None,
+ tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})],
+ usage={"prompt_tokens": 10, "completion_tokens": 5},
+ )
+ return LLMResponse(
+ content="Here are the results.",
+ tool_calls=[],
+ usage={"prompt_tokens": 10, "completion_tokens": 10},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ provider.chat_stream_with_retry = chat_with_retry
+
+ async def fake_tool(name, args, **kw):
+ return "file content"
+
+ tool_registry = MagicMock()
+ tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}]
+ tool_registry.execute = AsyncMock(side_effect=fake_tool)
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "read both files"}],
+ tools=tool_registry,
+ model="test-model",
+ max_iterations=10,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == "Here are the results."
+ assert result.stop_reason == "completed"
+ assert call_count == 4
+ assert "read_file" in result.tools_used
+
+
+@pytest.mark.asyncio
+async def test_runner_accumulates_usage_and_preserves_cached_tokens():
+ """Runner should accumulate prompt/completion tokens across iterations
+ and preserve cached_tokens from provider responses."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="thinking",
+ tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
+ usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80},
+ )
+ return LLMResponse(
+ content="done",
+ tool_calls=[],
+ usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="file content")
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ # Usage should be accumulated across iterations
+ assert result.usage["prompt_tokens"] == 300 # 100 + 200
+ assert result.usage["completion_tokens"] == 30 # 10 + 20
+ assert result.usage["cached_tokens"] == 230 # 80 + 150
+
+
+@pytest.mark.asyncio
+async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
+ """Regression: provider retry heartbeats must route through
+ ``retry_wait_callback``, not ``progress_callback``. Binding them to
+ the progress callback (as an earlier runtime refactor did) caused
+ internal retry diagnostics like "Model request failed, retry in 1s"
+ to leak to end-user channels as normal progress updates.
+ """
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ captured: dict = {}
+
+ async def chat_with_retry(**kwargs):
+ captured.update(kwargs)
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider = MagicMock(spec=LLMProvider)
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ progress_cb = AsyncMock()
+ retry_wait_cb = AsyncMock()
+
+ runner = AgentRunner(provider)
+ await runner.run(AgentRunSpec(
+ initial_messages=[
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "hi"},
+ ],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ progress_callback=progress_cb,
+ retry_wait_callback=retry_wait_cb,
+ ))
+
+ assert captured["on_retry_wait"] is retry_wait_cb
+ assert captured["on_retry_wait"] is not progress_cb
+
+
+# ---------------------------------------------------------------------------
+# Config passthrough tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_temperature_to_provider():
+ """temperature from AgentRunSpec should reach provider.chat_with_retry."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ captured: dict = {}
+
+ async def chat_with_retry(**kwargs):
+ captured.update(kwargs)
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider = MagicMock(spec=LLMProvider)
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hi"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ temperature=0.7,
+ ))
+
+ assert captured["temperature"] == 0.7
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_max_tokens_to_provider():
+ """max_tokens from AgentRunSpec should reach provider.chat_with_retry."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ captured: dict = {}
+
+ async def chat_with_retry(**kwargs):
+ captured.update(kwargs)
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider = MagicMock(spec=LLMProvider)
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hi"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ max_tokens=8192,
+ ))
+
+ assert captured["max_tokens"] == 8192
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_reasoning_effort_to_provider():
+ """reasoning_effort from AgentRunSpec should reach provider.chat_with_retry."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ captured: dict = {}
+
+ async def chat_with_retry(**kwargs):
+ captured.update(kwargs)
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider = MagicMock(spec=LLMProvider)
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hi"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ reasoning_effort="high",
+ ))
+
+ assert captured["reasoning_effort"] == "high"
diff --git a/tests/agent/test_runner_errors.py b/tests/agent/test_runner_errors.py
new file mode 100644
index 000000000..8df7ad8f3
--- /dev/null
+++ b/tests/agent/test_runner_errors.py
@@ -0,0 +1,171 @@
+"""Tests for AgentRunner error handling: tool errors, LLM errors,
+session message isolation, and tool result preservation."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_structured_tool_error():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
+ ))
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
+
+ runner = AgentRunner(provider)
+
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=2,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ fail_on_tool_error=True,
+ ))
+
+ assert result.stop_reason == "tool_error"
+ assert result.error == "Error: RuntimeError: boom"
+ assert result.tool_events == [
+ {"name": "list_dir", "status": "error", "detail": "boom"}
+ ]
+
+
+@pytest.mark.asyncio
+async def test_llm_error_not_appended_to_session_messages():
+ """When LLM returns finish_reason='error', the error content must NOT be
+ appended to the messages list (prevents polluting session history)."""
+ from nanobot.agent.runner import (
+ AgentRunSpec,
+ AgentRunner,
+ _PERSISTED_MODEL_ERROR_PLACEHOLDER,
+ )
+
+ provider = MagicMock(spec=LLMProvider)
+ provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+ content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={},
+ ))
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=5,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.stop_reason == "error"
+ assert result.final_content == "429 rate limit exceeded"
+ assistant_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+ assert all("429" not in (m.get("content") or "") for m in assistant_msgs), \
+ "Error content should not appear in session messages"
+ assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER
+
+
+@pytest.mark.asyncio
+async def test_runner_tool_error_sets_final_content():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+
+ async def chat_with_retry(*, messages, **kwargs):
+ return LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
+ usage={},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ fail_on_tool_error=True,
+ ))
+
+ assert result.final_content == "Error: RuntimeError: boom"
+ assert result.stop_reason == "tool_error"
+
+
+@pytest.mark.asyncio
+async def test_runner_tool_error_preserves_tool_results_in_messages():
+ """When a tool raises a fatal error, its results must still be appended
+ to messages so the session never contains orphan tool_calls (#2943)."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+
+ async def chat_with_retry(*, messages, **kwargs):
+ return LLMResponse(
+ content=None,
+ tool_calls=[
+ ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}),
+ ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}),
+ ],
+ usage={},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ provider.chat_stream_with_retry = chat_with_retry
+
+ call_idx = 0
+
+ async def fake_execute(name, args, **kw):
+ nonlocal call_idx
+ call_idx += 1
+ if call_idx == 2:
+ raise RuntimeError("boom")
+ return "file content"
+
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(side_effect=fake_execute)
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do stuff"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ fail_on_tool_error=True,
+ ))
+
+ assert result.stop_reason == "tool_error"
+ # Both tool results must be in messages even though tc2 had a fatal error.
+ tool_msgs = [m for m in result.messages if m.get("role") == "tool"]
+ assert len(tool_msgs) == 2
+ assert tool_msgs[0]["tool_call_id"] == "tc1"
+ assert tool_msgs[1]["tool_call_id"] == "tc2"
+ # The assistant message with tool_calls must precede the tool results.
+ asst_tc_idx = next(
+ i for i, m in enumerate(result.messages)
+ if m.get("role") == "assistant" and m.get("tool_calls")
+ )
+ tool_indices = [
+ i for i, m in enumerate(result.messages) if m.get("role") == "tool"
+ ]
+ assert all(ti > asst_tc_idx for ti in tool_indices)
diff --git a/tests/agent/test_runner_governance.py b/tests/agent/test_runner_governance.py
new file mode 100644
index 000000000..50e882ca6
--- /dev/null
+++ b/tests/agent/test_runner_governance.py
@@ -0,0 +1,643 @@
+"""Tests for AgentRunner context governance: backfill, orphan cleanup, microcompact, snip_history."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+def _make_loop(tmp_path):
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.bus.queue import MessageBus
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+
+ with patch("nanobot.agent.loop.ContextBuilder"), \
+ patch("nanobot.agent.loop.SessionManager"), \
+ patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+ MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+ loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
+ return loop
+
+async def test_runner_uses_raw_messages_when_context_governance_fails():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ captured_messages: list[dict] = []
+
+ async def chat_with_retry(*, messages, **kwargs):
+ captured_messages[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ initial_messages = [
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "hello"},
+ ]
+
+ runner = AgentRunner(provider)
+ runner._snip_history = MagicMock(side_effect=RuntimeError("boom")) # type: ignore[method-assign]
+ result = await runner.run(AgentRunSpec(
+ initial_messages=initial_messages,
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == "done"
+ assert captured_messages == initial_messages
+def test_snip_history_drops_orphaned_tool_results_from_trimmed_slice(monkeypatch):
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ runner = AgentRunner(provider)
+ messages = [
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "old user"},
+ {
+ "role": "assistant",
+ "content": "tool call",
+ "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "ls", "arguments": "{}"}}],
+ },
+ {"role": "tool", "tool_call_id": "call_1", "content": "tool output"},
+ {"role": "assistant", "content": "after tool"},
+ ]
+ spec = AgentRunSpec(
+ initial_messages=messages,
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ context_window_tokens=2000,
+ context_block_limit=100,
+ )
+
+ monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_args, **_kwargs: (500, None))
+ token_sizes = {
+ "old user": 120,
+ "tool call": 120,
+ "tool output": 40,
+ "after tool": 40,
+ "system": 0,
+ }
+ monkeypatch.setattr(
+ "nanobot.agent.runner.estimate_message_tokens",
+ lambda msg: token_sizes.get(str(msg.get("content")), 40),
+ )
+
+ trimmed = runner._snip_history(spec, messages)
+
+ # After the fix, the user message is recovered so the sequence is valid
+ # for providers that require system → user (e.g. GLM error 1214).
+ assert trimmed[0]["role"] == "system"
+ non_system = [m for m in trimmed if m["role"] != "system"]
+ assert non_system[0]["role"] == "user", f"Expected user after system, got {non_system[0]['role']}"
+async def test_backfill_missing_tool_results_inserts_error():
+ """Orphaned tool_use (no matching tool_result) should get a synthetic error."""
+ from nanobot.agent.runner import AgentRunner, _BACKFILL_CONTENT
+
+ messages = [
+ {"role": "user", "content": "hi"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {"id": "call_a", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
+ {"id": "call_b", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
+ ],
+ },
+ {"role": "tool", "tool_call_id": "call_a", "name": "exec", "content": "ok"},
+ ]
+ result = AgentRunner._backfill_missing_tool_results(messages)
+ tool_msgs = [m for m in result if m.get("role") == "tool"]
+ assert len(tool_msgs) == 2
+ backfilled = [m for m in tool_msgs if m.get("tool_call_id") == "call_b"]
+ assert len(backfilled) == 1
+ assert backfilled[0]["content"] == _BACKFILL_CONTENT
+ assert backfilled[0]["name"] == "read_file"
+
+
+def test_drop_orphan_tool_results_removes_unmatched_tool_messages():
+ from nanobot.agent.runner import AgentRunner
+
+ messages = [
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "old user"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
+ ],
+ },
+ {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
+ {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
+ {"role": "assistant", "content": "after tool"},
+ ]
+
+ cleaned = AgentRunner._drop_orphan_tool_results(messages)
+
+ assert cleaned == [
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "old user"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
+ ],
+ },
+ {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
+ {"role": "assistant", "content": "after tool"},
+ ]
+
+
+@pytest.mark.asyncio
+async def test_backfill_noop_when_complete():
+ """Complete message chains should not be modified."""
+ from nanobot.agent.runner import AgentRunner
+
+ messages = [
+ {"role": "user", "content": "hi"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {"id": "call_x", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
+ ],
+ },
+ {"role": "tool", "tool_call_id": "call_x", "name": "exec", "content": "done"},
+ {"role": "assistant", "content": "all good"},
+ ]
+ result = AgentRunner._backfill_missing_tool_results(messages)
+ assert result is messages # same object — no copy
+
+
+@pytest.mark.asyncio
+async def test_runner_drops_orphan_tool_results_before_model_request():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ captured_messages: list[dict] = []
+
+ async def chat_with_retry(*, messages, **kwargs):
+ captured_messages[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "old user"},
+ {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
+ {"role": "assistant", "content": "after orphan"},
+ {"role": "user", "content": "new prompt"},
+ ],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert all(
+ message.get("tool_call_id") != "call_orphan"
+ for message in captured_messages
+ if message.get("role") == "tool"
+ )
+ assert result.messages[2]["tool_call_id"] == "call_orphan"
+ assert result.final_content == "done"
+
+
+@pytest.mark.asyncio
+async def test_backfill_repairs_model_context_without_shifting_save_turn_boundary(tmp_path):
+ """Historical backfill should not duplicate old tail messages on persist."""
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.agent.runner import _BACKFILL_CONTENT
+ from nanobot.bus.events import InboundMessage
+ from nanobot.bus.queue import MessageBus
+
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ response = LLMResponse(content="new answer", tool_calls=[], usage={})
+ provider.chat_with_retry = AsyncMock(return_value=response)
+ provider.chat_stream_with_retry = AsyncMock(return_value=response)
+
+ loop = AgentLoop(
+ bus=MessageBus(),
+ provider=provider,
+ workspace=tmp_path,
+ model="test-model",
+ )
+ loop.tools.get_definitions = MagicMock(return_value=[])
+ loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign]
+
+ session = loop.sessions.get_or_create("cli:test")
+ session.messages = [
+ {"role": "user", "content": "old user", "timestamp": "2026-01-01T00:00:00"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "id": "call_missing",
+ "type": "function",
+ "function": {"name": "read_file", "arguments": "{}"},
+ }
+ ],
+ "timestamp": "2026-01-01T00:00:01",
+ },
+ {"role": "assistant", "content": "old tail", "timestamp": "2026-01-01T00:00:02"},
+ ]
+ loop.sessions.save(session)
+
+ result = await loop._process_message(
+ InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new prompt")
+ )
+
+ assert result is not None
+ assert result.content == "new answer"
+
+ request_messages = provider.chat_with_retry.await_args.kwargs["messages"]
+ synthetic = [
+ message
+ for message in request_messages
+ if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
+ ]
+ assert len(synthetic) == 1
+ assert synthetic[0]["content"] == _BACKFILL_CONTENT
+
+ session_after = loop.sessions.get_or_create("cli:test")
+ assert [
+ {
+ key: value
+ for key, value in message.items()
+ if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
+ }
+ for message in session_after.messages
+ ] == [
+ {"role": "user", "content": "old user"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "id": "call_missing",
+ "type": "function",
+ "function": {"name": "read_file", "arguments": "{}"},
+ }
+ ],
+ },
+ {"role": "assistant", "content": "old tail"},
+ {"role": "user", "content": "new prompt"},
+ {"role": "assistant", "content": "new answer"},
+ ]
+
+
+@pytest.mark.asyncio
+async def test_runner_backfill_only_mutates_model_context_not_returned_messages():
+ """Runner should repair orphaned tool calls for the model without rewriting result.messages."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner, _BACKFILL_CONTENT
+
+ provider = MagicMock()
+ captured_messages: list[dict] = []
+
+ async def chat_with_retry(*, messages, **kwargs):
+ captured_messages[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ initial_messages = [
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "old user"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "id": "call_missing",
+ "type": "function",
+ "function": {"name": "read_file", "arguments": "{}"},
+ }
+ ],
+ },
+ {"role": "assistant", "content": "old tail"},
+ {"role": "user", "content": "new prompt"},
+ ]
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=initial_messages,
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ synthetic = [
+ message
+ for message in captured_messages
+ if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
+ ]
+ assert len(synthetic) == 1
+ assert synthetic[0]["content"] == _BACKFILL_CONTENT
+
+ assert [
+ {
+ key: value
+ for key, value in message.items()
+ if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
+ }
+ for message in result.messages
+ ] == [
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "old user"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "id": "call_missing",
+ "type": "function",
+ "function": {"name": "read_file", "arguments": "{}"},
+ }
+ ],
+ },
+ {"role": "assistant", "content": "old tail"},
+ {"role": "user", "content": "new prompt"},
+ {"role": "assistant", "content": "done"},
+ ]
+
+
+# ---------------------------------------------------------------------------
+# Microcompact (stale tool result compaction)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_microcompact_replaces_old_tool_results():
+ """Tool results beyond _MICROCOMPACT_KEEP_RECENT should be summarized."""
+ from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
+
+ total = _MICROCOMPACT_KEEP_RECENT + 5
+ long_content = "x" * 600
+ messages: list[dict] = [{"role": "system", "content": "sys"}]
+ for i in range(total):
+ messages.append({
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}],
+ })
+ messages.append({
+ "role": "tool", "tool_call_id": f"c{i}", "name": "read_file",
+ "content": long_content,
+ })
+
+ result = AgentRunner._microcompact(messages)
+ tool_msgs = [m for m in result if m.get("role") == "tool"]
+ stale_count = total - _MICROCOMPACT_KEEP_RECENT
+ compacted = [m for m in tool_msgs if "omitted from context" in str(m.get("content", ""))]
+ preserved = [m for m in tool_msgs if m.get("content") == long_content]
+ assert len(compacted) == stale_count
+ assert len(preserved) == _MICROCOMPACT_KEEP_RECENT
+
+
+@pytest.mark.asyncio
+async def test_microcompact_preserves_short_results():
+ """Short tool results (< _MICROCOMPACT_MIN_CHARS) should not be replaced."""
+ from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
+
+ total = _MICROCOMPACT_KEEP_RECENT + 5
+ messages: list[dict] = []
+ for i in range(total):
+ messages.append({
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
+ })
+ messages.append({
+ "role": "tool", "tool_call_id": f"c{i}", "name": "exec",
+ "content": "short",
+ })
+
+ result = AgentRunner._microcompact(messages)
+ assert result is messages # no copy needed — all stale results are short
+
+
+@pytest.mark.asyncio
+async def test_microcompact_skips_non_compactable_tools():
+ """Non-compactable tools (e.g. 'message') should never be replaced."""
+ from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
+
+ total = _MICROCOMPACT_KEEP_RECENT + 5
+ long_content = "y" * 1000
+ messages: list[dict] = []
+ for i in range(total):
+ messages.append({
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "message", "arguments": "{}"}}],
+ })
+ messages.append({
+ "role": "tool", "tool_call_id": f"c{i}", "name": "message",
+ "content": long_content,
+ })
+
+ result = AgentRunner._microcompact(messages)
+ assert result is messages # no compactable tools found
+
+
+def test_governance_repairs_orphans_after_snip():
+ """After _snip_history clips an assistant+tool_calls, the second
+ _drop_orphan_tool_results pass must clean up the resulting orphans."""
+ from nanobot.agent.runner import AgentRunner
+
+ messages = [
+ {"role": "system", "content": "system"},
+ {"role": "user", "content": "old msg"},
+ {"role": "assistant", "content": None,
+ "tool_calls": [{"id": "tc_old", "type": "function",
+ "function": {"name": "search", "arguments": "{}"}}]},
+ {"role": "tool", "tool_call_id": "tc_old", "name": "search",
+ "content": "old result"},
+ {"role": "assistant", "content": "old answer"},
+ {"role": "user", "content": "new msg"},
+ ]
+
+ # Simulate snipping that keeps only the tail: drop the assistant with
+ # tool_calls but keep its tool result (orphan).
+ snipped = [
+ {"role": "system", "content": "system"},
+ {"role": "tool", "tool_call_id": "tc_old", "name": "search",
+ "content": "old result"},
+ {"role": "assistant", "content": "old answer"},
+ {"role": "user", "content": "new msg"},
+ ]
+
+ cleaned = AgentRunner._drop_orphan_tool_results(snipped)
+ # The orphan tool result should be removed.
+ assert not any(
+ m.get("role") == "tool" and m.get("tool_call_id") == "tc_old"
+ for m in cleaned
+ )
+
+
+def test_governance_fallback_still_repairs_orphans():
+ """When full governance fails, the fallback must still run
+ _drop_orphan_tool_results and _backfill_missing_tool_results."""
+ from nanobot.agent.runner import AgentRunner
+
+ # Messages with an orphan tool result (no matching assistant tool_call).
+ messages = [
+ {"role": "user", "content": "hello"},
+ {"role": "tool", "tool_call_id": "orphan_tc", "name": "read",
+ "content": "stale"},
+ {"role": "assistant", "content": "hi"},
+ ]
+
+ repaired = AgentRunner._drop_orphan_tool_results(messages)
+ repaired = AgentRunner._backfill_missing_tool_results(repaired)
+ # Orphan tool result should be gone.
+ assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired)
+def test_snip_history_preserves_user_message_after_truncation(monkeypatch):
+ """When _snip_history truncates messages and the only user message ends up
+ outside the kept window, the method must recover the nearest user message
+ so the resulting sequence is valid for providers like GLM (which reject
+ system→assistant with error 1214).
+
+ This reproduces the exact scenario from the bug report:
+ - Normal interaction: user asks, assistant calls tool, tool returns,
+ assistant replies.
+ - Injection adds a phantom user message, triggering more tool calls.
+ - _snip_history activates, keeping only recent assistant/tool pairs.
+ - The injected user message is in the truncated prefix and gets lost.
+ """
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ runner = AgentRunner(provider)
+
+ messages = [
+ {"role": "system", "content": "system"},
+ {"role": "assistant", "content": "previous reply"},
+ {"role": "user", "content": ".nanobot的同目录"},
+ {
+ "role": "assistant",
+ "content": None,
+ "tool_calls": [{"id": "tc_1", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
+ },
+ {"role": "tool", "tool_call_id": "tc_1", "content": "tool output 1"},
+ {
+ "role": "assistant",
+ "content": None,
+ "tool_calls": [{"id": "tc_2", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
+ },
+ {"role": "tool", "tool_call_id": "tc_2", "content": "tool output 2"},
+ ]
+
+ spec = AgentRunSpec(
+ initial_messages=messages,
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ context_window_tokens=2000,
+ context_block_limit=100,
+ )
+
+ # Make estimate_prompt_tokens_chain report above budget so _snip_history activates.
+ monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
+ # Make kept window small: only the last 2 messages fit the budget.
+ token_sizes = {
+ "system": 0,
+ "previous reply": 200,
+ ".nanobot的同目录": 80,
+ "tool output 1": 80,
+ "tool output 2": 80,
+ }
+ monkeypatch.setattr(
+ "nanobot.agent.runner.estimate_message_tokens",
+ lambda msg: token_sizes.get(str(msg.get("content")), 100),
+ )
+
+ trimmed = runner._snip_history(spec, messages)
+
+ # The first non-system message MUST be user (not assistant).
+ non_system = [m for m in trimmed if m.get("role") != "system"]
+ assert non_system, "trimmed should contain at least one non-system message"
+ assert non_system[0]["role"] == "user", (
+ f"First non-system message must be 'user', got '{non_system[0]['role']}'. "
+ f"Roles: {[m['role'] for m in trimmed]}"
+ )
+
+
+def test_snip_history_no_user_at_all_falls_back_gracefully(monkeypatch):
+ """Edge case: if non_system has zero user messages, _snip_history should
+ still return a valid sequence (not crash or produce system→assistant)."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ runner = AgentRunner(provider)
+
+ messages = [
+ {"role": "system", "content": "system"},
+ {"role": "assistant", "content": "reply"},
+ {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+ {"role": "assistant", "content": "reply 2"},
+ {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
+ ]
+
+ spec = AgentRunSpec(
+ initial_messages=messages,
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ context_window_tokens=2000,
+ context_block_limit=100,
+ )
+
+ monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
+ monkeypatch.setattr(
+ "nanobot.agent.runner.estimate_message_tokens",
+ lambda msg: 100,
+ )
+
+ trimmed = runner._snip_history(spec, messages)
+
+ # Should not crash. The result should still be a valid list.
+ assert isinstance(trimmed, list)
+ # Must have at least system.
+ assert any(m.get("role") == "system" for m in trimmed)
+ # The _enforce_role_alternation safety net must be able to fix whatever
+ # _snip_history returns here — verify it produces a valid sequence.
+ from nanobot.providers.base import LLMProvider
+ fixed = LLMProvider._enforce_role_alternation(trimmed)
+ non_system = [m for m in fixed if m["role"] != "system"]
+ if non_system:
+ assert non_system[0]["role"] in ("user", "tool"), (
+ f"Safety net should ensure first non-system is user/tool, got {non_system[0]['role']}"
+ )
diff --git a/tests/agent/test_runner_hooks.py b/tests/agent/test_runner_hooks.py
new file mode 100644
index 000000000..7718eee20
--- /dev/null
+++ b/tests/agent/test_runner_hooks.py
@@ -0,0 +1,172 @@
+"""Tests for AgentRunner hook lifecycle: ordering, streaming deltas,
+cached-token propagation, and hook context."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+@pytest.mark.asyncio
+async def test_runner_calls_hooks_in_order():
+ from nanobot.agent.hook import AgentHook, AgentHookContext
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ call_count = {"n": 0}
+ events: list[tuple] = []
+
+ async def chat_with_retry(**kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="thinking",
+ tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+ )
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="tool result")
+
+ class RecordingHook(AgentHook):
+ async def before_iteration(self, context: AgentHookContext) -> None:
+ events.append(("before_iteration", context.iteration))
+
+ async def before_execute_tools(self, context: AgentHookContext) -> None:
+ events.append((
+ "before_execute_tools",
+ context.iteration,
+ [tc.name for tc in context.tool_calls],
+ ))
+
+ async def after_iteration(self, context: AgentHookContext) -> None:
+ events.append((
+ "after_iteration",
+ context.iteration,
+ context.final_content,
+ list(context.tool_results),
+ list(context.tool_events),
+ context.stop_reason,
+ ))
+
+ def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
+ events.append(("finalize_content", context.iteration, content))
+ return content.upper() if content else content
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ hook=RecordingHook(),
+ ))
+
+ assert result.final_content == "DONE"
+ assert events == [
+ ("before_iteration", 0),
+ ("before_execute_tools", 0, ["list_dir"]),
+ (
+ "after_iteration",
+ 0,
+ None,
+ ["tool result"],
+ [{"name": "list_dir", "status": "ok", "detail": "tool result"}],
+ None,
+ ),
+ ("before_iteration", 1),
+ ("finalize_content", 1, "done"),
+ ("after_iteration", 1, "DONE", [], [], "completed"),
+ ]
+
+
+@pytest.mark.asyncio
+async def test_runner_streaming_hook_receives_deltas_and_end_signal():
+ from nanobot.agent.hook import AgentHook, AgentHookContext
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ streamed: list[str] = []
+ endings: list[bool] = []
+
+ async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+ await on_content_delta("he")
+ await on_content_delta("llo")
+ return LLMResponse(content="hello", tool_calls=[], usage={})
+
+ provider.chat_stream_with_retry = chat_stream_with_retry
+ provider.chat_with_retry = AsyncMock()
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ class StreamingHook(AgentHook):
+ def wants_streaming(self) -> bool:
+ return True
+
+ async def on_stream(self, context: AgentHookContext, delta: str) -> None:
+ streamed.append(delta)
+
+ async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
+ endings.append(resuming)
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ hook=StreamingHook(),
+ ))
+
+ assert result.final_content == "hello"
+ assert streamed == ["he", "llo"]
+ assert endings == [False]
+ provider.chat_with_retry.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_cached_tokens_to_hook_context():
+ """Hook context.usage should contain cached_tokens."""
+ from nanobot.agent.hook import AgentHook, AgentHookContext
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock(spec=LLMProvider)
+ captured_usage: list[dict] = []
+
+ class UsageHook(AgentHook):
+ async def after_iteration(self, context: AgentHookContext) -> None:
+ captured_usage.append(dict(context.usage))
+
+ async def chat_with_retry(**kwargs):
+ return LLMResponse(
+ content="done",
+ tool_calls=[],
+ usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ hook=UsageHook(),
+ ))
+
+ assert len(captured_usage) == 1
+ assert captured_usage[0]["cached_tokens"] == 150
diff --git a/tests/agent/test_runner_injections.py b/tests/agent/test_runner_injections.py
new file mode 100644
index 000000000..1aa504e32
--- /dev/null
+++ b/tests/agent/test_runner_injections.py
@@ -0,0 +1,1038 @@
+"""Tests for the mid-turn injection system: drain, checkpoints, pending queues, error paths."""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+def _make_injection_callback(queue: asyncio.Queue):
+ """Return an async callback that drains *queue* into a list of dicts."""
+ async def inject_cb():
+ items = []
+ while not queue.empty():
+ items.append(await queue.get())
+ return items
+ return inject_cb
+
+
+def _make_loop(tmp_path):
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.bus.queue import MessageBus
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+
+ with patch("nanobot.agent.loop.ContextBuilder"), \
+ patch("nanobot.agent.loop.SessionManager"), \
+ patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+ MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+ loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
+ return loop
+
+@pytest.mark.asyncio
+async def test_drain_injections_returns_empty_when_no_callback():
+ """No injection_callback → empty list."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ runner = AgentRunner(provider)
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ spec = AgentRunSpec(
+ initial_messages=[], tools=tools, model="m",
+ max_iterations=1, max_tool_result_chars=1000,
+ injection_callback=None,
+ )
+ result = await runner._drain_injections(spec)
+ assert result == []
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_extracts_content_from_inbound_messages():
+ """Should extract .content from InboundMessage objects."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ runner = AgentRunner(provider)
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ msgs = [
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello"),
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="world"),
+ ]
+
+ async def cb():
+ return msgs
+
+ spec = AgentRunSpec(
+ initial_messages=[], tools=tools, model="m",
+ max_iterations=1, max_tool_result_chars=1000,
+ injection_callback=cb,
+ )
+ result = await runner._drain_injections(spec)
+ assert result == [
+ {"role": "user", "content": "hello"},
+ {"role": "user", "content": "world"},
+ ]
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_passes_limit_to_callback_when_supported():
+ """Limit-aware callbacks can preserve overflow in their own queue."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ runner = AgentRunner(provider)
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ seen_limits: list[int] = []
+
+ msgs = [
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg{i}")
+ for i in range(_MAX_INJECTIONS_PER_TURN + 3)
+ ]
+
+ async def cb(*, limit: int):
+ seen_limits.append(limit)
+ return msgs[:limit]
+
+ spec = AgentRunSpec(
+ initial_messages=[], tools=tools, model="m",
+ max_iterations=1, max_tool_result_chars=1000,
+ injection_callback=cb,
+ )
+ result = await runner._drain_injections(spec)
+ assert seen_limits == [_MAX_INJECTIONS_PER_TURN]
+ assert result == [
+ {"role": "user", "content": "msg0"},
+ {"role": "user", "content": "msg1"},
+ {"role": "user", "content": "msg2"},
+ ]
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_skips_empty_content():
+ """Messages with blank content should be filtered out."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ runner = AgentRunner(provider)
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ msgs = [
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content=""),
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content=" "),
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="valid"),
+ ]
+
+ async def cb():
+ return msgs
+
+ spec = AgentRunSpec(
+ initial_messages=[], tools=tools, model="m",
+ max_iterations=1, max_tool_result_chars=1000,
+ injection_callback=cb,
+ )
+ result = await runner._drain_injections(spec)
+ assert result == [{"role": "user", "content": "valid"}]
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_handles_callback_exception():
+ """If the callback raises, return empty list (error is logged)."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ runner = AgentRunner(provider)
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ async def cb():
+ raise RuntimeError("boom")
+
+ spec = AgentRunSpec(
+ initial_messages=[], tools=tools, model="m",
+ max_iterations=1, max_tool_result_chars=1000,
+ injection_callback=cb,
+ )
+ result = await runner._drain_injections(spec)
+ assert result == []
+
+
+@pytest.mark.asyncio
+async def test_checkpoint1_injects_after_tool_execution():
+ """Follow-up messages are injected after tool execution, before next LLM call."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+ captured_messages = []
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ captured_messages.append(list(messages))
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="using tool",
+ tool_calls=[ToolCallRequest(id="c1", name="read_file", arguments={"path": "x"})],
+ usage={},
+ )
+ return LLMResponse(content="final answer", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="file content")
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ # Put a follow-up message in the queue before the run starts
+ await injection_queue.put(
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=5,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.had_injections is True
+ assert result.final_content == "final answer"
+ # The second call should have the injected user message
+ assert call_count["n"] == 2
+ last_messages = captured_messages[-1]
+ injected = [m for m in last_messages if m.get("role") == "user" and m.get("content") == "follow-up question"]
+ assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
+ """After final response, if injections exist, stream_end should get resuming=True."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.agent.hook import AgentHook, AgentHookContext
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+ stream_end_calls = []
+
+ class TrackingHook(AgentHook):
+ def wants_streaming(self) -> bool:
+ return True
+
+ async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
+ stream_end_calls.append(resuming)
+
+ def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
+ return content
+
+ async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(content="first answer", tool_calls=[], usage={})
+ return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+ provider.chat_stream_with_retry = chat_stream_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ # Inject a follow-up that arrives during the first response
+ await injection_queue.put(
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="quick follow-up")
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=5,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ hook=TrackingHook(),
+ injection_callback=inject_cb,
+ ))
+
+ assert result.had_injections is True
+ assert result.final_content == "second answer"
+ assert call_count["n"] == 2
+ # First stream_end should have resuming=True (because injections found)
+ assert stream_end_calls[0] is True
+ # Second (final) stream_end should have resuming=False
+ assert stream_end_calls[-1] is False
+
+
+@pytest.mark.asyncio
+async def test_checkpoint2_preserves_final_response_in_history_before_followup():
+ """A follow-up injected after a final answer must still see that answer in history."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+ captured_messages = []
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ captured_messages.append([dict(message) for message in messages])
+ if call_count["n"] == 1:
+ return LLMResponse(content="first answer", tool_calls=[], usage={})
+ return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ await injection_queue.put(
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=5,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.final_content == "second answer"
+ assert call_count["n"] == 2
+ assert captured_messages[-1] == [
+ {"role": "user", "content": "hello"},
+ {"role": "assistant", "content": "first answer"},
+ {"role": "user", "content": "follow-up question"},
+ ]
+ assert [
+ {"role": message["role"], "content": message["content"]}
+ for message in result.messages
+ if message.get("role") == "assistant"
+ ] == [
+ {"role": "assistant", "content": "first answer"},
+ {"role": "assistant", "content": "second answer"},
+ ]
+
+
+@pytest.mark.asyncio
+async def test_loop_injected_followup_preserves_image_media(tmp_path):
+ """Mid-turn follow-ups with images should keep multimodal content."""
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.bus.events import InboundMessage
+ from nanobot.bus.queue import MessageBus
+
+ image_path = tmp_path / "followup.png"
+ image_path.write_bytes(base64.b64decode(
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+yF9kAAAAASUVORK5CYII="
+ ))
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ captured_messages: list[list[dict]] = []
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ captured_messages.append(list(messages))
+ if call_count["n"] == 1:
+ return LLMResponse(content="first answer", tool_calls=[], usage={})
+ return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+ loop.tools.get_definitions = MagicMock(return_value=[])
+
+ pending_queue = asyncio.Queue()
+ await pending_queue.put(InboundMessage(
+ channel="cli",
+ sender_id="u",
+ chat_id="c",
+ content="",
+ media=[str(image_path)],
+ ))
+
+ final_content, _, _, _, had_injections = await loop._run_agent_loop(
+ [{"role": "user", "content": "hello"}],
+ channel="cli",
+ chat_id="c",
+ pending_queue=pending_queue,
+ )
+
+ assert final_content == "second answer"
+ assert had_injections is True
+ assert call_count["n"] == 2
+ injected_user_messages = [
+ message for message in captured_messages[-1]
+ if message.get("role") == "user" and isinstance(message.get("content"), list)
+ ]
+ assert injected_user_messages
+ assert any(
+ block.get("type") == "image_url"
+ for block in injected_user_messages[-1]["content"]
+ if isinstance(block, dict)
+ )
+
+
+@pytest.mark.asyncio
+async def test_runner_merges_multiple_injected_user_messages_without_losing_media():
+ """Multiple injected follow-ups should not create lossy consecutive user messages."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+ captured_messages = []
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ captured_messages.append([dict(message) for message in messages])
+ if call_count["n"] == 1:
+ return LLMResponse(content="first answer", tool_calls=[], usage={})
+ return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ async def inject_cb():
+ if call_count["n"] == 1:
+ return [
+ {
+ "role": "user",
+ "content": [
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
+ {"type": "text", "text": "look at this"},
+ ],
+ },
+ {"role": "user", "content": "and answer briefly"},
+ ]
+ return []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=5,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.final_content == "second answer"
+ assert call_count["n"] == 2
+ second_call = captured_messages[-1]
+ user_messages = [message for message in second_call if message.get("role") == "user"]
+ assert len(user_messages) == 2
+ injected = user_messages[-1]
+ assert isinstance(injected["content"], list)
+ assert any(
+ block.get("type") == "image_url"
+ for block in injected["content"]
+ if isinstance(block, dict)
+ )
+ assert any(
+ block.get("type") == "text" and block.get("text") == "and answer briefly"
+ for block in injected["content"]
+ if isinstance(block, dict)
+ )
+
+
+@pytest.mark.asyncio
+async def test_injection_cycles_capped_at_max():
+ """Injection cycles should be capped at _MAX_INJECTION_CYCLES."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ drain_count = {"n": 0}
+
+ async def inject_cb():
+ drain_count["n"] += 1
+ # Only inject for the first _MAX_INJECTION_CYCLES drains
+ if drain_count["n"] <= _MAX_INJECTION_CYCLES:
+ return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
+ return []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "start"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=20,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.had_injections is True
+ # Should be capped: _MAX_INJECTION_CYCLES injection rounds + 1 final round
+ assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
+
+
+@pytest.mark.asyncio
+async def test_no_injections_flag_is_false_by_default():
+ """had_injections should be False when no injection callback or no messages."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+
+ async def chat_with_retry(**kwargs):
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hi"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.had_injections is False
+
+
+@pytest.mark.asyncio
+async def test_pending_queue_cleanup_on_dispatch(tmp_path):
+ """_pending_queues should be cleaned up after _dispatch completes."""
+ loop = _make_loop(tmp_path)
+
+ async def chat_with_retry(**kwargs):
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ loop.provider.chat_with_retry = chat_with_retry
+
+ from nanobot.bus.events import InboundMessage
+
+ msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello")
+ # The queue should not exist before dispatch
+ assert msg.session_key not in loop._pending_queues
+
+ await loop._dispatch(msg)
+
+ # The queue should be cleaned up after dispatch
+ assert msg.session_key not in loop._pending_queues
+
+
+@pytest.mark.asyncio
+async def test_followup_routed_to_pending_queue(tmp_path):
+ """Unified-session follow-ups should route into the active pending queue."""
+ from nanobot.agent.loop import UNIFIED_SESSION_KEY
+ from nanobot.bus.events import InboundMessage
+
+ loop = _make_loop(tmp_path)
+ loop._unified_session = True
+ loop._dispatch = AsyncMock() # type: ignore[method-assign]
+
+ pending = asyncio.Queue(maxsize=20)
+ loop._pending_queues[UNIFIED_SESSION_KEY] = pending
+
+ run_task = asyncio.create_task(loop.run())
+ msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up")
+ await loop.bus.publish_inbound(msg)
+
+ deadline = time.time() + 2
+ while pending.empty() and time.time() < deadline:
+ await asyncio.sleep(0.01)
+
+ loop.stop()
+ await asyncio.wait_for(run_task, timeout=2)
+
+ assert loop._dispatch.await_count == 0
+ assert not pending.empty()
+ queued_msg = pending.get_nowait()
+ assert queued_msg.content == "follow-up"
+ assert queued_msg.session_key == UNIFIED_SESSION_KEY
+
+
+@pytest.mark.asyncio
+async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path):
+ """Pending queue should leave overflow messages queued for later drains."""
+ from nanobot.agent.loop import AgentLoop
+ from nanobot.bus.events import InboundMessage
+ from nanobot.bus.queue import MessageBus
+ from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN
+
+ bus = MessageBus()
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ captured_messages: list[list[dict]] = []
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ captured_messages.append([dict(message) for message in messages])
+ return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+ loop.tools.get_definitions = MagicMock(return_value=[])
+
+ pending_queue = asyncio.Queue()
+ total_followups = _MAX_INJECTIONS_PER_TURN + 2
+ for idx in range(total_followups):
+ await pending_queue.put(InboundMessage(
+ channel="cli",
+ sender_id="u",
+ chat_id="c",
+ content=f"follow-up-{idx}",
+ ))
+
+ final_content, _, _, _, had_injections = await loop._run_agent_loop(
+ [{"role": "user", "content": "hello"}],
+ channel="cli",
+ chat_id="c",
+ pending_queue=pending_queue,
+ )
+
+ assert final_content == "answer-3"
+ assert had_injections is True
+ assert call_count["n"] == 3
+ flattened_user_content = "\n".join(
+ message["content"]
+ for message in captured_messages[-1]
+ if message.get("role") == "user" and isinstance(message.get("content"), str)
+ )
+ for idx in range(total_followups):
+ assert f"follow-up-{idx}" in flattened_user_content
+ assert pending_queue.empty()
+
+
+@pytest.mark.asyncio
+async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
+ """QueueFull should preserve the message by dispatching a queued task."""
+ from nanobot.bus.events import InboundMessage
+
+ loop = _make_loop(tmp_path)
+ loop._dispatch = AsyncMock() # type: ignore[method-assign]
+
+ pending = asyncio.Queue(maxsize=1)
+ pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued"))
+ loop._pending_queues["cli:c"] = pending
+
+ run_task = asyncio.create_task(loop.run())
+ msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up")
+ await loop.bus.publish_inbound(msg)
+
+ deadline = time.time() + 2
+ while loop._dispatch.await_count == 0 and time.time() < deadline:
+ await asyncio.sleep(0.01)
+
+ loop.stop()
+ await asyncio.wait_for(run_task, timeout=2)
+
+ assert loop._dispatch.await_count == 1
+ dispatched_msg = loop._dispatch.await_args.args[0]
+ assert dispatched_msg.content == "follow-up"
+ assert pending.qsize() == 1
+
+
+@pytest.mark.asyncio
+async def test_dispatch_republishes_leftover_queue_messages(tmp_path):
+ """Messages left in the pending queue after _dispatch are re-published to the bus.
+
+ This tests the finally-block cleanup that prevents message loss when
+ the runner exits early (e.g., max_iterations, tool_error) with messages
+ still in the queue.
+ """
+ from nanobot.bus.events import InboundMessage
+
+ loop = _make_loop(tmp_path)
+ bus = loop.bus
+
+ # Simulate a completed dispatch by manually registering a queue
+ # with leftover messages, then running the cleanup logic directly.
+ pending = asyncio.Queue(maxsize=20)
+ session_key = "cli:c"
+ loop._pending_queues[session_key] = pending
+ pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-1"))
+ pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-2"))
+
+ # Execute the cleanup logic from the finally block
+ queue = loop._pending_queues.pop(session_key, None)
+ assert queue is not None
+ leftover = 0
+ while True:
+ try:
+ item = queue.get_nowait()
+ except asyncio.QueueEmpty:
+ break
+ await bus.publish_inbound(item)
+ leftover += 1
+
+ assert leftover == 2
+
+ # Verify the messages are now on the bus
+ msgs = []
+ while not bus.inbound.empty():
+ msgs.append(await asyncio.wait_for(bus.consume_inbound(), timeout=0.5))
+ contents = [m.content for m in msgs]
+ assert "leftover-1" in contents
+ assert "leftover-2" in contents
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_fatal_tool_error():
+ """Pending injections should be drained even when a fatal tool error occurs."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="",
+ tool_calls=[ToolCallRequest(id="c1", name="exec", arguments={"cmd": "bad"})],
+ usage={},
+ )
+ # Second call: respond normally to the injected follow-up
+ return LLMResponse(content="reply to follow-up", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(side_effect=RuntimeError("tool exploded"))
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ await injection_queue.put(
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after error")
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=5,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ fail_on_tool_error=True,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.had_injections is True
+ assert result.final_content == "reply to follow-up"
+ # The injection should be in the messages history
+ injected = [
+ m for m in result.messages
+ if m.get("role") == "user" and m.get("content") == "follow-up after error"
+ ]
+ assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_llm_error():
+ """Pending injections should be drained when the LLM returns an error finish_reason."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content=None,
+ tool_calls=[],
+ finish_reason="error",
+ usage={},
+ )
+ # Second call: respond normally to the injected follow-up
+ return LLMResponse(content="recovered answer", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ await injection_queue.put(
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after LLM error")
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[
+ {"role": "user", "content": "hello"},
+ {"role": "assistant", "content": "previous response"},
+ {"role": "user", "content": "trigger error"},
+ ],
+ tools=tools,
+ model="test-model",
+ max_iterations=5,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.had_injections is True
+ assert result.final_content == "recovered answer"
+ injected = [
+ m for m in result.messages
+ if m.get("role") == "user" and "follow-up after LLM error" in str(m.get("content", ""))
+ ]
+ assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_empty_final_response():
+ """Pending injections should be drained when the runner exits due to empty response."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] <= _MAX_EMPTY_RETRIES + 1:
+ return LLMResponse(content="", tool_calls=[], usage={})
+ # After retries exhausted + injection drain, respond normally
+ return LLMResponse(content="answer after empty", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ await injection_queue.put(
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after empty")
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[
+ {"role": "user", "content": "hello"},
+ {"role": "assistant", "content": "previous response"},
+ {"role": "user", "content": "trigger empty"},
+ ],
+ tools=tools,
+ model="test-model",
+ max_iterations=10,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.had_injections is True
+ assert result.final_content == "answer after empty"
+ injected = [
+ m for m in result.messages
+ if m.get("role") == "user" and "follow-up after empty" in str(m.get("content", ""))
+ ]
+ assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_max_iterations():
+ """Pending injections should be drained when the runner hits max_iterations.
+
+ Unlike other error paths, max_iterations cannot continue the loop, so
+ injections are appended to messages but not processed by the LLM.
+ The key point is they are consumed from the queue to prevent re-publish.
+ """
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ return LLMResponse(
+ content="",
+ tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
+ usage={},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="file content")
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ await injection_queue.put(
+ InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after max iters")
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=2,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.stop_reason == "max_iterations"
+ assert result.had_injections is True
+ # The injection was consumed from the queue (preventing re-publish)
+ assert injection_queue.empty()
+ # The injection message is appended to conversation history
+ injected = [
+ m for m in result.messages
+ if m.get("role") == "user" and m.get("content") == "follow-up after max iters"
+ ]
+ assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration():
+ """Late follow-ups drained in max_iterations should still flip had_injections."""
+ from nanobot.agent.hook import AgentHook
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ return LLMResponse(
+ content="",
+ tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
+ usage={},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="file content")
+
+ injection_queue = asyncio.Queue()
+ inject_cb = _make_injection_callback(injection_queue)
+
+ class InjectOnLastAfterIterationHook(AgentHook):
+ def __init__(self) -> None:
+ self.after_iteration_calls = 0
+
+ async def after_iteration(self, context) -> None:
+ self.after_iteration_calls += 1
+ if self.after_iteration_calls == 2:
+ await injection_queue.put(
+ InboundMessage(
+ channel="cli",
+ sender_id="u",
+ chat_id="c",
+ content="late follow-up after max iters",
+ )
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "hello"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=2,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ hook=InjectOnLastAfterIterationHook(),
+ ))
+
+ assert result.stop_reason == "max_iterations"
+ assert result.had_injections is True
+ assert injection_queue.empty()
+ injected = [
+ m for m in result.messages
+ if m.get("role") == "user" and m.get("content") == "late follow-up after max iters"
+ ]
+ assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_injection_cycle_cap_on_error_path():
+ """Injection cycles should be capped even when every iteration hits an LLM error."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+ from nanobot.bus.events import InboundMessage
+
+ provider = MagicMock()
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ return LLMResponse(
+ content=None,
+ tool_calls=[],
+ finish_reason="error",
+ usage={},
+ )
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+
+ drain_count = {"n": 0}
+
+ async def inject_cb():
+ drain_count["n"] += 1
+ if drain_count["n"] <= _MAX_INJECTION_CYCLES:
+ return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
+ return []
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[
+ {"role": "user", "content": "hello"},
+ {"role": "assistant", "content": "previous"},
+ {"role": "user", "content": "trigger error"},
+ ],
+ tools=tools,
+ model="test-model",
+ max_iterations=20,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ injection_callback=inject_cb,
+ ))
+
+ assert result.had_injections is True
+ # Should cap: _MAX_INJECTION_CYCLES drained rounds + 1 final round that breaks
+ assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
+
diff --git a/tests/agent/test_runner_persistence.py b/tests/agent/test_runner_persistence.py
new file mode 100644
index 000000000..d2bcfa9d4
--- /dev/null
+++ b/tests/agent/test_runner_persistence.py
@@ -0,0 +1,161 @@
+"""Tests for tool result persistence: large results, pruning, temp files, cleanup."""
+
+from __future__ import annotations
+
+import os
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ captured_second_call: list[dict] = []
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id="call_big", name="list_dir", arguments={"path": "."})],
+ usage={"prompt_tokens": 5, "completion_tokens": 3},
+ )
+ captured_second_call[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="x" * 20_000)
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=2,
+ workspace=tmp_path,
+ session_key="test:runner",
+ max_tool_result_chars=2048,
+ ))
+
+ assert result.final_content == "done"
+ tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
+ assert "[tool output persisted]" in tool_message["content"]
+ assert "tool-results" in tool_message["content"]
+ assert (tmp_path / ".nanobot" / "tool-results" / "test_runner" / "call_big.txt").exists()
+
+
+def test_persist_tool_result_prunes_old_session_buckets(tmp_path):
+ from nanobot.utils.helpers import maybe_persist_tool_result
+
+ root = tmp_path / ".nanobot" / "tool-results"
+ old_bucket = root / "old_session"
+ recent_bucket = root / "recent_session"
+ old_bucket.mkdir(parents=True)
+ recent_bucket.mkdir(parents=True)
+ (old_bucket / "old.txt").write_text("old", encoding="utf-8")
+ (recent_bucket / "recent.txt").write_text("recent", encoding="utf-8")
+
+ stale = time.time() - (8 * 24 * 60 * 60)
+ os.utime(old_bucket, (stale, stale))
+ os.utime(old_bucket / "old.txt", (stale, stale))
+
+ persisted = maybe_persist_tool_result(
+ tmp_path,
+ "current:session",
+ "call_big",
+ "x" * 5000,
+ max_chars=64,
+ )
+
+ assert "[tool output persisted]" in persisted
+ assert not old_bucket.exists()
+ assert recent_bucket.exists()
+ assert (root / "current_session" / "call_big.txt").exists()
+
+
+def test_persist_tool_result_leaves_no_temp_files(tmp_path):
+ from nanobot.utils.helpers import maybe_persist_tool_result
+
+ root = tmp_path / ".nanobot" / "tool-results"
+ maybe_persist_tool_result(
+ tmp_path,
+ "current:session",
+ "call_big",
+ "x" * 5000,
+ max_chars=64,
+ )
+
+ assert (root / "current_session" / "call_big.txt").exists()
+ assert list((root / "current_session").glob("*.tmp")) == []
+
+
+def test_persist_tool_result_logs_cleanup_failures(monkeypatch, tmp_path):
+ from nanobot.utils.helpers import maybe_persist_tool_result
+
+ warnings: list[str] = []
+
+ monkeypatch.setattr(
+ "nanobot.utils.helpers._cleanup_tool_result_buckets",
+ lambda *_args, **_kwargs: (_ for _ in ()).throw(OSError("busy")),
+ )
+ monkeypatch.setattr(
+ "nanobot.utils.helpers.logger.exception",
+ lambda message, *args: warnings.append(message.format(*args)),
+ )
+
+ persisted = maybe_persist_tool_result(
+ tmp_path,
+ "current:session",
+ "call_big",
+ "x" * 5000,
+ max_chars=64,
+ )
+
+ assert "[tool output persisted]" in persisted
+ assert warnings and "Failed to clean stale tool result buckets" in warnings[0]
+async def test_runner_keeps_going_when_tool_result_persistence_fails():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ captured_second_call: list[dict] = []
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 1:
+ return LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+ usage={"prompt_tokens": 5, "completion_tokens": 3},
+ )
+ captured_second_call[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="tool result")
+
+ runner = AgentRunner(provider)
+ with patch("nanobot.agent.runner.maybe_persist_tool_result", side_effect=RuntimeError("disk full")):
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "do task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=2,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == "done"
+ tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
+ assert tool_message["content"] == "tool result"
diff --git a/tests/agent/test_runner_safety.py b/tests/agent/test_runner_safety.py
new file mode 100644
index 000000000..14565e203
--- /dev/null
+++ b/tests/agent/test_runner_safety.py
@@ -0,0 +1,244 @@
+"""Tests for AgentRunner security: workspace violations, SSRF, shell guard, throttling."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+async def test_runner_does_not_abort_on_workspace_violation_anymore():
+ """v2 behavior: workspace-bound rejections are *soft* tool errors.
+
+ Previously (PR #3493) any workspace boundary error became a fatal
+ RuntimeError that aborted the turn. That silently killed legitimate
+ workspace commands once the heuristic guard misfired (#3599 #3605), so
+ we now hand the error back to the LLM as a recoverable tool result and
+ rely on ``repeated_workspace_violation_error`` to throttle bypass loops.
+ """
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ provider.chat_with_retry = AsyncMock(side_effect=[
+ LLMResponse(
+ content="trying outside",
+ tool_calls=[ToolCallRequest(
+ id="call_1", name="read_file", arguments={"path": "/tmp/outside.md"},
+ )],
+ ),
+ LLMResponse(content="ok, telling the user instead", tool_calls=[]),
+ ])
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(
+ side_effect=PermissionError(
+ "Path /tmp/outside.md is outside allowed directory /workspace"
+ )
+ )
+
+ runner = AgentRunner(provider)
+
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert provider.chat_with_retry.await_count == 2, (
+ "workspace violation must NOT short-circuit the loop"
+ )
+ assert result.stop_reason != "tool_error"
+ assert result.error is None
+ assert result.final_content == "ok, telling the user instead"
+ assert result.tool_events and result.tool_events[0]["status"] == "error"
+ # Detail still carries the workspace_violation breadcrumb for telemetry,
+ # but the runner did not raise.
+ assert "workspace_violation" in result.tool_events[0]["detail"]
+
+
+def test_is_ssrf_violation_recognizes_private_url_blocks():
+ """SSRF rejections are classified separately from workspace boundaries."""
+ from nanobot.agent.runner import AgentRunner
+
+ ssrf_msg = "Error: Command blocked by safety guard (internal/private URL detected)"
+ assert AgentRunner._is_ssrf_violation(ssrf_msg) is True
+ assert AgentRunner._is_ssrf_violation(
+ "URL validation failed: Blocked: host resolves to private/internal address 192.168.1.2"
+ ) is True
+
+ # Workspace-bound markers are NOT classified as SSRF.
+ assert AgentRunner._is_ssrf_violation(
+ "Error: Command blocked by safety guard (path outside working dir)"
+ ) is False
+ assert AgentRunner._is_ssrf_violation(
+ "Path /tmp/x is outside allowed directory /ws"
+ ) is False
+ # Deny / allowlist filter messages stay non-fatal too.
+ assert AgentRunner._is_ssrf_violation(
+ "Error: Command blocked by deny pattern filter"
+ ) is False
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_non_retryable_hint_on_ssrf_violation():
+ """SSRF stays blocked, but the runtime gives the LLM a final chance to recover."""
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ provider.chat_with_retry = AsyncMock(side_effect=[
+ LLMResponse(
+ content="curl-ing metadata",
+ tool_calls=[ToolCallRequest(
+ id="call_ssrf",
+ name="exec",
+ arguments={"command": "curl http://169.254.169.254"},
+ )],
+ ),
+ LLMResponse(
+ content="I cannot access that private URL. Please share local files.",
+ tool_calls=[],
+ ),
+ ])
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value=(
+ "Error: Command blocked by safety guard (internal/private URL detected)"
+ ))
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert provider.chat_with_retry.await_count == 2
+ assert result.stop_reason == "completed"
+ assert result.error is None
+ assert result.final_content == "I cannot access that private URL. Please share local files."
+ assert result.tool_events and result.tool_events[0]["detail"].startswith("ssrf_violation:")
+ tool_messages = [m for m in result.messages if m.get("role") == "tool"]
+ assert tool_messages
+ assert "non-bypassable security boundary" in tool_messages[0]["content"]
+ assert "Do not retry" in tool_messages[0]["content"]
+ assert "tools.ssrfWhitelist" in tool_messages[0]["content"]
+
+
+@pytest.mark.asyncio
+async def test_runner_lets_llm_recover_from_shell_guard_path_outside():
+ """Reporter scenario for #3599 / #3605 -- guard hit, agent recovers.
+
+ The shell `_guard_command` heuristic fires on `2>/dev/null`-style
+ redirects and other shell idioms. Before v2 that abort'd the whole
+ turn (silent hang on Telegram per #3605); now the LLM gets the soft
+ error back and can finalize on the next iteration.
+ """
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ captured_second_call: list[dict] = []
+
+ async def chat_with_retry(*, messages, **kwargs):
+ if provider.chat_with_retry.await_count == 1:
+ return LLMResponse(
+ content="trying noisy cleanup",
+ tool_calls=[ToolCallRequest(
+ id="call_blocked",
+ name="exec",
+ arguments={"command": "rm scratch.txt 2>/dev/null"},
+ )],
+ )
+ captured_second_call[:] = list(messages)
+ return LLMResponse(content="recovered final answer", tool_calls=[])
+
+ provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry)
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(
+ return_value="Error: Command blocked by safety guard (path outside working dir)"
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=3,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert provider.chat_with_retry.await_count == 2, (
+ "guard hit must NOT short-circuit the loop -- LLM should get a second turn"
+ )
+ assert result.stop_reason != "tool_error"
+ assert result.error is None
+ assert result.final_content == "recovered final answer"
+ assert result.tool_events and result.tool_events[0]["status"] == "error"
+ # v2: detail keeps the breadcrumb but the runner did not raise.
+ assert "workspace_violation" in result.tool_events[0]["detail"]
+
+
+@pytest.mark.asyncio
+async def test_runner_throttles_repeated_workspace_bypass_attempts():
+ """#3493 motivation: stop the LLM bypass loop without aborting the turn.
+
+ LLM keeps switching tools (read_file -> exec cat -> python -c open(...))
+ against the same outside path. After the soft retry budget is exhausted
+ the runner replaces the tool result with a hard "stop trying" message
+ so the model finally gives up and surfaces the boundary to the user.
+ """
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ bypass_attempts = [
+ ToolCallRequest(
+ id=f"a{i}", name="exec",
+ arguments={"command": f"cat /Users/x/Downloads/01.md # try {i}"},
+ )
+ for i in range(4)
+ ]
+ responses: list[LLMResponse] = [
+ LLMResponse(content=f"try {i}", tool_calls=[bypass_attempts[i]])
+ for i in range(4)
+ ]
+ responses.append(LLMResponse(content="ok telling user", tool_calls=[]))
+
+ provider = MagicMock()
+ provider.chat_with_retry = AsyncMock(side_effect=responses)
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(
+ return_value="Error: Command blocked by safety guard (path outside working dir)"
+ )
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=10,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ # All 4 bypass attempts surface to the LLM (no fatal abort), and the
+ # runner finally completes once the LLM stops asking.
+ assert result.stop_reason != "tool_error"
+ assert result.error is None
+ assert result.final_content == "ok telling user"
+ # The third+ attempts must have been escalated -- look at the events.
+ escalated = [
+ ev for ev in result.tool_events
+ if ev["status"] == "error"
+ and ev["detail"].startswith("workspace_violation_escalated:")
+ ]
+ assert escalated, (
+ "expected at least one escalated workspace_violation event, got: "
+ f"{result.tool_events}"
+ )
diff --git a/tests/agent/test_runner_tool_execution.py b/tests/agent/test_runner_tool_execution.py
new file mode 100644
index 000000000..a0380e871
--- /dev/null
+++ b/tests/agent/test_runner_tool_execution.py
@@ -0,0 +1,181 @@
+"""Tests for AgentRunner tool execution: batching, concurrency, exclusive tools."""
+
+from __future__ import annotations
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.agent.tools.base import Tool
+from nanobot.agent.tools.registry import ToolRegistry
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+class _DelayTool(Tool):
+ def __init__(
+ self,
+ name: str,
+ *,
+ delay: float,
+ read_only: bool,
+ shared_events: list[str],
+ exclusive: bool = False,
+ ):
+ self._name = name
+ self._delay = delay
+ self._read_only = read_only
+ self._shared_events = shared_events
+ self._exclusive = exclusive
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @property
+ def description(self) -> str:
+ return self._name
+
+ @property
+ def parameters(self) -> dict:
+ return {"type": "object", "properties": {}, "required": []}
+
+ @property
+ def read_only(self) -> bool:
+ return self._read_only
+
+ @property
+ def exclusive(self) -> bool:
+ return self._exclusive
+
+ async def execute(self, **kwargs):
+ self._shared_events.append(f"start:{self._name}")
+ await asyncio.sleep(self._delay)
+ self._shared_events.append(f"end:{self._name}")
+ return self._name
+
+
+@pytest.mark.asyncio
+async def test_runner_batches_read_only_tools_before_exclusive_work():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ tools = ToolRegistry()
+ shared_events: list[str] = []
+ read_a = _DelayTool("read_a", delay=0.05, read_only=True, shared_events=shared_events)
+ read_b = _DelayTool("read_b", delay=0.05, read_only=True, shared_events=shared_events)
+ write_a = _DelayTool("write_a", delay=0.01, read_only=False, shared_events=shared_events)
+ tools.register(read_a)
+ tools.register(read_b)
+ tools.register(write_a)
+
+ runner = AgentRunner(MagicMock())
+ await runner._execute_tools(
+ AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ concurrent_tools=True,
+ ),
+ [
+ ToolCallRequest(id="ro1", name="read_a", arguments={}),
+ ToolCallRequest(id="ro2", name="read_b", arguments={}),
+ ToolCallRequest(id="rw1", name="write_a", arguments={}),
+ ],
+ {},
+ {},
+ )
+
+ assert shared_events[0:2] == ["start:read_a", "start:read_b"]
+ assert "end:read_a" in shared_events and "end:read_b" in shared_events
+ assert shared_events.index("end:read_a") < shared_events.index("start:write_a")
+ assert shared_events.index("end:read_b") < shared_events.index("start:write_a")
+ assert shared_events[-2:] == ["start:write_a", "end:write_a"]
+
+
+@pytest.mark.asyncio
+async def test_runner_does_not_batch_exclusive_read_only_tools():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ tools = ToolRegistry()
+ shared_events: list[str] = []
+ read_a = _DelayTool("read_a", delay=0.03, read_only=True, shared_events=shared_events)
+ read_b = _DelayTool("read_b", delay=0.03, read_only=True, shared_events=shared_events)
+ ddg_like = _DelayTool(
+ "ddg_like",
+ delay=0.01,
+ read_only=True,
+ shared_events=shared_events,
+ exclusive=True,
+ )
+ tools.register(read_a)
+ tools.register(ddg_like)
+ tools.register(read_b)
+
+ runner = AgentRunner(MagicMock())
+ await runner._execute_tools(
+ AgentRunSpec(
+ initial_messages=[],
+ tools=tools,
+ model="test-model",
+ max_iterations=1,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ concurrent_tools=True,
+ ),
+ [
+ ToolCallRequest(id="ro1", name="read_a", arguments={}),
+ ToolCallRequest(id="ddg1", name="ddg_like", arguments={}),
+ ToolCallRequest(id="ro2", name="read_b", arguments={}),
+ ],
+ {},
+ {},
+ )
+
+ assert shared_events[0] == "start:read_a"
+ assert shared_events.index("end:read_a") < shared_events.index("start:ddg_like")
+ assert shared_events.index("end:ddg_like") < shared_events.index("start:read_b")
+
+
+@pytest.mark.asyncio
+async def test_runner_blocks_repeated_external_fetches():
+ from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+ provider = MagicMock()
+ captured_final_call: list[dict] = []
+ call_count = {"n": 0}
+
+ async def chat_with_retry(*, messages, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] <= 3:
+ return LLMResponse(
+ content="working",
+ tool_calls=[ToolCallRequest(id=f"call_{call_count['n']}", name="web_fetch", arguments={"url": "https://example.com"})],
+ usage={},
+ )
+ captured_final_call[:] = messages
+ return LLMResponse(content="done", tool_calls=[], usage={})
+
+ provider.chat_with_retry = chat_with_retry
+ tools = MagicMock()
+ tools.get_definitions.return_value = []
+ tools.execute = AsyncMock(return_value="page content")
+
+ runner = AgentRunner(provider)
+ result = await runner.run(AgentRunSpec(
+ initial_messages=[{"role": "user", "content": "research task"}],
+ tools=tools,
+ model="test-model",
+ max_iterations=4,
+ max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+ ))
+
+ assert result.final_content == "done"
+ assert tools.execute.await_count == 2
+ blocked_tool_message = [
+ msg for msg in captured_final_call
+ if msg.get("role") == "tool" and msg.get("tool_call_id") == "call_3"
+ ][0]
+ assert "repeated external lookup blocked" in blocked_tool_message["content"]
diff --git a/tests/agent/test_stop_preserves_context.py b/tests/agent/test_stop_preserves_context.py
index 2a082850f..c7e766be1 100644
--- a/tests/agent/test_stop_preserves_context.py
+++ b/tests/agent/test_stop_preserves_context.py
@@ -10,6 +10,7 @@ See: https://github.com/HKUDS/nanobot/issues/2966
from __future__ import annotations
import asyncio
+from pathlib import Path
from types import SimpleNamespace
from typing import Any
from unittest.mock import MagicMock, patch, AsyncMock
@@ -17,42 +18,47 @@ from unittest.mock import MagicMock, patch, AsyncMock
import pytest
from nanobot.agent.loop import AgentLoop
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMProvider
-@pytest.fixture
-def mock_loop():
- """Create a minimal AgentLoop with mocked dependencies."""
- with patch.object(AgentLoop, "__init__", lambda self: None):
- loop = AgentLoop()
- loop.sessions = MagicMock()
- loop._pending_queues = {}
- loop._session_locks = {}
- loop._active_tasks = {}
- loop._concurrency_gate = None
- loop._RUNTIME_CHECKPOINT_KEY = "runtime_checkpoint"
- loop._PENDING_USER_TURN_KEY = "pending_user_turn"
- loop.bus = MagicMock()
- loop.bus.publish_outbound = AsyncMock()
- loop.bus.publish_inbound = AsyncMock()
- loop.commands = MagicMock()
- loop.commands.dispatch_priority = AsyncMock(return_value=None)
- return loop
+def _make_provider():
+ """Create an LLM provider mock with required attributes."""
+ from types import SimpleNamespace
+ provider = MagicMock()
+ provider.get_default_model.return_value = "test-model"
+ provider.generation = SimpleNamespace(max_tokens=4096, temperature=0.1, reasoning_effort=None)
+ provider.estimate_prompt_tokens.return_value = (10_000, "test")
+ return provider
+
+
+def _make_loop(tmp_path: Path) -> AgentLoop:
+ """Create a real AgentLoop with mocked provider — avoids patching __init__."""
+ bus = MessageBus()
+ provider = _make_provider()
+ with patch("nanobot.agent.loop.ContextBuilder"), \
+ patch("nanobot.agent.loop.SessionManager"), \
+ patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+ MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+ return AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
class TestStopPreservesContext:
"""Verify that /stop restores partial context via checkpoint."""
- def test_restore_checkpoint_method_exists(self, mock_loop):
+ def test_restore_checkpoint_method_exists(self, tmp_path):
"""AgentLoop should have _restore_runtime_checkpoint."""
- assert hasattr(mock_loop, "_restore_runtime_checkpoint")
+ loop = _make_loop(tmp_path)
+ assert hasattr(loop, "_restore_runtime_checkpoint")
- def test_checkpoint_key_constant(self, mock_loop):
+ def test_checkpoint_key_constant(self, tmp_path):
"""The runtime checkpoint key should be defined."""
- assert mock_loop._RUNTIME_CHECKPOINT_KEY == "runtime_checkpoint"
+ loop = _make_loop(tmp_path)
+ assert loop._RUNTIME_CHECKPOINT_KEY == "runtime_checkpoint"
- def test_cancel_dispatch_restores_checkpoint(self, mock_loop):
+ def test_cancel_dispatch_restores_checkpoint(self, tmp_path):
"""When a task is cancelled, the checkpoint should be restored."""
- # Create a mock session with a checkpoint
+ loop = _make_loop(tmp_path)
session = MagicMock()
session.metadata = {
"runtime_checkpoint": {
@@ -74,14 +80,11 @@ class TestStopPreservesContext:
session.messages = [
{"role": "user", "content": "Search for something"},
]
- mock_loop.sessions.get_or_create.return_value = session
+ loop.sessions.get_or_create.return_value = session
- # The restore method should add checkpoint messages to session history
- restored = mock_loop._restore_runtime_checkpoint(session)
+ restored = loop._restore_runtime_checkpoint(session)
assert restored is True
- # After restore, session should have more messages
assert len(session.messages) > 1
- # The checkpoint should be cleared
assert "runtime_checkpoint" not in session.metadata
diff --git a/tests/agent/test_subagent_lifecycle.py b/tests/agent/test_subagent_lifecycle.py
new file mode 100644
index 000000000..bf3564f28
--- /dev/null
+++ b/tests/agent/test_subagent_lifecycle.py
@@ -0,0 +1,558 @@
+"""Tests for SubagentManager lifecycle — spawn, run, announce, cancel."""
+
+import asyncio
+import time
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.agent.hook import AgentHookContext
+from nanobot.agent.runner import AgentRunResult
+from nanobot.agent.subagent import (
+ SubagentManager,
+ SubagentStatus,
+ _SubagentHook,
+)
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMProvider
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _manager(tmp_path: Path, **kw) -> SubagentManager:
+ provider = MagicMock(spec=LLMProvider)
+ provider.get_default_model.return_value = "test-model"
+ defaults = dict(
+ provider=provider,
+ workspace=tmp_path,
+ bus=MessageBus(),
+ model="test-model",
+ max_tool_result_chars=16_000,
+ )
+ defaults.update(kw)
+ return SubagentManager(**defaults)
+
+
+def _make_hook_context(**overrides) -> AgentHookContext:
+ defaults = dict(
+ iteration=1,
+ tool_calls=[],
+ tool_events=[],
+ messages=[],
+ usage={},
+ error=None,
+ stop_reason="completed",
+ final_content="ok",
+ )
+ defaults.update(overrides)
+ return AgentHookContext(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# SubagentStatus defaults
+# ---------------------------------------------------------------------------
+
+
+class TestSubagentStatus:
+ def test_defaults(self):
+ s = SubagentStatus(
+ task_id="abc", label="test", task_description="do stuff",
+ started_at=time.monotonic(),
+ )
+ assert s.phase == "initializing"
+ assert s.iteration == 0
+ assert s.tool_events == []
+ assert s.usage == {}
+ assert s.stop_reason is None
+ assert s.error is None
+
+
+# ---------------------------------------------------------------------------
+# set_provider
+# ---------------------------------------------------------------------------
+
+
+class TestSetProvider:
+ def test_updates_provider_model_runner(self, tmp_path):
+ sm = _manager(tmp_path)
+ new_provider = MagicMock(spec=LLMProvider)
+ sm.set_provider(new_provider, "new-model")
+ assert sm.provider is new_provider
+ assert sm.model == "new-model"
+ assert sm.runner.provider is new_provider
+
+
+# ---------------------------------------------------------------------------
+# spawn
+# ---------------------------------------------------------------------------
+
+
+class TestSpawn:
+ @pytest.mark.asyncio
+ async def test_returns_string_with_task_id(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(return_value=AgentRunResult(
+ final_content="done", messages=[], stop_reason="completed",
+ ))
+ result = await sm.spawn("do something")
+ assert "started" in result
+ assert "id:" in result
+
+ @pytest.mark.asyncio
+ async def test_creates_task_in_running_tasks(self, tmp_path):
+ sm = _manager(tmp_path)
+ block = asyncio.Event()
+ async def _slow_run(spec):
+ await block.wait()
+ return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+ sm.runner.run = _slow_run
+
+ await sm.spawn("task", session_key="s1")
+ assert len(sm._running_tasks) == 1
+
+ block.set()
+ await asyncio.sleep(0.1)
+ assert len(sm._running_tasks) == 0
+
+ @pytest.mark.asyncio
+ async def test_creates_status(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(return_value=AgentRunResult(
+ final_content="done", messages=[], stop_reason="completed",
+ ))
+ await sm.spawn("my task")
+ await asyncio.sleep(0.1)
+ # Status cleaned up after task completes
+ assert len(sm._task_statuses) == 0
+
+ @pytest.mark.asyncio
+ async def test_registers_in_session_tasks(self, tmp_path):
+ sm = _manager(tmp_path)
+ block = asyncio.Event()
+ async def _slow_run(spec):
+ await block.wait()
+ return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+ sm.runner.run = _slow_run
+
+ await sm.spawn("task", session_key="s1")
+ assert "s1" in sm._session_tasks
+ assert len(sm._session_tasks["s1"]) == 1
+
+ block.set()
+ await asyncio.sleep(0.1)
+ assert "s1" not in sm._session_tasks
+
+ @pytest.mark.asyncio
+ async def test_no_session_key_no_registration(self, tmp_path):
+ sm = _manager(tmp_path)
+ block = asyncio.Event()
+ async def _slow_run(spec):
+ await block.wait()
+ return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+ sm.runner.run = _slow_run
+
+ await sm.spawn("task")
+ assert len(sm._session_tasks) == 0
+
+ block.set()
+ await asyncio.sleep(0.1)
+
+ @pytest.mark.asyncio
+ async def test_label_defaults_to_truncated_task(self, tmp_path):
+ sm = _manager(tmp_path)
+ block = asyncio.Event()
+ async def _slow_run(spec):
+ await block.wait()
+ return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+ sm.runner.run = _slow_run
+
+ long_task = "A" * 50
+ await sm.spawn(long_task, session_key="s1")
+ status = next(iter(sm._task_statuses.values()))
+ assert status.label == long_task[:30] + "..."
+
+ block.set()
+ await asyncio.sleep(0.1)
+
+ @pytest.mark.asyncio
+ async def test_custom_label(self, tmp_path):
+ sm = _manager(tmp_path)
+ block = asyncio.Event()
+ async def _slow_run(spec):
+ await block.wait()
+ return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+ sm.runner.run = _slow_run
+
+ await sm.spawn("task", label="Custom Label", session_key="s1")
+ status = next(iter(sm._task_statuses.values()))
+ assert status.label == "Custom Label"
+
+ block.set()
+ await asyncio.sleep(0.1)
+
+ @pytest.mark.asyncio
+ async def test_cleanup_callback_removes_all_entries(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(return_value=AgentRunResult(
+ final_content="done", messages=[], stop_reason="completed",
+ ))
+ await sm.spawn("task", session_key="s1")
+ await asyncio.sleep(0.1)
+ assert len(sm._running_tasks) == 0
+ assert len(sm._task_statuses) == 0
+ assert len(sm._session_tasks) == 0
+
+
+# ---------------------------------------------------------------------------
+# _run_subagent
+# ---------------------------------------------------------------------------
+
+
+class TestRunSubagent:
+ @pytest.mark.asyncio
+ async def test_successful_run(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(return_value=AgentRunResult(
+ final_content="Task done!", messages=[], stop_reason="completed",
+ ))
+ with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce:
+ await sm._run_subagent(
+ "t1", "do task", "label",
+ {"channel": "cli", "chat_id": "direct"},
+ SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic()),
+ )
+ mock_announce.assert_called_once()
+ assert mock_announce.call_args.args[-2] == "ok"
+
+ @pytest.mark.asyncio
+ async def test_tool_error_run(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(return_value=AgentRunResult(
+ final_content=None, messages=[], stop_reason="tool_error",
+ tool_events=[{"name": "read_file", "status": "error", "detail": "not found"}],
+ ))
+ status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic())
+ with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce:
+ await sm._run_subagent(
+ "t1", "do task", "label",
+ {"channel": "cli", "chat_id": "direct"}, status,
+ )
+ assert mock_announce.call_args.args[-2] == "error"
+
+ @pytest.mark.asyncio
+ async def test_exception_run(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(side_effect=RuntimeError("LLM down"))
+ status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic())
+ with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce:
+ await sm._run_subagent(
+ "t1", "do task", "label",
+ {"channel": "cli", "chat_id": "direct"}, status,
+ )
+ assert status.phase == "error"
+ assert "LLM down" in status.error
+ assert mock_announce.call_args.args[-2] == "error"
+
+ @pytest.mark.asyncio
+ async def test_status_updated_on_success(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(return_value=AgentRunResult(
+ final_content="ok", messages=[], stop_reason="completed",
+ ))
+ status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic())
+ with patch.object(sm, "_announce_result", new_callable=AsyncMock):
+ await sm._run_subagent(
+ "t1", "do task", "label",
+ {"channel": "cli", "chat_id": "direct"}, status,
+ )
+ assert status.phase == "done"
+ assert status.stop_reason == "completed"
+
+
+# ---------------------------------------------------------------------------
+# _announce_result
+# ---------------------------------------------------------------------------
+
+
+class TestAnnounceResult:
+ @pytest.mark.asyncio
+ async def test_publishes_inbound_message(self, tmp_path):
+ sm = _manager(tmp_path)
+ published = []
+ sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+ await sm._announce_result(
+ "t1", "label", "task", "result text",
+ {"channel": "cli", "chat_id": "direct"}, "ok",
+ )
+
+ assert len(published) == 1
+ msg = published[0]
+ assert msg.channel == "system"
+ assert msg.sender_id == "subagent"
+ assert msg.metadata["injected_event"] == "subagent_result"
+ assert msg.metadata["subagent_task_id"] == "t1"
+
+ @pytest.mark.asyncio
+ async def test_session_key_override(self, tmp_path):
+ sm = _manager(tmp_path)
+ published = []
+ sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+ await sm._announce_result(
+ "t1", "label", "task", "result",
+ {"channel": "telegram", "chat_id": "123", "session_key": "s1"}, "ok",
+ )
+
+ assert published[0].session_key_override == "s1"
+
+ @pytest.mark.asyncio
+ async def test_session_key_override_fallback(self, tmp_path):
+ sm = _manager(tmp_path)
+ published = []
+ sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+ await sm._announce_result(
+ "t1", "label", "task", "result",
+ {"channel": "telegram", "chat_id": "123"}, "ok",
+ )
+
+ assert published[0].session_key_override == "telegram:123"
+
+ @pytest.mark.asyncio
+ async def test_ok_status_text(self, tmp_path):
+ sm = _manager(tmp_path)
+ published = []
+ sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+ await sm._announce_result(
+ "t1", "label", "task", "result",
+ {"channel": "cli", "chat_id": "direct"}, "ok",
+ )
+
+ assert "completed successfully" in published[0].content
+
+ @pytest.mark.asyncio
+ async def test_error_status_text(self, tmp_path):
+ sm = _manager(tmp_path)
+ published = []
+ sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+ await sm._announce_result(
+ "t1", "label", "task", "error details",
+ {"channel": "cli", "chat_id": "direct"}, "error",
+ )
+
+ assert "failed" in published[0].content
+
+ @pytest.mark.asyncio
+ async def test_origin_message_id_in_metadata(self, tmp_path):
+ sm = _manager(tmp_path)
+ published = []
+ sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+ await sm._announce_result(
+ "t1", "label", "task", "result",
+ {"channel": "cli", "chat_id": "direct"}, "ok",
+ origin_message_id="msg-123",
+ )
+
+ assert published[0].metadata["origin_message_id"] == "msg-123"
+
+
+# ---------------------------------------------------------------------------
+# _format_partial_progress
+# ---------------------------------------------------------------------------
+
+
+class TestFormatPartialProgress:
+ def _make_result(self, tool_events=None, error=None):
+ return MagicMock(tool_events=tool_events or [], error=error)
+
+ def test_completed_only(self):
+ result = self._make_result(tool_events=[
+ {"name": "read_file", "status": "ok", "detail": "file content"},
+ {"name": "exec", "status": "ok", "detail": "output"},
+ ])
+ text = SubagentManager._format_partial_progress(result)
+ assert "Completed steps:" in text
+ assert "read_file" in text
+ assert "exec" in text
+
+ def test_failure_only(self):
+ result = self._make_result(tool_events=[
+ {"name": "read_file", "status": "error", "detail": "not found"},
+ ])
+ text = SubagentManager._format_partial_progress(result)
+ assert "Failure:" in text
+ assert "not found" in text
+
+ def test_completed_and_failure(self):
+ result = self._make_result(tool_events=[
+ {"name": "read_file", "status": "ok", "detail": "content"},
+ {"name": "exec", "status": "error", "detail": "timeout"},
+ ])
+ text = SubagentManager._format_partial_progress(result)
+ assert "Completed steps:" in text
+ assert "Failure:" in text
+
+ def test_limited_to_last_three(self):
+ result = self._make_result(tool_events=[
+ {"name": f"tool_{i}", "status": "ok", "detail": f"result_{i}"}
+ for i in range(5)
+ ])
+ text = SubagentManager._format_partial_progress(result)
+ assert "tool_2" in text
+ assert "tool_3" in text
+ assert "tool_4" in text
+ assert "tool_0" not in text
+ assert "tool_1" not in text
+
+ def test_error_without_failure_event(self):
+ result = self._make_result(
+ tool_events=[{"name": "read_file", "status": "ok", "detail": "ok"}],
+ error="Something went wrong",
+ )
+ text = SubagentManager._format_partial_progress(result)
+ assert "Something went wrong" in text
+
+ def test_empty_events_with_error(self):
+ result = self._make_result(error="Total failure")
+ text = SubagentManager._format_partial_progress(result)
+ assert "Total failure" in text
+
+ def test_empty_no_error_returns_fallback(self):
+ result = self._make_result()
+ text = SubagentManager._format_partial_progress(result)
+ assert "Error" in text
+
+
+# ---------------------------------------------------------------------------
+# cancel_by_session
+# ---------------------------------------------------------------------------
+
+
+class TestCancelBySession:
+ @pytest.mark.asyncio
+ async def test_cancels_running_tasks(self, tmp_path):
+ sm = _manager(tmp_path)
+ block = asyncio.Event()
+ async def _slow_run(spec):
+ await block.wait()
+ return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+ sm.runner.run = _slow_run
+
+ await sm.spawn("task1", session_key="s1")
+ await sm.spawn("task2", session_key="s1")
+ assert len(sm._session_tasks.get("s1", set())) == 2
+
+ count = await sm.cancel_by_session("s1")
+ assert count == 2
+ block.set()
+ await asyncio.sleep(0.1)
+
+ @pytest.mark.asyncio
+ async def test_no_tasks_returns_zero(self, tmp_path):
+ sm = _manager(tmp_path)
+ count = await sm.cancel_by_session("nonexistent")
+ assert count == 0
+
+ @pytest.mark.asyncio
+ async def test_already_done_not_counted(self, tmp_path):
+ sm = _manager(tmp_path)
+ sm.runner.run = AsyncMock(return_value=AgentRunResult(
+ final_content="done", messages=[], stop_reason="completed",
+ ))
+ await sm.spawn("task1", session_key="s1")
+ await asyncio.sleep(0.1) # Wait for completion
+
+ count = await sm.cancel_by_session("s1")
+ assert count == 0
+
+
+# ---------------------------------------------------------------------------
+# get_running_count / get_running_count_by_session
+# ---------------------------------------------------------------------------
+
+
+class TestRunningCounts:
+ @pytest.mark.asyncio
+ async def test_running_count_zero(self, tmp_path):
+ sm = _manager(tmp_path)
+ assert sm.get_running_count() == 0
+
+ @pytest.mark.asyncio
+ async def test_running_count_tracks_tasks(self, tmp_path):
+ sm = _manager(tmp_path)
+ block = asyncio.Event()
+ async def _slow_run(spec):
+ await block.wait()
+ return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+ sm.runner.run = _slow_run
+
+ await sm.spawn("t1", session_key="s1")
+ await sm.spawn("t2", session_key="s1")
+ assert sm.get_running_count() == 2
+ assert sm.get_running_count_by_session("s1") == 2
+
+ block.set()
+ await asyncio.sleep(0.1)
+ assert sm.get_running_count() == 0
+
+ @pytest.mark.asyncio
+ async def test_running_count_by_session_nonexistent(self, tmp_path):
+ sm = _manager(tmp_path)
+ assert sm.get_running_count_by_session("nonexistent") == 0
+
+
+# ---------------------------------------------------------------------------
+# _SubagentHook
+# ---------------------------------------------------------------------------
+
+
+class TestSubagentHook:
+ @pytest.mark.asyncio
+ async def test_before_execute_tools_logs(self, tmp_path):
+ hook = _SubagentHook("t1")
+ tool_call = MagicMock()
+ tool_call.name = "read_file"
+ tool_call.arguments = {"path": "/tmp/test"}
+ ctx = _make_hook_context(tool_calls=[tool_call])
+ # Should not raise
+ await hook.before_execute_tools(ctx)
+
+ @pytest.mark.asyncio
+ async def test_after_iteration_updates_status(self):
+ status = SubagentStatus(
+ task_id="t1", label="test", task_description="do", started_at=time.monotonic(),
+ )
+ hook = _SubagentHook("t1", status)
+ ctx = _make_hook_context(
+ iteration=3,
+ tool_events=[{"name": "read_file", "status": "ok", "detail": ""}],
+ usage={"prompt_tokens": 100},
+ )
+ await hook.after_iteration(ctx)
+ assert status.iteration == 3
+ assert len(status.tool_events) == 1
+ assert status.usage == {"prompt_tokens": 100}
+
+ @pytest.mark.asyncio
+ async def test_after_iteration_no_status_noop(self):
+ hook = _SubagentHook("t1", status=None)
+ ctx = _make_hook_context(iteration=5)
+ # Should not raise
+ await hook.after_iteration(ctx)
+
+ @pytest.mark.asyncio
+ async def test_after_iteration_sets_error(self):
+ status = SubagentStatus(
+ task_id="t1", label="test", task_description="do", started_at=time.monotonic(),
+ )
+ hook = _SubagentHook("t1", status)
+ ctx = _make_hook_context(error="something broke")
+ await hook.after_iteration(ctx)
+ assert status.error == "something broke"