diff --git a/tests/agent/conftest.py b/tests/agent/conftest.py new file mode 100644 index 000000000..57f678aa9 --- /dev/null +++ b/tests/agent/conftest.py @@ -0,0 +1,93 @@ +"""Shared fixtures and helpers for agent tests.""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from nanobot.agent.loop import AgentLoop +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import LLMProvider + + +def make_provider( + default_model: str = "test-model", + *, + max_tokens: int = 4096, + spec: bool = True, +) -> MagicMock: + """Create a spec-limited LLM provider mock.""" + mock_type = MagicMock(spec=LLMProvider) if spec else MagicMock() + provider = mock_type + provider.get_default_model.return_value = default_model + provider.generation = SimpleNamespace( + max_tokens=max_tokens, + temperature=0.1, + reasoning_effort=None, + ) + provider.estimate_prompt_tokens.return_value = (10_000, "test") + return provider + + +def make_loop( + tmp_path: Path, + *, + model: str = "test-model", + context_window_tokens: int = 128_000, + session_ttl_minutes: int = 0, + max_messages: int = 120, + unified_session: bool = False, + mcp_servers: dict | None = None, + tools_config=None, + model_presets: dict | None = None, + hooks: list | None = None, + provider: MagicMock | None = None, + patch_deps: bool = False, +) -> AgentLoop: + """Create a real AgentLoop for testing. + + Args: + patch_deps: If True, patch ContextBuilder/SessionManager/SubagentManager + during construction (needed when workspace has no real files). + """ + bus = MessageBus() + if provider is None: + provider = make_provider(default_model=model) + + kwargs = dict( + bus=bus, + provider=provider, + workspace=tmp_path, + model=model, + context_window_tokens=context_window_tokens, + session_ttl_minutes=session_ttl_minutes, + max_messages=max_messages, + unified_session=unified_session, + ) + if mcp_servers is not None: + kwargs["mcp_servers"] = mcp_servers + if tools_config is not None: + kwargs["tools_config"] = tools_config + if model_presets is not None: + kwargs["model_presets"] = model_presets + if hooks is not None: + kwargs["hooks"] = hooks + + if patch_deps: + with patch("nanobot.agent.loop.ContextBuilder"), \ + patch("nanobot.agent.loop.SessionManager"), \ + patch("nanobot.agent.loop.SubagentManager") as MockSubMgr: + MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0) + return AgentLoop(**kwargs) + return AgentLoop(**kwargs) + + +@pytest.fixture +def loop_factory(tmp_path): + """Fixture providing a factory for creating AgentLoop instances.""" + def _factory(**kwargs): + return make_loop(tmp_path, **kwargs) + return _factory diff --git a/tests/agent/test_autocompact_unit.py b/tests/agent/test_autocompact_unit.py new file mode 100644 index 000000000..d501770dd --- /dev/null +++ b/tests/agent/test_autocompact_unit.py @@ -0,0 +1,554 @@ +"""Direct unit tests for AutoCompact class methods in isolation.""" + +from datetime import datetime, timedelta +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.agent.autocompact import AutoCompact +from nanobot.session.manager import Session, SessionManager + + +def _make_session( + key: str = "cli:test", + messages: list | None = None, + last_consolidated: int = 0, + updated_at: datetime | None = None, + metadata: dict | None = None, +) -> Session: + """Create a Session with sensible defaults for testing.""" + session = Session( + key=key, + messages=messages or [], + metadata=metadata or {}, + last_consolidated=last_consolidated, + ) + if updated_at is not None: + session.updated_at = updated_at + return session + + +def _make_autocompact( + ttl: int = 15, + sessions: SessionManager | None = None, + consolidator: MagicMock | None = None, +) -> AutoCompact: + """Create an AutoCompact with mock dependencies.""" + if sessions is None: + sessions = MagicMock(spec=SessionManager) + if consolidator is None: + consolidator = MagicMock() + consolidator.archive = AsyncMock(return_value="Summary.") + return AutoCompact( + sessions=sessions, + consolidator=consolidator, + session_ttl_minutes=ttl, + ) + + +def _add_turns(session: Session, turns: int, *, prefix: str = "msg") -> None: + """Append simple user/assistant turns to a session.""" + for i in range(turns): + session.add_message("user", f"{prefix} user {i}") + session.add_message("assistant", f"{prefix} assistant {i}") + + +# --------------------------------------------------------------------------- +# __init__ +# --------------------------------------------------------------------------- + + +class TestInit: + """Test AutoCompact.__init__ stores constructor arguments correctly.""" + + def test_stores_ttl(self): + """_ttl should match session_ttl_minutes argument.""" + ac = _make_autocompact(ttl=30) + assert ac._ttl == 30 + + def test_default_ttl_is_zero(self): + """Default TTL should be 0.""" + ac = _make_autocompact(ttl=0) + assert ac._ttl == 0 + + def test_archiving_set_is_empty(self): + """_archiving should start as an empty set.""" + ac = _make_autocompact() + assert ac._archiving == set() + + def test_summaries_dict_is_empty(self): + """_summaries should start as an empty dict.""" + ac = _make_autocompact() + assert ac._summaries == {} + + def test_stores_sessions_reference(self): + """sessions attribute should reference the passed SessionManager.""" + mock_sm = MagicMock(spec=SessionManager) + ac = _make_autocompact(sessions=mock_sm) + assert ac.sessions is mock_sm + + def test_stores_consolidator_reference(self): + """consolidator attribute should reference the passed Consolidator.""" + mock_c = MagicMock() + ac = _make_autocompact(consolidator=mock_c) + assert ac.consolidator is mock_c + + +# --------------------------------------------------------------------------- +# _is_expired +# --------------------------------------------------------------------------- + + +class TestIsExpired: + """Test AutoCompact._is_expired edge cases.""" + + def test_ttl_zero_always_false(self): + """TTL=0 means auto-compact is disabled; always returns False.""" + ac = _make_autocompact(ttl=0) + old = datetime.now() - timedelta(days=365) + assert ac._is_expired(old) is False + + def test_none_timestamp_returns_false(self): + """None timestamp should return False.""" + ac = _make_autocompact(ttl=15) + assert ac._is_expired(None) is False + + def test_empty_string_timestamp_returns_false(self): + """Empty string timestamp should return False (falsy).""" + ac = _make_autocompact(ttl=15) + assert ac._is_expired("") is False + + def test_exactly_at_boundary_is_expired(self): + """Timestamp exactly at TTL boundary should be expired (>=).""" + ac = _make_autocompact(ttl=15) + now = datetime(2026, 1, 1, 12, 0, 0) + ts = now - timedelta(minutes=15) + assert ac._is_expired(ts, now=now) is True + + def test_just_under_boundary_not_expired(self): + """Timestamp just under TTL boundary should NOT be expired.""" + ac = _make_autocompact(ttl=15) + now = datetime(2026, 1, 1, 12, 0, 0) + ts = now - timedelta(minutes=14, seconds=59) + assert ac._is_expired(ts, now=now) is False + + def test_iso_string_parses_correctly(self): + """ISO format string timestamp should be parsed and evaluated.""" + ac = _make_autocompact(ttl=15) + now = datetime(2026, 1, 1, 12, 0, 0) + ts = (now - timedelta(minutes=20)).isoformat() + assert ac._is_expired(ts, now=now) is True + + def test_custom_now_parameter(self): + """Custom 'now' parameter should override datetime.now().""" + ac = _make_autocompact(ttl=10) + ts = datetime(2026, 1, 1, 10, 0, 0) + # 9 minutes later → not expired + now_under = datetime(2026, 1, 1, 10, 9, 0) + assert ac._is_expired(ts, now=now_under) is False + # 10 minutes later → expired + now_over = datetime(2026, 1, 1, 10, 10, 0) + assert ac._is_expired(ts, now=now_over) is True + + +# --------------------------------------------------------------------------- +# _format_summary +# --------------------------------------------------------------------------- + + +class TestFormatSummary: + """Test AutoCompact._format_summary static method.""" + + def test_contains_isoformat_timestamp(self): + """Output should contain last_active as isoformat.""" + last_active = datetime(2026, 5, 13, 14, 30, 0) + result = AutoCompact._format_summary("Some text", last_active) + assert "2026-05-13T14:30:00" in result + + def test_contains_summary_text(self): + """Output should contain the provided text verbatim.""" + last_active = datetime(2026, 1, 1) + result = AutoCompact._format_summary("User discussed Python.", last_active) + assert "User discussed Python." in result + + def test_output_starts_with_label(self): + """Output should start with the standard prefix.""" + last_active = datetime(2026, 1, 1) + result = AutoCompact._format_summary("text", last_active) + assert result.startswith("Previous conversation summary (last active ") + + +# --------------------------------------------------------------------------- +# _split_unconsolidated +# --------------------------------------------------------------------------- + + +class TestSplitUnconsolidated: + """Test AutoCompact._split_unconsolidated splitting logic.""" + + def test_empty_session_returns_both_empty(self): + """Empty session should return ([], []).""" + ac = _make_autocompact() + session = _make_session(messages=[]) + archive, kept = ac._split_unconsolidated(session) + assert archive == [] + assert kept == [] + + def test_all_messages_archivable_when_more_than_suffix(self): + """Session with many messages should archive a prefix and keep suffix.""" + ac = _make_autocompact() + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + session = _make_session(messages=msgs) + archive, kept = ac._split_unconsolidated(session) + assert len(archive) > 0 + assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES + + def test_fewer_messages_than_suffix_returns_empty_archive(self): + """Session with fewer messages than suffix should have empty archive.""" + ac = _make_autocompact() + msgs = [{"role": "user", "content": f"u{i}"} for i in range(3)] + session = _make_session(messages=msgs) + archive, kept = ac._split_unconsolidated(session) + assert archive == [] + assert len(kept) == len(msgs) + + def test_respects_last_consolidated_offset(self): + """Only messages after last_consolidated should be considered.""" + ac = _make_autocompact() + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + # First 10 are already consolidated + session = _make_session(messages=msgs, last_consolidated=10) + archive, kept = ac._split_unconsolidated(session) + # Only the tail of 10 messages is considered for splitting + assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in kept) + assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in archive) + + def test_retain_recent_legal_suffix_keeps_last_n(self): + """The kept suffix should be at most _RECENT_SUFFIX_MESSAGES long.""" + ac = _make_autocompact() + # 20 user messages = 20 messages total, all after last_consolidated=0 + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + session = _make_session(messages=msgs) + archive, kept = ac._split_unconsolidated(session) + assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES + assert len(archive) == len(msgs) - len(kept) + + +# --------------------------------------------------------------------------- +# check_expired +# --------------------------------------------------------------------------- + + +class TestCheckExpired: + """Test AutoCompact.check_expired scheduling logic.""" + + def test_empty_sessions_list(self): + """No sessions → schedule_background should never be called.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + mock_sm.list_sessions.return_value = [] + ac.sessions = mock_sm + scheduler = MagicMock() + ac.check_expired(scheduler) + scheduler.assert_not_called() + + def test_expired_session_schedules_background(self): + """Expired session should trigger schedule_background.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + old_ts = (datetime.now() - timedelta(minutes=20)).isoformat() + mock_sm.list_sessions.return_value = [{"key": "cli:old", "updated_at": old_ts}] + ac.sessions = mock_sm + scheduler = MagicMock() + ac.check_expired(scheduler) + scheduler.assert_called_once() + assert "cli:old" in ac._archiving + + def test_active_session_key_skips(self): + """Session in active_session_keys should be skipped.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + old_ts = (datetime.now() - timedelta(minutes=20)).isoformat() + mock_sm.list_sessions.return_value = [{"key": "cli:busy", "updated_at": old_ts}] + ac.sessions = mock_sm + scheduler = MagicMock() + ac.check_expired(scheduler, active_session_keys={"cli:busy"}) + scheduler.assert_not_called() + + def test_session_already_in_archiving_skips(self): + """Session already in _archiving set should be skipped.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + old_ts = (datetime.now() - timedelta(minutes=20)).isoformat() + mock_sm.list_sessions.return_value = [{"key": "cli:dup", "updated_at": old_ts}] + ac.sessions = mock_sm + ac._archiving.add("cli:dup") + scheduler = MagicMock() + ac.check_expired(scheduler) + scheduler.assert_not_called() + + def test_session_with_no_key_skips(self): + """Session info with empty/missing key should be skipped.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + mock_sm.list_sessions.return_value = [{"key": "", "updated_at": "old"}] + ac.sessions = mock_sm + scheduler = MagicMock() + ac.check_expired(scheduler) + scheduler.assert_not_called() + + def test_session_with_missing_key_field_skips(self): + """Session info dict without 'key' field should be skipped.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + mock_sm.list_sessions.return_value = [{"updated_at": "old"}] + ac.sessions = mock_sm + scheduler = MagicMock() + ac.check_expired(scheduler) + scheduler.assert_not_called() + + +# --------------------------------------------------------------------------- +# _archive +# --------------------------------------------------------------------------- + + +class TestArchive: + """Test AutoCompact._archive async method.""" + + @pytest.mark.asyncio + async def test_empty_session_updates_timestamp_no_archive_call(self): + """Empty session should refresh updated_at and not call consolidator.archive.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + empty_session = _make_session(messages=[]) + mock_sm.get_or_create.return_value = empty_session + ac.sessions = mock_sm + ac.consolidator.archive = AsyncMock(return_value="Summary.") + + await ac._archive("cli:test") + + ac.consolidator.archive.assert_not_called() + mock_sm.save.assert_called_once_with(empty_session) + # updated_at was refreshed + assert empty_session.updated_at > datetime.now() - timedelta(seconds=5) + + @pytest.mark.asyncio + async def test_archive_returns_empty_string_no_summary_stored(self): + """If archive returns empty string, no summary should be stored.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + session = _make_session(messages=msgs) + mock_sm.get_or_create.return_value = session + ac.sessions = mock_sm + ac.consolidator.archive = AsyncMock(return_value="") + + await ac._archive("cli:test") + + assert "cli:test" not in ac._summaries + + @pytest.mark.asyncio + async def test_archive_returns_nothing_no_summary_stored(self): + """If archive returns '(nothing)', no summary should be stored.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + session = _make_session(messages=msgs) + mock_sm.get_or_create.return_value = session + ac.sessions = mock_sm + ac.consolidator.archive = AsyncMock(return_value="(nothing)") + + await ac._archive("cli:test") + + assert "cli:test" not in ac._summaries + + @pytest.mark.asyncio + async def test_archive_exception_caught_key_removed_from_archiving(self): + """If archive raises, exception is caught and key removed from _archiving.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + session = _make_session(messages=msgs) + mock_sm.get_or_create.return_value = session + ac.sessions = mock_sm + ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("LLM down")) + + # Should not raise + await ac._archive("cli:test") + + assert "cli:test" not in ac._archiving + + @pytest.mark.asyncio + async def test_successful_archive_stores_summary_in_summaries_and_metadata(self): + """Successful archive should store summary in _summaries dict and metadata.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + last_active = datetime(2026, 5, 13, 10, 0, 0) + session = _make_session(messages=msgs, updated_at=last_active) + mock_sm.get_or_create.return_value = session + ac.sessions = mock_sm + ac.consolidator.archive = AsyncMock(return_value="User discussed AI.") + + await ac._archive("cli:test") + + # _summaries + entry = ac._summaries.get("cli:test") + assert entry is not None + assert entry[0] == "User discussed AI." + assert entry[1] == last_active + # metadata + meta = session.metadata.get("_last_summary") + assert meta is not None + assert meta["text"] == "User discussed AI." + assert "last_active" in meta + + @pytest.mark.asyncio + async def test_finally_block_always_removes_from_archiving(self): + """Finally block should always remove key from _archiving, even on error.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + session = _make_session(messages=msgs) + mock_sm.get_or_create.return_value = session + ac.sessions = mock_sm + ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("fail")) + + # Pre-add key to archiving to verify it gets removed + ac._archiving.add("cli:test") + await ac._archive("cli:test") + assert "cli:test" not in ac._archiving + + @pytest.mark.asyncio + async def test_finally_removes_from_archiving_on_success(self): + """Finally block should remove key from _archiving on success too.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] + session = _make_session(messages=msgs) + mock_sm.get_or_create.return_value = session + ac.sessions = mock_sm + ac.consolidator.archive = AsyncMock(return_value="Summary.") + + ac._archiving.add("cli:test") + await ac._archive("cli:test") + assert "cli:test" not in ac._archiving + + +# --------------------------------------------------------------------------- +# prepare_session +# --------------------------------------------------------------------------- + + +class TestPrepareSession: + """Test AutoCompact.prepare_session logic.""" + + def test_key_in_archiving_reloads_session(self): + """If key is in _archiving, session should be reloaded via get_or_create.""" + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + reloaded = _make_session(key="cli:test") + mock_sm.get_or_create.return_value = reloaded + ac.sessions = mock_sm + ac._archiving.add("cli:test") + + original_session = _make_session() + result_session, summary = ac.prepare_session(original_session, "cli:test") + + mock_sm.get_or_create.assert_called_once_with("cli:test") + assert result_session is reloaded + + def test_expired_session_reloads(self): + """If session is expired, it should be reloaded via get_or_create.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + reloaded = _make_session(key="cli:test", updated_at=datetime.now()) + mock_sm.get_or_create.return_value = reloaded + ac.sessions = mock_sm + + old_session = _make_session(updated_at=datetime.now() - timedelta(minutes=20)) + result_session, summary = ac.prepare_session(old_session, "cli:test") + + mock_sm.get_or_create.assert_called_once_with("cli:test") + assert result_session is reloaded + + def test_hot_path_summary_from_summaries(self): + """Summary from _summaries dict should be returned (hot path).""" + ac = _make_autocompact() + session = _make_session() + last_active = datetime(2026, 5, 13, 14, 0, 0) + ac._summaries["cli:test"] = ("Hot summary.", last_active) + + result_session, summary = ac.prepare_session(session, "cli:test") + + assert result_session is session + assert summary is not None + assert "Hot summary." in summary + assert "Previous conversation summary" in summary + + def test_hot_path_pops_summary_one_shot(self): + """Hot path should pop the summary (one-shot; second call returns None).""" + ac = _make_autocompact() + session = _make_session() + last_active = datetime(2026, 1, 1) + ac._summaries["cli:test"] = ("One-shot.", last_active) + + _, summary1 = ac.prepare_session(session, "cli:test") + assert summary1 is not None + # Second call: hot path entry was popped + _, summary2 = ac.prepare_session(session, "cli:test") + assert summary2 is None + + def test_cold_path_summary_from_metadata(self): + """When _summaries is empty, summary should come from metadata (cold path).""" + ac = _make_autocompact() + last_active = datetime(2026, 5, 13, 14, 0, 0) + session = _make_session(metadata={ + "_last_summary": { + "text": "Cold summary.", + "last_active": last_active.isoformat(), + }, + }) + + result_session, summary = ac.prepare_session(session, "cli:test") + + assert result_session is session + assert summary is not None + assert "Cold summary." in summary + + def test_no_summary_available_returns_none(self): + """When no summary is available, should return (session, None).""" + ac = _make_autocompact() + session = _make_session() + + result_session, summary = ac.prepare_session(session, "cli:test") + + assert result_session is session + assert summary is None + + def test_cold_path_metadata_not_dict_returns_none(self): + """If metadata _last_summary is not a dict, should return None summary.""" + ac = _make_autocompact() + session = _make_session(metadata={"_last_summary": "not a dict"}) + + result_session, summary = ac.prepare_session(session, "cli:test") + + assert result_session is session + assert summary is None + + def test_hot_path_takes_priority_over_metadata(self): + """Hot path (_summaries) should take priority over metadata.""" + ac = _make_autocompact() + session = _make_session(metadata={ + "_last_summary": { + "text": "Cold summary.", + "last_active": datetime(2026, 1, 1).isoformat(), + }, + }) + last_active = datetime(2026, 5, 13, 14, 0, 0) + ac._summaries["cli:test"] = ("Hot summary.", last_active) + + _, summary = ac.prepare_session(session, "cli:test") + assert "Hot summary." in summary + # After hot path pops, cold path would kick in on next call diff --git a/tests/agent/test_context_builder.py b/tests/agent/test_context_builder.py new file mode 100644 index 000000000..862f1ff2b --- /dev/null +++ b/tests/agent/test_context_builder.py @@ -0,0 +1,333 @@ +"""Tests for ContextBuilder — system prompt and message assembly.""" + +import base64 +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from nanobot.agent.context import ContextBuilder + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _builder(tmp_path: Path, **kw) -> ContextBuilder: + return ContextBuilder(workspace=tmp_path, **kw) + + +# --------------------------------------------------------------------------- +# _build_runtime_context (static) +# --------------------------------------------------------------------------- + + +class TestBuildRuntimeContext: + def test_time_only(self): + ctx = ContextBuilder._build_runtime_context(None, None) + assert "[Runtime Context" in ctx + assert "[/Runtime Context]" in ctx + assert "Current Time:" in ctx + assert "Channel:" not in ctx + + def test_with_channel_and_chat_id(self): + ctx = ContextBuilder._build_runtime_context("telegram", "chat123") + assert "Channel: telegram" in ctx + assert "Chat ID: chat123" in ctx + + def test_with_sender_id(self): + ctx = ContextBuilder._build_runtime_context("cli", "direct", sender_id="user1") + assert "Sender ID: user1" in ctx + + def test_with_timezone(self): + ctx = ContextBuilder._build_runtime_context(None, None, timezone="Asia/Shanghai") + assert "Current Time:" in ctx + + def test_no_channel_no_chat_id_omits_both(self): + ctx = ContextBuilder._build_runtime_context(None, None) + assert "Channel:" not in ctx + assert "Chat ID:" not in ctx + + def test_no_sender_id_omits(self): + ctx = ContextBuilder._build_runtime_context("cli", "direct") + assert "Sender ID:" not in ctx + + +# --------------------------------------------------------------------------- +# _merge_message_content (static) +# --------------------------------------------------------------------------- + + +class TestMergeMessageContent: + def test_str_plus_str(self): + result = ContextBuilder._merge_message_content("hello", "world") + assert result == "hello\n\nworld" + + def test_empty_left_plus_str(self): + result = ContextBuilder._merge_message_content("", "world") + assert result == "world" + + def test_list_plus_list(self): + left = [{"type": "text", "text": "a"}] + right = [{"type": "text", "text": "b"}] + result = ContextBuilder._merge_message_content(left, right) + assert len(result) == 2 + assert result[0]["text"] == "a" + assert result[1]["text"] == "b" + + def test_str_plus_list(self): + right = [{"type": "text", "text": "b"}] + result = ContextBuilder._merge_message_content("hello", right) + assert len(result) == 2 + assert result[0]["text"] == "hello" + assert result[1]["text"] == "b" + + def test_list_plus_str(self): + left = [{"type": "text", "text": "a"}] + result = ContextBuilder._merge_message_content(left, "world") + assert len(result) == 2 + assert result[0]["text"] == "a" + assert result[1]["text"] == "world" + + def test_none_plus_str(self): + result = ContextBuilder._merge_message_content(None, "hello") + assert result == [{"type": "text", "text": "hello"}] + + def test_str_plus_none(self): + result = ContextBuilder._merge_message_content("hello", None) + assert result == [{"type": "text", "text": "hello"}] + + def test_none_plus_none(self): + result = ContextBuilder._merge_message_content(None, None) + assert result == [] + + def test_list_items_not_dicts_wrapped(self): + result = ContextBuilder._merge_message_content(["raw_item"], None) + assert result == [{"type": "text", "text": "raw_item"}] + + +# --------------------------------------------------------------------------- +# _load_bootstrap_files +# --------------------------------------------------------------------------- + + +class TestLoadBootstrapFiles: + def test_no_bootstrap_files(self, tmp_path): + builder = _builder(tmp_path) + assert builder._load_bootstrap_files() == "" + + def test_agents_md(self, tmp_path): + (tmp_path / "AGENTS.md").write_text("Be helpful.", encoding="utf-8") + builder = _builder(tmp_path) + result = builder._load_bootstrap_files() + assert "## AGENTS.md" in result + assert "Be helpful." in result + + def test_multiple_bootstrap_files(self, tmp_path): + (tmp_path / "AGENTS.md").write_text("Rules.", encoding="utf-8") + (tmp_path / "SOUL.md").write_text("Soul.", encoding="utf-8") + builder = _builder(tmp_path) + result = builder._load_bootstrap_files() + assert "## AGENTS.md" in result + assert "## SOUL.md" in result + assert "Rules." in result + assert "Soul." in result + + def test_all_bootstrap_files(self, tmp_path): + for name in ContextBuilder.BOOTSTRAP_FILES: + (tmp_path / name).write_text(f"Content of {name}", encoding="utf-8") + builder = _builder(tmp_path) + result = builder._load_bootstrap_files() + for name in ContextBuilder.BOOTSTRAP_FILES: + assert f"## {name}" in result + + def test_utf8_content(self, tmp_path): + (tmp_path / "AGENTS.md").write_text("用中文回复", encoding="utf-8") + builder = _builder(tmp_path) + result = builder._load_bootstrap_files() + assert "用中文回复" in result + + +# --------------------------------------------------------------------------- +# _is_template_content (static) +# --------------------------------------------------------------------------- + + +class TestIsTemplateContent: + def test_nonexistent_template_returns_false(self): + assert ContextBuilder._is_template_content("anything", "nonexistent/path.md") is False + + def test_content_matching_template(self): + from importlib.resources import files as pkg_files + tpl = pkg_files("nanobot") / "templates" / "memory" / "MEMORY.md" + if not tpl.is_file(): + pytest.skip("MEMORY.md template not bundled") + original = tpl.read_text(encoding="utf-8") + assert ContextBuilder._is_template_content(original, "memory/MEMORY.md") is True + + def test_modified_content_returns_false(self): + from importlib.resources import files as pkg_files + tpl = pkg_files("nanobot") / "templates" / "memory" / "MEMORY.md" + if not tpl.is_file(): + pytest.skip("MEMORY.md template not bundled") + assert ContextBuilder._is_template_content("totally different", "memory/MEMORY.md") is False + + +# --------------------------------------------------------------------------- +# _build_user_content +# --------------------------------------------------------------------------- + + +class TestBuildUserContent: + def test_no_media_returns_string(self, tmp_path): + builder = _builder(tmp_path) + result = builder._build_user_content("hello", None) + assert result == "hello" + + def test_empty_media_returns_string(self, tmp_path): + builder = _builder(tmp_path) + result = builder._build_user_content("hello", []) + assert result == "hello" + + def test_nonexistent_media_file_returns_string(self, tmp_path): + builder = _builder(tmp_path) + result = builder._build_user_content("hello", ["/nonexistent/image.png"]) + assert result == "hello" + + def test_non_image_file_returns_string(self, tmp_path): + txt = tmp_path / "doc.txt" + txt.write_text("not an image", encoding="utf-8") + builder = _builder(tmp_path) + result = builder._build_user_content("hello", [str(txt)]) + assert result == "hello" + + def test_valid_image_returns_list(self, tmp_path): + png = tmp_path / "test.png" + png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16) + builder = _builder(tmp_path) + result = builder._build_user_content("hello", [str(png)]) + assert isinstance(result, list) + assert len(result) == 2 + assert result[0]["type"] == "image_url" + assert result[0]["image_url"]["url"].startswith("data:image/png;base64,") + assert result[1]["type"] == "text" + assert result[1]["text"] == "hello" + + def test_image_meta_includes_path(self, tmp_path): + png = tmp_path / "test.png" + png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16) + builder = _builder(tmp_path) + result = builder._build_user_content("hello", [str(png)]) + assert "_meta" in result[0] + assert "path" in result[0]["_meta"] + + +# --------------------------------------------------------------------------- +# build_system_prompt +# --------------------------------------------------------------------------- + + +class TestBuildSystemPrompt: + def test_returns_nonempty_string(self, tmp_path): + builder = _builder(tmp_path) + result = builder.build_system_prompt() + assert isinstance(result, str) + assert len(result) > 0 + + def test_includes_identity_section(self, tmp_path): + builder = _builder(tmp_path) + result = builder.build_system_prompt() + assert "workspace" in result.lower() or "python" in result.lower() + + def test_includes_bootstrap_files(self, tmp_path): + (tmp_path / "AGENTS.md").write_text("Be helpful and concise.", encoding="utf-8") + builder = _builder(tmp_path) + result = builder.build_system_prompt() + assert "Be helpful and concise." in result + + def test_includes_session_summary(self, tmp_path): + builder = _builder(tmp_path) + result = builder.build_system_prompt(session_summary="Previous chat about Python.") + assert "Previous chat about Python." in result + assert "[Archived Context Summary]" in result + + def test_sections_separated_by_separator(self, tmp_path): + (tmp_path / "AGENTS.md").write_text("Rules.", encoding="utf-8") + builder = _builder(tmp_path) + result = builder.build_system_prompt(session_summary="Summary.") + assert "\n\n---\n\n" in result + + def test_no_bootstrap_no_summary(self, tmp_path): + builder = _builder(tmp_path) + result = builder.build_system_prompt() + assert "## AGENTS.md" not in result + assert "[Archived Context Summary]" not in result + + +# --------------------------------------------------------------------------- +# build_messages +# --------------------------------------------------------------------------- + + +class TestBuildMessages: + def test_basic_empty_history(self, tmp_path): + builder = _builder(tmp_path) + messages = builder.build_messages([], "hello") + assert len(messages) == 2 + assert messages[0]["role"] == "system" + assert messages[1]["role"] == "user" + assert "hello" in str(messages[1]["content"]) + + def test_runtime_context_injected(self, tmp_path): + builder = _builder(tmp_path) + messages = builder.build_messages([], "hello", channel="cli", chat_id="direct") + user_msg = str(messages[-1]["content"]) + assert "[Runtime Context" in user_msg + assert "hello" in user_msg + + def test_consecutive_same_role_merged(self, tmp_path): + builder = _builder(tmp_path) + history = [{"role": "user", "content": "previous user message"}] + messages = builder.build_messages(history, "new message") + assert len(messages) == 2 # system + merged user + assert "previous user message" in str(messages[1]["content"]) + assert "new message" in str(messages[1]["content"]) + + def test_different_role_appended(self, tmp_path): + builder = _builder(tmp_path) + history = [{"role": "assistant", "content": "previous response"}] + messages = builder.build_messages(history, "new message") + assert len(messages) == 3 # system + assistant + user + + def test_media_with_history(self, tmp_path): + png = tmp_path / "img.png" + png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16) + builder = _builder(tmp_path) + history = [{"role": "assistant", "content": "see this"}] + messages = builder.build_messages(history, "check image", media=[str(png)]) + user_msg = messages[-1]["content"] + assert isinstance(user_msg, list) + assert any(b.get("type") == "image_url" for b in user_msg) + + +# --------------------------------------------------------------------------- +# add_tool_result +# --------------------------------------------------------------------------- + + +class TestAddToolResult: + def test_appends_tool_message(self, tmp_path): + builder = _builder(tmp_path) + msgs = [{"role": "user", "content": "hello"}] + result = builder.add_tool_result(msgs, "call_123", "read_file", "file content") + assert len(result) == 2 + assert result[1]["role"] == "tool" + assert result[1]["tool_call_id"] == "call_123" + assert result[1]["name"] == "read_file" + assert result[1]["content"] == "file content" + + def test_returns_same_list(self, tmp_path): + builder = _builder(tmp_path) + msgs = [] + result = builder.add_tool_result(msgs, "id", "tool", "ok") + assert result is msgs diff --git a/tests/agent/test_loop_runner_integration.py b/tests/agent/test_loop_runner_integration.py new file mode 100644 index 000000000..3cfe07f41 --- /dev/null +++ b/tests/agent/test_loop_runner_integration.py @@ -0,0 +1,301 @@ +"""Tests for AgentLoop integration with AgentRunner: streaming, think-filter, error handling, subagent.""" + +from __future__ import annotations + +import asyncio +import time +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + + +def _make_loop(tmp_path): + from nanobot.agent.loop import AgentLoop + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + + with patch("nanobot.agent.loop.ContextBuilder"), \ + patch("nanobot.agent.loop.SessionManager"), \ + patch("nanobot.agent.loop.SubagentManager") as MockSubMgr: + MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0) + loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path) + return loop + +@pytest.mark.asyncio +async def test_loop_max_iterations_message_stays_stable(tmp_path): + loop = _make_loop(tmp_path) + loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})], + )) + loop.tools.get_definitions = MagicMock(return_value=[]) + loop.tools.execute = AsyncMock(return_value="ok") + loop.max_iterations = 2 + + final_content, _, _, _, _ = await loop._run_agent_loop([]) + + assert final_content == ( + "I reached the maximum number of tool call iterations (2) " + "without completing the task. You can try breaking the task into smaller steps." + ) + + +@pytest.mark.asyncio +async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp_path): + loop = _make_loop(tmp_path) + deltas: list[str] = [] + endings: list[bool] = [] + + async def chat_stream_with_retry(*, on_content_delta, **kwargs): + await on_content_delta("hidden") + await on_content_delta("Hello") + return LLMResponse(content="hiddenHello", tool_calls=[], usage={}) + + loop.provider.chat_stream_with_retry = chat_stream_with_retry + + async def on_stream(delta: str) -> None: + deltas.append(delta) + + async def on_stream_end(*, resuming: bool = False) -> None: + endings.append(resuming) + + final_content, _, _, _, _ = await loop._run_agent_loop( + [], + on_stream=on_stream, + on_stream_end=on_stream_end, + ) + + assert final_content == "Hello" + assert deltas == ["Hello"] + assert endings == [False] + + +@pytest.mark.asyncio +async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path): + loop = _make_loop(tmp_path) + deltas: list[str] = [] + + async def chat_stream_with_retry(*, on_content_delta, **kwargs): + await on_content_delta("Hello hiddenWorld") + return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={}) + + loop.provider.chat_stream_with_retry = chat_stream_with_retry + + async def on_stream(delta: str) -> None: + deltas.append(delta) + + final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream) + + assert final_content == "Hello World" + assert deltas == ["Hello", " World"] + + +@pytest.mark.asyncio +async def test_loop_stream_filter_hides_complete_trailing_think_tag(tmp_path): + loop = _make_loop(tmp_path) + deltas: list[str] = [] + + async def chat_stream_with_retry(*, on_content_delta, **kwargs): + await on_content_delta("Hello ") + await on_content_delta("hiddenWorld") + return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={}) + + loop.provider.chat_stream_with_retry = chat_stream_with_retry + + async def on_stream(delta: str) -> None: + deltas.append(delta) + + final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream) + + assert final_content == "Hello World" + assert deltas == ["Hello", " World"] + + +@pytest.mark.asyncio +async def test_loop_retries_think_only_final_response(tmp_path): + loop = _make_loop(tmp_path) + call_count = {"n": 0} + + async def chat_with_retry(**kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse(content="hidden", tool_calls=[], usage={}) + return LLMResponse(content="Recovered answer", tool_calls=[], usage={}) + + loop.provider.chat_with_retry = chat_with_retry + + final_content, _, _, _, _ = await loop._run_agent_loop([]) + + assert final_content == "Recovered answer" + assert call_count["n"] == 2 + + +@pytest.mark.asyncio +async def test_streamed_flag_not_set_on_llm_error(tmp_path): + """When LLM errors during a streaming-capable channel interaction, + _streamed must NOT be set so ChannelManager delivers the error.""" + from nanobot.agent.loop import AgentLoop + from nanobot.bus.events import InboundMessage + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") + error_resp = LLMResponse( + content="503 service unavailable", finish_reason="error", tool_calls=[], usage={}, + ) + loop.provider.chat_with_retry = AsyncMock(return_value=error_resp) + loop.provider.chat_stream_with_retry = AsyncMock(return_value=error_resp) + loop.tools.get_definitions = MagicMock(return_value=[]) + + msg = InboundMessage( + channel="feishu", sender_id="u1", chat_id="c1", content="hi", + ) + result = await loop._process_message( + msg, + on_stream=AsyncMock(), + on_stream_end=AsyncMock(), + ) + + assert result is not None + assert "503" in result.content + assert not result.metadata.get("_streamed"), \ + "_streamed must not be set when stop_reason is error" + + +@pytest.mark.asyncio +async def test_ssrf_soft_block_can_finalize_after_streamed_tool_call(tmp_path): + from nanobot.agent.loop import AgentLoop + from nanobot.bus.events import InboundMessage + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + tool_call_resp = LLMResponse( + content="checking metadata", + tool_calls=[ToolCallRequest( + id="call_ssrf", + name="exec", + arguments={"command": "curl http://169.254.169.254/latest/meta-data/"}, + )], + usage={}, + ) + provider.chat_stream_with_retry = AsyncMock(side_effect=[ + tool_call_resp, + LLMResponse( + content="I cannot access private URLs. Please share the local file.", + tool_calls=[], + usage={}, + ), + ]) + + loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") + loop.tools.get_definitions = MagicMock(return_value=[]) + loop.tools.prepare_call = MagicMock(return_value=(None, {}, None)) + loop.tools.execute = AsyncMock(return_value=( + "Error: Command blocked by safety guard (internal/private URL detected)" + )) + + result = await loop._process_message( + InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="hi"), + on_stream=AsyncMock(), + on_stream_end=AsyncMock(), + ) + + assert result is not None + assert result.content == "I cannot access private URLs. Please share the local file." + assert result.metadata.get("_streamed") is True + + +@pytest.mark.asyncio +async def test_next_turn_after_llm_error_keeps_turn_boundary(tmp_path): + from nanobot.agent.loop import AgentLoop + from nanobot.agent.runner import _PERSISTED_MODEL_ERROR_PLACEHOLDER + from nanobot.bus.events import InboundMessage + from nanobot.bus.queue import MessageBus + + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.chat_with_retry = AsyncMock(side_effect=[ + LLMResponse(content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}), + LLMResponse(content="Recovered answer", tool_calls=[], usage={}), + ]) + + loop = AgentLoop(bus=MessageBus(), provider=provider, workspace=tmp_path, model="test-model") + loop.tools.get_definitions = MagicMock(return_value=[]) + loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign] + + first = await loop._process_message( + InboundMessage(channel="cli", sender_id="user", chat_id="test", content="first question") + ) + assert first is not None + assert first.content == "429 rate limit exceeded" + + session = loop.sessions.get_or_create("cli:test") + assert [ + {key: value for key, value in message.items() if key in {"role", "content"}} + for message in session.messages + ] == [ + {"role": "user", "content": "first question"}, + {"role": "assistant", "content": _PERSISTED_MODEL_ERROR_PLACEHOLDER}, + ] + + second = await loop._process_message( + InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second question") + ) + assert second is not None + assert second.content == "Recovered answer" + + request_messages = provider.chat_with_retry.await_args_list[1].kwargs["messages"] + non_system = [message for message in request_messages if message.get("role") != "system"] + assert non_system[0]["role"] == "user" + assert "first question" in non_system[0]["content"] + assert non_system[1]["role"] == "assistant" + assert _PERSISTED_MODEL_ERROR_PLACEHOLDER in non_system[1]["content"] + assert non_system[2]["role"] == "user" + assert "second question" in non_system[2]["content"] + + +@pytest.mark.asyncio +async def test_subagent_max_iterations_announces_existing_fallback(tmp_path, monkeypatch): + from nanobot.agent.subagent import SubagentManager, SubagentStatus + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.chat_with_retry = AsyncMock(return_value=LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], + )) + mgr = SubagentManager( + provider=provider, + workspace=tmp_path, + bus=bus, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + ) + mgr._announce_result = AsyncMock() + + async def fake_execute(self, **kwargs): + return "tool result" + + monkeypatch.setattr("nanobot.agent.tools.filesystem.ListDirTool.execute", fake_execute) + + status = SubagentStatus(task_id="sub-1", label="label", task_description="do task", started_at=time.monotonic()) + await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}, status) + + mgr._announce_result.assert_awaited_once() + args = mgr._announce_result.await_args.args + assert args[3] == "Task completed but no final response was generated." + assert args[5] == "ok" diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py deleted file mode 100644 index b821d9bab..000000000 --- a/tests/agent/test_runner.py +++ /dev/null @@ -1,3313 +0,0 @@ -"""Tests for the shared agent runner and its integration contracts.""" - -from __future__ import annotations - -import asyncio -import base64 -import os -import time -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from nanobot.config.schema import AgentDefaults -from nanobot.agent.tools.base import Tool -from nanobot.agent.tools.registry import ToolRegistry -from nanobot.providers.base import LLMResponse, ToolCallRequest - -_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars - - -def _make_injection_callback(queue: asyncio.Queue): - """Return an async callback that drains *queue* into a list of dicts.""" - async def inject_cb(): - items = [] - while not queue.empty(): - items.append(await queue.get()) - return items - return inject_cb - - -def _make_loop(tmp_path): - from nanobot.agent.loop import AgentLoop - from nanobot.bus.queue import MessageBus - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - - with patch("nanobot.agent.loop.ContextBuilder"), \ - patch("nanobot.agent.loop.SessionManager"), \ - patch("nanobot.agent.loop.SubagentManager") as MockSubMgr: - MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0) - loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path) - return loop - - -@pytest.mark.asyncio -async def test_runner_preserves_reasoning_fields_and_tool_results(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_second_call: list[dict] = [] - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content="thinking", - tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], - reasoning_content="hidden reasoning", - thinking_blocks=[{"type": "thinking", "thinking": "step"}], - usage={"prompt_tokens": 5, "completion_tokens": 3}, - ) - captured_second_call[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="tool result") - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[ - {"role": "system", "content": "system"}, - {"role": "user", "content": "do task"}, - ], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == "done" - assert result.tools_used == ["list_dir"] - assert result.tool_events == [ - {"name": "list_dir", "status": "ok", "detail": "tool result"} - ] - - assistant_messages = [ - msg for msg in captured_second_call - if msg.get("role") == "assistant" and msg.get("tool_calls") - ] - assert len(assistant_messages) == 1 - assert assistant_messages[0]["reasoning_content"] == "hidden reasoning" - assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}] - assert any( - msg.get("role") == "tool" and msg.get("content") == "tool result" - for msg in captured_second_call - ) - - -@pytest.mark.asyncio -async def test_runner_calls_hooks_in_order(): - from nanobot.agent.hook import AgentHook, AgentHookContext - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - call_count = {"n": 0} - events: list[tuple] = [] - - async def chat_with_retry(**kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content="thinking", - tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], - ) - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="tool result") - - class RecordingHook(AgentHook): - async def before_iteration(self, context: AgentHookContext) -> None: - events.append(("before_iteration", context.iteration)) - - async def before_execute_tools(self, context: AgentHookContext) -> None: - events.append(( - "before_execute_tools", - context.iteration, - [tc.name for tc in context.tool_calls], - )) - - async def after_iteration(self, context: AgentHookContext) -> None: - events.append(( - "after_iteration", - context.iteration, - context.final_content, - list(context.tool_results), - list(context.tool_events), - context.stop_reason, - )) - - def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None: - events.append(("finalize_content", context.iteration, content)) - return content.upper() if content else content - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - hook=RecordingHook(), - )) - - assert result.final_content == "DONE" - assert events == [ - ("before_iteration", 0), - ("before_execute_tools", 0, ["list_dir"]), - ( - "after_iteration", - 0, - None, - ["tool result"], - [{"name": "list_dir", "status": "ok", "detail": "tool result"}], - None, - ), - ("before_iteration", 1), - ("finalize_content", 1, "done"), - ("after_iteration", 1, "DONE", [], [], "completed"), - ] - - -@pytest.mark.asyncio -async def test_runner_streaming_hook_receives_deltas_and_end_signal(): - from nanobot.agent.hook import AgentHook, AgentHookContext - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - streamed: list[str] = [] - endings: list[bool] = [] - - async def chat_stream_with_retry(*, on_content_delta, **kwargs): - await on_content_delta("he") - await on_content_delta("llo") - return LLMResponse(content="hello", tool_calls=[], usage={}) - - provider.chat_stream_with_retry = chat_stream_with_retry - provider.chat_with_retry = AsyncMock() - tools = MagicMock() - tools.get_definitions.return_value = [] - - class StreamingHook(AgentHook): - def wants_streaming(self) -> bool: - return True - - async def on_stream(self, context: AgentHookContext, delta: str) -> None: - streamed.append(delta) - - async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None: - endings.append(resuming) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - hook=StreamingHook(), - )) - - assert result.final_content == "hello" - assert streamed == ["he", "llo"] - assert endings == [False] - provider.chat_with_retry.assert_not_awaited() - - -@pytest.mark.asyncio -async def test_runner_returns_max_iterations_fallback(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - provider.chat_with_retry = AsyncMock(return_value=LLMResponse( - content="still working", - tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], - )) - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="tool result") - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=2, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.stop_reason == "max_iterations" - assert result.final_content == ( - "I reached the maximum number of tool call iterations (2) " - "without completing the task. You can try breaking the task into smaller steps." - ) - assert result.messages[-1]["role"] == "assistant" - assert result.messages[-1]["content"] == result.final_content - - -@pytest.mark.asyncio -async def test_runner_times_out_hung_llm_request(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - - async def chat_with_retry(**kwargs): - await asyncio.sleep(3600) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - started = time.monotonic() - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - llm_timeout_s=0.05, - )) - - assert (time.monotonic() - started) < 1.0 - assert result.stop_reason == "error" - assert "timed out" in (result.final_content or "").lower() - -@pytest.mark.asyncio -async def test_runner_returns_structured_tool_error(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - provider.chat_with_retry = AsyncMock(return_value=LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})], - )) - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(side_effect=RuntimeError("boom")) - - runner = AgentRunner(provider) - - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=2, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - fail_on_tool_error=True, - )) - - assert result.stop_reason == "tool_error" - assert result.error == "Error: RuntimeError: boom" - assert result.tool_events == [ - {"name": "list_dir", "status": "error", "detail": "boom"} - ] - - -@pytest.mark.asyncio -async def test_runner_does_not_abort_on_workspace_violation_anymore(): - """v2 behavior: workspace-bound rejections are *soft* tool errors. - - Previously (PR #3493) any workspace boundary error became a fatal - RuntimeError that aborted the turn. That silently killed legitimate - workspace commands once the heuristic guard misfired (#3599 #3605), so - we now hand the error back to the LLM as a recoverable tool result and - rely on ``repeated_workspace_violation_error`` to throttle bypass loops. - """ - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - provider.chat_with_retry = AsyncMock(side_effect=[ - LLMResponse( - content="trying outside", - tool_calls=[ToolCallRequest( - id="call_1", name="read_file", arguments={"path": "/tmp/outside.md"}, - )], - ), - LLMResponse(content="ok, telling the user instead", tool_calls=[]), - ]) - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock( - side_effect=PermissionError( - "Path /tmp/outside.md is outside allowed directory /workspace" - ) - ) - - runner = AgentRunner(provider) - - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert provider.chat_with_retry.await_count == 2, ( - "workspace violation must NOT short-circuit the loop" - ) - assert result.stop_reason != "tool_error" - assert result.error is None - assert result.final_content == "ok, telling the user instead" - assert result.tool_events and result.tool_events[0]["status"] == "error" - # Detail still carries the workspace_violation breadcrumb for telemetry, - # but the runner did not raise. - assert "workspace_violation" in result.tool_events[0]["detail"] - - -def test_is_ssrf_violation_recognizes_private_url_blocks(): - """SSRF rejections are classified separately from workspace boundaries.""" - from nanobot.agent.runner import AgentRunner - - ssrf_msg = "Error: Command blocked by safety guard (internal/private URL detected)" - assert AgentRunner._is_ssrf_violation(ssrf_msg) is True - assert AgentRunner._is_ssrf_violation( - "URL validation failed: Blocked: host resolves to private/internal address 192.168.1.2" - ) is True - - # Workspace-bound markers are NOT classified as SSRF. - assert AgentRunner._is_ssrf_violation( - "Error: Command blocked by safety guard (path outside working dir)" - ) is False - assert AgentRunner._is_ssrf_violation( - "Path /tmp/x is outside allowed directory /ws" - ) is False - # Deny / allowlist filter messages stay non-fatal too. - assert AgentRunner._is_ssrf_violation( - "Error: Command blocked by deny pattern filter" - ) is False - - -@pytest.mark.asyncio -async def test_runner_returns_non_retryable_hint_on_ssrf_violation(): - """SSRF stays blocked, but the runtime gives the LLM a final chance to recover.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - provider.chat_with_retry = AsyncMock(side_effect=[ - LLMResponse( - content="curl-ing metadata", - tool_calls=[ToolCallRequest( - id="call_ssrf", - name="exec", - arguments={"command": "curl http://169.254.169.254"}, - )], - ), - LLMResponse( - content="I cannot access that private URL. Please share local files.", - tool_calls=[], - ), - ]) - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value=( - "Error: Command blocked by safety guard (internal/private URL detected)" - )) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert provider.chat_with_retry.await_count == 2 - assert result.stop_reason == "completed" - assert result.error is None - assert result.final_content == "I cannot access that private URL. Please share local files." - assert result.tool_events and result.tool_events[0]["detail"].startswith("ssrf_violation:") - tool_messages = [m for m in result.messages if m.get("role") == "tool"] - assert tool_messages - assert "non-bypassable security boundary" in tool_messages[0]["content"] - assert "Do not retry" in tool_messages[0]["content"] - assert "tools.ssrfWhitelist" in tool_messages[0]["content"] - - -@pytest.mark.asyncio -async def test_runner_lets_llm_recover_from_shell_guard_path_outside(): - """Reporter scenario for #3599 / #3605 -- guard hit, agent recovers. - - The shell `_guard_command` heuristic fires on `2>/dev/null`-style - redirects and other shell idioms. Before v2 that abort'd the whole - turn (silent hang on Telegram per #3605); now the LLM gets the soft - error back and can finalize on the next iteration. - """ - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_second_call: list[dict] = [] - - async def chat_with_retry(*, messages, **kwargs): - if provider.chat_with_retry.await_count == 1: - return LLMResponse( - content="trying noisy cleanup", - tool_calls=[ToolCallRequest( - id="call_blocked", - name="exec", - arguments={"command": "rm scratch.txt 2>/dev/null"}, - )], - ) - captured_second_call[:] = list(messages) - return LLMResponse(content="recovered final answer", tool_calls=[]) - - provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry) - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock( - return_value="Error: Command blocked by safety guard (path outside working dir)" - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert provider.chat_with_retry.await_count == 2, ( - "guard hit must NOT short-circuit the loop -- LLM should get a second turn" - ) - assert result.stop_reason != "tool_error" - assert result.error is None - assert result.final_content == "recovered final answer" - assert result.tool_events and result.tool_events[0]["status"] == "error" - # v2: detail keeps the breadcrumb but the runner did not raise. - assert "workspace_violation" in result.tool_events[0]["detail"] - - -@pytest.mark.asyncio -async def test_runner_throttles_repeated_workspace_bypass_attempts(): - """#3493 motivation: stop the LLM bypass loop without aborting the turn. - - LLM keeps switching tools (read_file -> exec cat -> python -c open(...)) - against the same outside path. After the soft retry budget is exhausted - the runner replaces the tool result with a hard "stop trying" message - so the model finally gives up and surfaces the boundary to the user. - """ - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - bypass_attempts = [ - ToolCallRequest( - id=f"a{i}", name="exec", - arguments={"command": f"cat /Users/x/Downloads/01.md # try {i}"}, - ) - for i in range(4) - ] - responses: list[LLMResponse] = [ - LLMResponse(content=f"try {i}", tool_calls=[bypass_attempts[i]]) - for i in range(4) - ] - responses.append(LLMResponse(content="ok telling user", tool_calls=[])) - - provider = MagicMock() - provider.chat_with_retry = AsyncMock(side_effect=responses) - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock( - return_value="Error: Command blocked by safety guard (path outside working dir)" - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=10, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - # All 4 bypass attempts surface to the LLM (no fatal abort), and the - # runner finally completes once the LLM stops asking. - assert result.stop_reason != "tool_error" - assert result.error is None - assert result.final_content == "ok telling user" - # The third+ attempts must have been escalated -- look at the events. - escalated = [ - ev for ev in result.tool_events - if ev["status"] == "error" - and ev["detail"].startswith("workspace_violation_escalated:") - ] - assert escalated, ( - "expected at least one escalated workspace_violation event, got: " - f"{result.tool_events}" - ) - - -@pytest.mark.asyncio -async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_second_call: list[dict] = [] - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id="call_big", name="list_dir", arguments={"path": "."})], - usage={"prompt_tokens": 5, "completion_tokens": 3}, - ) - captured_second_call[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="x" * 20_000) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do task"}], - tools=tools, - model="test-model", - max_iterations=2, - workspace=tmp_path, - session_key="test:runner", - max_tool_result_chars=2048, - )) - - assert result.final_content == "done" - tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool") - assert "[tool output persisted]" in tool_message["content"] - assert "tool-results" in tool_message["content"] - assert (tmp_path / ".nanobot" / "tool-results" / "test_runner" / "call_big.txt").exists() - - -def test_persist_tool_result_prunes_old_session_buckets(tmp_path): - from nanobot.utils.helpers import maybe_persist_tool_result - - root = tmp_path / ".nanobot" / "tool-results" - old_bucket = root / "old_session" - recent_bucket = root / "recent_session" - old_bucket.mkdir(parents=True) - recent_bucket.mkdir(parents=True) - (old_bucket / "old.txt").write_text("old", encoding="utf-8") - (recent_bucket / "recent.txt").write_text("recent", encoding="utf-8") - - stale = time.time() - (8 * 24 * 60 * 60) - os.utime(old_bucket, (stale, stale)) - os.utime(old_bucket / "old.txt", (stale, stale)) - - persisted = maybe_persist_tool_result( - tmp_path, - "current:session", - "call_big", - "x" * 5000, - max_chars=64, - ) - - assert "[tool output persisted]" in persisted - assert not old_bucket.exists() - assert recent_bucket.exists() - assert (root / "current_session" / "call_big.txt").exists() - - -def test_persist_tool_result_leaves_no_temp_files(tmp_path): - from nanobot.utils.helpers import maybe_persist_tool_result - - root = tmp_path / ".nanobot" / "tool-results" - maybe_persist_tool_result( - tmp_path, - "current:session", - "call_big", - "x" * 5000, - max_chars=64, - ) - - assert (root / "current_session" / "call_big.txt").exists() - assert list((root / "current_session").glob("*.tmp")) == [] - - -def test_persist_tool_result_logs_cleanup_failures(monkeypatch, tmp_path): - from nanobot.utils.helpers import maybe_persist_tool_result - - warnings: list[str] = [] - - monkeypatch.setattr( - "nanobot.utils.helpers._cleanup_tool_result_buckets", - lambda *_args, **_kwargs: (_ for _ in ()).throw(OSError("busy")), - ) - monkeypatch.setattr( - "nanobot.utils.helpers.logger.exception", - lambda message, *args: warnings.append(message.format(*args)), - ) - - persisted = maybe_persist_tool_result( - tmp_path, - "current:session", - "call_big", - "x" * 5000, - max_chars=64, - ) - - assert "[tool output persisted]" in persisted - assert warnings and "Failed to clean stale tool result buckets" in warnings[0] - - -@pytest.mark.asyncio -async def test_runner_replaces_empty_tool_result_with_marker(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_second_call: list[dict] = [] - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})], - usage={}, - ) - captured_second_call[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="") - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do task"}], - tools=tools, - model="test-model", - max_iterations=2, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == "done" - tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool") - assert tool_message["content"] == "(noop completed with no output)" - - -@pytest.mark.asyncio -async def test_runner_uses_raw_messages_when_context_governance_fails(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_messages: list[dict] = [] - - async def chat_with_retry(*, messages, **kwargs): - captured_messages[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - initial_messages = [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "hello"}, - ] - - runner = AgentRunner(provider) - runner._snip_history = MagicMock(side_effect=RuntimeError("boom")) # type: ignore[method-assign] - result = await runner.run(AgentRunSpec( - initial_messages=initial_messages, - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == "done" - assert captured_messages == initial_messages - - -@pytest.mark.asyncio -async def test_runner_retries_empty_final_response_with_summary_prompt(): - """Empty responses get 2 silent retries before finalization kicks in.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - calls: list[dict] = [] - - async def chat_with_retry(*, messages, tools=None, **kwargs): - calls.append({"messages": messages, "tools": tools}) - if len(calls) <= 2: - return LLMResponse( - content=None, - tool_calls=[], - usage={"prompt_tokens": 5, "completion_tokens": 1}, - ) - return LLMResponse( - content="final answer", - tool_calls=[], - usage={"prompt_tokens": 3, "completion_tokens": 7}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do task"}], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == "final answer" - # 2 silent retries (iterations 0,1) + finalization on iteration 1 - assert len(calls) == 3 - assert calls[0]["tools"] is not None - assert calls[1]["tools"] is not None - assert calls[2]["tools"] is None - assert result.usage["prompt_tokens"] == 13 - assert result.usage["completion_tokens"] == 9 - - -@pytest.mark.asyncio -async def test_runner_uses_specific_message_after_empty_finalization_retry(): - """After silent retries + finalization all return empty, stop_reason is empty_final_response.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE - - provider = MagicMock() - - async def chat_with_retry(*, messages, **kwargs): - return LLMResponse(content=None, tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do task"}], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE - assert result.stop_reason == "empty_final_response" - - -@pytest.mark.asyncio -async def test_runner_empty_response_does_not_break_tool_chain(): - """An empty intermediate response must not kill an ongoing tool chain. - - Sequence: tool_call → empty → tool_call → final text. - The runner should recover via silent retry and complete normally. - """ - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - call_count = 0 - - async def chat_with_retry(*, messages, tools=None, **kwargs): - nonlocal call_count - call_count += 1 - if call_count == 1: - return LLMResponse( - content=None, - tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})], - usage={"prompt_tokens": 10, "completion_tokens": 5}, - ) - if call_count == 2: - return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1}) - if call_count == 3: - return LLMResponse( - content=None, - tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})], - usage={"prompt_tokens": 10, "completion_tokens": 5}, - ) - return LLMResponse( - content="Here are the results.", - tool_calls=[], - usage={"prompt_tokens": 10, "completion_tokens": 10}, - ) - - provider.chat_with_retry = chat_with_retry - provider.chat_stream_with_retry = chat_with_retry - - async def fake_tool(name, args, **kw): - return "file content" - - tool_registry = MagicMock() - tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}] - tool_registry.execute = AsyncMock(side_effect=fake_tool) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "read both files"}], - tools=tool_registry, - model="test-model", - max_iterations=10, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == "Here are the results." - assert result.stop_reason == "completed" - assert call_count == 4 - assert "read_file" in result.tools_used - - -def test_snip_history_drops_orphaned_tool_results_from_trimmed_slice(monkeypatch): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - tools = MagicMock() - tools.get_definitions.return_value = [] - runner = AgentRunner(provider) - messages = [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "old user"}, - { - "role": "assistant", - "content": "tool call", - "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "ls", "arguments": "{}"}}], - }, - {"role": "tool", "tool_call_id": "call_1", "content": "tool output"}, - {"role": "assistant", "content": "after tool"}, - ] - spec = AgentRunSpec( - initial_messages=messages, - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - context_window_tokens=2000, - context_block_limit=100, - ) - - monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_args, **_kwargs: (500, None)) - token_sizes = { - "old user": 120, - "tool call": 120, - "tool output": 40, - "after tool": 40, - "system": 0, - } - monkeypatch.setattr( - "nanobot.agent.runner.estimate_message_tokens", - lambda msg: token_sizes.get(str(msg.get("content")), 40), - ) - - trimmed = runner._snip_history(spec, messages) - - # After the fix, the user message is recovered so the sequence is valid - # for providers that require system → user (e.g. GLM error 1214). - assert trimmed[0]["role"] == "system" - non_system = [m for m in trimmed if m["role"] != "system"] - assert non_system[0]["role"] == "user", f"Expected user after system, got {non_system[0]['role']}" - - -@pytest.mark.asyncio -async def test_runner_keeps_going_when_tool_result_persistence_fails(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_second_call: list[dict] = [] - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], - usage={"prompt_tokens": 5, "completion_tokens": 3}, - ) - captured_second_call[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="tool result") - - runner = AgentRunner(provider) - with patch("nanobot.agent.runner.maybe_persist_tool_result", side_effect=RuntimeError("disk full")): - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do task"}], - tools=tools, - model="test-model", - max_iterations=2, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == "done" - tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool") - assert tool_message["content"] == "tool result" - - -class _DelayTool(Tool): - def __init__( - self, - name: str, - *, - delay: float, - read_only: bool, - shared_events: list[str], - exclusive: bool = False, - ): - self._name = name - self._delay = delay - self._read_only = read_only - self._shared_events = shared_events - self._exclusive = exclusive - - @property - def name(self) -> str: - return self._name - - @property - def description(self) -> str: - return self._name - - @property - def parameters(self) -> dict: - return {"type": "object", "properties": {}, "required": []} - - @property - def read_only(self) -> bool: - return self._read_only - - @property - def exclusive(self) -> bool: - return self._exclusive - - async def execute(self, **kwargs): - self._shared_events.append(f"start:{self._name}") - await asyncio.sleep(self._delay) - self._shared_events.append(f"end:{self._name}") - return self._name - - -@pytest.mark.asyncio -async def test_runner_batches_read_only_tools_before_exclusive_work(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - tools = ToolRegistry() - shared_events: list[str] = [] - read_a = _DelayTool("read_a", delay=0.05, read_only=True, shared_events=shared_events) - read_b = _DelayTool("read_b", delay=0.05, read_only=True, shared_events=shared_events) - write_a = _DelayTool("write_a", delay=0.01, read_only=False, shared_events=shared_events) - tools.register(read_a) - tools.register(read_b) - tools.register(write_a) - - runner = AgentRunner(MagicMock()) - await runner._execute_tools( - AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - concurrent_tools=True, - ), - [ - ToolCallRequest(id="ro1", name="read_a", arguments={}), - ToolCallRequest(id="ro2", name="read_b", arguments={}), - ToolCallRequest(id="rw1", name="write_a", arguments={}), - ], - {}, - {}, - ) - - assert shared_events[0:2] == ["start:read_a", "start:read_b"] - assert "end:read_a" in shared_events and "end:read_b" in shared_events - assert shared_events.index("end:read_a") < shared_events.index("start:write_a") - assert shared_events.index("end:read_b") < shared_events.index("start:write_a") - assert shared_events[-2:] == ["start:write_a", "end:write_a"] - - -@pytest.mark.asyncio -async def test_runner_does_not_batch_exclusive_read_only_tools(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - tools = ToolRegistry() - shared_events: list[str] = [] - read_a = _DelayTool("read_a", delay=0.03, read_only=True, shared_events=shared_events) - read_b = _DelayTool("read_b", delay=0.03, read_only=True, shared_events=shared_events) - ddg_like = _DelayTool( - "ddg_like", - delay=0.01, - read_only=True, - shared_events=shared_events, - exclusive=True, - ) - tools.register(read_a) - tools.register(ddg_like) - tools.register(read_b) - - runner = AgentRunner(MagicMock()) - await runner._execute_tools( - AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - concurrent_tools=True, - ), - [ - ToolCallRequest(id="ro1", name="read_a", arguments={}), - ToolCallRequest(id="ddg1", name="ddg_like", arguments={}), - ToolCallRequest(id="ro2", name="read_b", arguments={}), - ], - {}, - {}, - ) - - assert shared_events[0] == "start:read_a" - assert shared_events.index("end:read_a") < shared_events.index("start:ddg_like") - assert shared_events.index("end:ddg_like") < shared_events.index("start:read_b") - - -@pytest.mark.asyncio -async def test_runner_blocks_repeated_external_fetches(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_final_call: list[dict] = [] - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] <= 3: - return LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id=f"call_{call_count['n']}", name="web_fetch", arguments={"url": "https://example.com"})], - usage={}, - ) - captured_final_call[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="page content") - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "research task"}], - tools=tools, - model="test-model", - max_iterations=4, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.final_content == "done" - assert tools.execute.await_count == 2 - blocked_tool_message = [ - msg for msg in captured_final_call - if msg.get("role") == "tool" and msg.get("tool_call_id") == "call_3" - ][0] - assert "repeated external lookup blocked" in blocked_tool_message["content"] - - -@pytest.mark.asyncio -async def test_loop_max_iterations_message_stays_stable(tmp_path): - loop = _make_loop(tmp_path) - loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})], - )) - loop.tools.get_definitions = MagicMock(return_value=[]) - loop.tools.execute = AsyncMock(return_value="ok") - loop.max_iterations = 2 - - final_content, _, _, _, _ = await loop._run_agent_loop([]) - - assert final_content == ( - "I reached the maximum number of tool call iterations (2) " - "without completing the task. You can try breaking the task into smaller steps." - ) - - -@pytest.mark.asyncio -async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp_path): - loop = _make_loop(tmp_path) - deltas: list[str] = [] - endings: list[bool] = [] - - async def chat_stream_with_retry(*, on_content_delta, **kwargs): - await on_content_delta("hidden") - await on_content_delta("Hello") - return LLMResponse(content="hiddenHello", tool_calls=[], usage={}) - - loop.provider.chat_stream_with_retry = chat_stream_with_retry - - async def on_stream(delta: str) -> None: - deltas.append(delta) - - async def on_stream_end(*, resuming: bool = False) -> None: - endings.append(resuming) - - final_content, _, _, _, _ = await loop._run_agent_loop( - [], - on_stream=on_stream, - on_stream_end=on_stream_end, - ) - - assert final_content == "Hello" - assert deltas == ["Hello"] - assert endings == [False] - - -@pytest.mark.asyncio -async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path): - loop = _make_loop(tmp_path) - deltas: list[str] = [] - - async def chat_stream_with_retry(*, on_content_delta, **kwargs): - await on_content_delta("Hello hiddenWorld") - return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={}) - - loop.provider.chat_stream_with_retry = chat_stream_with_retry - - async def on_stream(delta: str) -> None: - deltas.append(delta) - - final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream) - - assert final_content == "Hello World" - assert deltas == ["Hello", " World"] - - -@pytest.mark.asyncio -async def test_loop_stream_filter_hides_complete_trailing_think_tag(tmp_path): - loop = _make_loop(tmp_path) - deltas: list[str] = [] - - async def chat_stream_with_retry(*, on_content_delta, **kwargs): - await on_content_delta("Hello ") - await on_content_delta("hiddenWorld") - return LLMResponse(content="Hello hiddenWorld", tool_calls=[], usage={}) - - loop.provider.chat_stream_with_retry = chat_stream_with_retry - - async def on_stream(delta: str) -> None: - deltas.append(delta) - - final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream) - - assert final_content == "Hello World" - assert deltas == ["Hello", " World"] - - -@pytest.mark.asyncio -async def test_loop_retries_think_only_final_response(tmp_path): - loop = _make_loop(tmp_path) - call_count = {"n": 0} - - async def chat_with_retry(**kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse(content="hidden", tool_calls=[], usage={}) - return LLMResponse(content="Recovered answer", tool_calls=[], usage={}) - - loop.provider.chat_with_retry = chat_with_retry - - final_content, _, _, _, _ = await loop._run_agent_loop([]) - - assert final_content == "Recovered answer" - assert call_count["n"] == 2 - - -@pytest.mark.asyncio -async def test_llm_error_not_appended_to_session_messages(): - """When LLM returns finish_reason='error', the error content must NOT be - appended to the messages list (prevents polluting session history).""" - from nanobot.agent.runner import ( - AgentRunSpec, - AgentRunner, - _PERSISTED_MODEL_ERROR_PLACEHOLDER, - ) - - provider = MagicMock() - provider.chat_with_retry = AsyncMock(return_value=LLMResponse( - content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}, - )) - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=5, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.stop_reason == "error" - assert result.final_content == "429 rate limit exceeded" - assistant_msgs = [m for m in result.messages if m.get("role") == "assistant"] - assert all("429" not in (m.get("content") or "") for m in assistant_msgs), \ - "Error content should not appear in session messages" - assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER - - -@pytest.mark.asyncio -async def test_streamed_flag_not_set_on_llm_error(tmp_path): - """When LLM errors during a streaming-capable channel interaction, - _streamed must NOT be set so ChannelManager delivers the error.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") - error_resp = LLMResponse( - content="503 service unavailable", finish_reason="error", tool_calls=[], usage={}, - ) - loop.provider.chat_with_retry = AsyncMock(return_value=error_resp) - loop.provider.chat_stream_with_retry = AsyncMock(return_value=error_resp) - loop.tools.get_definitions = MagicMock(return_value=[]) - - msg = InboundMessage( - channel="feishu", sender_id="u1", chat_id="c1", content="hi", - ) - result = await loop._process_message( - msg, - on_stream=AsyncMock(), - on_stream_end=AsyncMock(), - ) - - assert result is not None - assert "503" in result.content - assert not result.metadata.get("_streamed"), \ - "_streamed must not be set when stop_reason is error" - - -@pytest.mark.asyncio -async def test_ssrf_soft_block_can_finalize_after_streamed_tool_call(tmp_path): - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - tool_call_resp = LLMResponse( - content="checking metadata", - tool_calls=[ToolCallRequest( - id="call_ssrf", - name="exec", - arguments={"command": "curl http://169.254.169.254/latest/meta-data/"}, - )], - usage={}, - ) - provider.chat_stream_with_retry = AsyncMock(side_effect=[ - tool_call_resp, - LLMResponse( - content="I cannot access private URLs. Please share the local file.", - tool_calls=[], - usage={}, - ), - ]) - - loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") - loop.tools.get_definitions = MagicMock(return_value=[]) - loop.tools.prepare_call = MagicMock(return_value=(None, {}, None)) - loop.tools.execute = AsyncMock(return_value=( - "Error: Command blocked by safety guard (internal/private URL detected)" - )) - - result = await loop._process_message( - InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="hi"), - on_stream=AsyncMock(), - on_stream_end=AsyncMock(), - ) - - assert result is not None - assert result.content == "I cannot access private URLs. Please share the local file." - assert result.metadata.get("_streamed") is True - - -@pytest.mark.asyncio -async def test_next_turn_after_llm_error_keeps_turn_boundary(tmp_path): - from nanobot.agent.loop import AgentLoop - from nanobot.agent.runner import _PERSISTED_MODEL_ERROR_PLACEHOLDER - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - provider.chat_with_retry = AsyncMock(side_effect=[ - LLMResponse(content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}), - LLMResponse(content="Recovered answer", tool_calls=[], usage={}), - ]) - - loop = AgentLoop(bus=MessageBus(), provider=provider, workspace=tmp_path, model="test-model") - loop.tools.get_definitions = MagicMock(return_value=[]) - loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign] - - first = await loop._process_message( - InboundMessage(channel="cli", sender_id="user", chat_id="test", content="first question") - ) - assert first is not None - assert first.content == "429 rate limit exceeded" - - session = loop.sessions.get_or_create("cli:test") - assert [ - {key: value for key, value in message.items() if key in {"role", "content"}} - for message in session.messages - ] == [ - {"role": "user", "content": "first question"}, - {"role": "assistant", "content": _PERSISTED_MODEL_ERROR_PLACEHOLDER}, - ] - - second = await loop._process_message( - InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second question") - ) - assert second is not None - assert second.content == "Recovered answer" - - request_messages = provider.chat_with_retry.await_args_list[1].kwargs["messages"] - non_system = [message for message in request_messages if message.get("role") != "system"] - assert non_system[0]["role"] == "user" - assert "first question" in non_system[0]["content"] - assert non_system[1]["role"] == "assistant" - assert _PERSISTED_MODEL_ERROR_PLACEHOLDER in non_system[1]["content"] - assert non_system[2]["role"] == "user" - assert "second question" in non_system[2]["content"] - - -@pytest.mark.asyncio -async def test_runner_tool_error_sets_final_content(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - - async def chat_with_retry(*, messages, **kwargs): - return LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})], - usage={}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(side_effect=RuntimeError("boom")) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do task"}], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - fail_on_tool_error=True, - )) - - assert result.final_content == "Error: RuntimeError: boom" - assert result.stop_reason == "tool_error" - - -@pytest.mark.asyncio -async def test_subagent_max_iterations_announces_existing_fallback(tmp_path, monkeypatch): - from nanobot.agent.subagent import SubagentManager, SubagentStatus - from nanobot.bus.queue import MessageBus - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - provider.chat_with_retry = AsyncMock(return_value=LLMResponse( - content="working", - tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], - )) - mgr = SubagentManager( - provider=provider, - workspace=tmp_path, - bus=bus, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - ) - mgr._announce_result = AsyncMock() - - async def fake_execute(self, **kwargs): - return "tool result" - - monkeypatch.setattr("nanobot.agent.tools.filesystem.ListDirTool.execute", fake_execute) - - status = SubagentStatus(task_id="sub-1", label="label", task_description="do task", started_at=time.monotonic()) - await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}, status) - - mgr._announce_result.assert_awaited_once() - args = mgr._announce_result.await_args.args - assert args[3] == "Task completed but no final response was generated." - assert args[5] == "ok" - - -@pytest.mark.asyncio -async def test_runner_accumulates_usage_and_preserves_cached_tokens(): - """Runner should accumulate prompt/completion tokens across iterations - and preserve cached_tokens from provider responses.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content="thinking", - tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})], - usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80}, - ) - return LLMResponse( - content="done", - tool_calls=[], - usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="file content") - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do task"}], - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - # Usage should be accumulated across iterations - assert result.usage["prompt_tokens"] == 300 # 100 + 200 - assert result.usage["completion_tokens"] == 30 # 10 + 20 - assert result.usage["cached_tokens"] == 230 # 80 + 150 - - -@pytest.mark.asyncio -async def test_runner_passes_cached_tokens_to_hook_context(): - """Hook context.usage should contain cached_tokens.""" - from nanobot.agent.hook import AgentHook, AgentHookContext - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_usage: list[dict] = [] - - class UsageHook(AgentHook): - async def after_iteration(self, context: AgentHookContext) -> None: - captured_usage.append(dict(context.usage)) - - async def chat_with_retry(**kwargs): - return LLMResponse( - content="done", - tool_calls=[], - usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - await runner.run(AgentRunSpec( - initial_messages=[], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - hook=UsageHook(), - )) - - assert len(captured_usage) == 1 - assert captured_usage[0]["cached_tokens"] == 150 - - -# --------------------------------------------------------------------------- -# Length recovery (auto-continue on finish_reason == "length") -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_length_recovery_continues_from_truncated_output(): - """When finish_reason is 'length', runner should insert a continuation - prompt and retry, stitching partial outputs into the final result.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] <= 2: - return LLMResponse( - content=f"part{call_count['n']} ", - finish_reason="length", - usage={}, - ) - return LLMResponse(content="final", finish_reason="stop", usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "write a long essay"}], - tools=tools, - model="test-model", - max_iterations=10, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.stop_reason == "completed" - assert result.final_content == "final" - assert call_count["n"] == 3 - roles = [m["role"] for m in result.messages if m["role"] == "user"] - assert len(roles) >= 3 # original + 2 recovery prompts - - -@pytest.mark.asyncio -async def test_length_recovery_streaming_calls_on_stream_end_with_resuming(): - """During length recovery with streaming, on_stream_end should be called - with resuming=True so the hook knows the conversation is continuing.""" - from nanobot.agent.hook import AgentHook, AgentHookContext - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - call_count = {"n": 0} - stream_end_calls: list[bool] = [] - - class StreamHook(AgentHook): - def wants_streaming(self) -> bool: - return True - - async def on_stream(self, context: AgentHookContext, delta: str) -> None: - pass - - async def on_stream_end(self, context: AgentHookContext, resuming: bool = False) -> None: - stream_end_calls.append(resuming) - - async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse(content="partial ", finish_reason="length", usage={}) - return LLMResponse(content="done", finish_reason="stop", usage={}) - - provider.chat_stream_with_retry = chat_stream_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "go"}], - tools=tools, - model="test-model", - max_iterations=10, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - hook=StreamHook(), - )) - - assert len(stream_end_calls) == 2 - assert stream_end_calls[0] is True # length recovery: resuming - assert stream_end_calls[1] is False # final response: done - - -@pytest.mark.asyncio -async def test_length_recovery_gives_up_after_max_retries(): - """After _MAX_LENGTH_RECOVERIES attempts the runner should stop retrying.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_LENGTH_RECOVERIES - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - return LLMResponse( - content=f"chunk{call_count['n']}", - finish_reason="length", - usage={}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "go"}], - tools=tools, - model="test-model", - max_iterations=20, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert call_count["n"] == _MAX_LENGTH_RECOVERIES + 1 - assert result.final_content is not None - - -# --------------------------------------------------------------------------- -# Backfill missing tool_results -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_backfill_missing_tool_results_inserts_error(): - """Orphaned tool_use (no matching tool_result) should get a synthetic error.""" - from nanobot.agent.runner import AgentRunner, _BACKFILL_CONTENT - - messages = [ - {"role": "user", "content": "hi"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - {"id": "call_a", "type": "function", "function": {"name": "exec", "arguments": "{}"}}, - {"id": "call_b", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}, - ], - }, - {"role": "tool", "tool_call_id": "call_a", "name": "exec", "content": "ok"}, - ] - result = AgentRunner._backfill_missing_tool_results(messages) - tool_msgs = [m for m in result if m.get("role") == "tool"] - assert len(tool_msgs) == 2 - backfilled = [m for m in tool_msgs if m.get("tool_call_id") == "call_b"] - assert len(backfilled) == 1 - assert backfilled[0]["content"] == _BACKFILL_CONTENT - assert backfilled[0]["name"] == "read_file" - - -def test_drop_orphan_tool_results_removes_unmatched_tool_messages(): - from nanobot.agent.runner import AgentRunner - - messages = [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "old user"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}, - ], - }, - {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"}, - {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"}, - {"role": "assistant", "content": "after tool"}, - ] - - cleaned = AgentRunner._drop_orphan_tool_results(messages) - - assert cleaned == [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "old user"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}, - ], - }, - {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"}, - {"role": "assistant", "content": "after tool"}, - ] - - -@pytest.mark.asyncio -async def test_backfill_noop_when_complete(): - """Complete message chains should not be modified.""" - from nanobot.agent.runner import AgentRunner - - messages = [ - {"role": "user", "content": "hi"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - {"id": "call_x", "type": "function", "function": {"name": "exec", "arguments": "{}"}}, - ], - }, - {"role": "tool", "tool_call_id": "call_x", "name": "exec", "content": "done"}, - {"role": "assistant", "content": "all good"}, - ] - result = AgentRunner._backfill_missing_tool_results(messages) - assert result is messages # same object — no copy - - -@pytest.mark.asyncio -async def test_runner_drops_orphan_tool_results_before_model_request(): - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - captured_messages: list[dict] = [] - - async def chat_with_retry(*, messages, **kwargs): - captured_messages[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[ - {"role": "system", "content": "system"}, - {"role": "user", "content": "old user"}, - {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"}, - {"role": "assistant", "content": "after orphan"}, - {"role": "user", "content": "new prompt"}, - ], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert all( - message.get("tool_call_id") != "call_orphan" - for message in captured_messages - if message.get("role") == "tool" - ) - assert result.messages[2]["tool_call_id"] == "call_orphan" - assert result.final_content == "done" - - -@pytest.mark.asyncio -async def test_backfill_repairs_model_context_without_shifting_save_turn_boundary(tmp_path): - """Historical backfill should not duplicate old tail messages on persist.""" - from nanobot.agent.loop import AgentLoop - from nanobot.agent.runner import _BACKFILL_CONTENT - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - response = LLMResponse(content="new answer", tool_calls=[], usage={}) - provider.chat_with_retry = AsyncMock(return_value=response) - provider.chat_stream_with_retry = AsyncMock(return_value=response) - - loop = AgentLoop( - bus=MessageBus(), - provider=provider, - workspace=tmp_path, - model="test-model", - ) - loop.tools.get_definitions = MagicMock(return_value=[]) - loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign] - - session = loop.sessions.get_or_create("cli:test") - session.messages = [ - {"role": "user", "content": "old user", "timestamp": "2026-01-01T00:00:00"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - { - "id": "call_missing", - "type": "function", - "function": {"name": "read_file", "arguments": "{}"}, - } - ], - "timestamp": "2026-01-01T00:00:01", - }, - {"role": "assistant", "content": "old tail", "timestamp": "2026-01-01T00:00:02"}, - ] - loop.sessions.save(session) - - result = await loop._process_message( - InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new prompt") - ) - - assert result is not None - assert result.content == "new answer" - - request_messages = provider.chat_with_retry.await_args.kwargs["messages"] - synthetic = [ - message - for message in request_messages - if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing" - ] - assert len(synthetic) == 1 - assert synthetic[0]["content"] == _BACKFILL_CONTENT - - session_after = loop.sessions.get_or_create("cli:test") - assert [ - { - key: value - for key, value in message.items() - if key in {"role", "content", "tool_call_id", "name", "tool_calls"} - } - for message in session_after.messages - ] == [ - {"role": "user", "content": "old user"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - { - "id": "call_missing", - "type": "function", - "function": {"name": "read_file", "arguments": "{}"}, - } - ], - }, - {"role": "assistant", "content": "old tail"}, - {"role": "user", "content": "new prompt"}, - {"role": "assistant", "content": "new answer"}, - ] - - -@pytest.mark.asyncio -async def test_runner_backfill_only_mutates_model_context_not_returned_messages(): - """Runner should repair orphaned tool calls for the model without rewriting result.messages.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner, _BACKFILL_CONTENT - - provider = MagicMock() - captured_messages: list[dict] = [] - - async def chat_with_retry(*, messages, **kwargs): - captured_messages[:] = messages - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - initial_messages = [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "old user"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - { - "id": "call_missing", - "type": "function", - "function": {"name": "read_file", "arguments": "{}"}, - } - ], - }, - {"role": "assistant", "content": "old tail"}, - {"role": "user", "content": "new prompt"}, - ] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=initial_messages, - tools=tools, - model="test-model", - max_iterations=3, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - synthetic = [ - message - for message in captured_messages - if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing" - ] - assert len(synthetic) == 1 - assert synthetic[0]["content"] == _BACKFILL_CONTENT - - assert [ - { - key: value - for key, value in message.items() - if key in {"role", "content", "tool_call_id", "name", "tool_calls"} - } - for message in result.messages - ] == [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "old user"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - { - "id": "call_missing", - "type": "function", - "function": {"name": "read_file", "arguments": "{}"}, - } - ], - }, - {"role": "assistant", "content": "old tail"}, - {"role": "user", "content": "new prompt"}, - {"role": "assistant", "content": "done"}, - ] - - -# --------------------------------------------------------------------------- -# Microcompact (stale tool result compaction) -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_microcompact_replaces_old_tool_results(): - """Tool results beyond _MICROCOMPACT_KEEP_RECENT should be summarized.""" - from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT - - total = _MICROCOMPACT_KEEP_RECENT + 5 - long_content = "x" * 600 - messages: list[dict] = [{"role": "system", "content": "sys"}] - for i in range(total): - messages.append({ - "role": "assistant", - "content": "", - "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}], - }) - messages.append({ - "role": "tool", "tool_call_id": f"c{i}", "name": "read_file", - "content": long_content, - }) - - result = AgentRunner._microcompact(messages) - tool_msgs = [m for m in result if m.get("role") == "tool"] - stale_count = total - _MICROCOMPACT_KEEP_RECENT - compacted = [m for m in tool_msgs if "omitted from context" in str(m.get("content", ""))] - preserved = [m for m in tool_msgs if m.get("content") == long_content] - assert len(compacted) == stale_count - assert len(preserved) == _MICROCOMPACT_KEEP_RECENT - - -@pytest.mark.asyncio -async def test_microcompact_preserves_short_results(): - """Short tool results (< _MICROCOMPACT_MIN_CHARS) should not be replaced.""" - from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT - - total = _MICROCOMPACT_KEEP_RECENT + 5 - messages: list[dict] = [] - for i in range(total): - messages.append({ - "role": "assistant", - "content": "", - "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "exec", "arguments": "{}"}}], - }) - messages.append({ - "role": "tool", "tool_call_id": f"c{i}", "name": "exec", - "content": "short", - }) - - result = AgentRunner._microcompact(messages) - assert result is messages # no copy needed — all stale results are short - - -@pytest.mark.asyncio -async def test_microcompact_skips_non_compactable_tools(): - """Non-compactable tools (e.g. 'message') should never be replaced.""" - from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT - - total = _MICROCOMPACT_KEEP_RECENT + 5 - long_content = "y" * 1000 - messages: list[dict] = [] - for i in range(total): - messages.append({ - "role": "assistant", - "content": "", - "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "message", "arguments": "{}"}}], - }) - messages.append({ - "role": "tool", "tool_call_id": f"c{i}", "name": "message", - "content": long_content, - }) - - result = AgentRunner._microcompact(messages) - assert result is messages # no compactable tools found - - -@pytest.mark.asyncio -async def test_runner_tool_error_preserves_tool_results_in_messages(): - """When a tool raises a fatal error, its results must still be appended - to messages so the session never contains orphan tool_calls (#2943).""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - - async def chat_with_retry(*, messages, **kwargs): - return LLMResponse( - content=None, - tool_calls=[ - ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}), - ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}), - ], - usage={}, - ) - - provider.chat_with_retry = chat_with_retry - provider.chat_stream_with_retry = chat_with_retry - - call_idx = 0 - - async def fake_execute(name, args, **kw): - nonlocal call_idx - call_idx += 1 - if call_idx == 2: - raise RuntimeError("boom") - return "file content" - - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(side_effect=fake_execute) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "do stuff"}], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - fail_on_tool_error=True, - )) - - assert result.stop_reason == "tool_error" - # Both tool results must be in messages even though tc2 had a fatal error. - tool_msgs = [m for m in result.messages if m.get("role") == "tool"] - assert len(tool_msgs) == 2 - assert tool_msgs[0]["tool_call_id"] == "tc1" - assert tool_msgs[1]["tool_call_id"] == "tc2" - # The assistant message with tool_calls must precede the tool results. - asst_tc_idx = next( - i for i, m in enumerate(result.messages) - if m.get("role") == "assistant" and m.get("tool_calls") - ) - tool_indices = [ - i for i, m in enumerate(result.messages) if m.get("role") == "tool" - ] - assert all(ti > asst_tc_idx for ti in tool_indices) - - -def test_governance_repairs_orphans_after_snip(): - """After _snip_history clips an assistant+tool_calls, the second - _drop_orphan_tool_results pass must clean up the resulting orphans.""" - from nanobot.agent.runner import AgentRunner - - messages = [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "old msg"}, - {"role": "assistant", "content": None, - "tool_calls": [{"id": "tc_old", "type": "function", - "function": {"name": "search", "arguments": "{}"}}]}, - {"role": "tool", "tool_call_id": "tc_old", "name": "search", - "content": "old result"}, - {"role": "assistant", "content": "old answer"}, - {"role": "user", "content": "new msg"}, - ] - - # Simulate snipping that keeps only the tail: drop the assistant with - # tool_calls but keep its tool result (orphan). - snipped = [ - {"role": "system", "content": "system"}, - {"role": "tool", "tool_call_id": "tc_old", "name": "search", - "content": "old result"}, - {"role": "assistant", "content": "old answer"}, - {"role": "user", "content": "new msg"}, - ] - - cleaned = AgentRunner._drop_orphan_tool_results(snipped) - # The orphan tool result should be removed. - assert not any( - m.get("role") == "tool" and m.get("tool_call_id") == "tc_old" - for m in cleaned - ) - - -def test_governance_fallback_still_repairs_orphans(): - """When full governance fails, the fallback must still run - _drop_orphan_tool_results and _backfill_missing_tool_results.""" - from nanobot.agent.runner import AgentRunner - - # Messages with an orphan tool result (no matching assistant tool_call). - messages = [ - {"role": "user", "content": "hello"}, - {"role": "tool", "tool_call_id": "orphan_tc", "name": "read", - "content": "stale"}, - {"role": "assistant", "content": "hi"}, - ] - - repaired = AgentRunner._drop_orphan_tool_results(messages) - repaired = AgentRunner._backfill_missing_tool_results(repaired) - # Orphan tool result should be gone. - assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired) -# ── Mid-turn injection tests ────────────────────────────────────────────── - - -@pytest.mark.asyncio -async def test_drain_injections_returns_empty_when_no_callback(): - """No injection_callback → empty list.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - runner = AgentRunner(provider) - tools = MagicMock() - tools.get_definitions.return_value = [] - spec = AgentRunSpec( - initial_messages=[], tools=tools, model="m", - max_iterations=1, max_tool_result_chars=1000, - injection_callback=None, - ) - result = await runner._drain_injections(spec) - assert result == [] - - -@pytest.mark.asyncio -async def test_drain_injections_extracts_content_from_inbound_messages(): - """Should extract .content from InboundMessage objects.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - runner = AgentRunner(provider) - tools = MagicMock() - tools.get_definitions.return_value = [] - - msgs = [ - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello"), - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="world"), - ] - - async def cb(): - return msgs - - spec = AgentRunSpec( - initial_messages=[], tools=tools, model="m", - max_iterations=1, max_tool_result_chars=1000, - injection_callback=cb, - ) - result = await runner._drain_injections(spec) - assert result == [ - {"role": "user", "content": "hello"}, - {"role": "user", "content": "world"}, - ] - - -@pytest.mark.asyncio -async def test_drain_injections_passes_limit_to_callback_when_supported(): - """Limit-aware callbacks can preserve overflow in their own queue.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - runner = AgentRunner(provider) - tools = MagicMock() - tools.get_definitions.return_value = [] - seen_limits: list[int] = [] - - msgs = [ - InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg{i}") - for i in range(_MAX_INJECTIONS_PER_TURN + 3) - ] - - async def cb(*, limit: int): - seen_limits.append(limit) - return msgs[:limit] - - spec = AgentRunSpec( - initial_messages=[], tools=tools, model="m", - max_iterations=1, max_tool_result_chars=1000, - injection_callback=cb, - ) - result = await runner._drain_injections(spec) - assert seen_limits == [_MAX_INJECTIONS_PER_TURN] - assert result == [ - {"role": "user", "content": "msg0"}, - {"role": "user", "content": "msg1"}, - {"role": "user", "content": "msg2"}, - ] - - -@pytest.mark.asyncio -async def test_drain_injections_skips_empty_content(): - """Messages with blank content should be filtered out.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - runner = AgentRunner(provider) - tools = MagicMock() - tools.get_definitions.return_value = [] - - msgs = [ - InboundMessage(channel="cli", sender_id="u", chat_id="c", content=""), - InboundMessage(channel="cli", sender_id="u", chat_id="c", content=" "), - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="valid"), - ] - - async def cb(): - return msgs - - spec = AgentRunSpec( - initial_messages=[], tools=tools, model="m", - max_iterations=1, max_tool_result_chars=1000, - injection_callback=cb, - ) - result = await runner._drain_injections(spec) - assert result == [{"role": "user", "content": "valid"}] - - -@pytest.mark.asyncio -async def test_drain_injections_handles_callback_exception(): - """If the callback raises, return empty list (error is logged).""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - runner = AgentRunner(provider) - tools = MagicMock() - tools.get_definitions.return_value = [] - - async def cb(): - raise RuntimeError("boom") - - spec = AgentRunSpec( - initial_messages=[], tools=tools, model="m", - max_iterations=1, max_tool_result_chars=1000, - injection_callback=cb, - ) - result = await runner._drain_injections(spec) - assert result == [] - - -@pytest.mark.asyncio -async def test_checkpoint1_injects_after_tool_execution(): - """Follow-up messages are injected after tool execution, before next LLM call.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - captured_messages = [] - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - captured_messages.append(list(messages)) - if call_count["n"] == 1: - return LLMResponse( - content="using tool", - tool_calls=[ToolCallRequest(id="c1", name="read_file", arguments={"path": "x"})], - usage={}, - ) - return LLMResponse(content="final answer", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="file content") - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - # Put a follow-up message in the queue before the run starts - await injection_queue.put( - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question") - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=5, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.had_injections is True - assert result.final_content == "final answer" - # The second call should have the injected user message - assert call_count["n"] == 2 - last_messages = captured_messages[-1] - injected = [m for m in last_messages if m.get("role") == "user" and m.get("content") == "follow-up question"] - assert len(injected) == 1 - - -@pytest.mark.asyncio -async def test_checkpoint2_injects_after_final_response_with_resuming_stream(): - """After final response, if injections exist, stream_end should get resuming=True.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.agent.hook import AgentHook, AgentHookContext - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - stream_end_calls = [] - - class TrackingHook(AgentHook): - def wants_streaming(self) -> bool: - return True - - async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None: - stream_end_calls.append(resuming) - - def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None: - return content - - async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse(content="first answer", tool_calls=[], usage={}) - return LLMResponse(content="second answer", tool_calls=[], usage={}) - - provider.chat_stream_with_retry = chat_stream_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - # Inject a follow-up that arrives during the first response - await injection_queue.put( - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="quick follow-up") - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=5, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - hook=TrackingHook(), - injection_callback=inject_cb, - )) - - assert result.had_injections is True - assert result.final_content == "second answer" - assert call_count["n"] == 2 - # First stream_end should have resuming=True (because injections found) - assert stream_end_calls[0] is True - # Second (final) stream_end should have resuming=False - assert stream_end_calls[-1] is False - - -@pytest.mark.asyncio -async def test_checkpoint2_preserves_final_response_in_history_before_followup(): - """A follow-up injected after a final answer must still see that answer in history.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - captured_messages = [] - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - captured_messages.append([dict(message) for message in messages]) - if call_count["n"] == 1: - return LLMResponse(content="first answer", tool_calls=[], usage={}) - return LLMResponse(content="second answer", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - await injection_queue.put( - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question") - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=5, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.final_content == "second answer" - assert call_count["n"] == 2 - assert captured_messages[-1] == [ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "first answer"}, - {"role": "user", "content": "follow-up question"}, - ] - assert [ - {"role": message["role"], "content": message["content"]} - for message in result.messages - if message.get("role") == "assistant" - ] == [ - {"role": "assistant", "content": "first answer"}, - {"role": "assistant", "content": "second answer"}, - ] - - -@pytest.mark.asyncio -async def test_loop_injected_followup_preserves_image_media(tmp_path): - """Mid-turn follow-ups with images should keep multimodal content.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - - image_path = tmp_path / "followup.png" - image_path.write_bytes(base64.b64decode( - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+yF9kAAAAASUVORK5CYII=" - )) - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - captured_messages: list[list[dict]] = [] - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - captured_messages.append(list(messages)) - if call_count["n"] == 1: - return LLMResponse(content="first answer", tool_calls=[], usage={}) - return LLMResponse(content="second answer", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") - loop.tools.get_definitions = MagicMock(return_value=[]) - - pending_queue = asyncio.Queue() - await pending_queue.put(InboundMessage( - channel="cli", - sender_id="u", - chat_id="c", - content="", - media=[str(image_path)], - )) - - final_content, _, _, _, had_injections = await loop._run_agent_loop( - [{"role": "user", "content": "hello"}], - channel="cli", - chat_id="c", - pending_queue=pending_queue, - ) - - assert final_content == "second answer" - assert had_injections is True - assert call_count["n"] == 2 - injected_user_messages = [ - message for message in captured_messages[-1] - if message.get("role") == "user" and isinstance(message.get("content"), list) - ] - assert injected_user_messages - assert any( - block.get("type") == "image_url" - for block in injected_user_messages[-1]["content"] - if isinstance(block, dict) - ) - - -@pytest.mark.asyncio -async def test_runner_merges_multiple_injected_user_messages_without_losing_media(): - """Multiple injected follow-ups should not create lossy consecutive user messages.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - call_count = {"n": 0} - captured_messages = [] - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - captured_messages.append([dict(message) for message in messages]) - if call_count["n"] == 1: - return LLMResponse(content="first answer", tool_calls=[], usage={}) - return LLMResponse(content="second answer", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - async def inject_cb(): - if call_count["n"] == 1: - return [ - { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, - {"type": "text", "text": "look at this"}, - ], - }, - {"role": "user", "content": "and answer briefly"}, - ] - return [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=5, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.final_content == "second answer" - assert call_count["n"] == 2 - second_call = captured_messages[-1] - user_messages = [message for message in second_call if message.get("role") == "user"] - assert len(user_messages) == 2 - injected = user_messages[-1] - assert isinstance(injected["content"], list) - assert any( - block.get("type") == "image_url" - for block in injected["content"] - if isinstance(block, dict) - ) - assert any( - block.get("type") == "text" and block.get("text") == "and answer briefly" - for block in injected["content"] - if isinstance(block, dict) - ) - - -@pytest.mark.asyncio -async def test_injection_cycles_capped_at_max(): - """Injection cycles should be capped at _MAX_INJECTION_CYCLES.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - drain_count = {"n": 0} - - async def inject_cb(): - drain_count["n"] += 1 - # Only inject for the first _MAX_INJECTION_CYCLES drains - if drain_count["n"] <= _MAX_INJECTION_CYCLES: - return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")] - return [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "start"}], - tools=tools, - model="test-model", - max_iterations=20, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.had_injections is True - # Should be capped: _MAX_INJECTION_CYCLES injection rounds + 1 final round - assert call_count["n"] == _MAX_INJECTION_CYCLES + 1 - - -@pytest.mark.asyncio -async def test_no_injections_flag_is_false_by_default(): - """had_injections should be False when no injection callback or no messages.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - - async def chat_with_retry(**kwargs): - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hi"}], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - )) - - assert result.had_injections is False - - -@pytest.mark.asyncio -async def test_pending_queue_cleanup_on_dispatch(tmp_path): - """_pending_queues should be cleaned up after _dispatch completes.""" - loop = _make_loop(tmp_path) - - async def chat_with_retry(**kwargs): - return LLMResponse(content="done", tool_calls=[], usage={}) - - loop.provider.chat_with_retry = chat_with_retry - - from nanobot.bus.events import InboundMessage - - msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello") - # The queue should not exist before dispatch - assert msg.session_key not in loop._pending_queues - - await loop._dispatch(msg) - - # The queue should be cleaned up after dispatch - assert msg.session_key not in loop._pending_queues - - -@pytest.mark.asyncio -async def test_followup_routed_to_pending_queue(tmp_path): - """Unified-session follow-ups should route into the active pending queue.""" - from nanobot.agent.loop import UNIFIED_SESSION_KEY - from nanobot.bus.events import InboundMessage - - loop = _make_loop(tmp_path) - loop._unified_session = True - loop._dispatch = AsyncMock() # type: ignore[method-assign] - - pending = asyncio.Queue(maxsize=20) - loop._pending_queues[UNIFIED_SESSION_KEY] = pending - - run_task = asyncio.create_task(loop.run()) - msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up") - await loop.bus.publish_inbound(msg) - - deadline = time.time() + 2 - while pending.empty() and time.time() < deadline: - await asyncio.sleep(0.01) - - loop.stop() - await asyncio.wait_for(run_task, timeout=2) - - assert loop._dispatch.await_count == 0 - assert not pending.empty() - queued_msg = pending.get_nowait() - assert queued_msg.content == "follow-up" - assert queued_msg.session_key == UNIFIED_SESSION_KEY - - -@pytest.mark.asyncio -async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path): - """Pending queue should leave overflow messages queued for later drains.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - captured_messages: list[list[dict]] = [] - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - captured_messages.append([dict(message) for message in messages]) - return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") - loop.tools.get_definitions = MagicMock(return_value=[]) - - pending_queue = asyncio.Queue() - total_followups = _MAX_INJECTIONS_PER_TURN + 2 - for idx in range(total_followups): - await pending_queue.put(InboundMessage( - channel="cli", - sender_id="u", - chat_id="c", - content=f"follow-up-{idx}", - )) - - final_content, _, _, _, had_injections = await loop._run_agent_loop( - [{"role": "user", "content": "hello"}], - channel="cli", - chat_id="c", - pending_queue=pending_queue, - ) - - assert final_content == "answer-3" - assert had_injections is True - assert call_count["n"] == 3 - flattened_user_content = "\n".join( - message["content"] - for message in captured_messages[-1] - if message.get("role") == "user" and isinstance(message.get("content"), str) - ) - for idx in range(total_followups): - assert f"follow-up-{idx}" in flattened_user_content - assert pending_queue.empty() - - -@pytest.mark.asyncio -async def test_pending_queue_full_falls_back_to_queued_task(tmp_path): - """QueueFull should preserve the message by dispatching a queued task.""" - from nanobot.bus.events import InboundMessage - - loop = _make_loop(tmp_path) - loop._dispatch = AsyncMock() # type: ignore[method-assign] - - pending = asyncio.Queue(maxsize=1) - pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued")) - loop._pending_queues["cli:c"] = pending - - run_task = asyncio.create_task(loop.run()) - msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up") - await loop.bus.publish_inbound(msg) - - deadline = time.time() + 2 - while loop._dispatch.await_count == 0 and time.time() < deadline: - await asyncio.sleep(0.01) - - loop.stop() - await asyncio.wait_for(run_task, timeout=2) - - assert loop._dispatch.await_count == 1 - dispatched_msg = loop._dispatch.await_args.args[0] - assert dispatched_msg.content == "follow-up" - assert pending.qsize() == 1 - - -@pytest.mark.asyncio -async def test_dispatch_republishes_leftover_queue_messages(tmp_path): - """Messages left in the pending queue after _dispatch are re-published to the bus. - - This tests the finally-block cleanup that prevents message loss when - the runner exits early (e.g., max_iterations, tool_error) with messages - still in the queue. - """ - from nanobot.bus.events import InboundMessage - - loop = _make_loop(tmp_path) - bus = loop.bus - - # Simulate a completed dispatch by manually registering a queue - # with leftover messages, then running the cleanup logic directly. - pending = asyncio.Queue(maxsize=20) - session_key = "cli:c" - loop._pending_queues[session_key] = pending - pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-1")) - pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-2")) - - # Execute the cleanup logic from the finally block - queue = loop._pending_queues.pop(session_key, None) - assert queue is not None - leftover = 0 - while True: - try: - item = queue.get_nowait() - except asyncio.QueueEmpty: - break - await bus.publish_inbound(item) - leftover += 1 - - assert leftover == 2 - - # Verify the messages are now on the bus - msgs = [] - while not bus.inbound.empty(): - msgs.append(await asyncio.wait_for(bus.consume_inbound(), timeout=0.5)) - contents = [m.content for m in msgs] - assert "leftover-1" in contents - assert "leftover-2" in contents - - -@pytest.mark.asyncio -async def test_drain_injections_on_fatal_tool_error(): - """Pending injections should be drained even when a fatal tool error occurs.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content="", - tool_calls=[ToolCallRequest(id="c1", name="exec", arguments={"cmd": "bad"})], - usage={}, - ) - # Second call: respond normally to the injected follow-up - return LLMResponse(content="reply to follow-up", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(side_effect=RuntimeError("tool exploded")) - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - await injection_queue.put( - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after error") - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=5, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - fail_on_tool_error=True, - injection_callback=inject_cb, - )) - - assert result.had_injections is True - assert result.final_content == "reply to follow-up" - # The injection should be in the messages history - injected = [ - m for m in result.messages - if m.get("role") == "user" and m.get("content") == "follow-up after error" - ] - assert len(injected) == 1 - - -@pytest.mark.asyncio -async def test_drain_injections_on_llm_error(): - """Pending injections should be drained when the LLM returns an error finish_reason.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 1: - return LLMResponse( - content=None, - tool_calls=[], - finish_reason="error", - usage={}, - ) - # Second call: respond normally to the injected follow-up - return LLMResponse(content="recovered answer", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - await injection_queue.put( - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after LLM error") - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "previous response"}, - {"role": "user", "content": "trigger error"}, - ], - tools=tools, - model="test-model", - max_iterations=5, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.had_injections is True - assert result.final_content == "recovered answer" - injected = [ - m for m in result.messages - if m.get("role") == "user" and "follow-up after LLM error" in str(m.get("content", "")) - ] - assert len(injected) == 1 - - -@pytest.mark.asyncio -async def test_drain_injections_on_empty_final_response(): - """Pending injections should be drained when the runner exits due to empty response.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - if call_count["n"] <= _MAX_EMPTY_RETRIES + 1: - return LLMResponse(content="", tool_calls=[], usage={}) - # After retries exhausted + injection drain, respond normally - return LLMResponse(content="answer after empty", tool_calls=[], usage={}) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - await injection_queue.put( - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after empty") - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "previous response"}, - {"role": "user", "content": "trigger empty"}, - ], - tools=tools, - model="test-model", - max_iterations=10, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.had_injections is True - assert result.final_content == "answer after empty" - injected = [ - m for m in result.messages - if m.get("role") == "user" and "follow-up after empty" in str(m.get("content", "")) - ] - assert len(injected) == 1 - - -@pytest.mark.asyncio -async def test_drain_injections_on_max_iterations(): - """Pending injections should be drained when the runner hits max_iterations. - - Unlike other error paths, max_iterations cannot continue the loop, so - injections are appended to messages but not processed by the LLM. - The key point is they are consumed from the queue to prevent re-publish. - """ - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - return LLMResponse( - content="", - tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})], - usage={}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="file content") - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - await injection_queue.put( - InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after max iters") - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=2, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.stop_reason == "max_iterations" - assert result.had_injections is True - # The injection was consumed from the queue (preventing re-publish) - assert injection_queue.empty() - # The injection message is appended to conversation history - injected = [ - m for m in result.messages - if m.get("role") == "user" and m.get("content") == "follow-up after max iters" - ] - assert len(injected) == 1 - - -@pytest.mark.asyncio -async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration(): - """Late follow-ups drained in max_iterations should still flip had_injections.""" - from nanobot.agent.hook import AgentHook - from nanobot.agent.runner import AgentRunSpec, AgentRunner - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - return LLMResponse( - content="", - tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})], - usage={}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - tools.execute = AsyncMock(return_value="file content") - - injection_queue = asyncio.Queue() - inject_cb = _make_injection_callback(injection_queue) - - class InjectOnLastAfterIterationHook(AgentHook): - def __init__(self) -> None: - self.after_iteration_calls = 0 - - async def after_iteration(self, context) -> None: - self.after_iteration_calls += 1 - if self.after_iteration_calls == 2: - await injection_queue.put( - InboundMessage( - channel="cli", - sender_id="u", - chat_id="c", - content="late follow-up after max iters", - ) - ) - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[{"role": "user", "content": "hello"}], - tools=tools, - model="test-model", - max_iterations=2, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - hook=InjectOnLastAfterIterationHook(), - )) - - assert result.stop_reason == "max_iterations" - assert result.had_injections is True - assert injection_queue.empty() - injected = [ - m for m in result.messages - if m.get("role") == "user" and m.get("content") == "late follow-up after max iters" - ] - assert len(injected) == 1 - - -@pytest.mark.asyncio -async def test_injection_cycle_cap_on_error_path(): - """Injection cycles should be capped even when every iteration hits an LLM error.""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES - from nanobot.bus.events import InboundMessage - - provider = MagicMock() - call_count = {"n": 0} - - async def chat_with_retry(*, messages, **kwargs): - call_count["n"] += 1 - return LLMResponse( - content=None, - tool_calls=[], - finish_reason="error", - usage={}, - ) - - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - drain_count = {"n": 0} - - async def inject_cb(): - drain_count["n"] += 1 - if drain_count["n"] <= _MAX_INJECTION_CYCLES: - return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")] - return [] - - runner = AgentRunner(provider) - result = await runner.run(AgentRunSpec( - initial_messages=[ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "previous"}, - {"role": "user", "content": "trigger error"}, - ], - tools=tools, - model="test-model", - max_iterations=20, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - injection_callback=inject_cb, - )) - - assert result.had_injections is True - # Should cap: _MAX_INJECTION_CYCLES drained rounds + 1 final round that breaks - assert call_count["n"] == _MAX_INJECTION_CYCLES + 1 - - -# --------------------------------------------------------------------------- -# Regression tests for GLM-1214: _snip_history must preserve a user message -# --------------------------------------------------------------------------- - - -def test_snip_history_preserves_user_message_after_truncation(monkeypatch): - """When _snip_history truncates messages and the only user message ends up - outside the kept window, the method must recover the nearest user message - so the resulting sequence is valid for providers like GLM (which reject - system→assistant with error 1214). - - This reproduces the exact scenario from the bug report: - - Normal interaction: user asks, assistant calls tool, tool returns, - assistant replies. - - Injection adds a phantom user message, triggering more tool calls. - - _snip_history activates, keeping only recent assistant/tool pairs. - - The injected user message is in the truncated prefix and gets lost. - """ - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - tools = MagicMock() - tools.get_definitions.return_value = [] - runner = AgentRunner(provider) - - messages = [ - {"role": "system", "content": "system"}, - {"role": "assistant", "content": "previous reply"}, - {"role": "user", "content": ".nanobot的同目录"}, - { - "role": "assistant", - "content": None, - "tool_calls": [{"id": "tc_1", "type": "function", "function": {"name": "exec", "arguments": "{}"}}], - }, - {"role": "tool", "tool_call_id": "tc_1", "content": "tool output 1"}, - { - "role": "assistant", - "content": None, - "tool_calls": [{"id": "tc_2", "type": "function", "function": {"name": "exec", "arguments": "{}"}}], - }, - {"role": "tool", "tool_call_id": "tc_2", "content": "tool output 2"}, - ] - - spec = AgentRunSpec( - initial_messages=messages, - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - context_window_tokens=2000, - context_block_limit=100, - ) - - # Make estimate_prompt_tokens_chain report above budget so _snip_history activates. - monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None)) - # Make kept window small: only the last 2 messages fit the budget. - token_sizes = { - "system": 0, - "previous reply": 200, - ".nanobot的同目录": 80, - "tool output 1": 80, - "tool output 2": 80, - } - monkeypatch.setattr( - "nanobot.agent.runner.estimate_message_tokens", - lambda msg: token_sizes.get(str(msg.get("content")), 100), - ) - - trimmed = runner._snip_history(spec, messages) - - # The first non-system message MUST be user (not assistant). - non_system = [m for m in trimmed if m.get("role") != "system"] - assert non_system, "trimmed should contain at least one non-system message" - assert non_system[0]["role"] == "user", ( - f"First non-system message must be 'user', got '{non_system[0]['role']}'. " - f"Roles: {[m['role'] for m in trimmed]}" - ) - - -def test_snip_history_no_user_at_all_falls_back_gracefully(monkeypatch): - """Edge case: if non_system has zero user messages, _snip_history should - still return a valid sequence (not crash or produce system→assistant).""" - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - provider = MagicMock() - tools = MagicMock() - tools.get_definitions.return_value = [] - runner = AgentRunner(provider) - - messages = [ - {"role": "system", "content": "system"}, - {"role": "assistant", "content": "reply"}, - {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, - {"role": "assistant", "content": "reply 2"}, - {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"}, - ] - - spec = AgentRunSpec( - initial_messages=messages, - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - context_window_tokens=2000, - context_block_limit=100, - ) - - monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None)) - monkeypatch.setattr( - "nanobot.agent.runner.estimate_message_tokens", - lambda msg: 100, - ) - - trimmed = runner._snip_history(spec, messages) - - # Should not crash. The result should still be a valid list. - assert isinstance(trimmed, list) - # Must have at least system. - assert any(m.get("role") == "system" for m in trimmed) - # The _enforce_role_alternation safety net must be able to fix whatever - # _snip_history returns here — verify it produces a valid sequence. - from nanobot.providers.base import LLMProvider - fixed = LLMProvider._enforce_role_alternation(trimmed) - non_system = [m for m in fixed if m["role"] != "system"] - if non_system: - assert non_system[0]["role"] in ("user", "tool"), ( - f"Safety net should ensure first non-system is user/tool, got {non_system[0]['role']}" - ) - - -@pytest.mark.asyncio -async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress(): - """Regression: provider retry heartbeats must route through - ``retry_wait_callback``, not ``progress_callback``. Binding them to - the progress callback (as an earlier runtime refactor did) caused - internal retry diagnostics like "Model request failed, retry in 1s" - to leak to end-user channels as normal progress updates. - """ - from nanobot.agent.runner import AgentRunSpec, AgentRunner - - captured: dict = {} - - async def chat_with_retry(**kwargs): - captured.update(kwargs) - return LLMResponse(content="done", tool_calls=[], usage={}) - - provider = MagicMock() - provider.chat_with_retry = chat_with_retry - tools = MagicMock() - tools.get_definitions.return_value = [] - - progress_cb = AsyncMock() - retry_wait_cb = AsyncMock() - - runner = AgentRunner(provider) - await runner.run(AgentRunSpec( - initial_messages=[ - {"role": "system", "content": "system"}, - {"role": "user", "content": "hi"}, - ], - tools=tools, - model="test-model", - max_iterations=1, - max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, - progress_callback=progress_cb, - retry_wait_callback=retry_wait_cb, - )) - - assert captured["on_retry_wait"] is retry_wait_cb - assert captured["on_retry_wait"] is not progress_cb diff --git a/tests/agent/test_runner_core.py b/tests/agent/test_runner_core.py new file mode 100644 index 000000000..dd28fa1cc --- /dev/null +++ b/tests/agent/test_runner_core.py @@ -0,0 +1,481 @@ +"""Tests for core AgentRunner behavior: message passing, iteration limits, +timeouts, empty-response handling, usage accumulation, and config passthrough.""" + +from __future__ import annotations + +import asyncio +import time +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.agent.tools.registry import ToolRegistry +from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + + +@pytest.mark.asyncio +async def test_runner_preserves_reasoning_fields_and_tool_results(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + captured_second_call: list[dict] = [] + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="thinking", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], + reasoning_content="hidden reasoning", + thinking_blocks=[{"type": "thinking", "thinking": "step"}], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + captured_second_call[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="tool result") + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[ + {"role": "system", "content": "system"}, + {"role": "user", "content": "do task"}, + ], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == "done" + assert result.tools_used == ["list_dir"] + assert result.tool_events == [ + {"name": "list_dir", "status": "ok", "detail": "tool result"} + ] + + assistant_messages = [ + msg for msg in captured_second_call + if msg.get("role") == "assistant" and msg.get("tool_calls") + ] + assert len(assistant_messages) == 1 + assert assistant_messages[0]["reasoning_content"] == "hidden reasoning" + assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}] + assert any( + msg.get("role") == "tool" and msg.get("content") == "tool result" + for msg in captured_second_call + ) + + +@pytest.mark.asyncio +async def test_runner_returns_max_iterations_fallback(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = AsyncMock(return_value=LLMResponse( + content="still working", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], + )) + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="tool result") + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=2, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.stop_reason == "max_iterations" + assert result.final_content == ( + "I reached the maximum number of tool call iterations (2) " + "without completing the task. You can try breaking the task into smaller steps." + ) + assert result.messages[-1]["role"] == "assistant" + assert result.messages[-1]["content"] == result.final_content + + +@pytest.mark.asyncio +async def test_runner_times_out_hung_llm_request(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + + async def chat_with_retry(**kwargs): + await asyncio.sleep(3600) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + started = time.monotonic() + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + llm_timeout_s=0.05, + )) + + assert (time.monotonic() - started) < 1.0 + assert result.stop_reason == "error" + assert "timed out" in (result.final_content or "").lower() + + +@pytest.mark.asyncio +async def test_runner_replaces_empty_tool_result_with_marker(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + captured_second_call: list[dict] = [] + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})], + usage={}, + ) + captured_second_call[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="") + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do task"}], + tools=tools, + model="test-model", + max_iterations=2, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == "done" + tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool") + assert tool_message["content"] == "(noop completed with no output)" + + +@pytest.mark.asyncio +async def test_runner_retries_empty_final_response_with_summary_prompt(): + """Empty responses get 2 silent retries before finalization kicks in.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + calls: list[dict] = [] + + async def chat_with_retry(*, messages, tools=None, **kwargs): + calls.append({"messages": messages, "tools": tools}) + if len(calls) <= 2: + return LLMResponse( + content=None, + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 1}, + ) + return LLMResponse( + content="final answer", + tool_calls=[], + usage={"prompt_tokens": 3, "completion_tokens": 7}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do task"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == "final answer" + # 2 silent retries (iterations 0,1) + finalization on iteration 1 + assert len(calls) == 3 + assert calls[0]["tools"] is not None + assert calls[1]["tools"] is not None + assert calls[2]["tools"] is None + assert result.usage["prompt_tokens"] == 13 + assert result.usage["completion_tokens"] == 9 + + +@pytest.mark.asyncio +async def test_runner_uses_specific_message_after_empty_finalization_retry(): + """After silent retries + finalization all return empty, stop_reason is empty_final_response.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE + + provider = MagicMock(spec=LLMProvider) + + async def chat_with_retry(*, messages, **kwargs): + return LLMResponse(content=None, tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do task"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE + assert result.stop_reason == "empty_final_response" + + +@pytest.mark.asyncio +async def test_runner_empty_response_does_not_break_tool_chain(): + """An empty intermediate response must not kill an ongoing tool chain. + + Sequence: tool_call -> empty -> tool_call -> final text. + The runner should recover via silent retry and complete normally. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + call_count = 0 + + async def chat_with_retry(*, messages, tools=None, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + return LLMResponse( + content=None, + tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})], + usage={"prompt_tokens": 10, "completion_tokens": 5}, + ) + if call_count == 2: + return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1}) + if call_count == 3: + return LLMResponse( + content=None, + tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})], + usage={"prompt_tokens": 10, "completion_tokens": 5}, + ) + return LLMResponse( + content="Here are the results.", + tool_calls=[], + usage={"prompt_tokens": 10, "completion_tokens": 10}, + ) + + provider.chat_with_retry = chat_with_retry + provider.chat_stream_with_retry = chat_with_retry + + async def fake_tool(name, args, **kw): + return "file content" + + tool_registry = MagicMock() + tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}] + tool_registry.execute = AsyncMock(side_effect=fake_tool) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "read both files"}], + tools=tool_registry, + model="test-model", + max_iterations=10, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == "Here are the results." + assert result.stop_reason == "completed" + assert call_count == 4 + assert "read_file" in result.tools_used + + +@pytest.mark.asyncio +async def test_runner_accumulates_usage_and_preserves_cached_tokens(): + """Runner should accumulate prompt/completion tokens across iterations + and preserve cached_tokens from provider responses.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="thinking", + tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})], + usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80}, + ) + return LLMResponse( + content="done", + tool_calls=[], + usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="file content") + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do task"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + # Usage should be accumulated across iterations + assert result.usage["prompt_tokens"] == 300 # 100 + 200 + assert result.usage["completion_tokens"] == 30 # 10 + 20 + assert result.usage["cached_tokens"] == 230 # 80 + 150 + + +@pytest.mark.asyncio +async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress(): + """Regression: provider retry heartbeats must route through + ``retry_wait_callback``, not ``progress_callback``. Binding them to + the progress callback (as an earlier runtime refactor did) caused + internal retry diagnostics like "Model request failed, retry in 1s" + to leak to end-user channels as normal progress updates. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + captured: dict = {} + + async def chat_with_retry(**kwargs): + captured.update(kwargs) + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + progress_cb = AsyncMock() + retry_wait_cb = AsyncMock() + + runner = AgentRunner(provider) + await runner.run(AgentRunSpec( + initial_messages=[ + {"role": "system", "content": "system"}, + {"role": "user", "content": "hi"}, + ], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + progress_callback=progress_cb, + retry_wait_callback=retry_wait_cb, + )) + + assert captured["on_retry_wait"] is retry_wait_cb + assert captured["on_retry_wait"] is not progress_cb + + +# --------------------------------------------------------------------------- +# Config passthrough tests +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_runner_passes_temperature_to_provider(): + """temperature from AgentRunSpec should reach provider.chat_with_retry.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + captured: dict = {} + + async def chat_with_retry(**kwargs): + captured.update(kwargs) + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hi"}], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + temperature=0.7, + )) + + assert captured["temperature"] == 0.7 + + +@pytest.mark.asyncio +async def test_runner_passes_max_tokens_to_provider(): + """max_tokens from AgentRunSpec should reach provider.chat_with_retry.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + captured: dict = {} + + async def chat_with_retry(**kwargs): + captured.update(kwargs) + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hi"}], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + max_tokens=8192, + )) + + assert captured["max_tokens"] == 8192 + + +@pytest.mark.asyncio +async def test_runner_passes_reasoning_effort_to_provider(): + """reasoning_effort from AgentRunSpec should reach provider.chat_with_retry.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + captured: dict = {} + + async def chat_with_retry(**kwargs): + captured.update(kwargs) + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hi"}], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + reasoning_effort="high", + )) + + assert captured["reasoning_effort"] == "high" diff --git a/tests/agent/test_runner_errors.py b/tests/agent/test_runner_errors.py new file mode 100644 index 000000000..8df7ad8f3 --- /dev/null +++ b/tests/agent/test_runner_errors.py @@ -0,0 +1,171 @@ +"""Tests for AgentRunner error handling: tool errors, LLM errors, +session message isolation, and tool result preservation.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + + +@pytest.mark.asyncio +async def test_runner_returns_structured_tool_error(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = AsyncMock(return_value=LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})], + )) + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(side_effect=RuntimeError("boom")) + + runner = AgentRunner(provider) + + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=2, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + fail_on_tool_error=True, + )) + + assert result.stop_reason == "tool_error" + assert result.error == "Error: RuntimeError: boom" + assert result.tool_events == [ + {"name": "list_dir", "status": "error", "detail": "boom"} + ] + + +@pytest.mark.asyncio +async def test_llm_error_not_appended_to_session_messages(): + """When LLM returns finish_reason='error', the error content must NOT be + appended to the messages list (prevents polluting session history).""" + from nanobot.agent.runner import ( + AgentRunSpec, + AgentRunner, + _PERSISTED_MODEL_ERROR_PLACEHOLDER, + ) + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = AsyncMock(return_value=LLMResponse( + content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}, + )) + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.stop_reason == "error" + assert result.final_content == "429 rate limit exceeded" + assistant_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert all("429" not in (m.get("content") or "") for m in assistant_msgs), \ + "Error content should not appear in session messages" + assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER + + +@pytest.mark.asyncio +async def test_runner_tool_error_sets_final_content(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + + async def chat_with_retry(*, messages, **kwargs): + return LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})], + usage={}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(side_effect=RuntimeError("boom")) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do task"}], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + fail_on_tool_error=True, + )) + + assert result.final_content == "Error: RuntimeError: boom" + assert result.stop_reason == "tool_error" + + +@pytest.mark.asyncio +async def test_runner_tool_error_preserves_tool_results_in_messages(): + """When a tool raises a fatal error, its results must still be appended + to messages so the session never contains orphan tool_calls (#2943).""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + + async def chat_with_retry(*, messages, **kwargs): + return LLMResponse( + content=None, + tool_calls=[ + ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}), + ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}), + ], + usage={}, + ) + + provider.chat_with_retry = chat_with_retry + provider.chat_stream_with_retry = chat_with_retry + + call_idx = 0 + + async def fake_execute(name, args, **kw): + nonlocal call_idx + call_idx += 1 + if call_idx == 2: + raise RuntimeError("boom") + return "file content" + + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(side_effect=fake_execute) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do stuff"}], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + fail_on_tool_error=True, + )) + + assert result.stop_reason == "tool_error" + # Both tool results must be in messages even though tc2 had a fatal error. + tool_msgs = [m for m in result.messages if m.get("role") == "tool"] + assert len(tool_msgs) == 2 + assert tool_msgs[0]["tool_call_id"] == "tc1" + assert tool_msgs[1]["tool_call_id"] == "tc2" + # The assistant message with tool_calls must precede the tool results. + asst_tc_idx = next( + i for i, m in enumerate(result.messages) + if m.get("role") == "assistant" and m.get("tool_calls") + ) + tool_indices = [ + i for i, m in enumerate(result.messages) if m.get("role") == "tool" + ] + assert all(ti > asst_tc_idx for ti in tool_indices) diff --git a/tests/agent/test_runner_governance.py b/tests/agent/test_runner_governance.py new file mode 100644 index 000000000..50e882ca6 --- /dev/null +++ b/tests/agent/test_runner_governance.py @@ -0,0 +1,643 @@ +"""Tests for AgentRunner context governance: backfill, orphan cleanup, microcompact, snip_history.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + + +def _make_loop(tmp_path): + from nanobot.agent.loop import AgentLoop + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + + with patch("nanobot.agent.loop.ContextBuilder"), \ + patch("nanobot.agent.loop.SessionManager"), \ + patch("nanobot.agent.loop.SubagentManager") as MockSubMgr: + MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0) + loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path) + return loop + +async def test_runner_uses_raw_messages_when_context_governance_fails(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + captured_messages: list[dict] = [] + + async def chat_with_retry(*, messages, **kwargs): + captured_messages[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + initial_messages = [ + {"role": "system", "content": "system"}, + {"role": "user", "content": "hello"}, + ] + + runner = AgentRunner(provider) + runner._snip_history = MagicMock(side_effect=RuntimeError("boom")) # type: ignore[method-assign] + result = await runner.run(AgentRunSpec( + initial_messages=initial_messages, + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == "done" + assert captured_messages == initial_messages +def test_snip_history_drops_orphaned_tool_results_from_trimmed_slice(monkeypatch): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + tools = MagicMock() + tools.get_definitions.return_value = [] + runner = AgentRunner(provider) + messages = [ + {"role": "system", "content": "system"}, + {"role": "user", "content": "old user"}, + { + "role": "assistant", + "content": "tool call", + "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "ls", "arguments": "{}"}}], + }, + {"role": "tool", "tool_call_id": "call_1", "content": "tool output"}, + {"role": "assistant", "content": "after tool"}, + ] + spec = AgentRunSpec( + initial_messages=messages, + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + context_window_tokens=2000, + context_block_limit=100, + ) + + monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_args, **_kwargs: (500, None)) + token_sizes = { + "old user": 120, + "tool call": 120, + "tool output": 40, + "after tool": 40, + "system": 0, + } + monkeypatch.setattr( + "nanobot.agent.runner.estimate_message_tokens", + lambda msg: token_sizes.get(str(msg.get("content")), 40), + ) + + trimmed = runner._snip_history(spec, messages) + + # After the fix, the user message is recovered so the sequence is valid + # for providers that require system → user (e.g. GLM error 1214). + assert trimmed[0]["role"] == "system" + non_system = [m for m in trimmed if m["role"] != "system"] + assert non_system[0]["role"] == "user", f"Expected user after system, got {non_system[0]['role']}" +async def test_backfill_missing_tool_results_inserts_error(): + """Orphaned tool_use (no matching tool_result) should get a synthetic error.""" + from nanobot.agent.runner import AgentRunner, _BACKFILL_CONTENT + + messages = [ + {"role": "user", "content": "hi"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "call_a", "type": "function", "function": {"name": "exec", "arguments": "{}"}}, + {"id": "call_b", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}, + ], + }, + {"role": "tool", "tool_call_id": "call_a", "name": "exec", "content": "ok"}, + ] + result = AgentRunner._backfill_missing_tool_results(messages) + tool_msgs = [m for m in result if m.get("role") == "tool"] + assert len(tool_msgs) == 2 + backfilled = [m for m in tool_msgs if m.get("tool_call_id") == "call_b"] + assert len(backfilled) == 1 + assert backfilled[0]["content"] == _BACKFILL_CONTENT + assert backfilled[0]["name"] == "read_file" + + +def test_drop_orphan_tool_results_removes_unmatched_tool_messages(): + from nanobot.agent.runner import AgentRunner + + messages = [ + {"role": "system", "content": "system"}, + {"role": "user", "content": "old user"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}, + ], + }, + {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"}, + {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"}, + {"role": "assistant", "content": "after tool"}, + ] + + cleaned = AgentRunner._drop_orphan_tool_results(messages) + + assert cleaned == [ + {"role": "system", "content": "system"}, + {"role": "user", "content": "old user"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}, + ], + }, + {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"}, + {"role": "assistant", "content": "after tool"}, + ] + + +@pytest.mark.asyncio +async def test_backfill_noop_when_complete(): + """Complete message chains should not be modified.""" + from nanobot.agent.runner import AgentRunner + + messages = [ + {"role": "user", "content": "hi"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "call_x", "type": "function", "function": {"name": "exec", "arguments": "{}"}}, + ], + }, + {"role": "tool", "tool_call_id": "call_x", "name": "exec", "content": "done"}, + {"role": "assistant", "content": "all good"}, + ] + result = AgentRunner._backfill_missing_tool_results(messages) + assert result is messages # same object — no copy + + +@pytest.mark.asyncio +async def test_runner_drops_orphan_tool_results_before_model_request(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + captured_messages: list[dict] = [] + + async def chat_with_retry(*, messages, **kwargs): + captured_messages[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[ + {"role": "system", "content": "system"}, + {"role": "user", "content": "old user"}, + {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"}, + {"role": "assistant", "content": "after orphan"}, + {"role": "user", "content": "new prompt"}, + ], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert all( + message.get("tool_call_id") != "call_orphan" + for message in captured_messages + if message.get("role") == "tool" + ) + assert result.messages[2]["tool_call_id"] == "call_orphan" + assert result.final_content == "done" + + +@pytest.mark.asyncio +async def test_backfill_repairs_model_context_without_shifting_save_turn_boundary(tmp_path): + """Historical backfill should not duplicate old tail messages on persist.""" + from nanobot.agent.loop import AgentLoop + from nanobot.agent.runner import _BACKFILL_CONTENT + from nanobot.bus.events import InboundMessage + from nanobot.bus.queue import MessageBus + + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + response = LLMResponse(content="new answer", tool_calls=[], usage={}) + provider.chat_with_retry = AsyncMock(return_value=response) + provider.chat_stream_with_retry = AsyncMock(return_value=response) + + loop = AgentLoop( + bus=MessageBus(), + provider=provider, + workspace=tmp_path, + model="test-model", + ) + loop.tools.get_definitions = MagicMock(return_value=[]) + loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False) # type: ignore[method-assign] + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "old user", "timestamp": "2026-01-01T00:00:00"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_missing", + "type": "function", + "function": {"name": "read_file", "arguments": "{}"}, + } + ], + "timestamp": "2026-01-01T00:00:01", + }, + {"role": "assistant", "content": "old tail", "timestamp": "2026-01-01T00:00:02"}, + ] + loop.sessions.save(session) + + result = await loop._process_message( + InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new prompt") + ) + + assert result is not None + assert result.content == "new answer" + + request_messages = provider.chat_with_retry.await_args.kwargs["messages"] + synthetic = [ + message + for message in request_messages + if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing" + ] + assert len(synthetic) == 1 + assert synthetic[0]["content"] == _BACKFILL_CONTENT + + session_after = loop.sessions.get_or_create("cli:test") + assert [ + { + key: value + for key, value in message.items() + if key in {"role", "content", "tool_call_id", "name", "tool_calls"} + } + for message in session_after.messages + ] == [ + {"role": "user", "content": "old user"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_missing", + "type": "function", + "function": {"name": "read_file", "arguments": "{}"}, + } + ], + }, + {"role": "assistant", "content": "old tail"}, + {"role": "user", "content": "new prompt"}, + {"role": "assistant", "content": "new answer"}, + ] + + +@pytest.mark.asyncio +async def test_runner_backfill_only_mutates_model_context_not_returned_messages(): + """Runner should repair orphaned tool calls for the model without rewriting result.messages.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner, _BACKFILL_CONTENT + + provider = MagicMock() + captured_messages: list[dict] = [] + + async def chat_with_retry(*, messages, **kwargs): + captured_messages[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + initial_messages = [ + {"role": "system", "content": "system"}, + {"role": "user", "content": "old user"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_missing", + "type": "function", + "function": {"name": "read_file", "arguments": "{}"}, + } + ], + }, + {"role": "assistant", "content": "old tail"}, + {"role": "user", "content": "new prompt"}, + ] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=initial_messages, + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + synthetic = [ + message + for message in captured_messages + if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing" + ] + assert len(synthetic) == 1 + assert synthetic[0]["content"] == _BACKFILL_CONTENT + + assert [ + { + key: value + for key, value in message.items() + if key in {"role", "content", "tool_call_id", "name", "tool_calls"} + } + for message in result.messages + ] == [ + {"role": "system", "content": "system"}, + {"role": "user", "content": "old user"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_missing", + "type": "function", + "function": {"name": "read_file", "arguments": "{}"}, + } + ], + }, + {"role": "assistant", "content": "old tail"}, + {"role": "user", "content": "new prompt"}, + {"role": "assistant", "content": "done"}, + ] + + +# --------------------------------------------------------------------------- +# Microcompact (stale tool result compaction) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_microcompact_replaces_old_tool_results(): + """Tool results beyond _MICROCOMPACT_KEEP_RECENT should be summarized.""" + from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT + + total = _MICROCOMPACT_KEEP_RECENT + 5 + long_content = "x" * 600 + messages: list[dict] = [{"role": "system", "content": "sys"}] + for i in range(total): + messages.append({ + "role": "assistant", + "content": "", + "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}], + }) + messages.append({ + "role": "tool", "tool_call_id": f"c{i}", "name": "read_file", + "content": long_content, + }) + + result = AgentRunner._microcompact(messages) + tool_msgs = [m for m in result if m.get("role") == "tool"] + stale_count = total - _MICROCOMPACT_KEEP_RECENT + compacted = [m for m in tool_msgs if "omitted from context" in str(m.get("content", ""))] + preserved = [m for m in tool_msgs if m.get("content") == long_content] + assert len(compacted) == stale_count + assert len(preserved) == _MICROCOMPACT_KEEP_RECENT + + +@pytest.mark.asyncio +async def test_microcompact_preserves_short_results(): + """Short tool results (< _MICROCOMPACT_MIN_CHARS) should not be replaced.""" + from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT + + total = _MICROCOMPACT_KEEP_RECENT + 5 + messages: list[dict] = [] + for i in range(total): + messages.append({ + "role": "assistant", + "content": "", + "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "exec", "arguments": "{}"}}], + }) + messages.append({ + "role": "tool", "tool_call_id": f"c{i}", "name": "exec", + "content": "short", + }) + + result = AgentRunner._microcompact(messages) + assert result is messages # no copy needed — all stale results are short + + +@pytest.mark.asyncio +async def test_microcompact_skips_non_compactable_tools(): + """Non-compactable tools (e.g. 'message') should never be replaced.""" + from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT + + total = _MICROCOMPACT_KEEP_RECENT + 5 + long_content = "y" * 1000 + messages: list[dict] = [] + for i in range(total): + messages.append({ + "role": "assistant", + "content": "", + "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "message", "arguments": "{}"}}], + }) + messages.append({ + "role": "tool", "tool_call_id": f"c{i}", "name": "message", + "content": long_content, + }) + + result = AgentRunner._microcompact(messages) + assert result is messages # no compactable tools found + + +def test_governance_repairs_orphans_after_snip(): + """After _snip_history clips an assistant+tool_calls, the second + _drop_orphan_tool_results pass must clean up the resulting orphans.""" + from nanobot.agent.runner import AgentRunner + + messages = [ + {"role": "system", "content": "system"}, + {"role": "user", "content": "old msg"}, + {"role": "assistant", "content": None, + "tool_calls": [{"id": "tc_old", "type": "function", + "function": {"name": "search", "arguments": "{}"}}]}, + {"role": "tool", "tool_call_id": "tc_old", "name": "search", + "content": "old result"}, + {"role": "assistant", "content": "old answer"}, + {"role": "user", "content": "new msg"}, + ] + + # Simulate snipping that keeps only the tail: drop the assistant with + # tool_calls but keep its tool result (orphan). + snipped = [ + {"role": "system", "content": "system"}, + {"role": "tool", "tool_call_id": "tc_old", "name": "search", + "content": "old result"}, + {"role": "assistant", "content": "old answer"}, + {"role": "user", "content": "new msg"}, + ] + + cleaned = AgentRunner._drop_orphan_tool_results(snipped) + # The orphan tool result should be removed. + assert not any( + m.get("role") == "tool" and m.get("tool_call_id") == "tc_old" + for m in cleaned + ) + + +def test_governance_fallback_still_repairs_orphans(): + """When full governance fails, the fallback must still run + _drop_orphan_tool_results and _backfill_missing_tool_results.""" + from nanobot.agent.runner import AgentRunner + + # Messages with an orphan tool result (no matching assistant tool_call). + messages = [ + {"role": "user", "content": "hello"}, + {"role": "tool", "tool_call_id": "orphan_tc", "name": "read", + "content": "stale"}, + {"role": "assistant", "content": "hi"}, + ] + + repaired = AgentRunner._drop_orphan_tool_results(messages) + repaired = AgentRunner._backfill_missing_tool_results(repaired) + # Orphan tool result should be gone. + assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired) +def test_snip_history_preserves_user_message_after_truncation(monkeypatch): + """When _snip_history truncates messages and the only user message ends up + outside the kept window, the method must recover the nearest user message + so the resulting sequence is valid for providers like GLM (which reject + system→assistant with error 1214). + + This reproduces the exact scenario from the bug report: + - Normal interaction: user asks, assistant calls tool, tool returns, + assistant replies. + - Injection adds a phantom user message, triggering more tool calls. + - _snip_history activates, keeping only recent assistant/tool pairs. + - The injected user message is in the truncated prefix and gets lost. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + tools = MagicMock() + tools.get_definitions.return_value = [] + runner = AgentRunner(provider) + + messages = [ + {"role": "system", "content": "system"}, + {"role": "assistant", "content": "previous reply"}, + {"role": "user", "content": ".nanobot的同目录"}, + { + "role": "assistant", + "content": None, + "tool_calls": [{"id": "tc_1", "type": "function", "function": {"name": "exec", "arguments": "{}"}}], + }, + {"role": "tool", "tool_call_id": "tc_1", "content": "tool output 1"}, + { + "role": "assistant", + "content": None, + "tool_calls": [{"id": "tc_2", "type": "function", "function": {"name": "exec", "arguments": "{}"}}], + }, + {"role": "tool", "tool_call_id": "tc_2", "content": "tool output 2"}, + ] + + spec = AgentRunSpec( + initial_messages=messages, + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + context_window_tokens=2000, + context_block_limit=100, + ) + + # Make estimate_prompt_tokens_chain report above budget so _snip_history activates. + monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None)) + # Make kept window small: only the last 2 messages fit the budget. + token_sizes = { + "system": 0, + "previous reply": 200, + ".nanobot的同目录": 80, + "tool output 1": 80, + "tool output 2": 80, + } + monkeypatch.setattr( + "nanobot.agent.runner.estimate_message_tokens", + lambda msg: token_sizes.get(str(msg.get("content")), 100), + ) + + trimmed = runner._snip_history(spec, messages) + + # The first non-system message MUST be user (not assistant). + non_system = [m for m in trimmed if m.get("role") != "system"] + assert non_system, "trimmed should contain at least one non-system message" + assert non_system[0]["role"] == "user", ( + f"First non-system message must be 'user', got '{non_system[0]['role']}'. " + f"Roles: {[m['role'] for m in trimmed]}" + ) + + +def test_snip_history_no_user_at_all_falls_back_gracefully(monkeypatch): + """Edge case: if non_system has zero user messages, _snip_history should + still return a valid sequence (not crash or produce system→assistant).""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + tools = MagicMock() + tools.get_definitions.return_value = [] + runner = AgentRunner(provider) + + messages = [ + {"role": "system", "content": "system"}, + {"role": "assistant", "content": "reply"}, + {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, + {"role": "assistant", "content": "reply 2"}, + {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"}, + ] + + spec = AgentRunSpec( + initial_messages=messages, + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + context_window_tokens=2000, + context_block_limit=100, + ) + + monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None)) + monkeypatch.setattr( + "nanobot.agent.runner.estimate_message_tokens", + lambda msg: 100, + ) + + trimmed = runner._snip_history(spec, messages) + + # Should not crash. The result should still be a valid list. + assert isinstance(trimmed, list) + # Must have at least system. + assert any(m.get("role") == "system" for m in trimmed) + # The _enforce_role_alternation safety net must be able to fix whatever + # _snip_history returns here — verify it produces a valid sequence. + from nanobot.providers.base import LLMProvider + fixed = LLMProvider._enforce_role_alternation(trimmed) + non_system = [m for m in fixed if m["role"] != "system"] + if non_system: + assert non_system[0]["role"] in ("user", "tool"), ( + f"Safety net should ensure first non-system is user/tool, got {non_system[0]['role']}" + ) diff --git a/tests/agent/test_runner_hooks.py b/tests/agent/test_runner_hooks.py new file mode 100644 index 000000000..7718eee20 --- /dev/null +++ b/tests/agent/test_runner_hooks.py @@ -0,0 +1,172 @@ +"""Tests for AgentRunner hook lifecycle: ordering, streaming deltas, +cached-token propagation, and hook context.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + + +@pytest.mark.asyncio +async def test_runner_calls_hooks_in_order(): + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + call_count = {"n": 0} + events: list[tuple] = [] + + async def chat_with_retry(**kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="thinking", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], + ) + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="tool result") + + class RecordingHook(AgentHook): + async def before_iteration(self, context: AgentHookContext) -> None: + events.append(("before_iteration", context.iteration)) + + async def before_execute_tools(self, context: AgentHookContext) -> None: + events.append(( + "before_execute_tools", + context.iteration, + [tc.name for tc in context.tool_calls], + )) + + async def after_iteration(self, context: AgentHookContext) -> None: + events.append(( + "after_iteration", + context.iteration, + context.final_content, + list(context.tool_results), + list(context.tool_events), + context.stop_reason, + )) + + def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None: + events.append(("finalize_content", context.iteration, content)) + return content.upper() if content else content + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=RecordingHook(), + )) + + assert result.final_content == "DONE" + assert events == [ + ("before_iteration", 0), + ("before_execute_tools", 0, ["list_dir"]), + ( + "after_iteration", + 0, + None, + ["tool result"], + [{"name": "list_dir", "status": "ok", "detail": "tool result"}], + None, + ), + ("before_iteration", 1), + ("finalize_content", 1, "done"), + ("after_iteration", 1, "DONE", [], [], "completed"), + ] + + +@pytest.mark.asyncio +async def test_runner_streaming_hook_receives_deltas_and_end_signal(): + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + streamed: list[str] = [] + endings: list[bool] = [] + + async def chat_stream_with_retry(*, on_content_delta, **kwargs): + await on_content_delta("he") + await on_content_delta("llo") + return LLMResponse(content="hello", tool_calls=[], usage={}) + + provider.chat_stream_with_retry = chat_stream_with_retry + provider.chat_with_retry = AsyncMock() + tools = MagicMock() + tools.get_definitions.return_value = [] + + class StreamingHook(AgentHook): + def wants_streaming(self) -> bool: + return True + + async def on_stream(self, context: AgentHookContext, delta: str) -> None: + streamed.append(delta) + + async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None: + endings.append(resuming) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=StreamingHook(), + )) + + assert result.final_content == "hello" + assert streamed == ["he", "llo"] + assert endings == [False] + provider.chat_with_retry.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_runner_passes_cached_tokens_to_hook_context(): + """Hook context.usage should contain cached_tokens.""" + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock(spec=LLMProvider) + captured_usage: list[dict] = [] + + class UsageHook(AgentHook): + async def after_iteration(self, context: AgentHookContext) -> None: + captured_usage.append(dict(context.usage)) + + async def chat_with_retry(**kwargs): + return LLMResponse( + content="done", + tool_calls=[], + usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=UsageHook(), + )) + + assert len(captured_usage) == 1 + assert captured_usage[0]["cached_tokens"] == 150 diff --git a/tests/agent/test_runner_injections.py b/tests/agent/test_runner_injections.py new file mode 100644 index 000000000..1aa504e32 --- /dev/null +++ b/tests/agent/test_runner_injections.py @@ -0,0 +1,1038 @@ +"""Tests for the mid-turn injection system: drain, checkpoints, pending queues, error paths.""" + +from __future__ import annotations + +import asyncio +import base64 +import time +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + + +def _make_injection_callback(queue: asyncio.Queue): + """Return an async callback that drains *queue* into a list of dicts.""" + async def inject_cb(): + items = [] + while not queue.empty(): + items.append(await queue.get()) + return items + return inject_cb + + +def _make_loop(tmp_path): + from nanobot.agent.loop import AgentLoop + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + + with patch("nanobot.agent.loop.ContextBuilder"), \ + patch("nanobot.agent.loop.SessionManager"), \ + patch("nanobot.agent.loop.SubagentManager") as MockSubMgr: + MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0) + loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path) + return loop + +@pytest.mark.asyncio +async def test_drain_injections_returns_empty_when_no_callback(): + """No injection_callback → empty list.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + runner = AgentRunner(provider) + tools = MagicMock() + tools.get_definitions.return_value = [] + spec = AgentRunSpec( + initial_messages=[], tools=tools, model="m", + max_iterations=1, max_tool_result_chars=1000, + injection_callback=None, + ) + result = await runner._drain_injections(spec) + assert result == [] + + +@pytest.mark.asyncio +async def test_drain_injections_extracts_content_from_inbound_messages(): + """Should extract .content from InboundMessage objects.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + runner = AgentRunner(provider) + tools = MagicMock() + tools.get_definitions.return_value = [] + + msgs = [ + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello"), + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="world"), + ] + + async def cb(): + return msgs + + spec = AgentRunSpec( + initial_messages=[], tools=tools, model="m", + max_iterations=1, max_tool_result_chars=1000, + injection_callback=cb, + ) + result = await runner._drain_injections(spec) + assert result == [ + {"role": "user", "content": "hello"}, + {"role": "user", "content": "world"}, + ] + + +@pytest.mark.asyncio +async def test_drain_injections_passes_limit_to_callback_when_supported(): + """Limit-aware callbacks can preserve overflow in their own queue.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + runner = AgentRunner(provider) + tools = MagicMock() + tools.get_definitions.return_value = [] + seen_limits: list[int] = [] + + msgs = [ + InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg{i}") + for i in range(_MAX_INJECTIONS_PER_TURN + 3) + ] + + async def cb(*, limit: int): + seen_limits.append(limit) + return msgs[:limit] + + spec = AgentRunSpec( + initial_messages=[], tools=tools, model="m", + max_iterations=1, max_tool_result_chars=1000, + injection_callback=cb, + ) + result = await runner._drain_injections(spec) + assert seen_limits == [_MAX_INJECTIONS_PER_TURN] + assert result == [ + {"role": "user", "content": "msg0"}, + {"role": "user", "content": "msg1"}, + {"role": "user", "content": "msg2"}, + ] + + +@pytest.mark.asyncio +async def test_drain_injections_skips_empty_content(): + """Messages with blank content should be filtered out.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + runner = AgentRunner(provider) + tools = MagicMock() + tools.get_definitions.return_value = [] + + msgs = [ + InboundMessage(channel="cli", sender_id="u", chat_id="c", content=""), + InboundMessage(channel="cli", sender_id="u", chat_id="c", content=" "), + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="valid"), + ] + + async def cb(): + return msgs + + spec = AgentRunSpec( + initial_messages=[], tools=tools, model="m", + max_iterations=1, max_tool_result_chars=1000, + injection_callback=cb, + ) + result = await runner._drain_injections(spec) + assert result == [{"role": "user", "content": "valid"}] + + +@pytest.mark.asyncio +async def test_drain_injections_handles_callback_exception(): + """If the callback raises, return empty list (error is logged).""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + runner = AgentRunner(provider) + tools = MagicMock() + tools.get_definitions.return_value = [] + + async def cb(): + raise RuntimeError("boom") + + spec = AgentRunSpec( + initial_messages=[], tools=tools, model="m", + max_iterations=1, max_tool_result_chars=1000, + injection_callback=cb, + ) + result = await runner._drain_injections(spec) + assert result == [] + + +@pytest.mark.asyncio +async def test_checkpoint1_injects_after_tool_execution(): + """Follow-up messages are injected after tool execution, before next LLM call.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + captured_messages = [] + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + captured_messages.append(list(messages)) + if call_count["n"] == 1: + return LLMResponse( + content="using tool", + tool_calls=[ToolCallRequest(id="c1", name="read_file", arguments={"path": "x"})], + usage={}, + ) + return LLMResponse(content="final answer", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="file content") + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + # Put a follow-up message in the queue before the run starts + await injection_queue.put( + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question") + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.had_injections is True + assert result.final_content == "final answer" + # The second call should have the injected user message + assert call_count["n"] == 2 + last_messages = captured_messages[-1] + injected = [m for m in last_messages if m.get("role") == "user" and m.get("content") == "follow-up question"] + assert len(injected) == 1 + + +@pytest.mark.asyncio +async def test_checkpoint2_injects_after_final_response_with_resuming_stream(): + """After final response, if injections exist, stream_end should get resuming=True.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + stream_end_calls = [] + + class TrackingHook(AgentHook): + def wants_streaming(self) -> bool: + return True + + async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None: + stream_end_calls.append(resuming) + + def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None: + return content + + async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse(content="first answer", tool_calls=[], usage={}) + return LLMResponse(content="second answer", tool_calls=[], usage={}) + + provider.chat_stream_with_retry = chat_stream_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + # Inject a follow-up that arrives during the first response + await injection_queue.put( + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="quick follow-up") + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=TrackingHook(), + injection_callback=inject_cb, + )) + + assert result.had_injections is True + assert result.final_content == "second answer" + assert call_count["n"] == 2 + # First stream_end should have resuming=True (because injections found) + assert stream_end_calls[0] is True + # Second (final) stream_end should have resuming=False + assert stream_end_calls[-1] is False + + +@pytest.mark.asyncio +async def test_checkpoint2_preserves_final_response_in_history_before_followup(): + """A follow-up injected after a final answer must still see that answer in history.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + captured_messages = [] + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + captured_messages.append([dict(message) for message in messages]) + if call_count["n"] == 1: + return LLMResponse(content="first answer", tool_calls=[], usage={}) + return LLMResponse(content="second answer", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + await injection_queue.put( + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question") + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.final_content == "second answer" + assert call_count["n"] == 2 + assert captured_messages[-1] == [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "first answer"}, + {"role": "user", "content": "follow-up question"}, + ] + assert [ + {"role": message["role"], "content": message["content"]} + for message in result.messages + if message.get("role") == "assistant" + ] == [ + {"role": "assistant", "content": "first answer"}, + {"role": "assistant", "content": "second answer"}, + ] + + +@pytest.mark.asyncio +async def test_loop_injected_followup_preserves_image_media(tmp_path): + """Mid-turn follow-ups with images should keep multimodal content.""" + from nanobot.agent.loop import AgentLoop + from nanobot.bus.events import InboundMessage + from nanobot.bus.queue import MessageBus + + image_path = tmp_path / "followup.png" + image_path.write_bytes(base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+yF9kAAAAASUVORK5CYII=" + )) + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + captured_messages: list[list[dict]] = [] + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + captured_messages.append(list(messages)) + if call_count["n"] == 1: + return LLMResponse(content="first answer", tool_calls=[], usage={}) + return LLMResponse(content="second answer", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") + loop.tools.get_definitions = MagicMock(return_value=[]) + + pending_queue = asyncio.Queue() + await pending_queue.put(InboundMessage( + channel="cli", + sender_id="u", + chat_id="c", + content="", + media=[str(image_path)], + )) + + final_content, _, _, _, had_injections = await loop._run_agent_loop( + [{"role": "user", "content": "hello"}], + channel="cli", + chat_id="c", + pending_queue=pending_queue, + ) + + assert final_content == "second answer" + assert had_injections is True + assert call_count["n"] == 2 + injected_user_messages = [ + message for message in captured_messages[-1] + if message.get("role") == "user" and isinstance(message.get("content"), list) + ] + assert injected_user_messages + assert any( + block.get("type") == "image_url" + for block in injected_user_messages[-1]["content"] + if isinstance(block, dict) + ) + + +@pytest.mark.asyncio +async def test_runner_merges_multiple_injected_user_messages_without_losing_media(): + """Multiple injected follow-ups should not create lossy consecutive user messages.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + call_count = {"n": 0} + captured_messages = [] + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + captured_messages.append([dict(message) for message in messages]) + if call_count["n"] == 1: + return LLMResponse(content="first answer", tool_calls=[], usage={}) + return LLMResponse(content="second answer", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + async def inject_cb(): + if call_count["n"] == 1: + return [ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + {"type": "text", "text": "look at this"}, + ], + }, + {"role": "user", "content": "and answer briefly"}, + ] + return [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.final_content == "second answer" + assert call_count["n"] == 2 + second_call = captured_messages[-1] + user_messages = [message for message in second_call if message.get("role") == "user"] + assert len(user_messages) == 2 + injected = user_messages[-1] + assert isinstance(injected["content"], list) + assert any( + block.get("type") == "image_url" + for block in injected["content"] + if isinstance(block, dict) + ) + assert any( + block.get("type") == "text" and block.get("text") == "and answer briefly" + for block in injected["content"] + if isinstance(block, dict) + ) + + +@pytest.mark.asyncio +async def test_injection_cycles_capped_at_max(): + """Injection cycles should be capped at _MAX_INJECTION_CYCLES.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + drain_count = {"n": 0} + + async def inject_cb(): + drain_count["n"] += 1 + # Only inject for the first _MAX_INJECTION_CYCLES drains + if drain_count["n"] <= _MAX_INJECTION_CYCLES: + return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")] + return [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "start"}], + tools=tools, + model="test-model", + max_iterations=20, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.had_injections is True + # Should be capped: _MAX_INJECTION_CYCLES injection rounds + 1 final round + assert call_count["n"] == _MAX_INJECTION_CYCLES + 1 + + +@pytest.mark.asyncio +async def test_no_injections_flag_is_false_by_default(): + """had_injections should be False when no injection callback or no messages.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + + async def chat_with_retry(**kwargs): + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hi"}], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.had_injections is False + + +@pytest.mark.asyncio +async def test_pending_queue_cleanup_on_dispatch(tmp_path): + """_pending_queues should be cleaned up after _dispatch completes.""" + loop = _make_loop(tmp_path) + + async def chat_with_retry(**kwargs): + return LLMResponse(content="done", tool_calls=[], usage={}) + + loop.provider.chat_with_retry = chat_with_retry + + from nanobot.bus.events import InboundMessage + + msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello") + # The queue should not exist before dispatch + assert msg.session_key not in loop._pending_queues + + await loop._dispatch(msg) + + # The queue should be cleaned up after dispatch + assert msg.session_key not in loop._pending_queues + + +@pytest.mark.asyncio +async def test_followup_routed_to_pending_queue(tmp_path): + """Unified-session follow-ups should route into the active pending queue.""" + from nanobot.agent.loop import UNIFIED_SESSION_KEY + from nanobot.bus.events import InboundMessage + + loop = _make_loop(tmp_path) + loop._unified_session = True + loop._dispatch = AsyncMock() # type: ignore[method-assign] + + pending = asyncio.Queue(maxsize=20) + loop._pending_queues[UNIFIED_SESSION_KEY] = pending + + run_task = asyncio.create_task(loop.run()) + msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up") + await loop.bus.publish_inbound(msg) + + deadline = time.time() + 2 + while pending.empty() and time.time() < deadline: + await asyncio.sleep(0.01) + + loop.stop() + await asyncio.wait_for(run_task, timeout=2) + + assert loop._dispatch.await_count == 0 + assert not pending.empty() + queued_msg = pending.get_nowait() + assert queued_msg.content == "follow-up" + assert queued_msg.session_key == UNIFIED_SESSION_KEY + + +@pytest.mark.asyncio +async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path): + """Pending queue should leave overflow messages queued for later drains.""" + from nanobot.agent.loop import AgentLoop + from nanobot.bus.events import InboundMessage + from nanobot.bus.queue import MessageBus + from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + captured_messages: list[list[dict]] = [] + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + captured_messages.append([dict(message) for message in messages]) + return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") + loop.tools.get_definitions = MagicMock(return_value=[]) + + pending_queue = asyncio.Queue() + total_followups = _MAX_INJECTIONS_PER_TURN + 2 + for idx in range(total_followups): + await pending_queue.put(InboundMessage( + channel="cli", + sender_id="u", + chat_id="c", + content=f"follow-up-{idx}", + )) + + final_content, _, _, _, had_injections = await loop._run_agent_loop( + [{"role": "user", "content": "hello"}], + channel="cli", + chat_id="c", + pending_queue=pending_queue, + ) + + assert final_content == "answer-3" + assert had_injections is True + assert call_count["n"] == 3 + flattened_user_content = "\n".join( + message["content"] + for message in captured_messages[-1] + if message.get("role") == "user" and isinstance(message.get("content"), str) + ) + for idx in range(total_followups): + assert f"follow-up-{idx}" in flattened_user_content + assert pending_queue.empty() + + +@pytest.mark.asyncio +async def test_pending_queue_full_falls_back_to_queued_task(tmp_path): + """QueueFull should preserve the message by dispatching a queued task.""" + from nanobot.bus.events import InboundMessage + + loop = _make_loop(tmp_path) + loop._dispatch = AsyncMock() # type: ignore[method-assign] + + pending = asyncio.Queue(maxsize=1) + pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued")) + loop._pending_queues["cli:c"] = pending + + run_task = asyncio.create_task(loop.run()) + msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up") + await loop.bus.publish_inbound(msg) + + deadline = time.time() + 2 + while loop._dispatch.await_count == 0 and time.time() < deadline: + await asyncio.sleep(0.01) + + loop.stop() + await asyncio.wait_for(run_task, timeout=2) + + assert loop._dispatch.await_count == 1 + dispatched_msg = loop._dispatch.await_args.args[0] + assert dispatched_msg.content == "follow-up" + assert pending.qsize() == 1 + + +@pytest.mark.asyncio +async def test_dispatch_republishes_leftover_queue_messages(tmp_path): + """Messages left in the pending queue after _dispatch are re-published to the bus. + + This tests the finally-block cleanup that prevents message loss when + the runner exits early (e.g., max_iterations, tool_error) with messages + still in the queue. + """ + from nanobot.bus.events import InboundMessage + + loop = _make_loop(tmp_path) + bus = loop.bus + + # Simulate a completed dispatch by manually registering a queue + # with leftover messages, then running the cleanup logic directly. + pending = asyncio.Queue(maxsize=20) + session_key = "cli:c" + loop._pending_queues[session_key] = pending + pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-1")) + pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-2")) + + # Execute the cleanup logic from the finally block + queue = loop._pending_queues.pop(session_key, None) + assert queue is not None + leftover = 0 + while True: + try: + item = queue.get_nowait() + except asyncio.QueueEmpty: + break + await bus.publish_inbound(item) + leftover += 1 + + assert leftover == 2 + + # Verify the messages are now on the bus + msgs = [] + while not bus.inbound.empty(): + msgs.append(await asyncio.wait_for(bus.consume_inbound(), timeout=0.5)) + contents = [m.content for m in msgs] + assert "leftover-1" in contents + assert "leftover-2" in contents + + +@pytest.mark.asyncio +async def test_drain_injections_on_fatal_tool_error(): + """Pending injections should be drained even when a fatal tool error occurs.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="", + tool_calls=[ToolCallRequest(id="c1", name="exec", arguments={"cmd": "bad"})], + usage={}, + ) + # Second call: respond normally to the injected follow-up + return LLMResponse(content="reply to follow-up", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(side_effect=RuntimeError("tool exploded")) + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + await injection_queue.put( + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after error") + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + fail_on_tool_error=True, + injection_callback=inject_cb, + )) + + assert result.had_injections is True + assert result.final_content == "reply to follow-up" + # The injection should be in the messages history + injected = [ + m for m in result.messages + if m.get("role") == "user" and m.get("content") == "follow-up after error" + ] + assert len(injected) == 1 + + +@pytest.mark.asyncio +async def test_drain_injections_on_llm_error(): + """Pending injections should be drained when the LLM returns an error finish_reason.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content=None, + tool_calls=[], + finish_reason="error", + usage={}, + ) + # Second call: respond normally to the injected follow-up + return LLMResponse(content="recovered answer", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + await injection_queue.put( + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after LLM error") + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "previous response"}, + {"role": "user", "content": "trigger error"}, + ], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.had_injections is True + assert result.final_content == "recovered answer" + injected = [ + m for m in result.messages + if m.get("role") == "user" and "follow-up after LLM error" in str(m.get("content", "")) + ] + assert len(injected) == 1 + + +@pytest.mark.asyncio +async def test_drain_injections_on_empty_final_response(): + """Pending injections should be drained when the runner exits due to empty response.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] <= _MAX_EMPTY_RETRIES + 1: + return LLMResponse(content="", tool_calls=[], usage={}) + # After retries exhausted + injection drain, respond normally + return LLMResponse(content="answer after empty", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + await injection_queue.put( + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after empty") + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "previous response"}, + {"role": "user", "content": "trigger empty"}, + ], + tools=tools, + model="test-model", + max_iterations=10, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.had_injections is True + assert result.final_content == "answer after empty" + injected = [ + m for m in result.messages + if m.get("role") == "user" and "follow-up after empty" in str(m.get("content", "")) + ] + assert len(injected) == 1 + + +@pytest.mark.asyncio +async def test_drain_injections_on_max_iterations(): + """Pending injections should be drained when the runner hits max_iterations. + + Unlike other error paths, max_iterations cannot continue the loop, so + injections are appended to messages but not processed by the LLM. + The key point is they are consumed from the queue to prevent re-publish. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + return LLMResponse( + content="", + tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})], + usage={}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="file content") + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + await injection_queue.put( + InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after max iters") + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=2, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.stop_reason == "max_iterations" + assert result.had_injections is True + # The injection was consumed from the queue (preventing re-publish) + assert injection_queue.empty() + # The injection message is appended to conversation history + injected = [ + m for m in result.messages + if m.get("role") == "user" and m.get("content") == "follow-up after max iters" + ] + assert len(injected) == 1 + + +@pytest.mark.asyncio +async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration(): + """Late follow-ups drained in max_iterations should still flip had_injections.""" + from nanobot.agent.hook import AgentHook + from nanobot.agent.runner import AgentRunSpec, AgentRunner + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + return LLMResponse( + content="", + tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})], + usage={}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="file content") + + injection_queue = asyncio.Queue() + inject_cb = _make_injection_callback(injection_queue) + + class InjectOnLastAfterIterationHook(AgentHook): + def __init__(self) -> None: + self.after_iteration_calls = 0 + + async def after_iteration(self, context) -> None: + self.after_iteration_calls += 1 + if self.after_iteration_calls == 2: + await injection_queue.put( + InboundMessage( + channel="cli", + sender_id="u", + chat_id="c", + content="late follow-up after max iters", + ) + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=2, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + hook=InjectOnLastAfterIterationHook(), + )) + + assert result.stop_reason == "max_iterations" + assert result.had_injections is True + assert injection_queue.empty() + injected = [ + m for m in result.messages + if m.get("role") == "user" and m.get("content") == "late follow-up after max iters" + ] + assert len(injected) == 1 + + +@pytest.mark.asyncio +async def test_injection_cycle_cap_on_error_path(): + """Injection cycles should be capped even when every iteration hits an LLM error.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES + from nanobot.bus.events import InboundMessage + + provider = MagicMock() + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + return LLMResponse( + content=None, + tool_calls=[], + finish_reason="error", + usage={}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + drain_count = {"n": 0} + + async def inject_cb(): + drain_count["n"] += 1 + if drain_count["n"] <= _MAX_INJECTION_CYCLES: + return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")] + return [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "previous"}, + {"role": "user", "content": "trigger error"}, + ], + tools=tools, + model="test-model", + max_iterations=20, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + injection_callback=inject_cb, + )) + + assert result.had_injections is True + # Should cap: _MAX_INJECTION_CYCLES drained rounds + 1 final round that breaks + assert call_count["n"] == _MAX_INJECTION_CYCLES + 1 + diff --git a/tests/agent/test_runner_persistence.py b/tests/agent/test_runner_persistence.py new file mode 100644 index 000000000..d2bcfa9d4 --- /dev/null +++ b/tests/agent/test_runner_persistence.py @@ -0,0 +1,161 @@ +"""Tests for tool result persistence: large results, pruning, temp files, cleanup.""" + +from __future__ import annotations + +import os +import time +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + +async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + captured_second_call: list[dict] = [] + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id="call_big", name="list_dir", arguments={"path": "."})], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + captured_second_call[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="x" * 20_000) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do task"}], + tools=tools, + model="test-model", + max_iterations=2, + workspace=tmp_path, + session_key="test:runner", + max_tool_result_chars=2048, + )) + + assert result.final_content == "done" + tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool") + assert "[tool output persisted]" in tool_message["content"] + assert "tool-results" in tool_message["content"] + assert (tmp_path / ".nanobot" / "tool-results" / "test_runner" / "call_big.txt").exists() + + +def test_persist_tool_result_prunes_old_session_buckets(tmp_path): + from nanobot.utils.helpers import maybe_persist_tool_result + + root = tmp_path / ".nanobot" / "tool-results" + old_bucket = root / "old_session" + recent_bucket = root / "recent_session" + old_bucket.mkdir(parents=True) + recent_bucket.mkdir(parents=True) + (old_bucket / "old.txt").write_text("old", encoding="utf-8") + (recent_bucket / "recent.txt").write_text("recent", encoding="utf-8") + + stale = time.time() - (8 * 24 * 60 * 60) + os.utime(old_bucket, (stale, stale)) + os.utime(old_bucket / "old.txt", (stale, stale)) + + persisted = maybe_persist_tool_result( + tmp_path, + "current:session", + "call_big", + "x" * 5000, + max_chars=64, + ) + + assert "[tool output persisted]" in persisted + assert not old_bucket.exists() + assert recent_bucket.exists() + assert (root / "current_session" / "call_big.txt").exists() + + +def test_persist_tool_result_leaves_no_temp_files(tmp_path): + from nanobot.utils.helpers import maybe_persist_tool_result + + root = tmp_path / ".nanobot" / "tool-results" + maybe_persist_tool_result( + tmp_path, + "current:session", + "call_big", + "x" * 5000, + max_chars=64, + ) + + assert (root / "current_session" / "call_big.txt").exists() + assert list((root / "current_session").glob("*.tmp")) == [] + + +def test_persist_tool_result_logs_cleanup_failures(monkeypatch, tmp_path): + from nanobot.utils.helpers import maybe_persist_tool_result + + warnings: list[str] = [] + + monkeypatch.setattr( + "nanobot.utils.helpers._cleanup_tool_result_buckets", + lambda *_args, **_kwargs: (_ for _ in ()).throw(OSError("busy")), + ) + monkeypatch.setattr( + "nanobot.utils.helpers.logger.exception", + lambda message, *args: warnings.append(message.format(*args)), + ) + + persisted = maybe_persist_tool_result( + tmp_path, + "current:session", + "call_big", + "x" * 5000, + max_chars=64, + ) + + assert "[tool output persisted]" in persisted + assert warnings and "Failed to clean stale tool result buckets" in warnings[0] +async def test_runner_keeps_going_when_tool_result_persistence_fails(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + captured_second_call: list[dict] = [] + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + captured_second_call[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="tool result") + + runner = AgentRunner(provider) + with patch("nanobot.agent.runner.maybe_persist_tool_result", side_effect=RuntimeError("disk full")): + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "do task"}], + tools=tools, + model="test-model", + max_iterations=2, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == "done" + tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool") + assert tool_message["content"] == "tool result" diff --git a/tests/agent/test_runner_safety.py b/tests/agent/test_runner_safety.py new file mode 100644 index 000000000..14565e203 --- /dev/null +++ b/tests/agent/test_runner_safety.py @@ -0,0 +1,244 @@ +"""Tests for AgentRunner security: workspace violations, SSRF, shell guard, throttling.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + +async def test_runner_does_not_abort_on_workspace_violation_anymore(): + """v2 behavior: workspace-bound rejections are *soft* tool errors. + + Previously (PR #3493) any workspace boundary error became a fatal + RuntimeError that aborted the turn. That silently killed legitimate + workspace commands once the heuristic guard misfired (#3599 #3605), so + we now hand the error back to the LLM as a recoverable tool result and + rely on ``repeated_workspace_violation_error`` to throttle bypass loops. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + provider.chat_with_retry = AsyncMock(side_effect=[ + LLMResponse( + content="trying outside", + tool_calls=[ToolCallRequest( + id="call_1", name="read_file", arguments={"path": "/tmp/outside.md"}, + )], + ), + LLMResponse(content="ok, telling the user instead", tool_calls=[]), + ]) + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock( + side_effect=PermissionError( + "Path /tmp/outside.md is outside allowed directory /workspace" + ) + ) + + runner = AgentRunner(provider) + + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert provider.chat_with_retry.await_count == 2, ( + "workspace violation must NOT short-circuit the loop" + ) + assert result.stop_reason != "tool_error" + assert result.error is None + assert result.final_content == "ok, telling the user instead" + assert result.tool_events and result.tool_events[0]["status"] == "error" + # Detail still carries the workspace_violation breadcrumb for telemetry, + # but the runner did not raise. + assert "workspace_violation" in result.tool_events[0]["detail"] + + +def test_is_ssrf_violation_recognizes_private_url_blocks(): + """SSRF rejections are classified separately from workspace boundaries.""" + from nanobot.agent.runner import AgentRunner + + ssrf_msg = "Error: Command blocked by safety guard (internal/private URL detected)" + assert AgentRunner._is_ssrf_violation(ssrf_msg) is True + assert AgentRunner._is_ssrf_violation( + "URL validation failed: Blocked: host resolves to private/internal address 192.168.1.2" + ) is True + + # Workspace-bound markers are NOT classified as SSRF. + assert AgentRunner._is_ssrf_violation( + "Error: Command blocked by safety guard (path outside working dir)" + ) is False + assert AgentRunner._is_ssrf_violation( + "Path /tmp/x is outside allowed directory /ws" + ) is False + # Deny / allowlist filter messages stay non-fatal too. + assert AgentRunner._is_ssrf_violation( + "Error: Command blocked by deny pattern filter" + ) is False + + +@pytest.mark.asyncio +async def test_runner_returns_non_retryable_hint_on_ssrf_violation(): + """SSRF stays blocked, but the runtime gives the LLM a final chance to recover.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + provider.chat_with_retry = AsyncMock(side_effect=[ + LLMResponse( + content="curl-ing metadata", + tool_calls=[ToolCallRequest( + id="call_ssrf", + name="exec", + arguments={"command": "curl http://169.254.169.254"}, + )], + ), + LLMResponse( + content="I cannot access that private URL. Please share local files.", + tool_calls=[], + ), + ]) + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value=( + "Error: Command blocked by safety guard (internal/private URL detected)" + )) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert provider.chat_with_retry.await_count == 2 + assert result.stop_reason == "completed" + assert result.error is None + assert result.final_content == "I cannot access that private URL. Please share local files." + assert result.tool_events and result.tool_events[0]["detail"].startswith("ssrf_violation:") + tool_messages = [m for m in result.messages if m.get("role") == "tool"] + assert tool_messages + assert "non-bypassable security boundary" in tool_messages[0]["content"] + assert "Do not retry" in tool_messages[0]["content"] + assert "tools.ssrfWhitelist" in tool_messages[0]["content"] + + +@pytest.mark.asyncio +async def test_runner_lets_llm_recover_from_shell_guard_path_outside(): + """Reporter scenario for #3599 / #3605 -- guard hit, agent recovers. + + The shell `_guard_command` heuristic fires on `2>/dev/null`-style + redirects and other shell idioms. Before v2 that abort'd the whole + turn (silent hang on Telegram per #3605); now the LLM gets the soft + error back and can finalize on the next iteration. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + captured_second_call: list[dict] = [] + + async def chat_with_retry(*, messages, **kwargs): + if provider.chat_with_retry.await_count == 1: + return LLMResponse( + content="trying noisy cleanup", + tool_calls=[ToolCallRequest( + id="call_blocked", + name="exec", + arguments={"command": "rm scratch.txt 2>/dev/null"}, + )], + ) + captured_second_call[:] = list(messages) + return LLMResponse(content="recovered final answer", tool_calls=[]) + + provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry) + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock( + return_value="Error: Command blocked by safety guard (path outside working dir)" + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert provider.chat_with_retry.await_count == 2, ( + "guard hit must NOT short-circuit the loop -- LLM should get a second turn" + ) + assert result.stop_reason != "tool_error" + assert result.error is None + assert result.final_content == "recovered final answer" + assert result.tool_events and result.tool_events[0]["status"] == "error" + # v2: detail keeps the breadcrumb but the runner did not raise. + assert "workspace_violation" in result.tool_events[0]["detail"] + + +@pytest.mark.asyncio +async def test_runner_throttles_repeated_workspace_bypass_attempts(): + """#3493 motivation: stop the LLM bypass loop without aborting the turn. + + LLM keeps switching tools (read_file -> exec cat -> python -c open(...)) + against the same outside path. After the soft retry budget is exhausted + the runner replaces the tool result with a hard "stop trying" message + so the model finally gives up and surfaces the boundary to the user. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + bypass_attempts = [ + ToolCallRequest( + id=f"a{i}", name="exec", + arguments={"command": f"cat /Users/x/Downloads/01.md # try {i}"}, + ) + for i in range(4) + ] + responses: list[LLMResponse] = [ + LLMResponse(content=f"try {i}", tool_calls=[bypass_attempts[i]]) + for i in range(4) + ] + responses.append(LLMResponse(content="ok telling user", tool_calls=[])) + + provider = MagicMock() + provider.chat_with_retry = AsyncMock(side_effect=responses) + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock( + return_value="Error: Command blocked by safety guard (path outside working dir)" + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=10, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + # All 4 bypass attempts surface to the LLM (no fatal abort), and the + # runner finally completes once the LLM stops asking. + assert result.stop_reason != "tool_error" + assert result.error is None + assert result.final_content == "ok telling user" + # The third+ attempts must have been escalated -- look at the events. + escalated = [ + ev for ev in result.tool_events + if ev["status"] == "error" + and ev["detail"].startswith("workspace_violation_escalated:") + ] + assert escalated, ( + "expected at least one escalated workspace_violation event, got: " + f"{result.tool_events}" + ) diff --git a/tests/agent/test_runner_tool_execution.py b/tests/agent/test_runner_tool_execution.py new file mode 100644 index 000000000..a0380e871 --- /dev/null +++ b/tests/agent/test_runner_tool_execution.py @@ -0,0 +1,181 @@ +"""Tests for AgentRunner tool execution: batching, concurrency, exclusive tools.""" + +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.agent.tools.base import Tool +from nanobot.agent.tools.registry import ToolRegistry +from nanobot.config.schema import AgentDefaults +from nanobot.providers.base import LLMResponse, ToolCallRequest + +_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars + +class _DelayTool(Tool): + def __init__( + self, + name: str, + *, + delay: float, + read_only: bool, + shared_events: list[str], + exclusive: bool = False, + ): + self._name = name + self._delay = delay + self._read_only = read_only + self._shared_events = shared_events + self._exclusive = exclusive + + @property + def name(self) -> str: + return self._name + + @property + def description(self) -> str: + return self._name + + @property + def parameters(self) -> dict: + return {"type": "object", "properties": {}, "required": []} + + @property + def read_only(self) -> bool: + return self._read_only + + @property + def exclusive(self) -> bool: + return self._exclusive + + async def execute(self, **kwargs): + self._shared_events.append(f"start:{self._name}") + await asyncio.sleep(self._delay) + self._shared_events.append(f"end:{self._name}") + return self._name + + +@pytest.mark.asyncio +async def test_runner_batches_read_only_tools_before_exclusive_work(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + tools = ToolRegistry() + shared_events: list[str] = [] + read_a = _DelayTool("read_a", delay=0.05, read_only=True, shared_events=shared_events) + read_b = _DelayTool("read_b", delay=0.05, read_only=True, shared_events=shared_events) + write_a = _DelayTool("write_a", delay=0.01, read_only=False, shared_events=shared_events) + tools.register(read_a) + tools.register(read_b) + tools.register(write_a) + + runner = AgentRunner(MagicMock()) + await runner._execute_tools( + AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + concurrent_tools=True, + ), + [ + ToolCallRequest(id="ro1", name="read_a", arguments={}), + ToolCallRequest(id="ro2", name="read_b", arguments={}), + ToolCallRequest(id="rw1", name="write_a", arguments={}), + ], + {}, + {}, + ) + + assert shared_events[0:2] == ["start:read_a", "start:read_b"] + assert "end:read_a" in shared_events and "end:read_b" in shared_events + assert shared_events.index("end:read_a") < shared_events.index("start:write_a") + assert shared_events.index("end:read_b") < shared_events.index("start:write_a") + assert shared_events[-2:] == ["start:write_a", "end:write_a"] + + +@pytest.mark.asyncio +async def test_runner_does_not_batch_exclusive_read_only_tools(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + tools = ToolRegistry() + shared_events: list[str] = [] + read_a = _DelayTool("read_a", delay=0.03, read_only=True, shared_events=shared_events) + read_b = _DelayTool("read_b", delay=0.03, read_only=True, shared_events=shared_events) + ddg_like = _DelayTool( + "ddg_like", + delay=0.01, + read_only=True, + shared_events=shared_events, + exclusive=True, + ) + tools.register(read_a) + tools.register(ddg_like) + tools.register(read_b) + + runner = AgentRunner(MagicMock()) + await runner._execute_tools( + AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=1, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + concurrent_tools=True, + ), + [ + ToolCallRequest(id="ro1", name="read_a", arguments={}), + ToolCallRequest(id="ddg1", name="ddg_like", arguments={}), + ToolCallRequest(id="ro2", name="read_b", arguments={}), + ], + {}, + {}, + ) + + assert shared_events[0] == "start:read_a" + assert shared_events.index("end:read_a") < shared_events.index("start:ddg_like") + assert shared_events.index("end:ddg_like") < shared_events.index("start:read_b") + + +@pytest.mark.asyncio +async def test_runner_blocks_repeated_external_fetches(): + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + captured_final_call: list[dict] = [] + call_count = {"n": 0} + + async def chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] <= 3: + return LLMResponse( + content="working", + tool_calls=[ToolCallRequest(id=f"call_{call_count['n']}", name="web_fetch", arguments={"url": "https://example.com"})], + usage={}, + ) + captured_final_call[:] = messages + return LLMResponse(content="done", tool_calls=[], usage={}) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="page content") + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "research task"}], + tools=tools, + model="test-model", + max_iterations=4, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.final_content == "done" + assert tools.execute.await_count == 2 + blocked_tool_message = [ + msg for msg in captured_final_call + if msg.get("role") == "tool" and msg.get("tool_call_id") == "call_3" + ][0] + assert "repeated external lookup blocked" in blocked_tool_message["content"] diff --git a/tests/agent/test_stop_preserves_context.py b/tests/agent/test_stop_preserves_context.py index 2a082850f..c7e766be1 100644 --- a/tests/agent/test_stop_preserves_context.py +++ b/tests/agent/test_stop_preserves_context.py @@ -10,6 +10,7 @@ See: https://github.com/HKUDS/nanobot/issues/2966 from __future__ import annotations import asyncio +from pathlib import Path from types import SimpleNamespace from typing import Any from unittest.mock import MagicMock, patch, AsyncMock @@ -17,42 +18,47 @@ from unittest.mock import MagicMock, patch, AsyncMock import pytest from nanobot.agent.loop import AgentLoop +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import LLMProvider -@pytest.fixture -def mock_loop(): - """Create a minimal AgentLoop with mocked dependencies.""" - with patch.object(AgentLoop, "__init__", lambda self: None): - loop = AgentLoop() - loop.sessions = MagicMock() - loop._pending_queues = {} - loop._session_locks = {} - loop._active_tasks = {} - loop._concurrency_gate = None - loop._RUNTIME_CHECKPOINT_KEY = "runtime_checkpoint" - loop._PENDING_USER_TURN_KEY = "pending_user_turn" - loop.bus = MagicMock() - loop.bus.publish_outbound = AsyncMock() - loop.bus.publish_inbound = AsyncMock() - loop.commands = MagicMock() - loop.commands.dispatch_priority = AsyncMock(return_value=None) - return loop +def _make_provider(): + """Create an LLM provider mock with required attributes.""" + from types import SimpleNamespace + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.generation = SimpleNamespace(max_tokens=4096, temperature=0.1, reasoning_effort=None) + provider.estimate_prompt_tokens.return_value = (10_000, "test") + return provider + + +def _make_loop(tmp_path: Path) -> AgentLoop: + """Create a real AgentLoop with mocked provider — avoids patching __init__.""" + bus = MessageBus() + provider = _make_provider() + with patch("nanobot.agent.loop.ContextBuilder"), \ + patch("nanobot.agent.loop.SessionManager"), \ + patch("nanobot.agent.loop.SubagentManager") as MockSubMgr: + MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0) + return AgentLoop(bus=bus, provider=provider, workspace=tmp_path) class TestStopPreservesContext: """Verify that /stop restores partial context via checkpoint.""" - def test_restore_checkpoint_method_exists(self, mock_loop): + def test_restore_checkpoint_method_exists(self, tmp_path): """AgentLoop should have _restore_runtime_checkpoint.""" - assert hasattr(mock_loop, "_restore_runtime_checkpoint") + loop = _make_loop(tmp_path) + assert hasattr(loop, "_restore_runtime_checkpoint") - def test_checkpoint_key_constant(self, mock_loop): + def test_checkpoint_key_constant(self, tmp_path): """The runtime checkpoint key should be defined.""" - assert mock_loop._RUNTIME_CHECKPOINT_KEY == "runtime_checkpoint" + loop = _make_loop(tmp_path) + assert loop._RUNTIME_CHECKPOINT_KEY == "runtime_checkpoint" - def test_cancel_dispatch_restores_checkpoint(self, mock_loop): + def test_cancel_dispatch_restores_checkpoint(self, tmp_path): """When a task is cancelled, the checkpoint should be restored.""" - # Create a mock session with a checkpoint + loop = _make_loop(tmp_path) session = MagicMock() session.metadata = { "runtime_checkpoint": { @@ -74,14 +80,11 @@ class TestStopPreservesContext: session.messages = [ {"role": "user", "content": "Search for something"}, ] - mock_loop.sessions.get_or_create.return_value = session + loop.sessions.get_or_create.return_value = session - # The restore method should add checkpoint messages to session history - restored = mock_loop._restore_runtime_checkpoint(session) + restored = loop._restore_runtime_checkpoint(session) assert restored is True - # After restore, session should have more messages assert len(session.messages) > 1 - # The checkpoint should be cleared assert "runtime_checkpoint" not in session.metadata diff --git a/tests/agent/test_subagent_lifecycle.py b/tests/agent/test_subagent_lifecycle.py new file mode 100644 index 000000000..bf3564f28 --- /dev/null +++ b/tests/agent/test_subagent_lifecycle.py @@ -0,0 +1,558 @@ +"""Tests for SubagentManager lifecycle — spawn, run, announce, cancel.""" + +import asyncio +import time +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from nanobot.agent.hook import AgentHookContext +from nanobot.agent.runner import AgentRunResult +from nanobot.agent.subagent import ( + SubagentManager, + SubagentStatus, + _SubagentHook, +) +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import LLMProvider + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _manager(tmp_path: Path, **kw) -> SubagentManager: + provider = MagicMock(spec=LLMProvider) + provider.get_default_model.return_value = "test-model" + defaults = dict( + provider=provider, + workspace=tmp_path, + bus=MessageBus(), + model="test-model", + max_tool_result_chars=16_000, + ) + defaults.update(kw) + return SubagentManager(**defaults) + + +def _make_hook_context(**overrides) -> AgentHookContext: + defaults = dict( + iteration=1, + tool_calls=[], + tool_events=[], + messages=[], + usage={}, + error=None, + stop_reason="completed", + final_content="ok", + ) + defaults.update(overrides) + return AgentHookContext(**defaults) + + +# --------------------------------------------------------------------------- +# SubagentStatus defaults +# --------------------------------------------------------------------------- + + +class TestSubagentStatus: + def test_defaults(self): + s = SubagentStatus( + task_id="abc", label="test", task_description="do stuff", + started_at=time.monotonic(), + ) + assert s.phase == "initializing" + assert s.iteration == 0 + assert s.tool_events == [] + assert s.usage == {} + assert s.stop_reason is None + assert s.error is None + + +# --------------------------------------------------------------------------- +# set_provider +# --------------------------------------------------------------------------- + + +class TestSetProvider: + def test_updates_provider_model_runner(self, tmp_path): + sm = _manager(tmp_path) + new_provider = MagicMock(spec=LLMProvider) + sm.set_provider(new_provider, "new-model") + assert sm.provider is new_provider + assert sm.model == "new-model" + assert sm.runner.provider is new_provider + + +# --------------------------------------------------------------------------- +# spawn +# --------------------------------------------------------------------------- + + +class TestSpawn: + @pytest.mark.asyncio + async def test_returns_string_with_task_id(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(return_value=AgentRunResult( + final_content="done", messages=[], stop_reason="completed", + )) + result = await sm.spawn("do something") + assert "started" in result + assert "id:" in result + + @pytest.mark.asyncio + async def test_creates_task_in_running_tasks(self, tmp_path): + sm = _manager(tmp_path) + block = asyncio.Event() + async def _slow_run(spec): + await block.wait() + return AgentRunResult(final_content="done", messages=[], stop_reason="completed") + sm.runner.run = _slow_run + + await sm.spawn("task", session_key="s1") + assert len(sm._running_tasks) == 1 + + block.set() + await asyncio.sleep(0.1) + assert len(sm._running_tasks) == 0 + + @pytest.mark.asyncio + async def test_creates_status(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(return_value=AgentRunResult( + final_content="done", messages=[], stop_reason="completed", + )) + await sm.spawn("my task") + await asyncio.sleep(0.1) + # Status cleaned up after task completes + assert len(sm._task_statuses) == 0 + + @pytest.mark.asyncio + async def test_registers_in_session_tasks(self, tmp_path): + sm = _manager(tmp_path) + block = asyncio.Event() + async def _slow_run(spec): + await block.wait() + return AgentRunResult(final_content="done", messages=[], stop_reason="completed") + sm.runner.run = _slow_run + + await sm.spawn("task", session_key="s1") + assert "s1" in sm._session_tasks + assert len(sm._session_tasks["s1"]) == 1 + + block.set() + await asyncio.sleep(0.1) + assert "s1" not in sm._session_tasks + + @pytest.mark.asyncio + async def test_no_session_key_no_registration(self, tmp_path): + sm = _manager(tmp_path) + block = asyncio.Event() + async def _slow_run(spec): + await block.wait() + return AgentRunResult(final_content="done", messages=[], stop_reason="completed") + sm.runner.run = _slow_run + + await sm.spawn("task") + assert len(sm._session_tasks) == 0 + + block.set() + await asyncio.sleep(0.1) + + @pytest.mark.asyncio + async def test_label_defaults_to_truncated_task(self, tmp_path): + sm = _manager(tmp_path) + block = asyncio.Event() + async def _slow_run(spec): + await block.wait() + return AgentRunResult(final_content="done", messages=[], stop_reason="completed") + sm.runner.run = _slow_run + + long_task = "A" * 50 + await sm.spawn(long_task, session_key="s1") + status = next(iter(sm._task_statuses.values())) + assert status.label == long_task[:30] + "..." + + block.set() + await asyncio.sleep(0.1) + + @pytest.mark.asyncio + async def test_custom_label(self, tmp_path): + sm = _manager(tmp_path) + block = asyncio.Event() + async def _slow_run(spec): + await block.wait() + return AgentRunResult(final_content="done", messages=[], stop_reason="completed") + sm.runner.run = _slow_run + + await sm.spawn("task", label="Custom Label", session_key="s1") + status = next(iter(sm._task_statuses.values())) + assert status.label == "Custom Label" + + block.set() + await asyncio.sleep(0.1) + + @pytest.mark.asyncio + async def test_cleanup_callback_removes_all_entries(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(return_value=AgentRunResult( + final_content="done", messages=[], stop_reason="completed", + )) + await sm.spawn("task", session_key="s1") + await asyncio.sleep(0.1) + assert len(sm._running_tasks) == 0 + assert len(sm._task_statuses) == 0 + assert len(sm._session_tasks) == 0 + + +# --------------------------------------------------------------------------- +# _run_subagent +# --------------------------------------------------------------------------- + + +class TestRunSubagent: + @pytest.mark.asyncio + async def test_successful_run(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(return_value=AgentRunResult( + final_content="Task done!", messages=[], stop_reason="completed", + )) + with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce: + await sm._run_subagent( + "t1", "do task", "label", + {"channel": "cli", "chat_id": "direct"}, + SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic()), + ) + mock_announce.assert_called_once() + assert mock_announce.call_args.args[-2] == "ok" + + @pytest.mark.asyncio + async def test_tool_error_run(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(return_value=AgentRunResult( + final_content=None, messages=[], stop_reason="tool_error", + tool_events=[{"name": "read_file", "status": "error", "detail": "not found"}], + )) + status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic()) + with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce: + await sm._run_subagent( + "t1", "do task", "label", + {"channel": "cli", "chat_id": "direct"}, status, + ) + assert mock_announce.call_args.args[-2] == "error" + + @pytest.mark.asyncio + async def test_exception_run(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(side_effect=RuntimeError("LLM down")) + status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic()) + with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce: + await sm._run_subagent( + "t1", "do task", "label", + {"channel": "cli", "chat_id": "direct"}, status, + ) + assert status.phase == "error" + assert "LLM down" in status.error + assert mock_announce.call_args.args[-2] == "error" + + @pytest.mark.asyncio + async def test_status_updated_on_success(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(return_value=AgentRunResult( + final_content="ok", messages=[], stop_reason="completed", + )) + status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic()) + with patch.object(sm, "_announce_result", new_callable=AsyncMock): + await sm._run_subagent( + "t1", "do task", "label", + {"channel": "cli", "chat_id": "direct"}, status, + ) + assert status.phase == "done" + assert status.stop_reason == "completed" + + +# --------------------------------------------------------------------------- +# _announce_result +# --------------------------------------------------------------------------- + + +class TestAnnounceResult: + @pytest.mark.asyncio + async def test_publishes_inbound_message(self, tmp_path): + sm = _manager(tmp_path) + published = [] + sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg)) + + await sm._announce_result( + "t1", "label", "task", "result text", + {"channel": "cli", "chat_id": "direct"}, "ok", + ) + + assert len(published) == 1 + msg = published[0] + assert msg.channel == "system" + assert msg.sender_id == "subagent" + assert msg.metadata["injected_event"] == "subagent_result" + assert msg.metadata["subagent_task_id"] == "t1" + + @pytest.mark.asyncio + async def test_session_key_override(self, tmp_path): + sm = _manager(tmp_path) + published = [] + sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg)) + + await sm._announce_result( + "t1", "label", "task", "result", + {"channel": "telegram", "chat_id": "123", "session_key": "s1"}, "ok", + ) + + assert published[0].session_key_override == "s1" + + @pytest.mark.asyncio + async def test_session_key_override_fallback(self, tmp_path): + sm = _manager(tmp_path) + published = [] + sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg)) + + await sm._announce_result( + "t1", "label", "task", "result", + {"channel": "telegram", "chat_id": "123"}, "ok", + ) + + assert published[0].session_key_override == "telegram:123" + + @pytest.mark.asyncio + async def test_ok_status_text(self, tmp_path): + sm = _manager(tmp_path) + published = [] + sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg)) + + await sm._announce_result( + "t1", "label", "task", "result", + {"channel": "cli", "chat_id": "direct"}, "ok", + ) + + assert "completed successfully" in published[0].content + + @pytest.mark.asyncio + async def test_error_status_text(self, tmp_path): + sm = _manager(tmp_path) + published = [] + sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg)) + + await sm._announce_result( + "t1", "label", "task", "error details", + {"channel": "cli", "chat_id": "direct"}, "error", + ) + + assert "failed" in published[0].content + + @pytest.mark.asyncio + async def test_origin_message_id_in_metadata(self, tmp_path): + sm = _manager(tmp_path) + published = [] + sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg)) + + await sm._announce_result( + "t1", "label", "task", "result", + {"channel": "cli", "chat_id": "direct"}, "ok", + origin_message_id="msg-123", + ) + + assert published[0].metadata["origin_message_id"] == "msg-123" + + +# --------------------------------------------------------------------------- +# _format_partial_progress +# --------------------------------------------------------------------------- + + +class TestFormatPartialProgress: + def _make_result(self, tool_events=None, error=None): + return MagicMock(tool_events=tool_events or [], error=error) + + def test_completed_only(self): + result = self._make_result(tool_events=[ + {"name": "read_file", "status": "ok", "detail": "file content"}, + {"name": "exec", "status": "ok", "detail": "output"}, + ]) + text = SubagentManager._format_partial_progress(result) + assert "Completed steps:" in text + assert "read_file" in text + assert "exec" in text + + def test_failure_only(self): + result = self._make_result(tool_events=[ + {"name": "read_file", "status": "error", "detail": "not found"}, + ]) + text = SubagentManager._format_partial_progress(result) + assert "Failure:" in text + assert "not found" in text + + def test_completed_and_failure(self): + result = self._make_result(tool_events=[ + {"name": "read_file", "status": "ok", "detail": "content"}, + {"name": "exec", "status": "error", "detail": "timeout"}, + ]) + text = SubagentManager._format_partial_progress(result) + assert "Completed steps:" in text + assert "Failure:" in text + + def test_limited_to_last_three(self): + result = self._make_result(tool_events=[ + {"name": f"tool_{i}", "status": "ok", "detail": f"result_{i}"} + for i in range(5) + ]) + text = SubagentManager._format_partial_progress(result) + assert "tool_2" in text + assert "tool_3" in text + assert "tool_4" in text + assert "tool_0" not in text + assert "tool_1" not in text + + def test_error_without_failure_event(self): + result = self._make_result( + tool_events=[{"name": "read_file", "status": "ok", "detail": "ok"}], + error="Something went wrong", + ) + text = SubagentManager._format_partial_progress(result) + assert "Something went wrong" in text + + def test_empty_events_with_error(self): + result = self._make_result(error="Total failure") + text = SubagentManager._format_partial_progress(result) + assert "Total failure" in text + + def test_empty_no_error_returns_fallback(self): + result = self._make_result() + text = SubagentManager._format_partial_progress(result) + assert "Error" in text + + +# --------------------------------------------------------------------------- +# cancel_by_session +# --------------------------------------------------------------------------- + + +class TestCancelBySession: + @pytest.mark.asyncio + async def test_cancels_running_tasks(self, tmp_path): + sm = _manager(tmp_path) + block = asyncio.Event() + async def _slow_run(spec): + await block.wait() + return AgentRunResult(final_content="done", messages=[], stop_reason="completed") + sm.runner.run = _slow_run + + await sm.spawn("task1", session_key="s1") + await sm.spawn("task2", session_key="s1") + assert len(sm._session_tasks.get("s1", set())) == 2 + + count = await sm.cancel_by_session("s1") + assert count == 2 + block.set() + await asyncio.sleep(0.1) + + @pytest.mark.asyncio + async def test_no_tasks_returns_zero(self, tmp_path): + sm = _manager(tmp_path) + count = await sm.cancel_by_session("nonexistent") + assert count == 0 + + @pytest.mark.asyncio + async def test_already_done_not_counted(self, tmp_path): + sm = _manager(tmp_path) + sm.runner.run = AsyncMock(return_value=AgentRunResult( + final_content="done", messages=[], stop_reason="completed", + )) + await sm.spawn("task1", session_key="s1") + await asyncio.sleep(0.1) # Wait for completion + + count = await sm.cancel_by_session("s1") + assert count == 0 + + +# --------------------------------------------------------------------------- +# get_running_count / get_running_count_by_session +# --------------------------------------------------------------------------- + + +class TestRunningCounts: + @pytest.mark.asyncio + async def test_running_count_zero(self, tmp_path): + sm = _manager(tmp_path) + assert sm.get_running_count() == 0 + + @pytest.mark.asyncio + async def test_running_count_tracks_tasks(self, tmp_path): + sm = _manager(tmp_path) + block = asyncio.Event() + async def _slow_run(spec): + await block.wait() + return AgentRunResult(final_content="done", messages=[], stop_reason="completed") + sm.runner.run = _slow_run + + await sm.spawn("t1", session_key="s1") + await sm.spawn("t2", session_key="s1") + assert sm.get_running_count() == 2 + assert sm.get_running_count_by_session("s1") == 2 + + block.set() + await asyncio.sleep(0.1) + assert sm.get_running_count() == 0 + + @pytest.mark.asyncio + async def test_running_count_by_session_nonexistent(self, tmp_path): + sm = _manager(tmp_path) + assert sm.get_running_count_by_session("nonexistent") == 0 + + +# --------------------------------------------------------------------------- +# _SubagentHook +# --------------------------------------------------------------------------- + + +class TestSubagentHook: + @pytest.mark.asyncio + async def test_before_execute_tools_logs(self, tmp_path): + hook = _SubagentHook("t1") + tool_call = MagicMock() + tool_call.name = "read_file" + tool_call.arguments = {"path": "/tmp/test"} + ctx = _make_hook_context(tool_calls=[tool_call]) + # Should not raise + await hook.before_execute_tools(ctx) + + @pytest.mark.asyncio + async def test_after_iteration_updates_status(self): + status = SubagentStatus( + task_id="t1", label="test", task_description="do", started_at=time.monotonic(), + ) + hook = _SubagentHook("t1", status) + ctx = _make_hook_context( + iteration=3, + tool_events=[{"name": "read_file", "status": "ok", "detail": ""}], + usage={"prompt_tokens": 100}, + ) + await hook.after_iteration(ctx) + assert status.iteration == 3 + assert len(status.tool_events) == 1 + assert status.usage == {"prompt_tokens": 100} + + @pytest.mark.asyncio + async def test_after_iteration_no_status_noop(self): + hook = _SubagentHook("t1", status=None) + ctx = _make_hook_context(iteration=5) + # Should not raise + await hook.after_iteration(ctx) + + @pytest.mark.asyncio + async def test_after_iteration_sets_error(self): + status = SubagentStatus( + task_id="t1", label="test", task_description="do", started_at=time.monotonic(), + ) + hook = _SubagentHook("t1", status) + ctx = _make_hook_context(error="something broke") + await hook.after_iteration(ctx) + assert status.error == "something broke"