diff --git a/tests/agent/conftest.py b/tests/agent/conftest.py
new file mode 100644
index 000000000..57f678aa9
--- /dev/null
+++ b/tests/agent/conftest.py
@@ -0,0 +1,93 @@
+"""Shared fixtures and helpers for agent tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.agent.loop import AgentLoop
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMProvider
+
+
+def make_provider(
+    default_model: str = "test-model",
+    *,
+    max_tokens: int = 4096,
+    spec: bool = True,
+) -> MagicMock:
+    """Create a spec-limited LLM provider mock."""
+    mock_type = MagicMock(spec=LLMProvider) if spec else MagicMock()
+    provider = mock_type
+    provider.get_default_model.return_value = default_model
+    provider.generation = SimpleNamespace(
+        max_tokens=max_tokens,
+        temperature=0.1,
+        reasoning_effort=None,
+    )
+    provider.estimate_prompt_tokens.return_value = (10_000, "test")
+    return provider
+
+
+def make_loop(
+    tmp_path: Path,
+    *,
+    model: str = "test-model",
+    context_window_tokens: int = 128_000,
+    session_ttl_minutes: int = 0,
+    max_messages: int = 120,
+    unified_session: bool = False,
+    mcp_servers: dict | None = None,
+    tools_config=None,
+    model_presets: dict | None = None,
+    hooks: list | None = None,
+    provider: MagicMock | None = None,
+    patch_deps: bool = False,
+) -> AgentLoop:
+    """Create a real AgentLoop for testing.
+
+    Args:
+        patch_deps: If True, patch ContextBuilder/SessionManager/SubagentManager
+                    during construction (needed when workspace has no real files).
+    """
+    bus = MessageBus()
+    if provider is None:
+        provider = make_provider(default_model=model)
+
+    kwargs = dict(
+        bus=bus,
+        provider=provider,
+        workspace=tmp_path,
+        model=model,
+        context_window_tokens=context_window_tokens,
+        session_ttl_minutes=session_ttl_minutes,
+        max_messages=max_messages,
+        unified_session=unified_session,
+    )
+    if mcp_servers is not None:
+        kwargs["mcp_servers"] = mcp_servers
+    if tools_config is not None:
+        kwargs["tools_config"] = tools_config
+    if model_presets is not None:
+        kwargs["model_presets"] = model_presets
+    if hooks is not None:
+        kwargs["hooks"] = hooks
+
+    if patch_deps:
+        with patch("nanobot.agent.loop.ContextBuilder"), \
+             patch("nanobot.agent.loop.SessionManager"), \
+             patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+            MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+            return AgentLoop(**kwargs)
+    return AgentLoop(**kwargs)
+
+
+@pytest.fixture
+def loop_factory(tmp_path):
+    """Fixture providing a factory for creating AgentLoop instances."""
+    def _factory(**kwargs):
+        return make_loop(tmp_path, **kwargs)
+    return _factory
diff --git a/tests/agent/test_autocompact_unit.py b/tests/agent/test_autocompact_unit.py
new file mode 100644
index 000000000..d501770dd
--- /dev/null
+++ b/tests/agent/test_autocompact_unit.py
@@ -0,0 +1,554 @@
+"""Direct unit tests for AutoCompact class methods in isolation."""
+
+from datetime import datetime, timedelta
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.agent.autocompact import AutoCompact
+from nanobot.session.manager import Session, SessionManager
+
+
+def _make_session(
+    key: str = "cli:test",
+    messages: list | None = None,
+    last_consolidated: int = 0,
+    updated_at: datetime | None = None,
+    metadata: dict | None = None,
+) -> Session:
+    """Create a Session with sensible defaults for testing."""
+    session = Session(
+        key=key,
+        messages=messages or [],
+        metadata=metadata or {},
+        last_consolidated=last_consolidated,
+    )
+    if updated_at is not None:
+        session.updated_at = updated_at
+    return session
+
+
+def _make_autocompact(
+    ttl: int = 15,
+    sessions: SessionManager | None = None,
+    consolidator: MagicMock | None = None,
+) -> AutoCompact:
+    """Create an AutoCompact with mock dependencies."""
+    if sessions is None:
+        sessions = MagicMock(spec=SessionManager)
+    if consolidator is None:
+        consolidator = MagicMock()
+        consolidator.archive = AsyncMock(return_value="Summary.")
+    return AutoCompact(
+        sessions=sessions,
+        consolidator=consolidator,
+        session_ttl_minutes=ttl,
+    )
+
+
+def _add_turns(session: Session, turns: int, *, prefix: str = "msg") -> None:
+    """Append simple user/assistant turns to a session."""
+    for i in range(turns):
+        session.add_message("user", f"{prefix} user {i}")
+        session.add_message("assistant", f"{prefix} assistant {i}")
+
+
+# ---------------------------------------------------------------------------
+# __init__
+# ---------------------------------------------------------------------------
+
+
+class TestInit:
+    """Test AutoCompact.__init__ stores constructor arguments correctly."""
+
+    def test_stores_ttl(self):
+        """_ttl should match session_ttl_minutes argument."""
+        ac = _make_autocompact(ttl=30)
+        assert ac._ttl == 30
+
+    def test_default_ttl_is_zero(self):
+        """Default TTL should be 0."""
+        ac = _make_autocompact(ttl=0)
+        assert ac._ttl == 0
+
+    def test_archiving_set_is_empty(self):
+        """_archiving should start as an empty set."""
+        ac = _make_autocompact()
+        assert ac._archiving == set()
+
+    def test_summaries_dict_is_empty(self):
+        """_summaries should start as an empty dict."""
+        ac = _make_autocompact()
+        assert ac._summaries == {}
+
+    def test_stores_sessions_reference(self):
+        """sessions attribute should reference the passed SessionManager."""
+        mock_sm = MagicMock(spec=SessionManager)
+        ac = _make_autocompact(sessions=mock_sm)
+        assert ac.sessions is mock_sm
+
+    def test_stores_consolidator_reference(self):
+        """consolidator attribute should reference the passed Consolidator."""
+        mock_c = MagicMock()
+        ac = _make_autocompact(consolidator=mock_c)
+        assert ac.consolidator is mock_c
+
+
+# ---------------------------------------------------------------------------
+# _is_expired
+# ---------------------------------------------------------------------------
+
+
+class TestIsExpired:
+    """Test AutoCompact._is_expired edge cases."""
+
+    def test_ttl_zero_always_false(self):
+        """TTL=0 means auto-compact is disabled; always returns False."""
+        ac = _make_autocompact(ttl=0)
+        old = datetime.now() - timedelta(days=365)
+        assert ac._is_expired(old) is False
+
+    def test_none_timestamp_returns_false(self):
+        """None timestamp should return False."""
+        ac = _make_autocompact(ttl=15)
+        assert ac._is_expired(None) is False
+
+    def test_empty_string_timestamp_returns_false(self):
+        """Empty string timestamp should return False (falsy)."""
+        ac = _make_autocompact(ttl=15)
+        assert ac._is_expired("") is False
+
+    def test_exactly_at_boundary_is_expired(self):
+        """Timestamp exactly at TTL boundary should be expired (>=)."""
+        ac = _make_autocompact(ttl=15)
+        now = datetime(2026, 1, 1, 12, 0, 0)
+        ts = now - timedelta(minutes=15)
+        assert ac._is_expired(ts, now=now) is True
+
+    def test_just_under_boundary_not_expired(self):
+        """Timestamp just under TTL boundary should NOT be expired."""
+        ac = _make_autocompact(ttl=15)
+        now = datetime(2026, 1, 1, 12, 0, 0)
+        ts = now - timedelta(minutes=14, seconds=59)
+        assert ac._is_expired(ts, now=now) is False
+
+    def test_iso_string_parses_correctly(self):
+        """ISO format string timestamp should be parsed and evaluated."""
+        ac = _make_autocompact(ttl=15)
+        now = datetime(2026, 1, 1, 12, 0, 0)
+        ts = (now - timedelta(minutes=20)).isoformat()
+        assert ac._is_expired(ts, now=now) is True
+
+    def test_custom_now_parameter(self):
+        """Custom 'now' parameter should override datetime.now()."""
+        ac = _make_autocompact(ttl=10)
+        ts = datetime(2026, 1, 1, 10, 0, 0)
+        # 9 minutes later → not expired
+        now_under = datetime(2026, 1, 1, 10, 9, 0)
+        assert ac._is_expired(ts, now=now_under) is False
+        # 10 minutes later → expired
+        now_over = datetime(2026, 1, 1, 10, 10, 0)
+        assert ac._is_expired(ts, now=now_over) is True
+
+
+# ---------------------------------------------------------------------------
+# _format_summary
+# ---------------------------------------------------------------------------
+
+
+class TestFormatSummary:
+    """Test AutoCompact._format_summary static method."""
+
+    def test_contains_isoformat_timestamp(self):
+        """Output should contain last_active as isoformat."""
+        last_active = datetime(2026, 5, 13, 14, 30, 0)
+        result = AutoCompact._format_summary("Some text", last_active)
+        assert "2026-05-13T14:30:00" in result
+
+    def test_contains_summary_text(self):
+        """Output should contain the provided text verbatim."""
+        last_active = datetime(2026, 1, 1)
+        result = AutoCompact._format_summary("User discussed Python.", last_active)
+        assert "User discussed Python." in result
+
+    def test_output_starts_with_label(self):
+        """Output should start with the standard prefix."""
+        last_active = datetime(2026, 1, 1)
+        result = AutoCompact._format_summary("text", last_active)
+        assert result.startswith("Previous conversation summary (last active ")
+
+
+# ---------------------------------------------------------------------------
+# _split_unconsolidated
+# ---------------------------------------------------------------------------
+
+
+class TestSplitUnconsolidated:
+    """Test AutoCompact._split_unconsolidated splitting logic."""
+
+    def test_empty_session_returns_both_empty(self):
+        """Empty session should return ([], [])."""
+        ac = _make_autocompact()
+        session = _make_session(messages=[])
+        archive, kept = ac._split_unconsolidated(session)
+        assert archive == []
+        assert kept == []
+
+    def test_all_messages_archivable_when_more_than_suffix(self):
+        """Session with many messages should archive a prefix and keep suffix."""
+        ac = _make_autocompact()
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        session = _make_session(messages=msgs)
+        archive, kept = ac._split_unconsolidated(session)
+        assert len(archive) > 0
+        assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES
+
+    def test_fewer_messages_than_suffix_returns_empty_archive(self):
+        """Session with fewer messages than suffix should have empty archive."""
+        ac = _make_autocompact()
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(3)]
+        session = _make_session(messages=msgs)
+        archive, kept = ac._split_unconsolidated(session)
+        assert archive == []
+        assert len(kept) == len(msgs)
+
+    def test_respects_last_consolidated_offset(self):
+        """Only messages after last_consolidated should be considered."""
+        ac = _make_autocompact()
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        # First 10 are already consolidated
+        session = _make_session(messages=msgs, last_consolidated=10)
+        archive, kept = ac._split_unconsolidated(session)
+        # Only the tail of 10 messages is considered for splitting
+        assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in kept)
+        assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in archive)
+
+    def test_retain_recent_legal_suffix_keeps_last_n(self):
+        """The kept suffix should be at most _RECENT_SUFFIX_MESSAGES long."""
+        ac = _make_autocompact()
+        # 20 user messages = 20 messages total, all after last_consolidated=0
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        session = _make_session(messages=msgs)
+        archive, kept = ac._split_unconsolidated(session)
+        assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES
+        assert len(archive) == len(msgs) - len(kept)
+
+
+# ---------------------------------------------------------------------------
+# check_expired
+# ---------------------------------------------------------------------------
+
+
+class TestCheckExpired:
+    """Test AutoCompact.check_expired scheduling logic."""
+
+    def test_empty_sessions_list(self):
+        """No sessions → schedule_background should never be called."""
+        ac = _make_autocompact(ttl=15)
+        mock_sm = MagicMock(spec=SessionManager)
+        mock_sm.list_sessions.return_value = []
+        ac.sessions = mock_sm
+        scheduler = MagicMock()
+        ac.check_expired(scheduler)
+        scheduler.assert_not_called()
+
+    def test_expired_session_schedules_background(self):
+        """Expired session should trigger schedule_background."""
+        ac = _make_autocompact(ttl=15)
+        mock_sm = MagicMock(spec=SessionManager)
+        old_ts = (datetime.now() - timedelta(minutes=20)).isoformat()
+        mock_sm.list_sessions.return_value = [{"key": "cli:old", "updated_at": old_ts}]
+        ac.sessions = mock_sm
+        scheduler = MagicMock()
+        ac.check_expired(scheduler)
+        scheduler.assert_called_once()
+        assert "cli:old" in ac._archiving
+
+    def test_active_session_key_skips(self):
+        """Session in active_session_keys should be skipped."""
+        ac = _make_autocompact(ttl=15)
+        mock_sm = MagicMock(spec=SessionManager)
+        old_ts = (datetime.now() - timedelta(minutes=20)).isoformat()
+        mock_sm.list_sessions.return_value = [{"key": "cli:busy", "updated_at": old_ts}]
+        ac.sessions = mock_sm
+        scheduler = MagicMock()
+        ac.check_expired(scheduler, active_session_keys={"cli:busy"})
+        scheduler.assert_not_called()
+
+    def test_session_already_in_archiving_skips(self):
+        """Session already in _archiving set should be skipped."""
+        ac = _make_autocompact(ttl=15)
+        mock_sm = MagicMock(spec=SessionManager)
+        old_ts = (datetime.now() - timedelta(minutes=20)).isoformat()
+        mock_sm.list_sessions.return_value = [{"key": "cli:dup", "updated_at": old_ts}]
+        ac.sessions = mock_sm
+        ac._archiving.add("cli:dup")
+        scheduler = MagicMock()
+        ac.check_expired(scheduler)
+        scheduler.assert_not_called()
+
+    def test_session_with_no_key_skips(self):
+        """Session info with empty/missing key should be skipped."""
+        ac = _make_autocompact(ttl=15)
+        mock_sm = MagicMock(spec=SessionManager)
+        mock_sm.list_sessions.return_value = [{"key": "", "updated_at": "old"}]
+        ac.sessions = mock_sm
+        scheduler = MagicMock()
+        ac.check_expired(scheduler)
+        scheduler.assert_not_called()
+
+    def test_session_with_missing_key_field_skips(self):
+        """Session info dict without 'key' field should be skipped."""
+        ac = _make_autocompact(ttl=15)
+        mock_sm = MagicMock(spec=SessionManager)
+        mock_sm.list_sessions.return_value = [{"updated_at": "old"}]
+        ac.sessions = mock_sm
+        scheduler = MagicMock()
+        ac.check_expired(scheduler)
+        scheduler.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# _archive
+# ---------------------------------------------------------------------------
+
+
+class TestArchive:
+    """Test AutoCompact._archive async method."""
+
+    @pytest.mark.asyncio
+    async def test_empty_session_updates_timestamp_no_archive_call(self):
+        """Empty session should refresh updated_at and not call consolidator.archive."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        empty_session = _make_session(messages=[])
+        mock_sm.get_or_create.return_value = empty_session
+        ac.sessions = mock_sm
+        ac.consolidator.archive = AsyncMock(return_value="Summary.")
+
+        await ac._archive("cli:test")
+
+        ac.consolidator.archive.assert_not_called()
+        mock_sm.save.assert_called_once_with(empty_session)
+        # updated_at was refreshed
+        assert empty_session.updated_at > datetime.now() - timedelta(seconds=5)
+
+    @pytest.mark.asyncio
+    async def test_archive_returns_empty_string_no_summary_stored(self):
+        """If archive returns empty string, no summary should be stored."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        session = _make_session(messages=msgs)
+        mock_sm.get_or_create.return_value = session
+        ac.sessions = mock_sm
+        ac.consolidator.archive = AsyncMock(return_value="")
+
+        await ac._archive("cli:test")
+
+        assert "cli:test" not in ac._summaries
+
+    @pytest.mark.asyncio
+    async def test_archive_returns_nothing_no_summary_stored(self):
+        """If archive returns '(nothing)', no summary should be stored."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        session = _make_session(messages=msgs)
+        mock_sm.get_or_create.return_value = session
+        ac.sessions = mock_sm
+        ac.consolidator.archive = AsyncMock(return_value="(nothing)")
+
+        await ac._archive("cli:test")
+
+        assert "cli:test" not in ac._summaries
+
+    @pytest.mark.asyncio
+    async def test_archive_exception_caught_key_removed_from_archiving(self):
+        """If archive raises, exception is caught and key removed from _archiving."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        session = _make_session(messages=msgs)
+        mock_sm.get_or_create.return_value = session
+        ac.sessions = mock_sm
+        ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("LLM down"))
+
+        # Should not raise
+        await ac._archive("cli:test")
+
+        assert "cli:test" not in ac._archiving
+
+    @pytest.mark.asyncio
+    async def test_successful_archive_stores_summary_in_summaries_and_metadata(self):
+        """Successful archive should store summary in _summaries dict and metadata."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        last_active = datetime(2026, 5, 13, 10, 0, 0)
+        session = _make_session(messages=msgs, updated_at=last_active)
+        mock_sm.get_or_create.return_value = session
+        ac.sessions = mock_sm
+        ac.consolidator.archive = AsyncMock(return_value="User discussed AI.")
+
+        await ac._archive("cli:test")
+
+        # _summaries
+        entry = ac._summaries.get("cli:test")
+        assert entry is not None
+        assert entry[0] == "User discussed AI."
+        assert entry[1] == last_active
+        # metadata
+        meta = session.metadata.get("_last_summary")
+        assert meta is not None
+        assert meta["text"] == "User discussed AI."
+        assert "last_active" in meta
+
+    @pytest.mark.asyncio
+    async def test_finally_block_always_removes_from_archiving(self):
+        """Finally block should always remove key from _archiving, even on error."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        session = _make_session(messages=msgs)
+        mock_sm.get_or_create.return_value = session
+        ac.sessions = mock_sm
+        ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("fail"))
+
+        # Pre-add key to archiving to verify it gets removed
+        ac._archiving.add("cli:test")
+        await ac._archive("cli:test")
+        assert "cli:test" not in ac._archiving
+
+    @pytest.mark.asyncio
+    async def test_finally_removes_from_archiving_on_success(self):
+        """Finally block should remove key from _archiving on success too."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
+        session = _make_session(messages=msgs)
+        mock_sm.get_or_create.return_value = session
+        ac.sessions = mock_sm
+        ac.consolidator.archive = AsyncMock(return_value="Summary.")
+
+        ac._archiving.add("cli:test")
+        await ac._archive("cli:test")
+        assert "cli:test" not in ac._archiving
+
+
+# ---------------------------------------------------------------------------
+# prepare_session
+# ---------------------------------------------------------------------------
+
+
+class TestPrepareSession:
+    """Test AutoCompact.prepare_session logic."""
+
+    def test_key_in_archiving_reloads_session(self):
+        """If key is in _archiving, session should be reloaded via get_or_create."""
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        reloaded = _make_session(key="cli:test")
+        mock_sm.get_or_create.return_value = reloaded
+        ac.sessions = mock_sm
+        ac._archiving.add("cli:test")
+
+        original_session = _make_session()
+        result_session, summary = ac.prepare_session(original_session, "cli:test")
+
+        mock_sm.get_or_create.assert_called_once_with("cli:test")
+        assert result_session is reloaded
+
+    def test_expired_session_reloads(self):
+        """If session is expired, it should be reloaded via get_or_create."""
+        ac = _make_autocompact(ttl=15)
+        mock_sm = MagicMock(spec=SessionManager)
+        reloaded = _make_session(key="cli:test", updated_at=datetime.now())
+        mock_sm.get_or_create.return_value = reloaded
+        ac.sessions = mock_sm
+
+        old_session = _make_session(updated_at=datetime.now() - timedelta(minutes=20))
+        result_session, summary = ac.prepare_session(old_session, "cli:test")
+
+        mock_sm.get_or_create.assert_called_once_with("cli:test")
+        assert result_session is reloaded
+
+    def test_hot_path_summary_from_summaries(self):
+        """Summary from _summaries dict should be returned (hot path)."""
+        ac = _make_autocompact()
+        session = _make_session()
+        last_active = datetime(2026, 5, 13, 14, 0, 0)
+        ac._summaries["cli:test"] = ("Hot summary.", last_active)
+
+        result_session, summary = ac.prepare_session(session, "cli:test")
+
+        assert result_session is session
+        assert summary is not None
+        assert "Hot summary." in summary
+        assert "Previous conversation summary" in summary
+
+    def test_hot_path_pops_summary_one_shot(self):
+        """Hot path should pop the summary (one-shot; second call returns None)."""
+        ac = _make_autocompact()
+        session = _make_session()
+        last_active = datetime(2026, 1, 1)
+        ac._summaries["cli:test"] = ("One-shot.", last_active)
+
+        _, summary1 = ac.prepare_session(session, "cli:test")
+        assert summary1 is not None
+        # Second call: hot path entry was popped
+        _, summary2 = ac.prepare_session(session, "cli:test")
+        assert summary2 is None
+
+    def test_cold_path_summary_from_metadata(self):
+        """When _summaries is empty, summary should come from metadata (cold path)."""
+        ac = _make_autocompact()
+        last_active = datetime(2026, 5, 13, 14, 0, 0)
+        session = _make_session(metadata={
+            "_last_summary": {
+                "text": "Cold summary.",
+                "last_active": last_active.isoformat(),
+            },
+        })
+
+        result_session, summary = ac.prepare_session(session, "cli:test")
+
+        assert result_session is session
+        assert summary is not None
+        assert "Cold summary." in summary
+
+    def test_no_summary_available_returns_none(self):
+        """When no summary is available, should return (session, None)."""
+        ac = _make_autocompact()
+        session = _make_session()
+
+        result_session, summary = ac.prepare_session(session, "cli:test")
+
+        assert result_session is session
+        assert summary is None
+
+    def test_cold_path_metadata_not_dict_returns_none(self):
+        """If metadata _last_summary is not a dict, should return None summary."""
+        ac = _make_autocompact()
+        session = _make_session(metadata={"_last_summary": "not a dict"})
+
+        result_session, summary = ac.prepare_session(session, "cli:test")
+
+        assert result_session is session
+        assert summary is None
+
+    def test_hot_path_takes_priority_over_metadata(self):
+        """Hot path (_summaries) should take priority over metadata."""
+        ac = _make_autocompact()
+        session = _make_session(metadata={
+            "_last_summary": {
+                "text": "Cold summary.",
+                "last_active": datetime(2026, 1, 1).isoformat(),
+            },
+        })
+        last_active = datetime(2026, 5, 13, 14, 0, 0)
+        ac._summaries["cli:test"] = ("Hot summary.", last_active)
+
+        _, summary = ac.prepare_session(session, "cli:test")
+        assert "Hot summary." in summary
+        # After hot path pops, cold path would kick in on next call
diff --git a/tests/agent/test_context_builder.py b/tests/agent/test_context_builder.py
new file mode 100644
index 000000000..862f1ff2b
--- /dev/null
+++ b/tests/agent/test_context_builder.py
@@ -0,0 +1,333 @@
+"""Tests for ContextBuilder — system prompt and message assembly."""
+
+import base64
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from nanobot.agent.context import ContextBuilder
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _builder(tmp_path: Path, **kw) -> ContextBuilder:
+    return ContextBuilder(workspace=tmp_path, **kw)
+
+
+# ---------------------------------------------------------------------------
+# _build_runtime_context (static)
+# ---------------------------------------------------------------------------
+
+
+class TestBuildRuntimeContext:
+    def test_time_only(self):
+        ctx = ContextBuilder._build_runtime_context(None, None)
+        assert "[Runtime Context" in ctx
+        assert "[/Runtime Context]" in ctx
+        assert "Current Time:" in ctx
+        assert "Channel:" not in ctx
+
+    def test_with_channel_and_chat_id(self):
+        ctx = ContextBuilder._build_runtime_context("telegram", "chat123")
+        assert "Channel: telegram" in ctx
+        assert "Chat ID: chat123" in ctx
+
+    def test_with_sender_id(self):
+        ctx = ContextBuilder._build_runtime_context("cli", "direct", sender_id="user1")
+        assert "Sender ID: user1" in ctx
+
+    def test_with_timezone(self):
+        ctx = ContextBuilder._build_runtime_context(None, None, timezone="Asia/Shanghai")
+        assert "Current Time:" in ctx
+
+    def test_no_channel_no_chat_id_omits_both(self):
+        ctx = ContextBuilder._build_runtime_context(None, None)
+        assert "Channel:" not in ctx
+        assert "Chat ID:" not in ctx
+
+    def test_no_sender_id_omits(self):
+        ctx = ContextBuilder._build_runtime_context("cli", "direct")
+        assert "Sender ID:" not in ctx
+
+
+# ---------------------------------------------------------------------------
+# _merge_message_content (static)
+# ---------------------------------------------------------------------------
+
+
+class TestMergeMessageContent:
+    def test_str_plus_str(self):
+        result = ContextBuilder._merge_message_content("hello", "world")
+        assert result == "hello\n\nworld"
+
+    def test_empty_left_plus_str(self):
+        result = ContextBuilder._merge_message_content("", "world")
+        assert result == "world"
+
+    def test_list_plus_list(self):
+        left = [{"type": "text", "text": "a"}]
+        right = [{"type": "text", "text": "b"}]
+        result = ContextBuilder._merge_message_content(left, right)
+        assert len(result) == 2
+        assert result[0]["text"] == "a"
+        assert result[1]["text"] == "b"
+
+    def test_str_plus_list(self):
+        right = [{"type": "text", "text": "b"}]
+        result = ContextBuilder._merge_message_content("hello", right)
+        assert len(result) == 2
+        assert result[0]["text"] == "hello"
+        assert result[1]["text"] == "b"
+
+    def test_list_plus_str(self):
+        left = [{"type": "text", "text": "a"}]
+        result = ContextBuilder._merge_message_content(left, "world")
+        assert len(result) == 2
+        assert result[0]["text"] == "a"
+        assert result[1]["text"] == "world"
+
+    def test_none_plus_str(self):
+        result = ContextBuilder._merge_message_content(None, "hello")
+        assert result == [{"type": "text", "text": "hello"}]
+
+    def test_str_plus_none(self):
+        result = ContextBuilder._merge_message_content("hello", None)
+        assert result == [{"type": "text", "text": "hello"}]
+
+    def test_none_plus_none(self):
+        result = ContextBuilder._merge_message_content(None, None)
+        assert result == []
+
+    def test_list_items_not_dicts_wrapped(self):
+        result = ContextBuilder._merge_message_content(["raw_item"], None)
+        assert result == [{"type": "text", "text": "raw_item"}]
+
+
+# ---------------------------------------------------------------------------
+# _load_bootstrap_files
+# ---------------------------------------------------------------------------
+
+
+class TestLoadBootstrapFiles:
+    def test_no_bootstrap_files(self, tmp_path):
+        builder = _builder(tmp_path)
+        assert builder._load_bootstrap_files() == ""
+
+    def test_agents_md(self, tmp_path):
+        (tmp_path / "AGENTS.md").write_text("Be helpful.", encoding="utf-8")
+        builder = _builder(tmp_path)
+        result = builder._load_bootstrap_files()
+        assert "## AGENTS.md" in result
+        assert "Be helpful." in result
+
+    def test_multiple_bootstrap_files(self, tmp_path):
+        (tmp_path / "AGENTS.md").write_text("Rules.", encoding="utf-8")
+        (tmp_path / "SOUL.md").write_text("Soul.", encoding="utf-8")
+        builder = _builder(tmp_path)
+        result = builder._load_bootstrap_files()
+        assert "## AGENTS.md" in result
+        assert "## SOUL.md" in result
+        assert "Rules." in result
+        assert "Soul." in result
+
+    def test_all_bootstrap_files(self, tmp_path):
+        for name in ContextBuilder.BOOTSTRAP_FILES:
+            (tmp_path / name).write_text(f"Content of {name}", encoding="utf-8")
+        builder = _builder(tmp_path)
+        result = builder._load_bootstrap_files()
+        for name in ContextBuilder.BOOTSTRAP_FILES:
+            assert f"## {name}" in result
+
+    def test_utf8_content(self, tmp_path):
+        (tmp_path / "AGENTS.md").write_text("用中文回复", encoding="utf-8")
+        builder = _builder(tmp_path)
+        result = builder._load_bootstrap_files()
+        assert "用中文回复" in result
+
+
+# ---------------------------------------------------------------------------
+# _is_template_content (static)
+# ---------------------------------------------------------------------------
+
+
+class TestIsTemplateContent:
+    def test_nonexistent_template_returns_false(self):
+        assert ContextBuilder._is_template_content("anything", "nonexistent/path.md") is False
+
+    def test_content_matching_template(self):
+        from importlib.resources import files as pkg_files
+        tpl = pkg_files("nanobot") / "templates" / "memory" / "MEMORY.md"
+        if not tpl.is_file():
+            pytest.skip("MEMORY.md template not bundled")
+        original = tpl.read_text(encoding="utf-8")
+        assert ContextBuilder._is_template_content(original, "memory/MEMORY.md") is True
+
+    def test_modified_content_returns_false(self):
+        from importlib.resources import files as pkg_files
+        tpl = pkg_files("nanobot") / "templates" / "memory" / "MEMORY.md"
+        if not tpl.is_file():
+            pytest.skip("MEMORY.md template not bundled")
+        assert ContextBuilder._is_template_content("totally different", "memory/MEMORY.md") is False
+
+
+# ---------------------------------------------------------------------------
+# _build_user_content
+# ---------------------------------------------------------------------------
+
+
+class TestBuildUserContent:
+    def test_no_media_returns_string(self, tmp_path):
+        builder = _builder(tmp_path)
+        result = builder._build_user_content("hello", None)
+        assert result == "hello"
+
+    def test_empty_media_returns_string(self, tmp_path):
+        builder = _builder(tmp_path)
+        result = builder._build_user_content("hello", [])
+        assert result == "hello"
+
+    def test_nonexistent_media_file_returns_string(self, tmp_path):
+        builder = _builder(tmp_path)
+        result = builder._build_user_content("hello", ["/nonexistent/image.png"])
+        assert result == "hello"
+
+    def test_non_image_file_returns_string(self, tmp_path):
+        txt = tmp_path / "doc.txt"
+        txt.write_text("not an image", encoding="utf-8")
+        builder = _builder(tmp_path)
+        result = builder._build_user_content("hello", [str(txt)])
+        assert result == "hello"
+
+    def test_valid_image_returns_list(self, tmp_path):
+        png = tmp_path / "test.png"
+        png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
+        builder = _builder(tmp_path)
+        result = builder._build_user_content("hello", [str(png)])
+        assert isinstance(result, list)
+        assert len(result) == 2
+        assert result[0]["type"] == "image_url"
+        assert result[0]["image_url"]["url"].startswith("data:image/png;base64,")
+        assert result[1]["type"] == "text"
+        assert result[1]["text"] == "hello"
+
+    def test_image_meta_includes_path(self, tmp_path):
+        png = tmp_path / "test.png"
+        png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
+        builder = _builder(tmp_path)
+        result = builder._build_user_content("hello", [str(png)])
+        assert "_meta" in result[0]
+        assert "path" in result[0]["_meta"]
+
+
+# ---------------------------------------------------------------------------
+# build_system_prompt
+# ---------------------------------------------------------------------------
+
+
+class TestBuildSystemPrompt:
+    def test_returns_nonempty_string(self, tmp_path):
+        builder = _builder(tmp_path)
+        result = builder.build_system_prompt()
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_includes_identity_section(self, tmp_path):
+        builder = _builder(tmp_path)
+        result = builder.build_system_prompt()
+        assert "workspace" in result.lower() or "python" in result.lower()
+
+    def test_includes_bootstrap_files(self, tmp_path):
+        (tmp_path / "AGENTS.md").write_text("Be helpful and concise.", encoding="utf-8")
+        builder = _builder(tmp_path)
+        result = builder.build_system_prompt()
+        assert "Be helpful and concise." in result
+
+    def test_includes_session_summary(self, tmp_path):
+        builder = _builder(tmp_path)
+        result = builder.build_system_prompt(session_summary="Previous chat about Python.")
+        assert "Previous chat about Python." in result
+        assert "[Archived Context Summary]" in result
+
+    def test_sections_separated_by_separator(self, tmp_path):
+        (tmp_path / "AGENTS.md").write_text("Rules.", encoding="utf-8")
+        builder = _builder(tmp_path)
+        result = builder.build_system_prompt(session_summary="Summary.")
+        assert "\n\n---\n\n" in result
+
+    def test_no_bootstrap_no_summary(self, tmp_path):
+        builder = _builder(tmp_path)
+        result = builder.build_system_prompt()
+        assert "## AGENTS.md" not in result
+        assert "[Archived Context Summary]" not in result
+
+
+# ---------------------------------------------------------------------------
+# build_messages
+# ---------------------------------------------------------------------------
+
+
+class TestBuildMessages:
+    def test_basic_empty_history(self, tmp_path):
+        builder = _builder(tmp_path)
+        messages = builder.build_messages([], "hello")
+        assert len(messages) == 2
+        assert messages[0]["role"] == "system"
+        assert messages[1]["role"] == "user"
+        assert "hello" in str(messages[1]["content"])
+
+    def test_runtime_context_injected(self, tmp_path):
+        builder = _builder(tmp_path)
+        messages = builder.build_messages([], "hello", channel="cli", chat_id="direct")
+        user_msg = str(messages[-1]["content"])
+        assert "[Runtime Context" in user_msg
+        assert "hello" in user_msg
+
+    def test_consecutive_same_role_merged(self, tmp_path):
+        builder = _builder(tmp_path)
+        history = [{"role": "user", "content": "previous user message"}]
+        messages = builder.build_messages(history, "new message")
+        assert len(messages) == 2  # system + merged user
+        assert "previous user message" in str(messages[1]["content"])
+        assert "new message" in str(messages[1]["content"])
+
+    def test_different_role_appended(self, tmp_path):
+        builder = _builder(tmp_path)
+        history = [{"role": "assistant", "content": "previous response"}]
+        messages = builder.build_messages(history, "new message")
+        assert len(messages) == 3  # system + assistant + user
+
+    def test_media_with_history(self, tmp_path):
+        png = tmp_path / "img.png"
+        png.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
+        builder = _builder(tmp_path)
+        history = [{"role": "assistant", "content": "see this"}]
+        messages = builder.build_messages(history, "check image", media=[str(png)])
+        user_msg = messages[-1]["content"]
+        assert isinstance(user_msg, list)
+        assert any(b.get("type") == "image_url" for b in user_msg)
+
+
+# ---------------------------------------------------------------------------
+# add_tool_result
+# ---------------------------------------------------------------------------
+
+
+class TestAddToolResult:
+    def test_appends_tool_message(self, tmp_path):
+        builder = _builder(tmp_path)
+        msgs = [{"role": "user", "content": "hello"}]
+        result = builder.add_tool_result(msgs, "call_123", "read_file", "file content")
+        assert len(result) == 2
+        assert result[1]["role"] == "tool"
+        assert result[1]["tool_call_id"] == "call_123"
+        assert result[1]["name"] == "read_file"
+        assert result[1]["content"] == "file content"
+
+    def test_returns_same_list(self, tmp_path):
+        builder = _builder(tmp_path)
+        msgs = []
+        result = builder.add_tool_result(msgs, "id", "tool", "ok")
+        assert result is msgs
diff --git a/tests/agent/test_loop_runner_integration.py b/tests/agent/test_loop_runner_integration.py
new file mode 100644
index 000000000..3cfe07f41
--- /dev/null
+++ b/tests/agent/test_loop_runner_integration.py
@@ -0,0 +1,301 @@
+"""Tests for AgentLoop integration with AgentRunner: streaming, think-filter, error handling, subagent."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+def _make_loop(tmp_path):
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+
+    with patch("nanobot.agent.loop.ContextBuilder"), \
+         patch("nanobot.agent.loop.SessionManager"), \
+         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+        loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
+    return loop
+
+@pytest.mark.asyncio
+async def test_loop_max_iterations_message_stays_stable(tmp_path):
+    loop = _make_loop(tmp_path)
+    loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="working",
+        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
+    ))
+    loop.tools.get_definitions = MagicMock(return_value=[])
+    loop.tools.execute = AsyncMock(return_value="ok")
+    loop.max_iterations = 2
+
+    final_content, _, _, _, _ = await loop._run_agent_loop([])
+
+    assert final_content == (
+        "I reached the maximum number of tool call iterations (2) "
+        "without completing the task. You can try breaking the task into smaller steps."
+    )
+
+
+@pytest.mark.asyncio
+async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp_path):
+    loop = _make_loop(tmp_path)
+    deltas: list[str] = []
+    endings: list[bool] = []
+
+    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+        await on_content_delta("<think>hidden")
+        await on_content_delta("</think>Hello")
+        return LLMResponse(content="<think>hidden</think>Hello", tool_calls=[], usage={})
+
+    loop.provider.chat_stream_with_retry = chat_stream_with_retry
+
+    async def on_stream(delta: str) -> None:
+        deltas.append(delta)
+
+    async def on_stream_end(*, resuming: bool = False) -> None:
+        endings.append(resuming)
+
+    final_content, _, _, _, _ = await loop._run_agent_loop(
+        [],
+        on_stream=on_stream,
+        on_stream_end=on_stream_end,
+    )
+
+    assert final_content == "Hello"
+    assert deltas == ["Hello"]
+    assert endings == [False]
+
+
+@pytest.mark.asyncio
+async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path):
+    loop = _make_loop(tmp_path)
+    deltas: list[str] = []
+
+    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+        await on_content_delta("Hello <thin")
+        await on_content_delta("k>hidden</think>World")
+        return LLMResponse(content="Hello <think>hidden</think>World", tool_calls=[], usage={})
+
+    loop.provider.chat_stream_with_retry = chat_stream_with_retry
+
+    async def on_stream(delta: str) -> None:
+        deltas.append(delta)
+
+    final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
+
+    assert final_content == "Hello World"
+    assert deltas == ["Hello", " World"]
+
+
+@pytest.mark.asyncio
+async def test_loop_stream_filter_hides_complete_trailing_think_tag(tmp_path):
+    loop = _make_loop(tmp_path)
+    deltas: list[str] = []
+
+    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+        await on_content_delta("Hello <think>")
+        await on_content_delta("hidden</think>World")
+        return LLMResponse(content="Hello <think>hidden</think>World", tool_calls=[], usage={})
+
+    loop.provider.chat_stream_with_retry = chat_stream_with_retry
+
+    async def on_stream(delta: str) -> None:
+        deltas.append(delta)
+
+    final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
+
+    assert final_content == "Hello World"
+    assert deltas == ["Hello", " World"]
+
+
+@pytest.mark.asyncio
+async def test_loop_retries_think_only_final_response(tmp_path):
+    loop = _make_loop(tmp_path)
+    call_count = {"n": 0}
+
+    async def chat_with_retry(**kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(content="<think>hidden</think>", tool_calls=[], usage={})
+        return LLMResponse(content="Recovered answer", tool_calls=[], usage={})
+
+    loop.provider.chat_with_retry = chat_with_retry
+
+    final_content, _, _, _, _ = await loop._run_agent_loop([])
+
+    assert final_content == "Recovered answer"
+    assert call_count["n"] == 2
+
+
+@pytest.mark.asyncio
+async def test_streamed_flag_not_set_on_llm_error(tmp_path):
+    """When LLM errors during a streaming-capable channel interaction,
+    _streamed must NOT be set so ChannelManager delivers the error."""
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.events import InboundMessage
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+    error_resp = LLMResponse(
+        content="503 service unavailable", finish_reason="error", tool_calls=[], usage={},
+    )
+    loop.provider.chat_with_retry = AsyncMock(return_value=error_resp)
+    loop.provider.chat_stream_with_retry = AsyncMock(return_value=error_resp)
+    loop.tools.get_definitions = MagicMock(return_value=[])
+
+    msg = InboundMessage(
+        channel="feishu", sender_id="u1", chat_id="c1", content="hi",
+    )
+    result = await loop._process_message(
+        msg,
+        on_stream=AsyncMock(),
+        on_stream_end=AsyncMock(),
+    )
+
+    assert result is not None
+    assert "503" in result.content
+    assert not result.metadata.get("_streamed"), \
+        "_streamed must not be set when stop_reason is error"
+
+
+@pytest.mark.asyncio
+async def test_ssrf_soft_block_can_finalize_after_streamed_tool_call(tmp_path):
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.events import InboundMessage
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    tool_call_resp = LLMResponse(
+        content="checking metadata",
+        tool_calls=[ToolCallRequest(
+            id="call_ssrf",
+            name="exec",
+            arguments={"command": "curl http://169.254.169.254/latest/meta-data/"},
+        )],
+        usage={},
+    )
+    provider.chat_stream_with_retry = AsyncMock(side_effect=[
+        tool_call_resp,
+        LLMResponse(
+            content="I cannot access private URLs. Please share the local file.",
+            tool_calls=[],
+            usage={},
+        ),
+    ])
+
+    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+    loop.tools.get_definitions = MagicMock(return_value=[])
+    loop.tools.prepare_call = MagicMock(return_value=(None, {}, None))
+    loop.tools.execute = AsyncMock(return_value=(
+        "Error: Command blocked by safety guard (internal/private URL detected)"
+    ))
+
+    result = await loop._process_message(
+        InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="hi"),
+        on_stream=AsyncMock(),
+        on_stream_end=AsyncMock(),
+    )
+
+    assert result is not None
+    assert result.content == "I cannot access private URLs. Please share the local file."
+    assert result.metadata.get("_streamed") is True
+
+
+@pytest.mark.asyncio
+async def test_next_turn_after_llm_error_keeps_turn_boundary(tmp_path):
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.agent.runner import _PERSISTED_MODEL_ERROR_PLACEHOLDER
+    from nanobot.bus.events import InboundMessage
+    from nanobot.bus.queue import MessageBus
+
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    provider.chat_with_retry = AsyncMock(side_effect=[
+        LLMResponse(content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}),
+        LLMResponse(content="Recovered answer", tool_calls=[], usage={}),
+    ])
+
+    loop = AgentLoop(bus=MessageBus(), provider=provider, workspace=tmp_path, model="test-model")
+    loop.tools.get_definitions = MagicMock(return_value=[])
+    loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False)  # type: ignore[method-assign]
+
+    first = await loop._process_message(
+        InboundMessage(channel="cli", sender_id="user", chat_id="test", content="first question")
+    )
+    assert first is not None
+    assert first.content == "429 rate limit exceeded"
+
+    session = loop.sessions.get_or_create("cli:test")
+    assert [
+        {key: value for key, value in message.items() if key in {"role", "content"}}
+        for message in session.messages
+    ] == [
+        {"role": "user", "content": "first question"},
+        {"role": "assistant", "content": _PERSISTED_MODEL_ERROR_PLACEHOLDER},
+    ]
+
+    second = await loop._process_message(
+        InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second question")
+    )
+    assert second is not None
+    assert second.content == "Recovered answer"
+
+    request_messages = provider.chat_with_retry.await_args_list[1].kwargs["messages"]
+    non_system = [message for message in request_messages if message.get("role") != "system"]
+    assert non_system[0]["role"] == "user"
+    assert "first question" in non_system[0]["content"]
+    assert non_system[1]["role"] == "assistant"
+    assert _PERSISTED_MODEL_ERROR_PLACEHOLDER in non_system[1]["content"]
+    assert non_system[2]["role"] == "user"
+    assert "second question" in non_system[2]["content"]
+
+
+@pytest.mark.asyncio
+async def test_subagent_max_iterations_announces_existing_fallback(tmp_path, monkeypatch):
+    from nanobot.agent.subagent import SubagentManager, SubagentStatus
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="working",
+        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+    ))
+    mgr = SubagentManager(
+        provider=provider,
+        workspace=tmp_path,
+        bus=bus,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    )
+    mgr._announce_result = AsyncMock()
+
+    async def fake_execute(self, **kwargs):
+        return "tool result"
+
+    monkeypatch.setattr("nanobot.agent.tools.filesystem.ListDirTool.execute", fake_execute)
+
+    status = SubagentStatus(task_id="sub-1", label="label", task_description="do task", started_at=time.monotonic())
+    await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}, status)
+
+    mgr._announce_result.assert_awaited_once()
+    args = mgr._announce_result.await_args.args
+    assert args[3] == "Task completed but no final response was generated."
+    assert args[5] == "ok"
diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py
deleted file mode 100644
index b821d9bab..000000000
--- a/tests/agent/test_runner.py
+++ /dev/null
@@ -1,3313 +0,0 @@
-"""Tests for the shared agent runner and its integration contracts."""
-
-from __future__ import annotations
-
-import asyncio
-import base64
-import os
-import time
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from nanobot.config.schema import AgentDefaults
-from nanobot.agent.tools.base import Tool
-from nanobot.agent.tools.registry import ToolRegistry
-from nanobot.providers.base import LLMResponse, ToolCallRequest
-
-_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
-
-
-def _make_injection_callback(queue: asyncio.Queue):
-    """Return an async callback that drains *queue* into a list of dicts."""
-    async def inject_cb():
-        items = []
-        while not queue.empty():
-            items.append(await queue.get())
-        return items
-    return inject_cb
-
-
-def _make_loop(tmp_path):
-    from nanobot.agent.loop import AgentLoop
-    from nanobot.bus.queue import MessageBus
-
-    bus = MessageBus()
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-
-    with patch("nanobot.agent.loop.ContextBuilder"), \
-         patch("nanobot.agent.loop.SessionManager"), \
-         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
-        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
-        loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
-    return loop
-
-
-@pytest.mark.asyncio
-async def test_runner_preserves_reasoning_fields_and_tool_results():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_second_call: list[dict] = []
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="thinking",
-                tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
-                reasoning_content="hidden reasoning",
-                thinking_blocks=[{"type": "thinking", "thinking": "step"}],
-                usage={"prompt_tokens": 5, "completion_tokens": 3},
-            )
-        captured_second_call[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="tool result")
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[
-            {"role": "system", "content": "system"},
-            {"role": "user", "content": "do task"},
-        ],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.final_content == "done"
-    assert result.tools_used == ["list_dir"]
-    assert result.tool_events == [
-        {"name": "list_dir", "status": "ok", "detail": "tool result"}
-    ]
-
-    assistant_messages = [
-        msg for msg in captured_second_call
-        if msg.get("role") == "assistant" and msg.get("tool_calls")
-    ]
-    assert len(assistant_messages) == 1
-    assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
-    assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
-    assert any(
-        msg.get("role") == "tool" and msg.get("content") == "tool result"
-        for msg in captured_second_call
-    )
-
-
-@pytest.mark.asyncio
-async def test_runner_calls_hooks_in_order():
-    from nanobot.agent.hook import AgentHook, AgentHookContext
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-    events: list[tuple] = []
-
-    async def chat_with_retry(**kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="thinking",
-                tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
-            )
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="tool result")
-
-    class RecordingHook(AgentHook):
-        async def before_iteration(self, context: AgentHookContext) -> None:
-            events.append(("before_iteration", context.iteration))
-
-        async def before_execute_tools(self, context: AgentHookContext) -> None:
-            events.append((
-                "before_execute_tools",
-                context.iteration,
-                [tc.name for tc in context.tool_calls],
-            ))
-
-        async def after_iteration(self, context: AgentHookContext) -> None:
-            events.append((
-                "after_iteration",
-                context.iteration,
-                context.final_content,
-                list(context.tool_results),
-                list(context.tool_events),
-                context.stop_reason,
-            ))
-
-        def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
-            events.append(("finalize_content", context.iteration, content))
-            return content.upper() if content else content
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        hook=RecordingHook(),
-    ))
-
-    assert result.final_content == "DONE"
-    assert events == [
-        ("before_iteration", 0),
-        ("before_execute_tools", 0, ["list_dir"]),
-        (
-            "after_iteration",
-            0,
-            None,
-            ["tool result"],
-            [{"name": "list_dir", "status": "ok", "detail": "tool result"}],
-            None,
-        ),
-        ("before_iteration", 1),
-        ("finalize_content", 1, "done"),
-        ("after_iteration", 1, "DONE", [], [], "completed"),
-    ]
-
-
-@pytest.mark.asyncio
-async def test_runner_streaming_hook_receives_deltas_and_end_signal():
-    from nanobot.agent.hook import AgentHook, AgentHookContext
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    streamed: list[str] = []
-    endings: list[bool] = []
-
-    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
-        await on_content_delta("he")
-        await on_content_delta("llo")
-        return LLMResponse(content="hello", tool_calls=[], usage={})
-
-    provider.chat_stream_with_retry = chat_stream_with_retry
-    provider.chat_with_retry = AsyncMock()
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    class StreamingHook(AgentHook):
-        def wants_streaming(self) -> bool:
-            return True
-
-        async def on_stream(self, context: AgentHookContext, delta: str) -> None:
-            streamed.append(delta)
-
-        async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
-            endings.append(resuming)
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        hook=StreamingHook(),
-    ))
-
-    assert result.final_content == "hello"
-    assert streamed == ["he", "llo"]
-    assert endings == [False]
-    provider.chat_with_retry.assert_not_awaited()
-
-
-@pytest.mark.asyncio
-async def test_runner_returns_max_iterations_fallback():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
-        content="still working",
-        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
-    ))
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="tool result")
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=2,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.stop_reason == "max_iterations"
-    assert result.final_content == (
-        "I reached the maximum number of tool call iterations (2) "
-        "without completing the task. You can try breaking the task into smaller steps."
-    )
-    assert result.messages[-1]["role"] == "assistant"
-    assert result.messages[-1]["content"] == result.final_content
-
-
-@pytest.mark.asyncio
-async def test_runner_times_out_hung_llm_request():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-
-    async def chat_with_retry(**kwargs):
-        await asyncio.sleep(3600)
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    started = time.monotonic()
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        llm_timeout_s=0.05,
-    ))
-
-    assert (time.monotonic() - started) < 1.0
-    assert result.stop_reason == "error"
-    assert "timed out" in (result.final_content or "").lower()
-
-@pytest.mark.asyncio
-async def test_runner_returns_structured_tool_error():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
-        content="working",
-        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
-    ))
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
-
-    runner = AgentRunner(provider)
-
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=2,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        fail_on_tool_error=True,
-    ))
-
-    assert result.stop_reason == "tool_error"
-    assert result.error == "Error: RuntimeError: boom"
-    assert result.tool_events == [
-        {"name": "list_dir", "status": "error", "detail": "boom"}
-    ]
-
-
-@pytest.mark.asyncio
-async def test_runner_does_not_abort_on_workspace_violation_anymore():
-    """v2 behavior: workspace-bound rejections are *soft* tool errors.
-
-    Previously (PR #3493) any workspace boundary error became a fatal
-    RuntimeError that aborted the turn. That silently killed legitimate
-    workspace commands once the heuristic guard misfired (#3599 #3605), so
-    we now hand the error back to the LLM as a recoverable tool result and
-    rely on ``repeated_workspace_violation_error`` to throttle bypass loops.
-    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    provider.chat_with_retry = AsyncMock(side_effect=[
-        LLMResponse(
-            content="trying outside",
-            tool_calls=[ToolCallRequest(
-                id="call_1", name="read_file", arguments={"path": "/tmp/outside.md"},
-            )],
-        ),
-        LLMResponse(content="ok, telling the user instead", tool_calls=[]),
-    ])
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(
-        side_effect=PermissionError(
-            "Path /tmp/outside.md is outside allowed directory /workspace"
-        )
-    )
-
-    runner = AgentRunner(provider)
-
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert provider.chat_with_retry.await_count == 2, (
-        "workspace violation must NOT short-circuit the loop"
-    )
-    assert result.stop_reason != "tool_error"
-    assert result.error is None
-    assert result.final_content == "ok, telling the user instead"
-    assert result.tool_events and result.tool_events[0]["status"] == "error"
-    # Detail still carries the workspace_violation breadcrumb for telemetry,
-    # but the runner did not raise.
-    assert "workspace_violation" in result.tool_events[0]["detail"]
-
-
-def test_is_ssrf_violation_recognizes_private_url_blocks():
-    """SSRF rejections are classified separately from workspace boundaries."""
-    from nanobot.agent.runner import AgentRunner
-
-    ssrf_msg = "Error: Command blocked by safety guard (internal/private URL detected)"
-    assert AgentRunner._is_ssrf_violation(ssrf_msg) is True
-    assert AgentRunner._is_ssrf_violation(
-        "URL validation failed: Blocked: host resolves to private/internal address 192.168.1.2"
-    ) is True
-
-    # Workspace-bound markers are NOT classified as SSRF.
-    assert AgentRunner._is_ssrf_violation(
-        "Error: Command blocked by safety guard (path outside working dir)"
-    ) is False
-    assert AgentRunner._is_ssrf_violation(
-        "Path /tmp/x is outside allowed directory /ws"
-    ) is False
-    # Deny / allowlist filter messages stay non-fatal too.
-    assert AgentRunner._is_ssrf_violation(
-        "Error: Command blocked by deny pattern filter"
-    ) is False
-
-
-@pytest.mark.asyncio
-async def test_runner_returns_non_retryable_hint_on_ssrf_violation():
-    """SSRF stays blocked, but the runtime gives the LLM a final chance to recover."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    provider.chat_with_retry = AsyncMock(side_effect=[
-        LLMResponse(
-            content="curl-ing metadata",
-            tool_calls=[ToolCallRequest(
-                id="call_ssrf",
-                name="exec",
-                arguments={"command": "curl http://169.254.169.254"},
-            )],
-        ),
-        LLMResponse(
-            content="I cannot access that private URL. Please share local files.",
-            tool_calls=[],
-        ),
-    ])
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value=(
-        "Error: Command blocked by safety guard (internal/private URL detected)"
-    ))
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert provider.chat_with_retry.await_count == 2
-    assert result.stop_reason == "completed"
-    assert result.error is None
-    assert result.final_content == "I cannot access that private URL. Please share local files."
-    assert result.tool_events and result.tool_events[0]["detail"].startswith("ssrf_violation:")
-    tool_messages = [m for m in result.messages if m.get("role") == "tool"]
-    assert tool_messages
-    assert "non-bypassable security boundary" in tool_messages[0]["content"]
-    assert "Do not retry" in tool_messages[0]["content"]
-    assert "tools.ssrfWhitelist" in tool_messages[0]["content"]
-
-
-@pytest.mark.asyncio
-async def test_runner_lets_llm_recover_from_shell_guard_path_outside():
-    """Reporter scenario for #3599 / #3605 -- guard hit, agent recovers.
-
-    The shell `_guard_command` heuristic fires on `2>/dev/null`-style
-    redirects and other shell idioms. Before v2 that abort'd the whole
-    turn (silent hang on Telegram per #3605); now the LLM gets the soft
-    error back and can finalize on the next iteration.
-    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_second_call: list[dict] = []
-
-    async def chat_with_retry(*, messages, **kwargs):
-        if provider.chat_with_retry.await_count == 1:
-            return LLMResponse(
-                content="trying noisy cleanup",
-                tool_calls=[ToolCallRequest(
-                    id="call_blocked",
-                    name="exec",
-                    arguments={"command": "rm scratch.txt 2>/dev/null"},
-                )],
-            )
-        captured_second_call[:] = list(messages)
-        return LLMResponse(content="recovered final answer", tool_calls=[])
-
-    provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry)
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(
-        return_value="Error: Command blocked by safety guard (path outside working dir)"
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert provider.chat_with_retry.await_count == 2, (
-        "guard hit must NOT short-circuit the loop -- LLM should get a second turn"
-    )
-    assert result.stop_reason != "tool_error"
-    assert result.error is None
-    assert result.final_content == "recovered final answer"
-    assert result.tool_events and result.tool_events[0]["status"] == "error"
-    # v2: detail keeps the breadcrumb but the runner did not raise.
-    assert "workspace_violation" in result.tool_events[0]["detail"]
-
-
-@pytest.mark.asyncio
-async def test_runner_throttles_repeated_workspace_bypass_attempts():
-    """#3493 motivation: stop the LLM bypass loop without aborting the turn.
-
-    LLM keeps switching tools (read_file -> exec cat -> python -c open(...))
-    against the same outside path. After the soft retry budget is exhausted
-    the runner replaces the tool result with a hard "stop trying" message
-    so the model finally gives up and surfaces the boundary to the user.
-    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    bypass_attempts = [
-        ToolCallRequest(
-            id=f"a{i}", name="exec",
-            arguments={"command": f"cat /Users/x/Downloads/01.md  # try {i}"},
-        )
-        for i in range(4)
-    ]
-    responses: list[LLMResponse] = [
-        LLMResponse(content=f"try {i}", tool_calls=[bypass_attempts[i]])
-        for i in range(4)
-    ]
-    responses.append(LLMResponse(content="ok telling user", tool_calls=[]))
-
-    provider = MagicMock()
-    provider.chat_with_retry = AsyncMock(side_effect=responses)
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(
-        return_value="Error: Command blocked by safety guard (path outside working dir)"
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=10,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    # All 4 bypass attempts surface to the LLM (no fatal abort), and the
-    # runner finally completes once the LLM stops asking.
-    assert result.stop_reason != "tool_error"
-    assert result.error is None
-    assert result.final_content == "ok telling user"
-    # The third+ attempts must have been escalated -- look at the events.
-    escalated = [
-        ev for ev in result.tool_events
-        if ev["status"] == "error"
-        and ev["detail"].startswith("workspace_violation_escalated:")
-    ]
-    assert escalated, (
-        "expected at least one escalated workspace_violation event, got: "
-        f"{result.tool_events}"
-    )
-
-
-@pytest.mark.asyncio
-async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_second_call: list[dict] = []
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="working",
-                tool_calls=[ToolCallRequest(id="call_big", name="list_dir", arguments={"path": "."})],
-                usage={"prompt_tokens": 5, "completion_tokens": 3},
-            )
-        captured_second_call[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="x" * 20_000)
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "do task"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=2,
-        workspace=tmp_path,
-        session_key="test:runner",
-        max_tool_result_chars=2048,
-    ))
-
-    assert result.final_content == "done"
-    tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
-    assert "[tool output persisted]" in tool_message["content"]
-    assert "tool-results" in tool_message["content"]
-    assert (tmp_path / ".nanobot" / "tool-results" / "test_runner" / "call_big.txt").exists()
-
-
-def test_persist_tool_result_prunes_old_session_buckets(tmp_path):
-    from nanobot.utils.helpers import maybe_persist_tool_result
-
-    root = tmp_path / ".nanobot" / "tool-results"
-    old_bucket = root / "old_session"
-    recent_bucket = root / "recent_session"
-    old_bucket.mkdir(parents=True)
-    recent_bucket.mkdir(parents=True)
-    (old_bucket / "old.txt").write_text("old", encoding="utf-8")
-    (recent_bucket / "recent.txt").write_text("recent", encoding="utf-8")
-
-    stale = time.time() - (8 * 24 * 60 * 60)
-    os.utime(old_bucket, (stale, stale))
-    os.utime(old_bucket / "old.txt", (stale, stale))
-
-    persisted = maybe_persist_tool_result(
-        tmp_path,
-        "current:session",
-        "call_big",
-        "x" * 5000,
-        max_chars=64,
-    )
-
-    assert "[tool output persisted]" in persisted
-    assert not old_bucket.exists()
-    assert recent_bucket.exists()
-    assert (root / "current_session" / "call_big.txt").exists()
-
-
-def test_persist_tool_result_leaves_no_temp_files(tmp_path):
-    from nanobot.utils.helpers import maybe_persist_tool_result
-
-    root = tmp_path / ".nanobot" / "tool-results"
-    maybe_persist_tool_result(
-        tmp_path,
-        "current:session",
-        "call_big",
-        "x" * 5000,
-        max_chars=64,
-    )
-
-    assert (root / "current_session" / "call_big.txt").exists()
-    assert list((root / "current_session").glob("*.tmp")) == []
-
-
-def test_persist_tool_result_logs_cleanup_failures(monkeypatch, tmp_path):
-    from nanobot.utils.helpers import maybe_persist_tool_result
-
-    warnings: list[str] = []
-
-    monkeypatch.setattr(
-        "nanobot.utils.helpers._cleanup_tool_result_buckets",
-        lambda *_args, **_kwargs: (_ for _ in ()).throw(OSError("busy")),
-    )
-    monkeypatch.setattr(
-        "nanobot.utils.helpers.logger.exception",
-        lambda message, *args: warnings.append(message.format(*args)),
-    )
-
-    persisted = maybe_persist_tool_result(
-        tmp_path,
-        "current:session",
-        "call_big",
-        "x" * 5000,
-        max_chars=64,
-    )
-
-    assert "[tool output persisted]" in persisted
-    assert warnings and "Failed to clean stale tool result buckets" in warnings[0]
-
-
-@pytest.mark.asyncio
-async def test_runner_replaces_empty_tool_result_with_marker():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_second_call: list[dict] = []
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="working",
-                tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})],
-                usage={},
-            )
-        captured_second_call[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="")
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "do task"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=2,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.final_content == "done"
-    tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
-    assert tool_message["content"] == "(noop completed with no output)"
-
-
-@pytest.mark.asyncio
-async def test_runner_uses_raw_messages_when_context_governance_fails():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_messages: list[dict] = []
-
-    async def chat_with_retry(*, messages, **kwargs):
-        captured_messages[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    initial_messages = [
-        {"role": "system", "content": "system"},
-        {"role": "user", "content": "hello"},
-    ]
-
-    runner = AgentRunner(provider)
-    runner._snip_history = MagicMock(side_effect=RuntimeError("boom"))  # type: ignore[method-assign]
-    result = await runner.run(AgentRunSpec(
-        initial_messages=initial_messages,
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.final_content == "done"
-    assert captured_messages == initial_messages
-
-
-@pytest.mark.asyncio
-async def test_runner_retries_empty_final_response_with_summary_prompt():
-    """Empty responses get 2 silent retries before finalization kicks in."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    calls: list[dict] = []
-
-    async def chat_with_retry(*, messages, tools=None, **kwargs):
-        calls.append({"messages": messages, "tools": tools})
-        if len(calls) <= 2:
-            return LLMResponse(
-                content=None,
-                tool_calls=[],
-                usage={"prompt_tokens": 5, "completion_tokens": 1},
-            )
-        return LLMResponse(
-            content="final answer",
-            tool_calls=[],
-            usage={"prompt_tokens": 3, "completion_tokens": 7},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "do task"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.final_content == "final answer"
-    # 2 silent retries (iterations 0,1) + finalization on iteration 1
-    assert len(calls) == 3
-    assert calls[0]["tools"] is not None
-    assert calls[1]["tools"] is not None
-    assert calls[2]["tools"] is None
-    assert result.usage["prompt_tokens"] == 13
-    assert result.usage["completion_tokens"] == 9
-
-
-@pytest.mark.asyncio
-async def test_runner_uses_specific_message_after_empty_finalization_retry():
-    """After silent retries + finalization all return empty, stop_reason is empty_final_response."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
-
-    provider = MagicMock()
-
-    async def chat_with_retry(*, messages, **kwargs):
-        return LLMResponse(content=None, tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "do task"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE
-    assert result.stop_reason == "empty_final_response"
-
-
-@pytest.mark.asyncio
-async def test_runner_empty_response_does_not_break_tool_chain():
-    """An empty intermediate response must not kill an ongoing tool chain.
-
-    Sequence: tool_call → empty → tool_call → final text.
-    The runner should recover via silent retry and complete normally.
-    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    call_count = 0
-
-    async def chat_with_retry(*, messages, tools=None, **kwargs):
-        nonlocal call_count
-        call_count += 1
-        if call_count == 1:
-            return LLMResponse(
-                content=None,
-                tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})],
-                usage={"prompt_tokens": 10, "completion_tokens": 5},
-            )
-        if call_count == 2:
-            return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1})
-        if call_count == 3:
-            return LLMResponse(
-                content=None,
-                tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})],
-                usage={"prompt_tokens": 10, "completion_tokens": 5},
-            )
-        return LLMResponse(
-            content="Here are the results.",
-            tool_calls=[],
-            usage={"prompt_tokens": 10, "completion_tokens": 10},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    provider.chat_stream_with_retry = chat_with_retry
-
-    async def fake_tool(name, args, **kw):
-        return "file content"
-
-    tool_registry = MagicMock()
-    tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}]
-    tool_registry.execute = AsyncMock(side_effect=fake_tool)
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "read both files"}],
-        tools=tool_registry,
-        model="test-model",
-        max_iterations=10,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.final_content == "Here are the results."
-    assert result.stop_reason == "completed"
-    assert call_count == 4
-    assert "read_file" in result.tools_used
-
-
-def test_snip_history_drops_orphaned_tool_results_from_trimmed_slice(monkeypatch):
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    runner = AgentRunner(provider)
-    messages = [
-        {"role": "system", "content": "system"},
-        {"role": "user", "content": "old user"},
-        {
-            "role": "assistant",
-            "content": "tool call",
-            "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "ls", "arguments": "{}"}}],
-        },
-        {"role": "tool", "tool_call_id": "call_1", "content": "tool output"},
-        {"role": "assistant", "content": "after tool"},
-    ]
-    spec = AgentRunSpec(
-        initial_messages=messages,
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        context_window_tokens=2000,
-        context_block_limit=100,
-    )
-
-    monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_args, **_kwargs: (500, None))
-    token_sizes = {
-        "old user": 120,
-        "tool call": 120,
-        "tool output": 40,
-        "after tool": 40,
-        "system": 0,
-    }
-    monkeypatch.setattr(
-        "nanobot.agent.runner.estimate_message_tokens",
-        lambda msg: token_sizes.get(str(msg.get("content")), 40),
-    )
-
-    trimmed = runner._snip_history(spec, messages)
-
-    # After the fix, the user message is recovered so the sequence is valid
-    # for providers that require system → user (e.g. GLM error 1214).
-    assert trimmed[0]["role"] == "system"
-    non_system = [m for m in trimmed if m["role"] != "system"]
-    assert non_system[0]["role"] == "user", f"Expected user after system, got {non_system[0]['role']}"
-
-
-@pytest.mark.asyncio
-async def test_runner_keeps_going_when_tool_result_persistence_fails():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_second_call: list[dict] = []
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="working",
-                tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
-                usage={"prompt_tokens": 5, "completion_tokens": 3},
-            )
-        captured_second_call[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="tool result")
-
-    runner = AgentRunner(provider)
-    with patch("nanobot.agent.runner.maybe_persist_tool_result", side_effect=RuntimeError("disk full")):
-        result = await runner.run(AgentRunSpec(
-            initial_messages=[{"role": "user", "content": "do task"}],
-            tools=tools,
-            model="test-model",
-            max_iterations=2,
-            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        ))
-
-    assert result.final_content == "done"
-    tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
-    assert tool_message["content"] == "tool result"
-
-
-class _DelayTool(Tool):
-    def __init__(
-        self,
-        name: str,
-        *,
-        delay: float,
-        read_only: bool,
-        shared_events: list[str],
-        exclusive: bool = False,
-    ):
-        self._name = name
-        self._delay = delay
-        self._read_only = read_only
-        self._shared_events = shared_events
-        self._exclusive = exclusive
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @property
-    def description(self) -> str:
-        return self._name
-
-    @property
-    def parameters(self) -> dict:
-        return {"type": "object", "properties": {}, "required": []}
-
-    @property
-    def read_only(self) -> bool:
-        return self._read_only
-
-    @property
-    def exclusive(self) -> bool:
-        return self._exclusive
-
-    async def execute(self, **kwargs):
-        self._shared_events.append(f"start:{self._name}")
-        await asyncio.sleep(self._delay)
-        self._shared_events.append(f"end:{self._name}")
-        return self._name
-
-
-@pytest.mark.asyncio
-async def test_runner_batches_read_only_tools_before_exclusive_work():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    tools = ToolRegistry()
-    shared_events: list[str] = []
-    read_a = _DelayTool("read_a", delay=0.05, read_only=True, shared_events=shared_events)
-    read_b = _DelayTool("read_b", delay=0.05, read_only=True, shared_events=shared_events)
-    write_a = _DelayTool("write_a", delay=0.01, read_only=False, shared_events=shared_events)
-    tools.register(read_a)
-    tools.register(read_b)
-    tools.register(write_a)
-
-    runner = AgentRunner(MagicMock())
-    await runner._execute_tools(
-        AgentRunSpec(
-            initial_messages=[],
-            tools=tools,
-            model="test-model",
-            max_iterations=1,
-            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-            concurrent_tools=True,
-        ),
-        [
-            ToolCallRequest(id="ro1", name="read_a", arguments={}),
-            ToolCallRequest(id="ro2", name="read_b", arguments={}),
-            ToolCallRequest(id="rw1", name="write_a", arguments={}),
-        ],
-        {},
-        {},
-    )
-
-    assert shared_events[0:2] == ["start:read_a", "start:read_b"]
-    assert "end:read_a" in shared_events and "end:read_b" in shared_events
-    assert shared_events.index("end:read_a") < shared_events.index("start:write_a")
-    assert shared_events.index("end:read_b") < shared_events.index("start:write_a")
-    assert shared_events[-2:] == ["start:write_a", "end:write_a"]
-
-
-@pytest.mark.asyncio
-async def test_runner_does_not_batch_exclusive_read_only_tools():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    tools = ToolRegistry()
-    shared_events: list[str] = []
-    read_a = _DelayTool("read_a", delay=0.03, read_only=True, shared_events=shared_events)
-    read_b = _DelayTool("read_b", delay=0.03, read_only=True, shared_events=shared_events)
-    ddg_like = _DelayTool(
-        "ddg_like",
-        delay=0.01,
-        read_only=True,
-        shared_events=shared_events,
-        exclusive=True,
-    )
-    tools.register(read_a)
-    tools.register(ddg_like)
-    tools.register(read_b)
-
-    runner = AgentRunner(MagicMock())
-    await runner._execute_tools(
-        AgentRunSpec(
-            initial_messages=[],
-            tools=tools,
-            model="test-model",
-            max_iterations=1,
-            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-            concurrent_tools=True,
-        ),
-        [
-            ToolCallRequest(id="ro1", name="read_a", arguments={}),
-            ToolCallRequest(id="ddg1", name="ddg_like", arguments={}),
-            ToolCallRequest(id="ro2", name="read_b", arguments={}),
-        ],
-        {},
-        {},
-    )
-
-    assert shared_events[0] == "start:read_a"
-    assert shared_events.index("end:read_a") < shared_events.index("start:ddg_like")
-    assert shared_events.index("end:ddg_like") < shared_events.index("start:read_b")
-
-
-@pytest.mark.asyncio
-async def test_runner_blocks_repeated_external_fetches():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_final_call: list[dict] = []
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] <= 3:
-            return LLMResponse(
-                content="working",
-                tool_calls=[ToolCallRequest(id=f"call_{call_count['n']}", name="web_fetch", arguments={"url": "https://example.com"})],
-                usage={},
-            )
-        captured_final_call[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="page content")
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "research task"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=4,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.final_content == "done"
-    assert tools.execute.await_count == 2
-    blocked_tool_message = [
-        msg for msg in captured_final_call
-        if msg.get("role") == "tool" and msg.get("tool_call_id") == "call_3"
-    ][0]
-    assert "repeated external lookup blocked" in blocked_tool_message["content"]
-
-
-@pytest.mark.asyncio
-async def test_loop_max_iterations_message_stays_stable(tmp_path):
-    loop = _make_loop(tmp_path)
-    loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
-        content="working",
-        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
-    ))
-    loop.tools.get_definitions = MagicMock(return_value=[])
-    loop.tools.execute = AsyncMock(return_value="ok")
-    loop.max_iterations = 2
-
-    final_content, _, _, _, _ = await loop._run_agent_loop([])
-
-    assert final_content == (
-        "I reached the maximum number of tool call iterations (2) "
-        "without completing the task. You can try breaking the task into smaller steps."
-    )
-
-
-@pytest.mark.asyncio
-async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp_path):
-    loop = _make_loop(tmp_path)
-    deltas: list[str] = []
-    endings: list[bool] = []
-
-    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
-        await on_content_delta("<think>hidden")
-        await on_content_delta("</think>Hello")
-        return LLMResponse(content="<think>hidden</think>Hello", tool_calls=[], usage={})
-
-    loop.provider.chat_stream_with_retry = chat_stream_with_retry
-
-    async def on_stream(delta: str) -> None:
-        deltas.append(delta)
-
-    async def on_stream_end(*, resuming: bool = False) -> None:
-        endings.append(resuming)
-
-    final_content, _, _, _, _ = await loop._run_agent_loop(
-        [],
-        on_stream=on_stream,
-        on_stream_end=on_stream_end,
-    )
-
-    assert final_content == "Hello"
-    assert deltas == ["Hello"]
-    assert endings == [False]
-
-
-@pytest.mark.asyncio
-async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path):
-    loop = _make_loop(tmp_path)
-    deltas: list[str] = []
-
-    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
-        await on_content_delta("Hello <thin")
-        await on_content_delta("k>hidden</think>World")
-        return LLMResponse(content="Hello <think>hidden</think>World", tool_calls=[], usage={})
-
-    loop.provider.chat_stream_with_retry = chat_stream_with_retry
-
-    async def on_stream(delta: str) -> None:
-        deltas.append(delta)
-
-    final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
-
-    assert final_content == "Hello World"
-    assert deltas == ["Hello", " World"]
-
-
-@pytest.mark.asyncio
-async def test_loop_stream_filter_hides_complete_trailing_think_tag(tmp_path):
-    loop = _make_loop(tmp_path)
-    deltas: list[str] = []
-
-    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
-        await on_content_delta("Hello <think>")
-        await on_content_delta("hidden</think>World")
-        return LLMResponse(content="Hello <think>hidden</think>World", tool_calls=[], usage={})
-
-    loop.provider.chat_stream_with_retry = chat_stream_with_retry
-
-    async def on_stream(delta: str) -> None:
-        deltas.append(delta)
-
-    final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
-
-    assert final_content == "Hello World"
-    assert deltas == ["Hello", " World"]
-
-
-@pytest.mark.asyncio
-async def test_loop_retries_think_only_final_response(tmp_path):
-    loop = _make_loop(tmp_path)
-    call_count = {"n": 0}
-
-    async def chat_with_retry(**kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(content="<think>hidden</think>", tool_calls=[], usage={})
-        return LLMResponse(content="Recovered answer", tool_calls=[], usage={})
-
-    loop.provider.chat_with_retry = chat_with_retry
-
-    final_content, _, _, _, _ = await loop._run_agent_loop([])
-
-    assert final_content == "Recovered answer"
-    assert call_count["n"] == 2
-
-
-@pytest.mark.asyncio
-async def test_llm_error_not_appended_to_session_messages():
-    """When LLM returns finish_reason='error', the error content must NOT be
-    appended to the messages list (prevents polluting session history)."""
-    from nanobot.agent.runner import (
-        AgentRunSpec,
-        AgentRunner,
-        _PERSISTED_MODEL_ERROR_PLACEHOLDER,
-    )
-
-    provider = MagicMock()
-    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
-        content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={},
-    ))
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=5,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.stop_reason == "error"
-    assert result.final_content == "429 rate limit exceeded"
-    assistant_msgs = [m for m in result.messages if m.get("role") == "assistant"]
-    assert all("429" not in (m.get("content") or "") for m in assistant_msgs), \
-        "Error content should not appear in session messages"
-    assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER
-
-
-@pytest.mark.asyncio
-async def test_streamed_flag_not_set_on_llm_error(tmp_path):
-    """When LLM errors during a streaming-capable channel interaction,
-    _streamed must NOT be set so ChannelManager delivers the error."""
-    from nanobot.agent.loop import AgentLoop
-    from nanobot.bus.events import InboundMessage
-    from nanobot.bus.queue import MessageBus
-
-    bus = MessageBus()
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
-    error_resp = LLMResponse(
-        content="503 service unavailable", finish_reason="error", tool_calls=[], usage={},
-    )
-    loop.provider.chat_with_retry = AsyncMock(return_value=error_resp)
-    loop.provider.chat_stream_with_retry = AsyncMock(return_value=error_resp)
-    loop.tools.get_definitions = MagicMock(return_value=[])
-
-    msg = InboundMessage(
-        channel="feishu", sender_id="u1", chat_id="c1", content="hi",
-    )
-    result = await loop._process_message(
-        msg,
-        on_stream=AsyncMock(),
-        on_stream_end=AsyncMock(),
-    )
-
-    assert result is not None
-    assert "503" in result.content
-    assert not result.metadata.get("_streamed"), \
-        "_streamed must not be set when stop_reason is error"
-
-
-@pytest.mark.asyncio
-async def test_ssrf_soft_block_can_finalize_after_streamed_tool_call(tmp_path):
-    from nanobot.agent.loop import AgentLoop
-    from nanobot.bus.events import InboundMessage
-    from nanobot.bus.queue import MessageBus
-
-    bus = MessageBus()
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-    tool_call_resp = LLMResponse(
-        content="checking metadata",
-        tool_calls=[ToolCallRequest(
-            id="call_ssrf",
-            name="exec",
-            arguments={"command": "curl http://169.254.169.254/latest/meta-data/"},
-        )],
-        usage={},
-    )
-    provider.chat_stream_with_retry = AsyncMock(side_effect=[
-        tool_call_resp,
-        LLMResponse(
-            content="I cannot access private URLs. Please share the local file.",
-            tool_calls=[],
-            usage={},
-        ),
-    ])
-
-    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
-    loop.tools.get_definitions = MagicMock(return_value=[])
-    loop.tools.prepare_call = MagicMock(return_value=(None, {}, None))
-    loop.tools.execute = AsyncMock(return_value=(
-        "Error: Command blocked by safety guard (internal/private URL detected)"
-    ))
-
-    result = await loop._process_message(
-        InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="hi"),
-        on_stream=AsyncMock(),
-        on_stream_end=AsyncMock(),
-    )
-
-    assert result is not None
-    assert result.content == "I cannot access private URLs. Please share the local file."
-    assert result.metadata.get("_streamed") is True
-
-
-@pytest.mark.asyncio
-async def test_next_turn_after_llm_error_keeps_turn_boundary(tmp_path):
-    from nanobot.agent.loop import AgentLoop
-    from nanobot.agent.runner import _PERSISTED_MODEL_ERROR_PLACEHOLDER
-    from nanobot.bus.events import InboundMessage
-    from nanobot.bus.queue import MessageBus
-
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-    provider.chat_with_retry = AsyncMock(side_effect=[
-        LLMResponse(content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={}),
-        LLMResponse(content="Recovered answer", tool_calls=[], usage={}),
-    ])
-
-    loop = AgentLoop(bus=MessageBus(), provider=provider, workspace=tmp_path, model="test-model")
-    loop.tools.get_definitions = MagicMock(return_value=[])
-    loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False)  # type: ignore[method-assign]
-
-    first = await loop._process_message(
-        InboundMessage(channel="cli", sender_id="user", chat_id="test", content="first question")
-    )
-    assert first is not None
-    assert first.content == "429 rate limit exceeded"
-
-    session = loop.sessions.get_or_create("cli:test")
-    assert [
-        {key: value for key, value in message.items() if key in {"role", "content"}}
-        for message in session.messages
-    ] == [
-        {"role": "user", "content": "first question"},
-        {"role": "assistant", "content": _PERSISTED_MODEL_ERROR_PLACEHOLDER},
-    ]
-
-    second = await loop._process_message(
-        InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second question")
-    )
-    assert second is not None
-    assert second.content == "Recovered answer"
-
-    request_messages = provider.chat_with_retry.await_args_list[1].kwargs["messages"]
-    non_system = [message for message in request_messages if message.get("role") != "system"]
-    assert non_system[0]["role"] == "user"
-    assert "first question" in non_system[0]["content"]
-    assert non_system[1]["role"] == "assistant"
-    assert _PERSISTED_MODEL_ERROR_PLACEHOLDER in non_system[1]["content"]
-    assert non_system[2]["role"] == "user"
-    assert "second question" in non_system[2]["content"]
-
-
-@pytest.mark.asyncio
-async def test_runner_tool_error_sets_final_content():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-
-    async def chat_with_retry(*, messages, **kwargs):
-        return LLMResponse(
-            content="working",
-            tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
-            usage={},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "do task"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        fail_on_tool_error=True,
-    ))
-
-    assert result.final_content == "Error: RuntimeError: boom"
-    assert result.stop_reason == "tool_error"
-
-
-@pytest.mark.asyncio
-async def test_subagent_max_iterations_announces_existing_fallback(tmp_path, monkeypatch):
-    from nanobot.agent.subagent import SubagentManager, SubagentStatus
-    from nanobot.bus.queue import MessageBus
-
-    bus = MessageBus()
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
-        content="working",
-        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
-    ))
-    mgr = SubagentManager(
-        provider=provider,
-        workspace=tmp_path,
-        bus=bus,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    )
-    mgr._announce_result = AsyncMock()
-
-    async def fake_execute(self, **kwargs):
-        return "tool result"
-
-    monkeypatch.setattr("nanobot.agent.tools.filesystem.ListDirTool.execute", fake_execute)
-
-    status = SubagentStatus(task_id="sub-1", label="label", task_description="do task", started_at=time.monotonic())
-    await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}, status)
-
-    mgr._announce_result.assert_awaited_once()
-    args = mgr._announce_result.await_args.args
-    assert args[3] == "Task completed but no final response was generated."
-    assert args[5] == "ok"
-
-
-@pytest.mark.asyncio
-async def test_runner_accumulates_usage_and_preserves_cached_tokens():
-    """Runner should accumulate prompt/completion tokens across iterations
-    and preserve cached_tokens from provider responses."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="thinking",
-                tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
-                usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80},
-            )
-        return LLMResponse(
-            content="done",
-            tool_calls=[],
-            usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="file content")
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "do task"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    # Usage should be accumulated across iterations
-    assert result.usage["prompt_tokens"] == 300  # 100 + 200
-    assert result.usage["completion_tokens"] == 30  # 10 + 20
-    assert result.usage["cached_tokens"] == 230  # 80 + 150
-
-
-@pytest.mark.asyncio
-async def test_runner_passes_cached_tokens_to_hook_context():
-    """Hook context.usage should contain cached_tokens."""
-    from nanobot.agent.hook import AgentHook, AgentHookContext
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_usage: list[dict] = []
-
-    class UsageHook(AgentHook):
-        async def after_iteration(self, context: AgentHookContext) -> None:
-            captured_usage.append(dict(context.usage))
-
-    async def chat_with_retry(**kwargs):
-        return LLMResponse(
-            content="done",
-            tool_calls=[],
-            usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    await runner.run(AgentRunSpec(
-        initial_messages=[],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        hook=UsageHook(),
-    ))
-
-    assert len(captured_usage) == 1
-    assert captured_usage[0]["cached_tokens"] == 150
-
-
-# ---------------------------------------------------------------------------
-# Length recovery (auto-continue on finish_reason == "length")
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_length_recovery_continues_from_truncated_output():
-    """When finish_reason is 'length', runner should insert a continuation
-    prompt and retry, stitching partial outputs into the final result."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] <= 2:
-            return LLMResponse(
-                content=f"part{call_count['n']} ",
-                finish_reason="length",
-                usage={},
-            )
-        return LLMResponse(content="final", finish_reason="stop", usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "write a long essay"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=10,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.stop_reason == "completed"
-    assert result.final_content == "final"
-    assert call_count["n"] == 3
-    roles = [m["role"] for m in result.messages if m["role"] == "user"]
-    assert len(roles) >= 3  # original + 2 recovery prompts
-
-
-@pytest.mark.asyncio
-async def test_length_recovery_streaming_calls_on_stream_end_with_resuming():
-    """During length recovery with streaming, on_stream_end should be called
-    with resuming=True so the hook knows the conversation is continuing."""
-    from nanobot.agent.hook import AgentHook, AgentHookContext
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-    stream_end_calls: list[bool] = []
-
-    class StreamHook(AgentHook):
-        def wants_streaming(self) -> bool:
-            return True
-
-        async def on_stream(self, context: AgentHookContext, delta: str) -> None:
-            pass
-
-        async def on_stream_end(self, context: AgentHookContext, resuming: bool = False) -> None:
-            stream_end_calls.append(resuming)
-
-    async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(content="partial ", finish_reason="length", usage={})
-        return LLMResponse(content="done", finish_reason="stop", usage={})
-
-    provider.chat_stream_with_retry = chat_stream_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "go"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=10,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        hook=StreamHook(),
-    ))
-
-    assert len(stream_end_calls) == 2
-    assert stream_end_calls[0] is True   # length recovery: resuming
-    assert stream_end_calls[1] is False  # final response: done
-
-
-@pytest.mark.asyncio
-async def test_length_recovery_gives_up_after_max_retries():
-    """After _MAX_LENGTH_RECOVERIES attempts the runner should stop retrying."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_LENGTH_RECOVERIES
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        return LLMResponse(
-            content=f"chunk{call_count['n']}",
-            finish_reason="length",
-            usage={},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "go"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=20,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert call_count["n"] == _MAX_LENGTH_RECOVERIES + 1
-    assert result.final_content is not None
-
-
-# ---------------------------------------------------------------------------
-# Backfill missing tool_results
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_backfill_missing_tool_results_inserts_error():
-    """Orphaned tool_use (no matching tool_result) should get a synthetic error."""
-    from nanobot.agent.runner import AgentRunner, _BACKFILL_CONTENT
-
-    messages = [
-        {"role": "user", "content": "hi"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {"id": "call_a", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
-                {"id": "call_b", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
-            ],
-        },
-        {"role": "tool", "tool_call_id": "call_a", "name": "exec", "content": "ok"},
-    ]
-    result = AgentRunner._backfill_missing_tool_results(messages)
-    tool_msgs = [m for m in result if m.get("role") == "tool"]
-    assert len(tool_msgs) == 2
-    backfilled = [m for m in tool_msgs if m.get("tool_call_id") == "call_b"]
-    assert len(backfilled) == 1
-    assert backfilled[0]["content"] == _BACKFILL_CONTENT
-    assert backfilled[0]["name"] == "read_file"
-
-
-def test_drop_orphan_tool_results_removes_unmatched_tool_messages():
-    from nanobot.agent.runner import AgentRunner
-
-    messages = [
-        {"role": "system", "content": "system"},
-        {"role": "user", "content": "old user"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
-            ],
-        },
-        {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
-        {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
-        {"role": "assistant", "content": "after tool"},
-    ]
-
-    cleaned = AgentRunner._drop_orphan_tool_results(messages)
-
-    assert cleaned == [
-        {"role": "system", "content": "system"},
-        {"role": "user", "content": "old user"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
-            ],
-        },
-        {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
-        {"role": "assistant", "content": "after tool"},
-    ]
-
-
-@pytest.mark.asyncio
-async def test_backfill_noop_when_complete():
-    """Complete message chains should not be modified."""
-    from nanobot.agent.runner import AgentRunner
-
-    messages = [
-        {"role": "user", "content": "hi"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {"id": "call_x", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
-            ],
-        },
-        {"role": "tool", "tool_call_id": "call_x", "name": "exec", "content": "done"},
-        {"role": "assistant", "content": "all good"},
-    ]
-    result = AgentRunner._backfill_missing_tool_results(messages)
-    assert result is messages  # same object — no copy
-
-
-@pytest.mark.asyncio
-async def test_runner_drops_orphan_tool_results_before_model_request():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    captured_messages: list[dict] = []
-
-    async def chat_with_retry(*, messages, **kwargs):
-        captured_messages[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[
-            {"role": "system", "content": "system"},
-            {"role": "user", "content": "old user"},
-            {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
-            {"role": "assistant", "content": "after orphan"},
-            {"role": "user", "content": "new prompt"},
-        ],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert all(
-        message.get("tool_call_id") != "call_orphan"
-        for message in captured_messages
-        if message.get("role") == "tool"
-    )
-    assert result.messages[2]["tool_call_id"] == "call_orphan"
-    assert result.final_content == "done"
-
-
-@pytest.mark.asyncio
-async def test_backfill_repairs_model_context_without_shifting_save_turn_boundary(tmp_path):
-    """Historical backfill should not duplicate old tail messages on persist."""
-    from nanobot.agent.loop import AgentLoop
-    from nanobot.agent.runner import _BACKFILL_CONTENT
-    from nanobot.bus.events import InboundMessage
-    from nanobot.bus.queue import MessageBus
-
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-    response = LLMResponse(content="new answer", tool_calls=[], usage={})
-    provider.chat_with_retry = AsyncMock(return_value=response)
-    provider.chat_stream_with_retry = AsyncMock(return_value=response)
-
-    loop = AgentLoop(
-        bus=MessageBus(),
-        provider=provider,
-        workspace=tmp_path,
-        model="test-model",
-    )
-    loop.tools.get_definitions = MagicMock(return_value=[])
-    loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False)  # type: ignore[method-assign]
-
-    session = loop.sessions.get_or_create("cli:test")
-    session.messages = [
-        {"role": "user", "content": "old user", "timestamp": "2026-01-01T00:00:00"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {
-                    "id": "call_missing",
-                    "type": "function",
-                    "function": {"name": "read_file", "arguments": "{}"},
-                }
-            ],
-            "timestamp": "2026-01-01T00:00:01",
-        },
-        {"role": "assistant", "content": "old tail", "timestamp": "2026-01-01T00:00:02"},
-    ]
-    loop.sessions.save(session)
-
-    result = await loop._process_message(
-        InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new prompt")
-    )
-
-    assert result is not None
-    assert result.content == "new answer"
-
-    request_messages = provider.chat_with_retry.await_args.kwargs["messages"]
-    synthetic = [
-        message
-        for message in request_messages
-        if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
-    ]
-    assert len(synthetic) == 1
-    assert synthetic[0]["content"] == _BACKFILL_CONTENT
-
-    session_after = loop.sessions.get_or_create("cli:test")
-    assert [
-        {
-            key: value
-            for key, value in message.items()
-            if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
-        }
-        for message in session_after.messages
-    ] == [
-        {"role": "user", "content": "old user"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {
-                    "id": "call_missing",
-                    "type": "function",
-                    "function": {"name": "read_file", "arguments": "{}"},
-                }
-            ],
-        },
-        {"role": "assistant", "content": "old tail"},
-        {"role": "user", "content": "new prompt"},
-        {"role": "assistant", "content": "new answer"},
-    ]
-
-
-@pytest.mark.asyncio
-async def test_runner_backfill_only_mutates_model_context_not_returned_messages():
-    """Runner should repair orphaned tool calls for the model without rewriting result.messages."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _BACKFILL_CONTENT
-
-    provider = MagicMock()
-    captured_messages: list[dict] = []
-
-    async def chat_with_retry(*, messages, **kwargs):
-        captured_messages[:] = messages
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    initial_messages = [
-        {"role": "system", "content": "system"},
-        {"role": "user", "content": "old user"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {
-                    "id": "call_missing",
-                    "type": "function",
-                    "function": {"name": "read_file", "arguments": "{}"},
-                }
-            ],
-        },
-        {"role": "assistant", "content": "old tail"},
-        {"role": "user", "content": "new prompt"},
-    ]
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=initial_messages,
-        tools=tools,
-        model="test-model",
-        max_iterations=3,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    synthetic = [
-        message
-        for message in captured_messages
-        if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
-    ]
-    assert len(synthetic) == 1
-    assert synthetic[0]["content"] == _BACKFILL_CONTENT
-
-    assert [
-        {
-            key: value
-            for key, value in message.items()
-            if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
-        }
-        for message in result.messages
-    ] == [
-        {"role": "system", "content": "system"},
-        {"role": "user", "content": "old user"},
-        {
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {
-                    "id": "call_missing",
-                    "type": "function",
-                    "function": {"name": "read_file", "arguments": "{}"},
-                }
-            ],
-        },
-        {"role": "assistant", "content": "old tail"},
-        {"role": "user", "content": "new prompt"},
-        {"role": "assistant", "content": "done"},
-    ]
-
-
-# ---------------------------------------------------------------------------
-# Microcompact (stale tool result compaction)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_microcompact_replaces_old_tool_results():
-    """Tool results beyond _MICROCOMPACT_KEEP_RECENT should be summarized."""
-    from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
-
-    total = _MICROCOMPACT_KEEP_RECENT + 5
-    long_content = "x" * 600
-    messages: list[dict] = [{"role": "system", "content": "sys"}]
-    for i in range(total):
-        messages.append({
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}],
-        })
-        messages.append({
-            "role": "tool", "tool_call_id": f"c{i}", "name": "read_file",
-            "content": long_content,
-        })
-
-    result = AgentRunner._microcompact(messages)
-    tool_msgs = [m for m in result if m.get("role") == "tool"]
-    stale_count = total - _MICROCOMPACT_KEEP_RECENT
-    compacted = [m for m in tool_msgs if "omitted from context" in str(m.get("content", ""))]
-    preserved = [m for m in tool_msgs if m.get("content") == long_content]
-    assert len(compacted) == stale_count
-    assert len(preserved) == _MICROCOMPACT_KEEP_RECENT
-
-
-@pytest.mark.asyncio
-async def test_microcompact_preserves_short_results():
-    """Short tool results (< _MICROCOMPACT_MIN_CHARS) should not be replaced."""
-    from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
-
-    total = _MICROCOMPACT_KEEP_RECENT + 5
-    messages: list[dict] = []
-    for i in range(total):
-        messages.append({
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
-        })
-        messages.append({
-            "role": "tool", "tool_call_id": f"c{i}", "name": "exec",
-            "content": "short",
-        })
-
-    result = AgentRunner._microcompact(messages)
-    assert result is messages  # no copy needed — all stale results are short
-
-
-@pytest.mark.asyncio
-async def test_microcompact_skips_non_compactable_tools():
-    """Non-compactable tools (e.g. 'message') should never be replaced."""
-    from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
-
-    total = _MICROCOMPACT_KEEP_RECENT + 5
-    long_content = "y" * 1000
-    messages: list[dict] = []
-    for i in range(total):
-        messages.append({
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "message", "arguments": "{}"}}],
-        })
-        messages.append({
-            "role": "tool", "tool_call_id": f"c{i}", "name": "message",
-            "content": long_content,
-        })
-
-    result = AgentRunner._microcompact(messages)
-    assert result is messages  # no compactable tools found
-
-
-@pytest.mark.asyncio
-async def test_runner_tool_error_preserves_tool_results_in_messages():
-    """When a tool raises a fatal error, its results must still be appended
-    to messages so the session never contains orphan tool_calls (#2943)."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-
-    async def chat_with_retry(*, messages, **kwargs):
-        return LLMResponse(
-            content=None,
-            tool_calls=[
-                ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}),
-                ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}),
-            ],
-            usage={},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    provider.chat_stream_with_retry = chat_with_retry
-
-    call_idx = 0
-
-    async def fake_execute(name, args, **kw):
-        nonlocal call_idx
-        call_idx += 1
-        if call_idx == 2:
-            raise RuntimeError("boom")
-        return "file content"
-
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(side_effect=fake_execute)
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "do stuff"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        fail_on_tool_error=True,
-    ))
-
-    assert result.stop_reason == "tool_error"
-    # Both tool results must be in messages even though tc2 had a fatal error.
-    tool_msgs = [m for m in result.messages if m.get("role") == "tool"]
-    assert len(tool_msgs) == 2
-    assert tool_msgs[0]["tool_call_id"] == "tc1"
-    assert tool_msgs[1]["tool_call_id"] == "tc2"
-    # The assistant message with tool_calls must precede the tool results.
-    asst_tc_idx = next(
-        i for i, m in enumerate(result.messages)
-        if m.get("role") == "assistant" and m.get("tool_calls")
-    )
-    tool_indices = [
-        i for i, m in enumerate(result.messages) if m.get("role") == "tool"
-    ]
-    assert all(ti > asst_tc_idx for ti in tool_indices)
-
-
-def test_governance_repairs_orphans_after_snip():
-    """After _snip_history clips an assistant+tool_calls, the second
-    _drop_orphan_tool_results pass must clean up the resulting orphans."""
-    from nanobot.agent.runner import AgentRunner
-
-    messages = [
-        {"role": "system", "content": "system"},
-        {"role": "user", "content": "old msg"},
-        {"role": "assistant", "content": None,
-         "tool_calls": [{"id": "tc_old", "type": "function",
-                         "function": {"name": "search", "arguments": "{}"}}]},
-        {"role": "tool", "tool_call_id": "tc_old", "name": "search",
-         "content": "old result"},
-        {"role": "assistant", "content": "old answer"},
-        {"role": "user", "content": "new msg"},
-    ]
-
-    # Simulate snipping that keeps only the tail: drop the assistant with
-    # tool_calls but keep its tool result (orphan).
-    snipped = [
-        {"role": "system", "content": "system"},
-        {"role": "tool", "tool_call_id": "tc_old", "name": "search",
-         "content": "old result"},
-        {"role": "assistant", "content": "old answer"},
-        {"role": "user", "content": "new msg"},
-    ]
-
-    cleaned = AgentRunner._drop_orphan_tool_results(snipped)
-    # The orphan tool result should be removed.
-    assert not any(
-        m.get("role") == "tool" and m.get("tool_call_id") == "tc_old"
-        for m in cleaned
-    )
-
-
-def test_governance_fallback_still_repairs_orphans():
-    """When full governance fails, the fallback must still run
-    _drop_orphan_tool_results and _backfill_missing_tool_results."""
-    from nanobot.agent.runner import AgentRunner
-
-    # Messages with an orphan tool result (no matching assistant tool_call).
-    messages = [
-        {"role": "user", "content": "hello"},
-        {"role": "tool", "tool_call_id": "orphan_tc", "name": "read",
-         "content": "stale"},
-        {"role": "assistant", "content": "hi"},
-    ]
-
-    repaired = AgentRunner._drop_orphan_tool_results(messages)
-    repaired = AgentRunner._backfill_missing_tool_results(repaired)
-    # Orphan tool result should be gone.
-    assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired)
-# ── Mid-turn injection tests ──────────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_returns_empty_when_no_callback():
-    """No injection_callback → empty list."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    runner = AgentRunner(provider)
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    spec = AgentRunSpec(
-        initial_messages=[], tools=tools, model="m",
-        max_iterations=1, max_tool_result_chars=1000,
-        injection_callback=None,
-    )
-    result = await runner._drain_injections(spec)
-    assert result == []
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_extracts_content_from_inbound_messages():
-    """Should extract .content from InboundMessage objects."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    runner = AgentRunner(provider)
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    msgs = [
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello"),
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="world"),
-    ]
-
-    async def cb():
-        return msgs
-
-    spec = AgentRunSpec(
-        initial_messages=[], tools=tools, model="m",
-        max_iterations=1, max_tool_result_chars=1000,
-        injection_callback=cb,
-    )
-    result = await runner._drain_injections(spec)
-    assert result == [
-        {"role": "user", "content": "hello"},
-        {"role": "user", "content": "world"},
-    ]
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_passes_limit_to_callback_when_supported():
-    """Limit-aware callbacks can preserve overflow in their own queue."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    runner = AgentRunner(provider)
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    seen_limits: list[int] = []
-
-    msgs = [
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg{i}")
-        for i in range(_MAX_INJECTIONS_PER_TURN + 3)
-    ]
-
-    async def cb(*, limit: int):
-        seen_limits.append(limit)
-        return msgs[:limit]
-
-    spec = AgentRunSpec(
-        initial_messages=[], tools=tools, model="m",
-        max_iterations=1, max_tool_result_chars=1000,
-        injection_callback=cb,
-    )
-    result = await runner._drain_injections(spec)
-    assert seen_limits == [_MAX_INJECTIONS_PER_TURN]
-    assert result == [
-        {"role": "user", "content": "msg0"},
-        {"role": "user", "content": "msg1"},
-        {"role": "user", "content": "msg2"},
-    ]
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_skips_empty_content():
-    """Messages with blank content should be filtered out."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    runner = AgentRunner(provider)
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    msgs = [
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content=""),
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="   "),
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="valid"),
-    ]
-
-    async def cb():
-        return msgs
-
-    spec = AgentRunSpec(
-        initial_messages=[], tools=tools, model="m",
-        max_iterations=1, max_tool_result_chars=1000,
-        injection_callback=cb,
-    )
-    result = await runner._drain_injections(spec)
-    assert result == [{"role": "user", "content": "valid"}]
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_handles_callback_exception():
-    """If the callback raises, return empty list (error is logged)."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    runner = AgentRunner(provider)
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    async def cb():
-        raise RuntimeError("boom")
-
-    spec = AgentRunSpec(
-        initial_messages=[], tools=tools, model="m",
-        max_iterations=1, max_tool_result_chars=1000,
-        injection_callback=cb,
-    )
-    result = await runner._drain_injections(spec)
-    assert result == []
-
-
-@pytest.mark.asyncio
-async def test_checkpoint1_injects_after_tool_execution():
-    """Follow-up messages are injected after tool execution, before next LLM call."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-    captured_messages = []
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        captured_messages.append(list(messages))
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="using tool",
-                tool_calls=[ToolCallRequest(id="c1", name="read_file", arguments={"path": "x"})],
-                usage={},
-            )
-        return LLMResponse(content="final answer", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="file content")
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    # Put a follow-up message in the queue before the run starts
-    await injection_queue.put(
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=5,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.had_injections is True
-    assert result.final_content == "final answer"
-    # The second call should have the injected user message
-    assert call_count["n"] == 2
-    last_messages = captured_messages[-1]
-    injected = [m for m in last_messages if m.get("role") == "user" and m.get("content") == "follow-up question"]
-    assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
-    """After final response, if injections exist, stream_end should get resuming=True."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.agent.hook import AgentHook, AgentHookContext
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-    stream_end_calls = []
-
-    class TrackingHook(AgentHook):
-        def wants_streaming(self) -> bool:
-            return True
-
-        async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
-            stream_end_calls.append(resuming)
-
-        def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
-            return content
-
-    async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(content="first answer", tool_calls=[], usage={})
-        return LLMResponse(content="second answer", tool_calls=[], usage={})
-
-    provider.chat_stream_with_retry = chat_stream_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    # Inject a follow-up that arrives during the first response
-    await injection_queue.put(
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="quick follow-up")
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=5,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        hook=TrackingHook(),
-        injection_callback=inject_cb,
-    ))
-
-    assert result.had_injections is True
-    assert result.final_content == "second answer"
-    assert call_count["n"] == 2
-    # First stream_end should have resuming=True (because injections found)
-    assert stream_end_calls[0] is True
-    # Second (final) stream_end should have resuming=False
-    assert stream_end_calls[-1] is False
-
-
-@pytest.mark.asyncio
-async def test_checkpoint2_preserves_final_response_in_history_before_followup():
-    """A follow-up injected after a final answer must still see that answer in history."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-    captured_messages = []
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        captured_messages.append([dict(message) for message in messages])
-        if call_count["n"] == 1:
-            return LLMResponse(content="first answer", tool_calls=[], usage={})
-        return LLMResponse(content="second answer", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    await injection_queue.put(
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=5,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.final_content == "second answer"
-    assert call_count["n"] == 2
-    assert captured_messages[-1] == [
-        {"role": "user", "content": "hello"},
-        {"role": "assistant", "content": "first answer"},
-        {"role": "user", "content": "follow-up question"},
-    ]
-    assert [
-        {"role": message["role"], "content": message["content"]}
-        for message in result.messages
-        if message.get("role") == "assistant"
-    ] == [
-        {"role": "assistant", "content": "first answer"},
-        {"role": "assistant", "content": "second answer"},
-    ]
-
-
-@pytest.mark.asyncio
-async def test_loop_injected_followup_preserves_image_media(tmp_path):
-    """Mid-turn follow-ups with images should keep multimodal content."""
-    from nanobot.agent.loop import AgentLoop
-    from nanobot.bus.events import InboundMessage
-    from nanobot.bus.queue import MessageBus
-
-    image_path = tmp_path / "followup.png"
-    image_path.write_bytes(base64.b64decode(
-        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+yF9kAAAAASUVORK5CYII="
-    ))
-
-    bus = MessageBus()
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-    captured_messages: list[list[dict]] = []
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        captured_messages.append(list(messages))
-        if call_count["n"] == 1:
-            return LLMResponse(content="first answer", tool_calls=[], usage={})
-        return LLMResponse(content="second answer", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
-    loop.tools.get_definitions = MagicMock(return_value=[])
-
-    pending_queue = asyncio.Queue()
-    await pending_queue.put(InboundMessage(
-        channel="cli",
-        sender_id="u",
-        chat_id="c",
-        content="",
-        media=[str(image_path)],
-    ))
-
-    final_content, _, _, _, had_injections = await loop._run_agent_loop(
-        [{"role": "user", "content": "hello"}],
-        channel="cli",
-        chat_id="c",
-        pending_queue=pending_queue,
-    )
-
-    assert final_content == "second answer"
-    assert had_injections is True
-    assert call_count["n"] == 2
-    injected_user_messages = [
-        message for message in captured_messages[-1]
-        if message.get("role") == "user" and isinstance(message.get("content"), list)
-    ]
-    assert injected_user_messages
-    assert any(
-        block.get("type") == "image_url"
-        for block in injected_user_messages[-1]["content"]
-        if isinstance(block, dict)
-    )
-
-
-@pytest.mark.asyncio
-async def test_runner_merges_multiple_injected_user_messages_without_losing_media():
-    """Multiple injected follow-ups should not create lossy consecutive user messages."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-    captured_messages = []
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        captured_messages.append([dict(message) for message in messages])
-        if call_count["n"] == 1:
-            return LLMResponse(content="first answer", tool_calls=[], usage={})
-        return LLMResponse(content="second answer", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    async def inject_cb():
-        if call_count["n"] == 1:
-            return [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
-                        {"type": "text", "text": "look at this"},
-                    ],
-                },
-                {"role": "user", "content": "and answer briefly"},
-            ]
-        return []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=5,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.final_content == "second answer"
-    assert call_count["n"] == 2
-    second_call = captured_messages[-1]
-    user_messages = [message for message in second_call if message.get("role") == "user"]
-    assert len(user_messages) == 2
-    injected = user_messages[-1]
-    assert isinstance(injected["content"], list)
-    assert any(
-        block.get("type") == "image_url"
-        for block in injected["content"]
-        if isinstance(block, dict)
-    )
-    assert any(
-        block.get("type") == "text" and block.get("text") == "and answer briefly"
-        for block in injected["content"]
-        if isinstance(block, dict)
-    )
-
-
-@pytest.mark.asyncio
-async def test_injection_cycles_capped_at_max():
-    """Injection cycles should be capped at _MAX_INJECTION_CYCLES."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    drain_count = {"n": 0}
-
-    async def inject_cb():
-        drain_count["n"] += 1
-        # Only inject for the first _MAX_INJECTION_CYCLES drains
-        if drain_count["n"] <= _MAX_INJECTION_CYCLES:
-            return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
-        return []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "start"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=20,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.had_injections is True
-    # Should be capped: _MAX_INJECTION_CYCLES injection rounds + 1 final round
-    assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
-
-
-@pytest.mark.asyncio
-async def test_no_injections_flag_is_false_by_default():
-    """had_injections should be False when no injection callback or no messages."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-
-    async def chat_with_retry(**kwargs):
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hi"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-    ))
-
-    assert result.had_injections is False
-
-
-@pytest.mark.asyncio
-async def test_pending_queue_cleanup_on_dispatch(tmp_path):
-    """_pending_queues should be cleaned up after _dispatch completes."""
-    loop = _make_loop(tmp_path)
-
-    async def chat_with_retry(**kwargs):
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    loop.provider.chat_with_retry = chat_with_retry
-
-    from nanobot.bus.events import InboundMessage
-
-    msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello")
-    # The queue should not exist before dispatch
-    assert msg.session_key not in loop._pending_queues
-
-    await loop._dispatch(msg)
-
-    # The queue should be cleaned up after dispatch
-    assert msg.session_key not in loop._pending_queues
-
-
-@pytest.mark.asyncio
-async def test_followup_routed_to_pending_queue(tmp_path):
-    """Unified-session follow-ups should route into the active pending queue."""
-    from nanobot.agent.loop import UNIFIED_SESSION_KEY
-    from nanobot.bus.events import InboundMessage
-
-    loop = _make_loop(tmp_path)
-    loop._unified_session = True
-    loop._dispatch = AsyncMock()  # type: ignore[method-assign]
-
-    pending = asyncio.Queue(maxsize=20)
-    loop._pending_queues[UNIFIED_SESSION_KEY] = pending
-
-    run_task = asyncio.create_task(loop.run())
-    msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up")
-    await loop.bus.publish_inbound(msg)
-
-    deadline = time.time() + 2
-    while pending.empty() and time.time() < deadline:
-        await asyncio.sleep(0.01)
-
-    loop.stop()
-    await asyncio.wait_for(run_task, timeout=2)
-
-    assert loop._dispatch.await_count == 0
-    assert not pending.empty()
-    queued_msg = pending.get_nowait()
-    assert queued_msg.content == "follow-up"
-    assert queued_msg.session_key == UNIFIED_SESSION_KEY
-
-
-@pytest.mark.asyncio
-async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path):
-    """Pending queue should leave overflow messages queued for later drains."""
-    from nanobot.agent.loop import AgentLoop
-    from nanobot.bus.events import InboundMessage
-    from nanobot.bus.queue import MessageBus
-    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN
-
-    bus = MessageBus()
-    provider = MagicMock()
-    provider.get_default_model.return_value = "test-model"
-    captured_messages: list[list[dict]] = []
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        captured_messages.append([dict(message) for message in messages])
-        return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
-    loop.tools.get_definitions = MagicMock(return_value=[])
-
-    pending_queue = asyncio.Queue()
-    total_followups = _MAX_INJECTIONS_PER_TURN + 2
-    for idx in range(total_followups):
-        await pending_queue.put(InboundMessage(
-            channel="cli",
-            sender_id="u",
-            chat_id="c",
-            content=f"follow-up-{idx}",
-        ))
-
-    final_content, _, _, _, had_injections = await loop._run_agent_loop(
-        [{"role": "user", "content": "hello"}],
-        channel="cli",
-        chat_id="c",
-        pending_queue=pending_queue,
-    )
-
-    assert final_content == "answer-3"
-    assert had_injections is True
-    assert call_count["n"] == 3
-    flattened_user_content = "\n".join(
-        message["content"]
-        for message in captured_messages[-1]
-        if message.get("role") == "user" and isinstance(message.get("content"), str)
-    )
-    for idx in range(total_followups):
-        assert f"follow-up-{idx}" in flattened_user_content
-    assert pending_queue.empty()
-
-
-@pytest.mark.asyncio
-async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
-    """QueueFull should preserve the message by dispatching a queued task."""
-    from nanobot.bus.events import InboundMessage
-
-    loop = _make_loop(tmp_path)
-    loop._dispatch = AsyncMock()  # type: ignore[method-assign]
-
-    pending = asyncio.Queue(maxsize=1)
-    pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued"))
-    loop._pending_queues["cli:c"] = pending
-
-    run_task = asyncio.create_task(loop.run())
-    msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up")
-    await loop.bus.publish_inbound(msg)
-
-    deadline = time.time() + 2
-    while loop._dispatch.await_count == 0 and time.time() < deadline:
-        await asyncio.sleep(0.01)
-
-    loop.stop()
-    await asyncio.wait_for(run_task, timeout=2)
-
-    assert loop._dispatch.await_count == 1
-    dispatched_msg = loop._dispatch.await_args.args[0]
-    assert dispatched_msg.content == "follow-up"
-    assert pending.qsize() == 1
-
-
-@pytest.mark.asyncio
-async def test_dispatch_republishes_leftover_queue_messages(tmp_path):
-    """Messages left in the pending queue after _dispatch are re-published to the bus.
-
-    This tests the finally-block cleanup that prevents message loss when
-    the runner exits early (e.g., max_iterations, tool_error) with messages
-    still in the queue.
-    """
-    from nanobot.bus.events import InboundMessage
-
-    loop = _make_loop(tmp_path)
-    bus = loop.bus
-
-    # Simulate a completed dispatch by manually registering a queue
-    # with leftover messages, then running the cleanup logic directly.
-    pending = asyncio.Queue(maxsize=20)
-    session_key = "cli:c"
-    loop._pending_queues[session_key] = pending
-    pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-1"))
-    pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-2"))
-
-    # Execute the cleanup logic from the finally block
-    queue = loop._pending_queues.pop(session_key, None)
-    assert queue is not None
-    leftover = 0
-    while True:
-        try:
-            item = queue.get_nowait()
-        except asyncio.QueueEmpty:
-            break
-        await bus.publish_inbound(item)
-        leftover += 1
-
-    assert leftover == 2
-
-    # Verify the messages are now on the bus
-    msgs = []
-    while not bus.inbound.empty():
-        msgs.append(await asyncio.wait_for(bus.consume_inbound(), timeout=0.5))
-    contents = [m.content for m in msgs]
-    assert "leftover-1" in contents
-    assert "leftover-2" in contents
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_fatal_tool_error():
-    """Pending injections should be drained even when a fatal tool error occurs."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content="",
-                tool_calls=[ToolCallRequest(id="c1", name="exec", arguments={"cmd": "bad"})],
-                usage={},
-            )
-        # Second call: respond normally to the injected follow-up
-        return LLMResponse(content="reply to follow-up", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(side_effect=RuntimeError("tool exploded"))
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    await injection_queue.put(
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after error")
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=5,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        fail_on_tool_error=True,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.had_injections is True
-    assert result.final_content == "reply to follow-up"
-    # The injection should be in the messages history
-    injected = [
-        m for m in result.messages
-        if m.get("role") == "user" and m.get("content") == "follow-up after error"
-    ]
-    assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_llm_error():
-    """Pending injections should be drained when the LLM returns an error finish_reason."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] == 1:
-            return LLMResponse(
-                content=None,
-                tool_calls=[],
-                finish_reason="error",
-                usage={},
-            )
-        # Second call: respond normally to the injected follow-up
-        return LLMResponse(content="recovered answer", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    await injection_queue.put(
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after LLM error")
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[
-            {"role": "user", "content": "hello"},
-            {"role": "assistant", "content": "previous response"},
-            {"role": "user", "content": "trigger error"},
-        ],
-        tools=tools,
-        model="test-model",
-        max_iterations=5,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.had_injections is True
-    assert result.final_content == "recovered answer"
-    injected = [
-        m for m in result.messages
-        if m.get("role") == "user" and "follow-up after LLM error" in str(m.get("content", ""))
-    ]
-    assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_empty_final_response():
-    """Pending injections should be drained when the runner exits due to empty response."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        if call_count["n"] <= _MAX_EMPTY_RETRIES + 1:
-            return LLMResponse(content="", tool_calls=[], usage={})
-        # After retries exhausted + injection drain, respond normally
-        return LLMResponse(content="answer after empty", tool_calls=[], usage={})
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    await injection_queue.put(
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after empty")
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[
-            {"role": "user", "content": "hello"},
-            {"role": "assistant", "content": "previous response"},
-            {"role": "user", "content": "trigger empty"},
-        ],
-        tools=tools,
-        model="test-model",
-        max_iterations=10,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.had_injections is True
-    assert result.final_content == "answer after empty"
-    injected = [
-        m for m in result.messages
-        if m.get("role") == "user" and "follow-up after empty" in str(m.get("content", ""))
-    ]
-    assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_on_max_iterations():
-    """Pending injections should be drained when the runner hits max_iterations.
-
-    Unlike other error paths, max_iterations cannot continue the loop, so
-    injections are appended to messages but not processed by the LLM.
-    The key point is they are consumed from the queue to prevent re-publish.
-    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        return LLMResponse(
-            content="",
-            tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
-            usage={},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="file content")
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    await injection_queue.put(
-        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after max iters")
-    )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=2,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.stop_reason == "max_iterations"
-    assert result.had_injections is True
-    # The injection was consumed from the queue (preventing re-publish)
-    assert injection_queue.empty()
-    # The injection message is appended to conversation history
-    injected = [
-        m for m in result.messages
-        if m.get("role") == "user" and m.get("content") == "follow-up after max iters"
-    ]
-    assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration():
-    """Late follow-ups drained in max_iterations should still flip had_injections."""
-    from nanobot.agent.hook import AgentHook
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        return LLMResponse(
-            content="",
-            tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
-            usage={},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    tools.execute = AsyncMock(return_value="file content")
-
-    injection_queue = asyncio.Queue()
-    inject_cb = _make_injection_callback(injection_queue)
-
-    class InjectOnLastAfterIterationHook(AgentHook):
-        def __init__(self) -> None:
-            self.after_iteration_calls = 0
-
-        async def after_iteration(self, context) -> None:
-            self.after_iteration_calls += 1
-            if self.after_iteration_calls == 2:
-                await injection_queue.put(
-                    InboundMessage(
-                        channel="cli",
-                        sender_id="u",
-                        chat_id="c",
-                        content="late follow-up after max iters",
-                    )
-                )
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "hello"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=2,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-        hook=InjectOnLastAfterIterationHook(),
-    ))
-
-    assert result.stop_reason == "max_iterations"
-    assert result.had_injections is True
-    assert injection_queue.empty()
-    injected = [
-        m for m in result.messages
-        if m.get("role") == "user" and m.get("content") == "late follow-up after max iters"
-    ]
-    assert len(injected) == 1
-
-
-@pytest.mark.asyncio
-async def test_injection_cycle_cap_on_error_path():
-    """Injection cycles should be capped even when every iteration hits an LLM error."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
-    from nanobot.bus.events import InboundMessage
-
-    provider = MagicMock()
-    call_count = {"n": 0}
-
-    async def chat_with_retry(*, messages, **kwargs):
-        call_count["n"] += 1
-        return LLMResponse(
-            content=None,
-            tool_calls=[],
-            finish_reason="error",
-            usage={},
-        )
-
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    drain_count = {"n": 0}
-
-    async def inject_cb():
-        drain_count["n"] += 1
-        if drain_count["n"] <= _MAX_INJECTION_CYCLES:
-            return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
-        return []
-
-    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[
-            {"role": "user", "content": "hello"},
-            {"role": "assistant", "content": "previous"},
-            {"role": "user", "content": "trigger error"},
-        ],
-        tools=tools,
-        model="test-model",
-        max_iterations=20,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        injection_callback=inject_cb,
-    ))
-
-    assert result.had_injections is True
-    # Should cap: _MAX_INJECTION_CYCLES drained rounds + 1 final round that breaks
-    assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
-
-
-# ---------------------------------------------------------------------------
-# Regression tests for GLM-1214: _snip_history must preserve a user message
-# ---------------------------------------------------------------------------
-
-
-def test_snip_history_preserves_user_message_after_truncation(monkeypatch):
-    """When _snip_history truncates messages and the only user message ends up
-    outside the kept window, the method must recover the nearest user message
-    so the resulting sequence is valid for providers like GLM (which reject
-    system→assistant with error 1214).
-
-    This reproduces the exact scenario from the bug report:
-    - Normal interaction: user asks, assistant calls tool, tool returns,
-      assistant replies.
-    - Injection adds a phantom user message, triggering more tool calls.
-    - _snip_history activates, keeping only recent assistant/tool pairs.
-    - The injected user message is in the truncated prefix and gets lost.
-    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    runner = AgentRunner(provider)
-
-    messages = [
-        {"role": "system", "content": "system"},
-        {"role": "assistant", "content": "previous reply"},
-        {"role": "user", "content": ".nanobot的同目录"},
-        {
-            "role": "assistant",
-            "content": None,
-            "tool_calls": [{"id": "tc_1", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
-        },
-        {"role": "tool", "tool_call_id": "tc_1", "content": "tool output 1"},
-        {
-            "role": "assistant",
-            "content": None,
-            "tool_calls": [{"id": "tc_2", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
-        },
-        {"role": "tool", "tool_call_id": "tc_2", "content": "tool output 2"},
-    ]
-
-    spec = AgentRunSpec(
-        initial_messages=messages,
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        context_window_tokens=2000,
-        context_block_limit=100,
-    )
-
-    # Make estimate_prompt_tokens_chain report above budget so _snip_history activates.
-    monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
-    # Make kept window small: only the last 2 messages fit the budget.
-    token_sizes = {
-        "system": 0,
-        "previous reply": 200,
-        ".nanobot的同目录": 80,
-        "tool output 1": 80,
-        "tool output 2": 80,
-    }
-    monkeypatch.setattr(
-        "nanobot.agent.runner.estimate_message_tokens",
-        lambda msg: token_sizes.get(str(msg.get("content")), 100),
-    )
-
-    trimmed = runner._snip_history(spec, messages)
-
-    # The first non-system message MUST be user (not assistant).
-    non_system = [m for m in trimmed if m.get("role") != "system"]
-    assert non_system, "trimmed should contain at least one non-system message"
-    assert non_system[0]["role"] == "user", (
-        f"First non-system message must be 'user', got '{non_system[0]['role']}'. "
-        f"Roles: {[m['role'] for m in trimmed]}"
-    )
-
-
-def test_snip_history_no_user_at_all_falls_back_gracefully(monkeypatch):
-    """Edge case: if non_system has zero user messages, _snip_history should
-    still return a valid sequence (not crash or produce system→assistant)."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    provider = MagicMock()
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-    runner = AgentRunner(provider)
-
-    messages = [
-        {"role": "system", "content": "system"},
-        {"role": "assistant", "content": "reply"},
-        {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
-        {"role": "assistant", "content": "reply 2"},
-        {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
-    ]
-
-    spec = AgentRunSpec(
-        initial_messages=messages,
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        context_window_tokens=2000,
-        context_block_limit=100,
-    )
-
-    monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
-    monkeypatch.setattr(
-        "nanobot.agent.runner.estimate_message_tokens",
-        lambda msg: 100,
-    )
-
-    trimmed = runner._snip_history(spec, messages)
-
-    # Should not crash.  The result should still be a valid list.
-    assert isinstance(trimmed, list)
-    # Must have at least system.
-    assert any(m.get("role") == "system" for m in trimmed)
-    # The _enforce_role_alternation safety net must be able to fix whatever
-    # _snip_history returns here — verify it produces a valid sequence.
-    from nanobot.providers.base import LLMProvider
-    fixed = LLMProvider._enforce_role_alternation(trimmed)
-    non_system = [m for m in fixed if m["role"] != "system"]
-    if non_system:
-        assert non_system[0]["role"] in ("user", "tool"), (
-            f"Safety net should ensure first non-system is user/tool, got {non_system[0]['role']}"
-        )
-
-
-@pytest.mark.asyncio
-async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
-    """Regression: provider retry heartbeats must route through
-    ``retry_wait_callback``, not ``progress_callback``. Binding them to
-    the progress callback (as an earlier runtime refactor did) caused
-    internal retry diagnostics like "Model request failed, retry in 1s"
-    to leak to end-user channels as normal progress updates.
-    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
-
-    captured: dict = {}
-
-    async def chat_with_retry(**kwargs):
-        captured.update(kwargs)
-        return LLMResponse(content="done", tool_calls=[], usage={})
-
-    provider = MagicMock()
-    provider.chat_with_retry = chat_with_retry
-    tools = MagicMock()
-    tools.get_definitions.return_value = []
-
-    progress_cb = AsyncMock()
-    retry_wait_cb = AsyncMock()
-
-    runner = AgentRunner(provider)
-    await runner.run(AgentRunSpec(
-        initial_messages=[
-            {"role": "system", "content": "system"},
-            {"role": "user", "content": "hi"},
-        ],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        progress_callback=progress_cb,
-        retry_wait_callback=retry_wait_cb,
-    ))
-
-    assert captured["on_retry_wait"] is retry_wait_cb
-    assert captured["on_retry_wait"] is not progress_cb
diff --git a/tests/agent/test_runner_core.py b/tests/agent/test_runner_core.py
new file mode 100644
index 000000000..dd28fa1cc
--- /dev/null
+++ b/tests/agent/test_runner_core.py
@@ -0,0 +1,481 @@
+"""Tests for core AgentRunner behavior: message passing, iteration limits,
+timeouts, empty-response handling, usage accumulation, and config passthrough."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.agent.tools.registry import ToolRegistry
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+@pytest.mark.asyncio
+async def test_runner_preserves_reasoning_fields_and_tool_results():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    captured_second_call: list[dict] = []
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="thinking",
+                tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+                reasoning_content="hidden reasoning",
+                thinking_blocks=[{"type": "thinking", "thinking": "step"}],
+                usage={"prompt_tokens": 5, "completion_tokens": 3},
+            )
+        captured_second_call[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="tool result")
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[
+            {"role": "system", "content": "system"},
+            {"role": "user", "content": "do task"},
+        ],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.final_content == "done"
+    assert result.tools_used == ["list_dir"]
+    assert result.tool_events == [
+        {"name": "list_dir", "status": "ok", "detail": "tool result"}
+    ]
+
+    assistant_messages = [
+        msg for msg in captured_second_call
+        if msg.get("role") == "assistant" and msg.get("tool_calls")
+    ]
+    assert len(assistant_messages) == 1
+    assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
+    assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
+    assert any(
+        msg.get("role") == "tool" and msg.get("content") == "tool result"
+        for msg in captured_second_call
+    )
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_max_iterations_fallback():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="still working",
+        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="tool result")
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.stop_reason == "max_iterations"
+    assert result.final_content == (
+        "I reached the maximum number of tool call iterations (2) "
+        "without completing the task. You can try breaking the task into smaller steps."
+    )
+    assert result.messages[-1]["role"] == "assistant"
+    assert result.messages[-1]["content"] == result.final_content
+
+
+@pytest.mark.asyncio
+async def test_runner_times_out_hung_llm_request():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+
+    async def chat_with_retry(**kwargs):
+        await asyncio.sleep(3600)
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    started = time.monotonic()
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        llm_timeout_s=0.05,
+    ))
+
+    assert (time.monotonic() - started) < 1.0
+    assert result.stop_reason == "error"
+    assert "timed out" in (result.final_content or "").lower()
+
+
+@pytest.mark.asyncio
+async def test_runner_replaces_empty_tool_result_with_marker():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    captured_second_call: list[dict] = []
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="working",
+                tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})],
+                usage={},
+            )
+        captured_second_call[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="")
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.final_content == "done"
+    tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
+    assert tool_message["content"] == "(noop completed with no output)"
+
+
+@pytest.mark.asyncio
+async def test_runner_retries_empty_final_response_with_summary_prompt():
+    """Empty responses get 2 silent retries before finalization kicks in."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    calls: list[dict] = []
+
+    async def chat_with_retry(*, messages, tools=None, **kwargs):
+        calls.append({"messages": messages, "tools": tools})
+        if len(calls) <= 2:
+            return LLMResponse(
+                content=None,
+                tool_calls=[],
+                usage={"prompt_tokens": 5, "completion_tokens": 1},
+            )
+        return LLMResponse(
+            content="final answer",
+            tool_calls=[],
+            usage={"prompt_tokens": 3, "completion_tokens": 7},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.final_content == "final answer"
+    # 2 silent retries (iterations 0,1) + finalization on iteration 1
+    assert len(calls) == 3
+    assert calls[0]["tools"] is not None
+    assert calls[1]["tools"] is not None
+    assert calls[2]["tools"] is None
+    assert result.usage["prompt_tokens"] == 13
+    assert result.usage["completion_tokens"] == 9
+
+
+@pytest.mark.asyncio
+async def test_runner_uses_specific_message_after_empty_finalization_retry():
+    """After silent retries + finalization all return empty, stop_reason is empty_final_response."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
+
+    provider = MagicMock(spec=LLMProvider)
+
+    async def chat_with_retry(*, messages, **kwargs):
+        return LLMResponse(content=None, tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE
+    assert result.stop_reason == "empty_final_response"
+
+
+@pytest.mark.asyncio
+async def test_runner_empty_response_does_not_break_tool_chain():
+    """An empty intermediate response must not kill an ongoing tool chain.
+
+    Sequence: tool_call -> empty -> tool_call -> final text.
+    The runner should recover via silent retry and complete normally.
+    """
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    call_count = 0
+
+    async def chat_with_retry(*, messages, tools=None, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            return LLMResponse(
+                content=None,
+                tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})],
+                usage={"prompt_tokens": 10, "completion_tokens": 5},
+            )
+        if call_count == 2:
+            return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1})
+        if call_count == 3:
+            return LLMResponse(
+                content=None,
+                tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})],
+                usage={"prompt_tokens": 10, "completion_tokens": 5},
+            )
+        return LLMResponse(
+            content="Here are the results.",
+            tool_calls=[],
+            usage={"prompt_tokens": 10, "completion_tokens": 10},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    provider.chat_stream_with_retry = chat_with_retry
+
+    async def fake_tool(name, args, **kw):
+        return "file content"
+
+    tool_registry = MagicMock()
+    tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}]
+    tool_registry.execute = AsyncMock(side_effect=fake_tool)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "read both files"}],
+        tools=tool_registry,
+        model="test-model",
+        max_iterations=10,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.final_content == "Here are the results."
+    assert result.stop_reason == "completed"
+    assert call_count == 4
+    assert "read_file" in result.tools_used
+
+
+@pytest.mark.asyncio
+async def test_runner_accumulates_usage_and_preserves_cached_tokens():
+    """Runner should accumulate prompt/completion tokens across iterations
+    and preserve cached_tokens from provider responses."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="thinking",
+                tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
+                usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80},
+            )
+        return LLMResponse(
+            content="done",
+            tool_calls=[],
+            usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="file content")
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    # Usage should be accumulated across iterations
+    assert result.usage["prompt_tokens"] == 300  # 100 + 200
+    assert result.usage["completion_tokens"] == 30  # 10 + 20
+    assert result.usage["cached_tokens"] == 230  # 80 + 150
+
+
+@pytest.mark.asyncio
+async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
+    """Regression: provider retry heartbeats must route through
+    ``retry_wait_callback``, not ``progress_callback``. Binding them to
+    the progress callback (as an earlier runtime refactor did) caused
+    internal retry diagnostics like "Model request failed, retry in 1s"
+    to leak to end-user channels as normal progress updates.
+    """
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    captured: dict = {}
+
+    async def chat_with_retry(**kwargs):
+        captured.update(kwargs)
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    progress_cb = AsyncMock()
+    retry_wait_cb = AsyncMock()
+
+    runner = AgentRunner(provider)
+    await runner.run(AgentRunSpec(
+        initial_messages=[
+            {"role": "system", "content": "system"},
+            {"role": "user", "content": "hi"},
+        ],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        progress_callback=progress_cb,
+        retry_wait_callback=retry_wait_cb,
+    ))
+
+    assert captured["on_retry_wait"] is retry_wait_cb
+    assert captured["on_retry_wait"] is not progress_cb
+
+
+# ---------------------------------------------------------------------------
+# Config passthrough tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_temperature_to_provider():
+    """temperature from AgentRunSpec should reach provider.chat_with_retry."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    captured: dict = {}
+
+    async def chat_with_retry(**kwargs):
+        captured.update(kwargs)
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hi"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        temperature=0.7,
+    ))
+
+    assert captured["temperature"] == 0.7
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_max_tokens_to_provider():
+    """max_tokens from AgentRunSpec should reach provider.chat_with_retry."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    captured: dict = {}
+
+    async def chat_with_retry(**kwargs):
+        captured.update(kwargs)
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hi"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        max_tokens=8192,
+    ))
+
+    assert captured["max_tokens"] == 8192
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_reasoning_effort_to_provider():
+    """reasoning_effort from AgentRunSpec should reach provider.chat_with_retry."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    captured: dict = {}
+
+    async def chat_with_retry(**kwargs):
+        captured.update(kwargs)
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hi"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        reasoning_effort="high",
+    ))
+
+    assert captured["reasoning_effort"] == "high"
diff --git a/tests/agent/test_runner_errors.py b/tests/agent/test_runner_errors.py
new file mode 100644
index 000000000..8df7ad8f3
--- /dev/null
+++ b/tests/agent/test_runner_errors.py
@@ -0,0 +1,171 @@
+"""Tests for AgentRunner error handling: tool errors, LLM errors,
+session message isolation, and tool result preservation."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_structured_tool_error():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="working",
+        tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
+
+    runner = AgentRunner(provider)
+
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        fail_on_tool_error=True,
+    ))
+
+    assert result.stop_reason == "tool_error"
+    assert result.error == "Error: RuntimeError: boom"
+    assert result.tool_events == [
+        {"name": "list_dir", "status": "error", "detail": "boom"}
+    ]
+
+
+@pytest.mark.asyncio
+async def test_llm_error_not_appended_to_session_messages():
+    """When LLM returns finish_reason='error', the error content must NOT be
+    appended to the messages list (prevents polluting session history)."""
+    from nanobot.agent.runner import (
+        AgentRunSpec,
+        AgentRunner,
+        _PERSISTED_MODEL_ERROR_PLACEHOLDER,
+    )
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="429 rate limit exceeded", finish_reason="error", tool_calls=[], usage={},
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.stop_reason == "error"
+    assert result.final_content == "429 rate limit exceeded"
+    assistant_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+    assert all("429" not in (m.get("content") or "") for m in assistant_msgs), \
+        "Error content should not appear in session messages"
+    assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER
+
+
+@pytest.mark.asyncio
+async def test_runner_tool_error_sets_final_content():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+
+    async def chat_with_retry(*, messages, **kwargs):
+        return LLMResponse(
+            content="working",
+            tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
+            usage={},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(side_effect=RuntimeError("boom"))
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        fail_on_tool_error=True,
+    ))
+
+    assert result.final_content == "Error: RuntimeError: boom"
+    assert result.stop_reason == "tool_error"
+
+
+@pytest.mark.asyncio
+async def test_runner_tool_error_preserves_tool_results_in_messages():
+    """When a tool raises a fatal error, its results must still be appended
+    to messages so the session never contains orphan tool_calls (#2943)."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+
+    async def chat_with_retry(*, messages, **kwargs):
+        return LLMResponse(
+            content=None,
+            tool_calls=[
+                ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}),
+                ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}),
+            ],
+            usage={},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    provider.chat_stream_with_retry = chat_with_retry
+
+    call_idx = 0
+
+    async def fake_execute(name, args, **kw):
+        nonlocal call_idx
+        call_idx += 1
+        if call_idx == 2:
+            raise RuntimeError("boom")
+        return "file content"
+
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(side_effect=fake_execute)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do stuff"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        fail_on_tool_error=True,
+    ))
+
+    assert result.stop_reason == "tool_error"
+    # Both tool results must be in messages even though tc2 had a fatal error.
+    tool_msgs = [m for m in result.messages if m.get("role") == "tool"]
+    assert len(tool_msgs) == 2
+    assert tool_msgs[0]["tool_call_id"] == "tc1"
+    assert tool_msgs[1]["tool_call_id"] == "tc2"
+    # The assistant message with tool_calls must precede the tool results.
+    asst_tc_idx = next(
+        i for i, m in enumerate(result.messages)
+        if m.get("role") == "assistant" and m.get("tool_calls")
+    )
+    tool_indices = [
+        i for i, m in enumerate(result.messages) if m.get("role") == "tool"
+    ]
+    assert all(ti > asst_tc_idx for ti in tool_indices)
diff --git a/tests/agent/test_runner_governance.py b/tests/agent/test_runner_governance.py
new file mode 100644
index 000000000..50e882ca6
--- /dev/null
+++ b/tests/agent/test_runner_governance.py
@@ -0,0 +1,643 @@
+"""Tests for AgentRunner context governance: backfill, orphan cleanup, microcompact, snip_history."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+def _make_loop(tmp_path):
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+
+    with patch("nanobot.agent.loop.ContextBuilder"), \
+         patch("nanobot.agent.loop.SessionManager"), \
+         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+        loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
+    return loop
+
+async def test_runner_uses_raw_messages_when_context_governance_fails():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    captured_messages: list[dict] = []
+
+    async def chat_with_retry(*, messages, **kwargs):
+        captured_messages[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    initial_messages = [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "hello"},
+    ]
+
+    runner = AgentRunner(provider)
+    runner._snip_history = MagicMock(side_effect=RuntimeError("boom"))  # type: ignore[method-assign]
+    result = await runner.run(AgentRunSpec(
+        initial_messages=initial_messages,
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.final_content == "done"
+    assert captured_messages == initial_messages
+def test_snip_history_drops_orphaned_tool_results_from_trimmed_slice(monkeypatch):
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    runner = AgentRunner(provider)
+    messages = [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "old user"},
+        {
+            "role": "assistant",
+            "content": "tool call",
+            "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "ls", "arguments": "{}"}}],
+        },
+        {"role": "tool", "tool_call_id": "call_1", "content": "tool output"},
+        {"role": "assistant", "content": "after tool"},
+    ]
+    spec = AgentRunSpec(
+        initial_messages=messages,
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        context_window_tokens=2000,
+        context_block_limit=100,
+    )
+
+    monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_args, **_kwargs: (500, None))
+    token_sizes = {
+        "old user": 120,
+        "tool call": 120,
+        "tool output": 40,
+        "after tool": 40,
+        "system": 0,
+    }
+    monkeypatch.setattr(
+        "nanobot.agent.runner.estimate_message_tokens",
+        lambda msg: token_sizes.get(str(msg.get("content")), 40),
+    )
+
+    trimmed = runner._snip_history(spec, messages)
+
+    # After the fix, the user message is recovered so the sequence is valid
+    # for providers that require system → user (e.g. GLM error 1214).
+    assert trimmed[0]["role"] == "system"
+    non_system = [m for m in trimmed if m["role"] != "system"]
+    assert non_system[0]["role"] == "user", f"Expected user after system, got {non_system[0]['role']}"
+async def test_backfill_missing_tool_results_inserts_error():
+    """Orphaned tool_use (no matching tool_result) should get a synthetic error."""
+    from nanobot.agent.runner import AgentRunner, _BACKFILL_CONTENT
+
+    messages = [
+        {"role": "user", "content": "hi"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {"id": "call_a", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
+                {"id": "call_b", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_a", "name": "exec", "content": "ok"},
+    ]
+    result = AgentRunner._backfill_missing_tool_results(messages)
+    tool_msgs = [m for m in result if m.get("role") == "tool"]
+    assert len(tool_msgs) == 2
+    backfilled = [m for m in tool_msgs if m.get("tool_call_id") == "call_b"]
+    assert len(backfilled) == 1
+    assert backfilled[0]["content"] == _BACKFILL_CONTENT
+    assert backfilled[0]["name"] == "read_file"
+
+
+def test_drop_orphan_tool_results_removes_unmatched_tool_messages():
+    from nanobot.agent.runner import AgentRunner
+
+    messages = [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "old user"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
+        {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
+        {"role": "assistant", "content": "after tool"},
+    ]
+
+    cleaned = AgentRunner._drop_orphan_tool_results(messages)
+
+    assert cleaned == [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "old user"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {"id": "call_ok", "type": "function", "function": {"name": "read_file", "arguments": "{}"}},
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_ok", "name": "read_file", "content": "ok"},
+        {"role": "assistant", "content": "after tool"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_backfill_noop_when_complete():
+    """Complete message chains should not be modified."""
+    from nanobot.agent.runner import AgentRunner
+
+    messages = [
+        {"role": "user", "content": "hi"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {"id": "call_x", "type": "function", "function": {"name": "exec", "arguments": "{}"}},
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_x", "name": "exec", "content": "done"},
+        {"role": "assistant", "content": "all good"},
+    ]
+    result = AgentRunner._backfill_missing_tool_results(messages)
+    assert result is messages  # same object — no copy
+
+
+@pytest.mark.asyncio
+async def test_runner_drops_orphan_tool_results_before_model_request():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    captured_messages: list[dict] = []
+
+    async def chat_with_retry(*, messages, **kwargs):
+        captured_messages[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[
+            {"role": "system", "content": "system"},
+            {"role": "user", "content": "old user"},
+            {"role": "tool", "tool_call_id": "call_orphan", "name": "exec", "content": "stale"},
+            {"role": "assistant", "content": "after orphan"},
+            {"role": "user", "content": "new prompt"},
+        ],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert all(
+        message.get("tool_call_id") != "call_orphan"
+        for message in captured_messages
+        if message.get("role") == "tool"
+    )
+    assert result.messages[2]["tool_call_id"] == "call_orphan"
+    assert result.final_content == "done"
+
+
+@pytest.mark.asyncio
+async def test_backfill_repairs_model_context_without_shifting_save_turn_boundary(tmp_path):
+    """Historical backfill should not duplicate old tail messages on persist."""
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.agent.runner import _BACKFILL_CONTENT
+    from nanobot.bus.events import InboundMessage
+    from nanobot.bus.queue import MessageBus
+
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    response = LLMResponse(content="new answer", tool_calls=[], usage={})
+    provider.chat_with_retry = AsyncMock(return_value=response)
+    provider.chat_stream_with_retry = AsyncMock(return_value=response)
+
+    loop = AgentLoop(
+        bus=MessageBus(),
+        provider=provider,
+        workspace=tmp_path,
+        model="test-model",
+    )
+    loop.tools.get_definitions = MagicMock(return_value=[])
+    loop.consolidator.maybe_consolidate_by_tokens = AsyncMock(return_value=False)  # type: ignore[method-assign]
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "old user", "timestamp": "2026-01-01T00:00:00"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_missing",
+                    "type": "function",
+                    "function": {"name": "read_file", "arguments": "{}"},
+                }
+            ],
+            "timestamp": "2026-01-01T00:00:01",
+        },
+        {"role": "assistant", "content": "old tail", "timestamp": "2026-01-01T00:00:02"},
+    ]
+    loop.sessions.save(session)
+
+    result = await loop._process_message(
+        InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new prompt")
+    )
+
+    assert result is not None
+    assert result.content == "new answer"
+
+    request_messages = provider.chat_with_retry.await_args.kwargs["messages"]
+    synthetic = [
+        message
+        for message in request_messages
+        if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
+    ]
+    assert len(synthetic) == 1
+    assert synthetic[0]["content"] == _BACKFILL_CONTENT
+
+    session_after = loop.sessions.get_or_create("cli:test")
+    assert [
+        {
+            key: value
+            for key, value in message.items()
+            if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
+        }
+        for message in session_after.messages
+    ] == [
+        {"role": "user", "content": "old user"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_missing",
+                    "type": "function",
+                    "function": {"name": "read_file", "arguments": "{}"},
+                }
+            ],
+        },
+        {"role": "assistant", "content": "old tail"},
+        {"role": "user", "content": "new prompt"},
+        {"role": "assistant", "content": "new answer"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_runner_backfill_only_mutates_model_context_not_returned_messages():
+    """Runner should repair orphaned tool calls for the model without rewriting result.messages."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _BACKFILL_CONTENT
+
+    provider = MagicMock()
+    captured_messages: list[dict] = []
+
+    async def chat_with_retry(*, messages, **kwargs):
+        captured_messages[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    initial_messages = [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "old user"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_missing",
+                    "type": "function",
+                    "function": {"name": "read_file", "arguments": "{}"},
+                }
+            ],
+        },
+        {"role": "assistant", "content": "old tail"},
+        {"role": "user", "content": "new prompt"},
+    ]
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=initial_messages,
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    synthetic = [
+        message
+        for message in captured_messages
+        if message.get("role") == "tool" and message.get("tool_call_id") == "call_missing"
+    ]
+    assert len(synthetic) == 1
+    assert synthetic[0]["content"] == _BACKFILL_CONTENT
+
+    assert [
+        {
+            key: value
+            for key, value in message.items()
+            if key in {"role", "content", "tool_call_id", "name", "tool_calls"}
+        }
+        for message in result.messages
+    ] == [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "old user"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_missing",
+                    "type": "function",
+                    "function": {"name": "read_file", "arguments": "{}"},
+                }
+            ],
+        },
+        {"role": "assistant", "content": "old tail"},
+        {"role": "user", "content": "new prompt"},
+        {"role": "assistant", "content": "done"},
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Microcompact (stale tool result compaction)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_microcompact_replaces_old_tool_results():
+    """Tool results beyond _MICROCOMPACT_KEEP_RECENT should be summarized."""
+    from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
+
+    total = _MICROCOMPACT_KEEP_RECENT + 5
+    long_content = "x" * 600
+    messages: list[dict] = [{"role": "system", "content": "sys"}]
+    for i in range(total):
+        messages.append({
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}],
+        })
+        messages.append({
+            "role": "tool", "tool_call_id": f"c{i}", "name": "read_file",
+            "content": long_content,
+        })
+
+    result = AgentRunner._microcompact(messages)
+    tool_msgs = [m for m in result if m.get("role") == "tool"]
+    stale_count = total - _MICROCOMPACT_KEEP_RECENT
+    compacted = [m for m in tool_msgs if "omitted from context" in str(m.get("content", ""))]
+    preserved = [m for m in tool_msgs if m.get("content") == long_content]
+    assert len(compacted) == stale_count
+    assert len(preserved) == _MICROCOMPACT_KEEP_RECENT
+
+
+@pytest.mark.asyncio
+async def test_microcompact_preserves_short_results():
+    """Short tool results (< _MICROCOMPACT_MIN_CHARS) should not be replaced."""
+    from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
+
+    total = _MICROCOMPACT_KEEP_RECENT + 5
+    messages: list[dict] = []
+    for i in range(total):
+        messages.append({
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
+        })
+        messages.append({
+            "role": "tool", "tool_call_id": f"c{i}", "name": "exec",
+            "content": "short",
+        })
+
+    result = AgentRunner._microcompact(messages)
+    assert result is messages  # no copy needed — all stale results are short
+
+
+@pytest.mark.asyncio
+async def test_microcompact_skips_non_compactable_tools():
+    """Non-compactable tools (e.g. 'message') should never be replaced."""
+    from nanobot.agent.runner import AgentRunner, _MICROCOMPACT_KEEP_RECENT
+
+    total = _MICROCOMPACT_KEEP_RECENT + 5
+    long_content = "y" * 1000
+    messages: list[dict] = []
+    for i in range(total):
+        messages.append({
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": f"c{i}", "type": "function", "function": {"name": "message", "arguments": "{}"}}],
+        })
+        messages.append({
+            "role": "tool", "tool_call_id": f"c{i}", "name": "message",
+            "content": long_content,
+        })
+
+    result = AgentRunner._microcompact(messages)
+    assert result is messages  # no compactable tools found
+
+
+def test_governance_repairs_orphans_after_snip():
+    """After _snip_history clips an assistant+tool_calls, the second
+    _drop_orphan_tool_results pass must clean up the resulting orphans."""
+    from nanobot.agent.runner import AgentRunner
+
+    messages = [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "old msg"},
+        {"role": "assistant", "content": None,
+         "tool_calls": [{"id": "tc_old", "type": "function",
+                         "function": {"name": "search", "arguments": "{}"}}]},
+        {"role": "tool", "tool_call_id": "tc_old", "name": "search",
+         "content": "old result"},
+        {"role": "assistant", "content": "old answer"},
+        {"role": "user", "content": "new msg"},
+    ]
+
+    # Simulate snipping that keeps only the tail: drop the assistant with
+    # tool_calls but keep its tool result (orphan).
+    snipped = [
+        {"role": "system", "content": "system"},
+        {"role": "tool", "tool_call_id": "tc_old", "name": "search",
+         "content": "old result"},
+        {"role": "assistant", "content": "old answer"},
+        {"role": "user", "content": "new msg"},
+    ]
+
+    cleaned = AgentRunner._drop_orphan_tool_results(snipped)
+    # The orphan tool result should be removed.
+    assert not any(
+        m.get("role") == "tool" and m.get("tool_call_id") == "tc_old"
+        for m in cleaned
+    )
+
+
+def test_governance_fallback_still_repairs_orphans():
+    """When full governance fails, the fallback must still run
+    _drop_orphan_tool_results and _backfill_missing_tool_results."""
+    from nanobot.agent.runner import AgentRunner
+
+    # Messages with an orphan tool result (no matching assistant tool_call).
+    messages = [
+        {"role": "user", "content": "hello"},
+        {"role": "tool", "tool_call_id": "orphan_tc", "name": "read",
+         "content": "stale"},
+        {"role": "assistant", "content": "hi"},
+    ]
+
+    repaired = AgentRunner._drop_orphan_tool_results(messages)
+    repaired = AgentRunner._backfill_missing_tool_results(repaired)
+    # Orphan tool result should be gone.
+    assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired)
+def test_snip_history_preserves_user_message_after_truncation(monkeypatch):
+    """When _snip_history truncates messages and the only user message ends up
+    outside the kept window, the method must recover the nearest user message
+    so the resulting sequence is valid for providers like GLM (which reject
+    system→assistant with error 1214).
+
+    This reproduces the exact scenario from the bug report:
+    - Normal interaction: user asks, assistant calls tool, tool returns,
+      assistant replies.
+    - Injection adds a phantom user message, triggering more tool calls.
+    - _snip_history activates, keeping only recent assistant/tool pairs.
+    - The injected user message is in the truncated prefix and gets lost.
+    """
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    runner = AgentRunner(provider)
+
+    messages = [
+        {"role": "system", "content": "system"},
+        {"role": "assistant", "content": "previous reply"},
+        {"role": "user", "content": ".nanobot的同目录"},
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [{"id": "tc_1", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
+        },
+        {"role": "tool", "tool_call_id": "tc_1", "content": "tool output 1"},
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [{"id": "tc_2", "type": "function", "function": {"name": "exec", "arguments": "{}"}}],
+        },
+        {"role": "tool", "tool_call_id": "tc_2", "content": "tool output 2"},
+    ]
+
+    spec = AgentRunSpec(
+        initial_messages=messages,
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        context_window_tokens=2000,
+        context_block_limit=100,
+    )
+
+    # Make estimate_prompt_tokens_chain report above budget so _snip_history activates.
+    monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
+    # Make kept window small: only the last 2 messages fit the budget.
+    token_sizes = {
+        "system": 0,
+        "previous reply": 200,
+        ".nanobot的同目录": 80,
+        "tool output 1": 80,
+        "tool output 2": 80,
+    }
+    monkeypatch.setattr(
+        "nanobot.agent.runner.estimate_message_tokens",
+        lambda msg: token_sizes.get(str(msg.get("content")), 100),
+    )
+
+    trimmed = runner._snip_history(spec, messages)
+
+    # The first non-system message MUST be user (not assistant).
+    non_system = [m for m in trimmed if m.get("role") != "system"]
+    assert non_system, "trimmed should contain at least one non-system message"
+    assert non_system[0]["role"] == "user", (
+        f"First non-system message must be 'user', got '{non_system[0]['role']}'. "
+        f"Roles: {[m['role'] for m in trimmed]}"
+    )
+
+
+def test_snip_history_no_user_at_all_falls_back_gracefully(monkeypatch):
+    """Edge case: if non_system has zero user messages, _snip_history should
+    still return a valid sequence (not crash or produce system→assistant)."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    runner = AgentRunner(provider)
+
+    messages = [
+        {"role": "system", "content": "system"},
+        {"role": "assistant", "content": "reply"},
+        {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+        {"role": "assistant", "content": "reply 2"},
+        {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
+    ]
+
+    spec = AgentRunSpec(
+        initial_messages=messages,
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        context_window_tokens=2000,
+        context_block_limit=100,
+    )
+
+    monkeypatch.setattr("nanobot.agent.runner.estimate_prompt_tokens_chain", lambda *_a, **_kw: (500, None))
+    monkeypatch.setattr(
+        "nanobot.agent.runner.estimate_message_tokens",
+        lambda msg: 100,
+    )
+
+    trimmed = runner._snip_history(spec, messages)
+
+    # Should not crash.  The result should still be a valid list.
+    assert isinstance(trimmed, list)
+    # Must have at least system.
+    assert any(m.get("role") == "system" for m in trimmed)
+    # The _enforce_role_alternation safety net must be able to fix whatever
+    # _snip_history returns here — verify it produces a valid sequence.
+    from nanobot.providers.base import LLMProvider
+    fixed = LLMProvider._enforce_role_alternation(trimmed)
+    non_system = [m for m in fixed if m["role"] != "system"]
+    if non_system:
+        assert non_system[0]["role"] in ("user", "tool"), (
+            f"Safety net should ensure first non-system is user/tool, got {non_system[0]['role']}"
+        )
diff --git a/tests/agent/test_runner_hooks.py b/tests/agent/test_runner_hooks.py
new file mode 100644
index 000000000..7718eee20
--- /dev/null
+++ b/tests/agent/test_runner_hooks.py
@@ -0,0 +1,172 @@
+"""Tests for AgentRunner hook lifecycle: ordering, streaming deltas,
+cached-token propagation, and hook context."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+@pytest.mark.asyncio
+async def test_runner_calls_hooks_in_order():
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    call_count = {"n": 0}
+    events: list[tuple] = []
+
+    async def chat_with_retry(**kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="thinking",
+                tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+            )
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="tool result")
+
+    class RecordingHook(AgentHook):
+        async def before_iteration(self, context: AgentHookContext) -> None:
+            events.append(("before_iteration", context.iteration))
+
+        async def before_execute_tools(self, context: AgentHookContext) -> None:
+            events.append((
+                "before_execute_tools",
+                context.iteration,
+                [tc.name for tc in context.tool_calls],
+            ))
+
+        async def after_iteration(self, context: AgentHookContext) -> None:
+            events.append((
+                "after_iteration",
+                context.iteration,
+                context.final_content,
+                list(context.tool_results),
+                list(context.tool_events),
+                context.stop_reason,
+            ))
+
+        def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
+            events.append(("finalize_content", context.iteration, content))
+            return content.upper() if content else content
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=RecordingHook(),
+    ))
+
+    assert result.final_content == "DONE"
+    assert events == [
+        ("before_iteration", 0),
+        ("before_execute_tools", 0, ["list_dir"]),
+        (
+            "after_iteration",
+            0,
+            None,
+            ["tool result"],
+            [{"name": "list_dir", "status": "ok", "detail": "tool result"}],
+            None,
+        ),
+        ("before_iteration", 1),
+        ("finalize_content", 1, "done"),
+        ("after_iteration", 1, "DONE", [], [], "completed"),
+    ]
+
+
+@pytest.mark.asyncio
+async def test_runner_streaming_hook_receives_deltas_and_end_signal():
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    streamed: list[str] = []
+    endings: list[bool] = []
+
+    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
+        await on_content_delta("he")
+        await on_content_delta("llo")
+        return LLMResponse(content="hello", tool_calls=[], usage={})
+
+    provider.chat_stream_with_retry = chat_stream_with_retry
+    provider.chat_with_retry = AsyncMock()
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class StreamingHook(AgentHook):
+        def wants_streaming(self) -> bool:
+            return True
+
+        async def on_stream(self, context: AgentHookContext, delta: str) -> None:
+            streamed.append(delta)
+
+        async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
+            endings.append(resuming)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=StreamingHook(),
+    ))
+
+    assert result.final_content == "hello"
+    assert streamed == ["he", "llo"]
+    assert endings == [False]
+    provider.chat_with_retry.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_runner_passes_cached_tokens_to_hook_context():
+    """Hook context.usage should contain cached_tokens."""
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock(spec=LLMProvider)
+    captured_usage: list[dict] = []
+
+    class UsageHook(AgentHook):
+        async def after_iteration(self, context: AgentHookContext) -> None:
+            captured_usage.append(dict(context.usage))
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="done",
+            tool_calls=[],
+            usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=UsageHook(),
+    ))
+
+    assert len(captured_usage) == 1
+    assert captured_usage[0]["cached_tokens"] == 150
diff --git a/tests/agent/test_runner_injections.py b/tests/agent/test_runner_injections.py
new file mode 100644
index 000000000..1aa504e32
--- /dev/null
+++ b/tests/agent/test_runner_injections.py
@@ -0,0 +1,1038 @@
+"""Tests for the mid-turn injection system: drain, checkpoints, pending queues, error paths."""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+def _make_injection_callback(queue: asyncio.Queue):
+    """Return an async callback that drains *queue* into a list of dicts."""
+    async def inject_cb():
+        items = []
+        while not queue.empty():
+            items.append(await queue.get())
+        return items
+    return inject_cb
+
+
+def _make_loop(tmp_path):
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+
+    with patch("nanobot.agent.loop.ContextBuilder"), \
+         patch("nanobot.agent.loop.SessionManager"), \
+         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+        loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
+    return loop
+
+@pytest.mark.asyncio
+async def test_drain_injections_returns_empty_when_no_callback():
+    """No injection_callback → empty list."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    runner = AgentRunner(provider)
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    spec = AgentRunSpec(
+        initial_messages=[], tools=tools, model="m",
+        max_iterations=1, max_tool_result_chars=1000,
+        injection_callback=None,
+    )
+    result = await runner._drain_injections(spec)
+    assert result == []
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_extracts_content_from_inbound_messages():
+    """Should extract .content from InboundMessage objects."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    runner = AgentRunner(provider)
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    msgs = [
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello"),
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="world"),
+    ]
+
+    async def cb():
+        return msgs
+
+    spec = AgentRunSpec(
+        initial_messages=[], tools=tools, model="m",
+        max_iterations=1, max_tool_result_chars=1000,
+        injection_callback=cb,
+    )
+    result = await runner._drain_injections(spec)
+    assert result == [
+        {"role": "user", "content": "hello"},
+        {"role": "user", "content": "world"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_passes_limit_to_callback_when_supported():
+    """Limit-aware callbacks can preserve overflow in their own queue."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    runner = AgentRunner(provider)
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    seen_limits: list[int] = []
+
+    msgs = [
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg{i}")
+        for i in range(_MAX_INJECTIONS_PER_TURN + 3)
+    ]
+
+    async def cb(*, limit: int):
+        seen_limits.append(limit)
+        return msgs[:limit]
+
+    spec = AgentRunSpec(
+        initial_messages=[], tools=tools, model="m",
+        max_iterations=1, max_tool_result_chars=1000,
+        injection_callback=cb,
+    )
+    result = await runner._drain_injections(spec)
+    assert seen_limits == [_MAX_INJECTIONS_PER_TURN]
+    assert result == [
+        {"role": "user", "content": "msg0"},
+        {"role": "user", "content": "msg1"},
+        {"role": "user", "content": "msg2"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_skips_empty_content():
+    """Messages with blank content should be filtered out."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    runner = AgentRunner(provider)
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    msgs = [
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content=""),
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="   "),
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="valid"),
+    ]
+
+    async def cb():
+        return msgs
+
+    spec = AgentRunSpec(
+        initial_messages=[], tools=tools, model="m",
+        max_iterations=1, max_tool_result_chars=1000,
+        injection_callback=cb,
+    )
+    result = await runner._drain_injections(spec)
+    assert result == [{"role": "user", "content": "valid"}]
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_handles_callback_exception():
+    """If the callback raises, return empty list (error is logged)."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    runner = AgentRunner(provider)
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    async def cb():
+        raise RuntimeError("boom")
+
+    spec = AgentRunSpec(
+        initial_messages=[], tools=tools, model="m",
+        max_iterations=1, max_tool_result_chars=1000,
+        injection_callback=cb,
+    )
+    result = await runner._drain_injections(spec)
+    assert result == []
+
+
+@pytest.mark.asyncio
+async def test_checkpoint1_injects_after_tool_execution():
+    """Follow-up messages are injected after tool execution, before next LLM call."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+    captured_messages = []
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        captured_messages.append(list(messages))
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="using tool",
+                tool_calls=[ToolCallRequest(id="c1", name="read_file", arguments={"path": "x"})],
+                usage={},
+            )
+        return LLMResponse(content="final answer", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="file content")
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    # Put a follow-up message in the queue before the run starts
+    await injection_queue.put(
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.had_injections is True
+    assert result.final_content == "final answer"
+    # The second call should have the injected user message
+    assert call_count["n"] == 2
+    last_messages = captured_messages[-1]
+    injected = [m for m in last_messages if m.get("role") == "user" and m.get("content") == "follow-up question"]
+    assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
+    """After final response, if injections exist, stream_end should get resuming=True."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+    stream_end_calls = []
+
+    class TrackingHook(AgentHook):
+        def wants_streaming(self) -> bool:
+            return True
+
+        async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
+            stream_end_calls.append(resuming)
+
+        def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
+            return content
+
+    async def chat_stream_with_retry(*, messages, on_content_delta=None, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(content="first answer", tool_calls=[], usage={})
+        return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+    provider.chat_stream_with_retry = chat_stream_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    # Inject a follow-up that arrives during the first response
+    await injection_queue.put(
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="quick follow-up")
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=TrackingHook(),
+        injection_callback=inject_cb,
+    ))
+
+    assert result.had_injections is True
+    assert result.final_content == "second answer"
+    assert call_count["n"] == 2
+    # First stream_end should have resuming=True (because injections found)
+    assert stream_end_calls[0] is True
+    # Second (final) stream_end should have resuming=False
+    assert stream_end_calls[-1] is False
+
+
+@pytest.mark.asyncio
+async def test_checkpoint2_preserves_final_response_in_history_before_followup():
+    """A follow-up injected after a final answer must still see that answer in history."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+    captured_messages = []
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        captured_messages.append([dict(message) for message in messages])
+        if call_count["n"] == 1:
+            return LLMResponse(content="first answer", tool_calls=[], usage={})
+        return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    await injection_queue.put(
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up question")
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.final_content == "second answer"
+    assert call_count["n"] == 2
+    assert captured_messages[-1] == [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "first answer"},
+        {"role": "user", "content": "follow-up question"},
+    ]
+    assert [
+        {"role": message["role"], "content": message["content"]}
+        for message in result.messages
+        if message.get("role") == "assistant"
+    ] == [
+        {"role": "assistant", "content": "first answer"},
+        {"role": "assistant", "content": "second answer"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_loop_injected_followup_preserves_image_media(tmp_path):
+    """Mid-turn follow-ups with images should keep multimodal content."""
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.events import InboundMessage
+    from nanobot.bus.queue import MessageBus
+
+    image_path = tmp_path / "followup.png"
+    image_path.write_bytes(base64.b64decode(
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+yF9kAAAAASUVORK5CYII="
+    ))
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    captured_messages: list[list[dict]] = []
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        captured_messages.append(list(messages))
+        if call_count["n"] == 1:
+            return LLMResponse(content="first answer", tool_calls=[], usage={})
+        return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+    loop.tools.get_definitions = MagicMock(return_value=[])
+
+    pending_queue = asyncio.Queue()
+    await pending_queue.put(InboundMessage(
+        channel="cli",
+        sender_id="u",
+        chat_id="c",
+        content="",
+        media=[str(image_path)],
+    ))
+
+    final_content, _, _, _, had_injections = await loop._run_agent_loop(
+        [{"role": "user", "content": "hello"}],
+        channel="cli",
+        chat_id="c",
+        pending_queue=pending_queue,
+    )
+
+    assert final_content == "second answer"
+    assert had_injections is True
+    assert call_count["n"] == 2
+    injected_user_messages = [
+        message for message in captured_messages[-1]
+        if message.get("role") == "user" and isinstance(message.get("content"), list)
+    ]
+    assert injected_user_messages
+    assert any(
+        block.get("type") == "image_url"
+        for block in injected_user_messages[-1]["content"]
+        if isinstance(block, dict)
+    )
+
+
+@pytest.mark.asyncio
+async def test_runner_merges_multiple_injected_user_messages_without_losing_media():
+    """Multiple injected follow-ups should not create lossy consecutive user messages."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+    captured_messages = []
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        captured_messages.append([dict(message) for message in messages])
+        if call_count["n"] == 1:
+            return LLMResponse(content="first answer", tool_calls=[], usage={})
+        return LLMResponse(content="second answer", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    async def inject_cb():
+        if call_count["n"] == 1:
+            return [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
+                        {"type": "text", "text": "look at this"},
+                    ],
+                },
+                {"role": "user", "content": "and answer briefly"},
+            ]
+        return []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.final_content == "second answer"
+    assert call_count["n"] == 2
+    second_call = captured_messages[-1]
+    user_messages = [message for message in second_call if message.get("role") == "user"]
+    assert len(user_messages) == 2
+    injected = user_messages[-1]
+    assert isinstance(injected["content"], list)
+    assert any(
+        block.get("type") == "image_url"
+        for block in injected["content"]
+        if isinstance(block, dict)
+    )
+    assert any(
+        block.get("type") == "text" and block.get("text") == "and answer briefly"
+        for block in injected["content"]
+        if isinstance(block, dict)
+    )
+
+
+@pytest.mark.asyncio
+async def test_injection_cycles_capped_at_max():
+    """Injection cycles should be capped at _MAX_INJECTION_CYCLES."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    drain_count = {"n": 0}
+
+    async def inject_cb():
+        drain_count["n"] += 1
+        # Only inject for the first _MAX_INJECTION_CYCLES drains
+        if drain_count["n"] <= _MAX_INJECTION_CYCLES:
+            return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
+        return []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "start"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=20,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.had_injections is True
+    # Should be capped: _MAX_INJECTION_CYCLES injection rounds + 1 final round
+    assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
+
+
+@pytest.mark.asyncio
+async def test_no_injections_flag_is_false_by_default():
+    """had_injections should be False when no injection callback or no messages."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hi"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.had_injections is False
+
+
+@pytest.mark.asyncio
+async def test_pending_queue_cleanup_on_dispatch(tmp_path):
+    """_pending_queues should be cleaned up after _dispatch completes."""
+    loop = _make_loop(tmp_path)
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    loop.provider.chat_with_retry = chat_with_retry
+
+    from nanobot.bus.events import InboundMessage
+
+    msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="hello")
+    # The queue should not exist before dispatch
+    assert msg.session_key not in loop._pending_queues
+
+    await loop._dispatch(msg)
+
+    # The queue should be cleaned up after dispatch
+    assert msg.session_key not in loop._pending_queues
+
+
+@pytest.mark.asyncio
+async def test_followup_routed_to_pending_queue(tmp_path):
+    """Unified-session follow-ups should route into the active pending queue."""
+    from nanobot.agent.loop import UNIFIED_SESSION_KEY
+    from nanobot.bus.events import InboundMessage
+
+    loop = _make_loop(tmp_path)
+    loop._unified_session = True
+    loop._dispatch = AsyncMock()  # type: ignore[method-assign]
+
+    pending = asyncio.Queue(maxsize=20)
+    loop._pending_queues[UNIFIED_SESSION_KEY] = pending
+
+    run_task = asyncio.create_task(loop.run())
+    msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up")
+    await loop.bus.publish_inbound(msg)
+
+    deadline = time.time() + 2
+    while pending.empty() and time.time() < deadline:
+        await asyncio.sleep(0.01)
+
+    loop.stop()
+    await asyncio.wait_for(run_task, timeout=2)
+
+    assert loop._dispatch.await_count == 0
+    assert not pending.empty()
+    queued_msg = pending.get_nowait()
+    assert queued_msg.content == "follow-up"
+    assert queued_msg.session_key == UNIFIED_SESSION_KEY
+
+
+@pytest.mark.asyncio
+async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path):
+    """Pending queue should leave overflow messages queued for later drains."""
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.events import InboundMessage
+    from nanobot.bus.queue import MessageBus
+    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    captured_messages: list[list[dict]] = []
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        captured_messages.append([dict(message) for message in messages])
+        return LLMResponse(content=f"answer-{call_count['n']}", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
+    loop.tools.get_definitions = MagicMock(return_value=[])
+
+    pending_queue = asyncio.Queue()
+    total_followups = _MAX_INJECTIONS_PER_TURN + 2
+    for idx in range(total_followups):
+        await pending_queue.put(InboundMessage(
+            channel="cli",
+            sender_id="u",
+            chat_id="c",
+            content=f"follow-up-{idx}",
+        ))
+
+    final_content, _, _, _, had_injections = await loop._run_agent_loop(
+        [{"role": "user", "content": "hello"}],
+        channel="cli",
+        chat_id="c",
+        pending_queue=pending_queue,
+    )
+
+    assert final_content == "answer-3"
+    assert had_injections is True
+    assert call_count["n"] == 3
+    flattened_user_content = "\n".join(
+        message["content"]
+        for message in captured_messages[-1]
+        if message.get("role") == "user" and isinstance(message.get("content"), str)
+    )
+    for idx in range(total_followups):
+        assert f"follow-up-{idx}" in flattened_user_content
+    assert pending_queue.empty()
+
+
+@pytest.mark.asyncio
+async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
+    """QueueFull should preserve the message by dispatching a queued task."""
+    from nanobot.bus.events import InboundMessage
+
+    loop = _make_loop(tmp_path)
+    loop._dispatch = AsyncMock()  # type: ignore[method-assign]
+
+    pending = asyncio.Queue(maxsize=1)
+    pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued"))
+    loop._pending_queues["cli:c"] = pending
+
+    run_task = asyncio.create_task(loop.run())
+    msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up")
+    await loop.bus.publish_inbound(msg)
+
+    deadline = time.time() + 2
+    while loop._dispatch.await_count == 0 and time.time() < deadline:
+        await asyncio.sleep(0.01)
+
+    loop.stop()
+    await asyncio.wait_for(run_task, timeout=2)
+
+    assert loop._dispatch.await_count == 1
+    dispatched_msg = loop._dispatch.await_args.args[0]
+    assert dispatched_msg.content == "follow-up"
+    assert pending.qsize() == 1
+
+
+@pytest.mark.asyncio
+async def test_dispatch_republishes_leftover_queue_messages(tmp_path):
+    """Messages left in the pending queue after _dispatch are re-published to the bus.
+
+    This tests the finally-block cleanup that prevents message loss when
+    the runner exits early (e.g., max_iterations, tool_error) with messages
+    still in the queue.
+    """
+    from nanobot.bus.events import InboundMessage
+
+    loop = _make_loop(tmp_path)
+    bus = loop.bus
+
+    # Simulate a completed dispatch by manually registering a queue
+    # with leftover messages, then running the cleanup logic directly.
+    pending = asyncio.Queue(maxsize=20)
+    session_key = "cli:c"
+    loop._pending_queues[session_key] = pending
+    pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-1"))
+    pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="leftover-2"))
+
+    # Execute the cleanup logic from the finally block
+    queue = loop._pending_queues.pop(session_key, None)
+    assert queue is not None
+    leftover = 0
+    while True:
+        try:
+            item = queue.get_nowait()
+        except asyncio.QueueEmpty:
+            break
+        await bus.publish_inbound(item)
+        leftover += 1
+
+    assert leftover == 2
+
+    # Verify the messages are now on the bus
+    msgs = []
+    while not bus.inbound.empty():
+        msgs.append(await asyncio.wait_for(bus.consume_inbound(), timeout=0.5))
+    contents = [m.content for m in msgs]
+    assert "leftover-1" in contents
+    assert "leftover-2" in contents
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_fatal_tool_error():
+    """Pending injections should be drained even when a fatal tool error occurs."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="",
+                tool_calls=[ToolCallRequest(id="c1", name="exec", arguments={"cmd": "bad"})],
+                usage={},
+            )
+        # Second call: respond normally to the injected follow-up
+        return LLMResponse(content="reply to follow-up", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(side_effect=RuntimeError("tool exploded"))
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    await injection_queue.put(
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after error")
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        fail_on_tool_error=True,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.had_injections is True
+    assert result.final_content == "reply to follow-up"
+    # The injection should be in the messages history
+    injected = [
+        m for m in result.messages
+        if m.get("role") == "user" and m.get("content") == "follow-up after error"
+    ]
+    assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_llm_error():
+    """Pending injections should be drained when the LLM returns an error finish_reason."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content=None,
+                tool_calls=[],
+                finish_reason="error",
+                usage={},
+            )
+        # Second call: respond normally to the injected follow-up
+        return LLMResponse(content="recovered answer", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    await injection_queue.put(
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after LLM error")
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "previous response"},
+            {"role": "user", "content": "trigger error"},
+        ],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.had_injections is True
+    assert result.final_content == "recovered answer"
+    injected = [
+        m for m in result.messages
+        if m.get("role") == "user" and "follow-up after LLM error" in str(m.get("content", ""))
+    ]
+    assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_empty_final_response():
+    """Pending injections should be drained when the runner exits due to empty response."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] <= _MAX_EMPTY_RETRIES + 1:
+            return LLMResponse(content="", tool_calls=[], usage={})
+        # After retries exhausted + injection drain, respond normally
+        return LLMResponse(content="answer after empty", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    await injection_queue.put(
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after empty")
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "previous response"},
+            {"role": "user", "content": "trigger empty"},
+        ],
+        tools=tools,
+        model="test-model",
+        max_iterations=10,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.had_injections is True
+    assert result.final_content == "answer after empty"
+    injected = [
+        m for m in result.messages
+        if m.get("role") == "user" and "follow-up after empty" in str(m.get("content", ""))
+    ]
+    assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_on_max_iterations():
+    """Pending injections should be drained when the runner hits max_iterations.
+
+    Unlike other error paths, max_iterations cannot continue the loop, so
+    injections are appended to messages but not processed by the LLM.
+    The key point is they are consumed from the queue to prevent re-publish.
+    """
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        return LLMResponse(
+            content="",
+            tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
+            usage={},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="file content")
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    await injection_queue.put(
+        InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up after max iters")
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.stop_reason == "max_iterations"
+    assert result.had_injections is True
+    # The injection was consumed from the queue (preventing re-publish)
+    assert injection_queue.empty()
+    # The injection message is appended to conversation history
+    injected = [
+        m for m in result.messages
+        if m.get("role") == "user" and m.get("content") == "follow-up after max iters"
+    ]
+    assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration():
+    """Late follow-ups drained in max_iterations should still flip had_injections."""
+    from nanobot.agent.hook import AgentHook
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        return LLMResponse(
+            content="",
+            tool_calls=[ToolCallRequest(id=f"c{call_count['n']}", name="read_file", arguments={"path": "x"})],
+            usage={},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="file content")
+
+    injection_queue = asyncio.Queue()
+    inject_cb = _make_injection_callback(injection_queue)
+
+    class InjectOnLastAfterIterationHook(AgentHook):
+        def __init__(self) -> None:
+            self.after_iteration_calls = 0
+
+        async def after_iteration(self, context) -> None:
+            self.after_iteration_calls += 1
+            if self.after_iteration_calls == 2:
+                await injection_queue.put(
+                    InboundMessage(
+                        channel="cli",
+                        sender_id="u",
+                        chat_id="c",
+                        content="late follow-up after max iters",
+                    )
+                )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+        hook=InjectOnLastAfterIterationHook(),
+    ))
+
+    assert result.stop_reason == "max_iterations"
+    assert result.had_injections is True
+    assert injection_queue.empty()
+    injected = [
+        m for m in result.messages
+        if m.get("role") == "user" and m.get("content") == "late follow-up after max iters"
+    ]
+    assert len(injected) == 1
+
+
+@pytest.mark.asyncio
+async def test_injection_cycle_cap_on_error_path():
+    """Injection cycles should be capped even when every iteration hits an LLM error."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+    from nanobot.bus.events import InboundMessage
+
+    provider = MagicMock()
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        return LLMResponse(
+            content=None,
+            tool_calls=[],
+            finish_reason="error",
+            usage={},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    drain_count = {"n": 0}
+
+    async def inject_cb():
+        drain_count["n"] += 1
+        if drain_count["n"] <= _MAX_INJECTION_CYCLES:
+            return [InboundMessage(channel="cli", sender_id="u", chat_id="c", content=f"msg-{drain_count['n']}")]
+        return []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "previous"},
+            {"role": "user", "content": "trigger error"},
+        ],
+        tools=tools,
+        model="test-model",
+        max_iterations=20,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        injection_callback=inject_cb,
+    ))
+
+    assert result.had_injections is True
+    # Should cap: _MAX_INJECTION_CYCLES drained rounds + 1 final round that breaks
+    assert call_count["n"] == _MAX_INJECTION_CYCLES + 1
+
diff --git a/tests/agent/test_runner_persistence.py b/tests/agent/test_runner_persistence.py
new file mode 100644
index 000000000..d2bcfa9d4
--- /dev/null
+++ b/tests/agent/test_runner_persistence.py
@@ -0,0 +1,161 @@
+"""Tests for tool result persistence: large results, pruning, temp files, cleanup."""
+
+from __future__ import annotations
+
+import os
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    captured_second_call: list[dict] = []
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="working",
+                tool_calls=[ToolCallRequest(id="call_big", name="list_dir", arguments={"path": "."})],
+                usage={"prompt_tokens": 5, "completion_tokens": 3},
+            )
+        captured_second_call[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="x" * 20_000)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        workspace=tmp_path,
+        session_key="test:runner",
+        max_tool_result_chars=2048,
+    ))
+
+    assert result.final_content == "done"
+    tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
+    assert "[tool output persisted]" in tool_message["content"]
+    assert "tool-results" in tool_message["content"]
+    assert (tmp_path / ".nanobot" / "tool-results" / "test_runner" / "call_big.txt").exists()
+
+
+def test_persist_tool_result_prunes_old_session_buckets(tmp_path):
+    from nanobot.utils.helpers import maybe_persist_tool_result
+
+    root = tmp_path / ".nanobot" / "tool-results"
+    old_bucket = root / "old_session"
+    recent_bucket = root / "recent_session"
+    old_bucket.mkdir(parents=True)
+    recent_bucket.mkdir(parents=True)
+    (old_bucket / "old.txt").write_text("old", encoding="utf-8")
+    (recent_bucket / "recent.txt").write_text("recent", encoding="utf-8")
+
+    stale = time.time() - (8 * 24 * 60 * 60)
+    os.utime(old_bucket, (stale, stale))
+    os.utime(old_bucket / "old.txt", (stale, stale))
+
+    persisted = maybe_persist_tool_result(
+        tmp_path,
+        "current:session",
+        "call_big",
+        "x" * 5000,
+        max_chars=64,
+    )
+
+    assert "[tool output persisted]" in persisted
+    assert not old_bucket.exists()
+    assert recent_bucket.exists()
+    assert (root / "current_session" / "call_big.txt").exists()
+
+
+def test_persist_tool_result_leaves_no_temp_files(tmp_path):
+    from nanobot.utils.helpers import maybe_persist_tool_result
+
+    root = tmp_path / ".nanobot" / "tool-results"
+    maybe_persist_tool_result(
+        tmp_path,
+        "current:session",
+        "call_big",
+        "x" * 5000,
+        max_chars=64,
+    )
+
+    assert (root / "current_session" / "call_big.txt").exists()
+    assert list((root / "current_session").glob("*.tmp")) == []
+
+
+def test_persist_tool_result_logs_cleanup_failures(monkeypatch, tmp_path):
+    from nanobot.utils.helpers import maybe_persist_tool_result
+
+    warnings: list[str] = []
+
+    monkeypatch.setattr(
+        "nanobot.utils.helpers._cleanup_tool_result_buckets",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(OSError("busy")),
+    )
+    monkeypatch.setattr(
+        "nanobot.utils.helpers.logger.exception",
+        lambda message, *args: warnings.append(message.format(*args)),
+    )
+
+    persisted = maybe_persist_tool_result(
+        tmp_path,
+        "current:session",
+        "call_big",
+        "x" * 5000,
+        max_chars=64,
+    )
+
+    assert "[tool output persisted]" in persisted
+    assert warnings and "Failed to clean stale tool result buckets" in warnings[0]
+async def test_runner_keeps_going_when_tool_result_persistence_fails():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    captured_second_call: list[dict] = []
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return LLMResponse(
+                content="working",
+                tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
+                usage={"prompt_tokens": 5, "completion_tokens": 3},
+            )
+        captured_second_call[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="tool result")
+
+    runner = AgentRunner(provider)
+    with patch("nanobot.agent.runner.maybe_persist_tool_result", side_effect=RuntimeError("disk full")):
+        result = await runner.run(AgentRunSpec(
+            initial_messages=[{"role": "user", "content": "do task"}],
+            tools=tools,
+            model="test-model",
+            max_iterations=2,
+            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        ))
+
+    assert result.final_content == "done"
+    tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
+    assert tool_message["content"] == "tool result"
diff --git a/tests/agent/test_runner_safety.py b/tests/agent/test_runner_safety.py
new file mode 100644
index 000000000..14565e203
--- /dev/null
+++ b/tests/agent/test_runner_safety.py
@@ -0,0 +1,244 @@
+"""Tests for AgentRunner security: workspace violations, SSRF, shell guard, throttling."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+async def test_runner_does_not_abort_on_workspace_violation_anymore():
+    """v2 behavior: workspace-bound rejections are *soft* tool errors.
+
+    Previously (PR #3493) any workspace boundary error became a fatal
+    RuntimeError that aborted the turn. That silently killed legitimate
+    workspace commands once the heuristic guard misfired (#3599 #3605), so
+    we now hand the error back to the LLM as a recoverable tool result and
+    rely on ``repeated_workspace_violation_error`` to throttle bypass loops.
+    """
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    provider.chat_with_retry = AsyncMock(side_effect=[
+        LLMResponse(
+            content="trying outside",
+            tool_calls=[ToolCallRequest(
+                id="call_1", name="read_file", arguments={"path": "/tmp/outside.md"},
+            )],
+        ),
+        LLMResponse(content="ok, telling the user instead", tool_calls=[]),
+    ])
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(
+        side_effect=PermissionError(
+            "Path /tmp/outside.md is outside allowed directory /workspace"
+        )
+    )
+
+    runner = AgentRunner(provider)
+
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert provider.chat_with_retry.await_count == 2, (
+        "workspace violation must NOT short-circuit the loop"
+    )
+    assert result.stop_reason != "tool_error"
+    assert result.error is None
+    assert result.final_content == "ok, telling the user instead"
+    assert result.tool_events and result.tool_events[0]["status"] == "error"
+    # Detail still carries the workspace_violation breadcrumb for telemetry,
+    # but the runner did not raise.
+    assert "workspace_violation" in result.tool_events[0]["detail"]
+
+
+def test_is_ssrf_violation_recognizes_private_url_blocks():
+    """SSRF rejections are classified separately from workspace boundaries."""
+    from nanobot.agent.runner import AgentRunner
+
+    ssrf_msg = "Error: Command blocked by safety guard (internal/private URL detected)"
+    assert AgentRunner._is_ssrf_violation(ssrf_msg) is True
+    assert AgentRunner._is_ssrf_violation(
+        "URL validation failed: Blocked: host resolves to private/internal address 192.168.1.2"
+    ) is True
+
+    # Workspace-bound markers are NOT classified as SSRF.
+    assert AgentRunner._is_ssrf_violation(
+        "Error: Command blocked by safety guard (path outside working dir)"
+    ) is False
+    assert AgentRunner._is_ssrf_violation(
+        "Path /tmp/x is outside allowed directory /ws"
+    ) is False
+    # Deny / allowlist filter messages stay non-fatal too.
+    assert AgentRunner._is_ssrf_violation(
+        "Error: Command blocked by deny pattern filter"
+    ) is False
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_non_retryable_hint_on_ssrf_violation():
+    """SSRF stays blocked, but the runtime gives the LLM a final chance to recover."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    provider.chat_with_retry = AsyncMock(side_effect=[
+        LLMResponse(
+            content="curl-ing metadata",
+            tool_calls=[ToolCallRequest(
+                id="call_ssrf",
+                name="exec",
+                arguments={"command": "curl http://169.254.169.254"},
+            )],
+        ),
+        LLMResponse(
+            content="I cannot access that private URL. Please share local files.",
+            tool_calls=[],
+        ),
+    ])
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value=(
+        "Error: Command blocked by safety guard (internal/private URL detected)"
+    ))
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert provider.chat_with_retry.await_count == 2
+    assert result.stop_reason == "completed"
+    assert result.error is None
+    assert result.final_content == "I cannot access that private URL. Please share local files."
+    assert result.tool_events and result.tool_events[0]["detail"].startswith("ssrf_violation:")
+    tool_messages = [m for m in result.messages if m.get("role") == "tool"]
+    assert tool_messages
+    assert "non-bypassable security boundary" in tool_messages[0]["content"]
+    assert "Do not retry" in tool_messages[0]["content"]
+    assert "tools.ssrfWhitelist" in tool_messages[0]["content"]
+
+
+@pytest.mark.asyncio
+async def test_runner_lets_llm_recover_from_shell_guard_path_outside():
+    """Reporter scenario for #3599 / #3605 -- guard hit, agent recovers.
+
+    The shell `_guard_command` heuristic fires on `2>/dev/null`-style
+    redirects and other shell idioms. Before v2 that abort'd the whole
+    turn (silent hang on Telegram per #3605); now the LLM gets the soft
+    error back and can finalize on the next iteration.
+    """
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    captured_second_call: list[dict] = []
+
+    async def chat_with_retry(*, messages, **kwargs):
+        if provider.chat_with_retry.await_count == 1:
+            return LLMResponse(
+                content="trying noisy cleanup",
+                tool_calls=[ToolCallRequest(
+                    id="call_blocked",
+                    name="exec",
+                    arguments={"command": "rm scratch.txt 2>/dev/null"},
+                )],
+            )
+        captured_second_call[:] = list(messages)
+        return LLMResponse(content="recovered final answer", tool_calls=[])
+
+    provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry)
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(
+        return_value="Error: Command blocked by safety guard (path outside working dir)"
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert provider.chat_with_retry.await_count == 2, (
+        "guard hit must NOT short-circuit the loop -- LLM should get a second turn"
+    )
+    assert result.stop_reason != "tool_error"
+    assert result.error is None
+    assert result.final_content == "recovered final answer"
+    assert result.tool_events and result.tool_events[0]["status"] == "error"
+    # v2: detail keeps the breadcrumb but the runner did not raise.
+    assert "workspace_violation" in result.tool_events[0]["detail"]
+
+
+@pytest.mark.asyncio
+async def test_runner_throttles_repeated_workspace_bypass_attempts():
+    """#3493 motivation: stop the LLM bypass loop without aborting the turn.
+
+    LLM keeps switching tools (read_file -> exec cat -> python -c open(...))
+    against the same outside path. After the soft retry budget is exhausted
+    the runner replaces the tool result with a hard "stop trying" message
+    so the model finally gives up and surfaces the boundary to the user.
+    """
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    bypass_attempts = [
+        ToolCallRequest(
+            id=f"a{i}", name="exec",
+            arguments={"command": f"cat /Users/x/Downloads/01.md  # try {i}"},
+        )
+        for i in range(4)
+    ]
+    responses: list[LLMResponse] = [
+        LLMResponse(content=f"try {i}", tool_calls=[bypass_attempts[i]])
+        for i in range(4)
+    ]
+    responses.append(LLMResponse(content="ok telling user", tool_calls=[]))
+
+    provider = MagicMock()
+    provider.chat_with_retry = AsyncMock(side_effect=responses)
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(
+        return_value="Error: Command blocked by safety guard (path outside working dir)"
+    )
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[],
+        tools=tools,
+        model="test-model",
+        max_iterations=10,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    # All 4 bypass attempts surface to the LLM (no fatal abort), and the
+    # runner finally completes once the LLM stops asking.
+    assert result.stop_reason != "tool_error"
+    assert result.error is None
+    assert result.final_content == "ok telling user"
+    # The third+ attempts must have been escalated -- look at the events.
+    escalated = [
+        ev for ev in result.tool_events
+        if ev["status"] == "error"
+        and ev["detail"].startswith("workspace_violation_escalated:")
+    ]
+    assert escalated, (
+        "expected at least one escalated workspace_violation event, got: "
+        f"{result.tool_events}"
+    )
diff --git a/tests/agent/test_runner_tool_execution.py b/tests/agent/test_runner_tool_execution.py
new file mode 100644
index 000000000..a0380e871
--- /dev/null
+++ b/tests/agent/test_runner_tool_execution.py
@@ -0,0 +1,181 @@
+"""Tests for AgentRunner tool execution: batching, concurrency, exclusive tools."""
+
+from __future__ import annotations
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.agent.tools.base import Tool
+from nanobot.agent.tools.registry import ToolRegistry
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+class _DelayTool(Tool):
+    def __init__(
+        self,
+        name: str,
+        *,
+        delay: float,
+        read_only: bool,
+        shared_events: list[str],
+        exclusive: bool = False,
+    ):
+        self._name = name
+        self._delay = delay
+        self._read_only = read_only
+        self._shared_events = shared_events
+        self._exclusive = exclusive
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def description(self) -> str:
+        return self._name
+
+    @property
+    def parameters(self) -> dict:
+        return {"type": "object", "properties": {}, "required": []}
+
+    @property
+    def read_only(self) -> bool:
+        return self._read_only
+
+    @property
+    def exclusive(self) -> bool:
+        return self._exclusive
+
+    async def execute(self, **kwargs):
+        self._shared_events.append(f"start:{self._name}")
+        await asyncio.sleep(self._delay)
+        self._shared_events.append(f"end:{self._name}")
+        return self._name
+
+
+@pytest.mark.asyncio
+async def test_runner_batches_read_only_tools_before_exclusive_work():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    tools = ToolRegistry()
+    shared_events: list[str] = []
+    read_a = _DelayTool("read_a", delay=0.05, read_only=True, shared_events=shared_events)
+    read_b = _DelayTool("read_b", delay=0.05, read_only=True, shared_events=shared_events)
+    write_a = _DelayTool("write_a", delay=0.01, read_only=False, shared_events=shared_events)
+    tools.register(read_a)
+    tools.register(read_b)
+    tools.register(write_a)
+
+    runner = AgentRunner(MagicMock())
+    await runner._execute_tools(
+        AgentRunSpec(
+            initial_messages=[],
+            tools=tools,
+            model="test-model",
+            max_iterations=1,
+            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+            concurrent_tools=True,
+        ),
+        [
+            ToolCallRequest(id="ro1", name="read_a", arguments={}),
+            ToolCallRequest(id="ro2", name="read_b", arguments={}),
+            ToolCallRequest(id="rw1", name="write_a", arguments={}),
+        ],
+        {},
+        {},
+    )
+
+    assert shared_events[0:2] == ["start:read_a", "start:read_b"]
+    assert "end:read_a" in shared_events and "end:read_b" in shared_events
+    assert shared_events.index("end:read_a") < shared_events.index("start:write_a")
+    assert shared_events.index("end:read_b") < shared_events.index("start:write_a")
+    assert shared_events[-2:] == ["start:write_a", "end:write_a"]
+
+
+@pytest.mark.asyncio
+async def test_runner_does_not_batch_exclusive_read_only_tools():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    tools = ToolRegistry()
+    shared_events: list[str] = []
+    read_a = _DelayTool("read_a", delay=0.03, read_only=True, shared_events=shared_events)
+    read_b = _DelayTool("read_b", delay=0.03, read_only=True, shared_events=shared_events)
+    ddg_like = _DelayTool(
+        "ddg_like",
+        delay=0.01,
+        read_only=True,
+        shared_events=shared_events,
+        exclusive=True,
+    )
+    tools.register(read_a)
+    tools.register(ddg_like)
+    tools.register(read_b)
+
+    runner = AgentRunner(MagicMock())
+    await runner._execute_tools(
+        AgentRunSpec(
+            initial_messages=[],
+            tools=tools,
+            model="test-model",
+            max_iterations=1,
+            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+            concurrent_tools=True,
+        ),
+        [
+            ToolCallRequest(id="ro1", name="read_a", arguments={}),
+            ToolCallRequest(id="ddg1", name="ddg_like", arguments={}),
+            ToolCallRequest(id="ro2", name="read_b", arguments={}),
+        ],
+        {},
+        {},
+    )
+
+    assert shared_events[0] == "start:read_a"
+    assert shared_events.index("end:read_a") < shared_events.index("start:ddg_like")
+    assert shared_events.index("end:ddg_like") < shared_events.index("start:read_b")
+
+
+@pytest.mark.asyncio
+async def test_runner_blocks_repeated_external_fetches():
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    captured_final_call: list[dict] = []
+    call_count = {"n": 0}
+
+    async def chat_with_retry(*, messages, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] <= 3:
+            return LLMResponse(
+                content="working",
+                tool_calls=[ToolCallRequest(id=f"call_{call_count['n']}", name="web_fetch", arguments={"url": "https://example.com"})],
+                usage={},
+            )
+        captured_final_call[:] = messages
+        return LLMResponse(content="done", tool_calls=[], usage={})
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="page content")
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "research task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=4,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.final_content == "done"
+    assert tools.execute.await_count == 2
+    blocked_tool_message = [
+        msg for msg in captured_final_call
+        if msg.get("role") == "tool" and msg.get("tool_call_id") == "call_3"
+    ][0]
+    assert "repeated external lookup blocked" in blocked_tool_message["content"]
diff --git a/tests/agent/test_stop_preserves_context.py b/tests/agent/test_stop_preserves_context.py
index 2a082850f..c7e766be1 100644
--- a/tests/agent/test_stop_preserves_context.py
+++ b/tests/agent/test_stop_preserves_context.py
@@ -10,6 +10,7 @@ See: https://github.com/HKUDS/nanobot/issues/2966
 from __future__ import annotations
 
 import asyncio
+from pathlib import Path
 from types import SimpleNamespace
 from typing import Any
 from unittest.mock import MagicMock, patch, AsyncMock
@@ -17,42 +18,47 @@ from unittest.mock import MagicMock, patch, AsyncMock
 import pytest
 
 from nanobot.agent.loop import AgentLoop
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMProvider
 
 
-@pytest.fixture
-def mock_loop():
-    """Create a minimal AgentLoop with mocked dependencies."""
-    with patch.object(AgentLoop, "__init__", lambda self: None):
-        loop = AgentLoop()
-        loop.sessions = MagicMock()
-        loop._pending_queues = {}
-        loop._session_locks = {}
-        loop._active_tasks = {}
-        loop._concurrency_gate = None
-        loop._RUNTIME_CHECKPOINT_KEY = "runtime_checkpoint"
-        loop._PENDING_USER_TURN_KEY = "pending_user_turn"
-        loop.bus = MagicMock()
-        loop.bus.publish_outbound = AsyncMock()
-        loop.bus.publish_inbound = AsyncMock()
-        loop.commands = MagicMock()
-        loop.commands.dispatch_priority = AsyncMock(return_value=None)
-        return loop
+def _make_provider():
+    """Create an LLM provider mock with required attributes."""
+    from types import SimpleNamespace
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    provider.generation = SimpleNamespace(max_tokens=4096, temperature=0.1, reasoning_effort=None)
+    provider.estimate_prompt_tokens.return_value = (10_000, "test")
+    return provider
+
+
+def _make_loop(tmp_path: Path) -> AgentLoop:
+    """Create a real AgentLoop with mocked provider — avoids patching __init__."""
+    bus = MessageBus()
+    provider = _make_provider()
+    with patch("nanobot.agent.loop.ContextBuilder"), \
+         patch("nanobot.agent.loop.SessionManager"), \
+         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
+        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+        return AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
 
 
 class TestStopPreservesContext:
     """Verify that /stop restores partial context via checkpoint."""
 
-    def test_restore_checkpoint_method_exists(self, mock_loop):
+    def test_restore_checkpoint_method_exists(self, tmp_path):
         """AgentLoop should have _restore_runtime_checkpoint."""
-        assert hasattr(mock_loop, "_restore_runtime_checkpoint")
+        loop = _make_loop(tmp_path)
+        assert hasattr(loop, "_restore_runtime_checkpoint")
 
-    def test_checkpoint_key_constant(self, mock_loop):
+    def test_checkpoint_key_constant(self, tmp_path):
         """The runtime checkpoint key should be defined."""
-        assert mock_loop._RUNTIME_CHECKPOINT_KEY == "runtime_checkpoint"
+        loop = _make_loop(tmp_path)
+        assert loop._RUNTIME_CHECKPOINT_KEY == "runtime_checkpoint"
 
-    def test_cancel_dispatch_restores_checkpoint(self, mock_loop):
+    def test_cancel_dispatch_restores_checkpoint(self, tmp_path):
         """When a task is cancelled, the checkpoint should be restored."""
-        # Create a mock session with a checkpoint
+        loop = _make_loop(tmp_path)
         session = MagicMock()
         session.metadata = {
             "runtime_checkpoint": {
@@ -74,14 +80,11 @@ class TestStopPreservesContext:
         session.messages = [
             {"role": "user", "content": "Search for something"},
         ]
-        mock_loop.sessions.get_or_create.return_value = session
+        loop.sessions.get_or_create.return_value = session
 
-        # The restore method should add checkpoint messages to session history
-        restored = mock_loop._restore_runtime_checkpoint(session)
+        restored = loop._restore_runtime_checkpoint(session)
         assert restored is True
-        # After restore, session should have more messages
         assert len(session.messages) > 1
-        # The checkpoint should be cleared
         assert "runtime_checkpoint" not in session.metadata
 
 
diff --git a/tests/agent/test_subagent_lifecycle.py b/tests/agent/test_subagent_lifecycle.py
new file mode 100644
index 000000000..bf3564f28
--- /dev/null
+++ b/tests/agent/test_subagent_lifecycle.py
@@ -0,0 +1,558 @@
+"""Tests for SubagentManager lifecycle — spawn, run, announce, cancel."""
+
+import asyncio
+import time
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from nanobot.agent.hook import AgentHookContext
+from nanobot.agent.runner import AgentRunResult
+from nanobot.agent.subagent import (
+    SubagentManager,
+    SubagentStatus,
+    _SubagentHook,
+)
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMProvider
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _manager(tmp_path: Path, **kw) -> SubagentManager:
+    provider = MagicMock(spec=LLMProvider)
+    provider.get_default_model.return_value = "test-model"
+    defaults = dict(
+        provider=provider,
+        workspace=tmp_path,
+        bus=MessageBus(),
+        model="test-model",
+        max_tool_result_chars=16_000,
+    )
+    defaults.update(kw)
+    return SubagentManager(**defaults)
+
+
+def _make_hook_context(**overrides) -> AgentHookContext:
+    defaults = dict(
+        iteration=1,
+        tool_calls=[],
+        tool_events=[],
+        messages=[],
+        usage={},
+        error=None,
+        stop_reason="completed",
+        final_content="ok",
+    )
+    defaults.update(overrides)
+    return AgentHookContext(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# SubagentStatus defaults
+# ---------------------------------------------------------------------------
+
+
+class TestSubagentStatus:
+    def test_defaults(self):
+        s = SubagentStatus(
+            task_id="abc", label="test", task_description="do stuff",
+            started_at=time.monotonic(),
+        )
+        assert s.phase == "initializing"
+        assert s.iteration == 0
+        assert s.tool_events == []
+        assert s.usage == {}
+        assert s.stop_reason is None
+        assert s.error is None
+
+
+# ---------------------------------------------------------------------------
+# set_provider
+# ---------------------------------------------------------------------------
+
+
+class TestSetProvider:
+    def test_updates_provider_model_runner(self, tmp_path):
+        sm = _manager(tmp_path)
+        new_provider = MagicMock(spec=LLMProvider)
+        sm.set_provider(new_provider, "new-model")
+        assert sm.provider is new_provider
+        assert sm.model == "new-model"
+        assert sm.runner.provider is new_provider
+
+
+# ---------------------------------------------------------------------------
+# spawn
+# ---------------------------------------------------------------------------
+
+
+class TestSpawn:
+    @pytest.mark.asyncio
+    async def test_returns_string_with_task_id(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(return_value=AgentRunResult(
+            final_content="done", messages=[], stop_reason="completed",
+        ))
+        result = await sm.spawn("do something")
+        assert "started" in result
+        assert "id:" in result
+
+    @pytest.mark.asyncio
+    async def test_creates_task_in_running_tasks(self, tmp_path):
+        sm = _manager(tmp_path)
+        block = asyncio.Event()
+        async def _slow_run(spec):
+            await block.wait()
+            return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+        sm.runner.run = _slow_run
+
+        await sm.spawn("task", session_key="s1")
+        assert len(sm._running_tasks) == 1
+
+        block.set()
+        await asyncio.sleep(0.1)
+        assert len(sm._running_tasks) == 0
+
+    @pytest.mark.asyncio
+    async def test_creates_status(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(return_value=AgentRunResult(
+            final_content="done", messages=[], stop_reason="completed",
+        ))
+        await sm.spawn("my task")
+        await asyncio.sleep(0.1)
+        # Status cleaned up after task completes
+        assert len(sm._task_statuses) == 0
+
+    @pytest.mark.asyncio
+    async def test_registers_in_session_tasks(self, tmp_path):
+        sm = _manager(tmp_path)
+        block = asyncio.Event()
+        async def _slow_run(spec):
+            await block.wait()
+            return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+        sm.runner.run = _slow_run
+
+        await sm.spawn("task", session_key="s1")
+        assert "s1" in sm._session_tasks
+        assert len(sm._session_tasks["s1"]) == 1
+
+        block.set()
+        await asyncio.sleep(0.1)
+        assert "s1" not in sm._session_tasks
+
+    @pytest.mark.asyncio
+    async def test_no_session_key_no_registration(self, tmp_path):
+        sm = _manager(tmp_path)
+        block = asyncio.Event()
+        async def _slow_run(spec):
+            await block.wait()
+            return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+        sm.runner.run = _slow_run
+
+        await sm.spawn("task")
+        assert len(sm._session_tasks) == 0
+
+        block.set()
+        await asyncio.sleep(0.1)
+
+    @pytest.mark.asyncio
+    async def test_label_defaults_to_truncated_task(self, tmp_path):
+        sm = _manager(tmp_path)
+        block = asyncio.Event()
+        async def _slow_run(spec):
+            await block.wait()
+            return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+        sm.runner.run = _slow_run
+
+        long_task = "A" * 50
+        await sm.spawn(long_task, session_key="s1")
+        status = next(iter(sm._task_statuses.values()))
+        assert status.label == long_task[:30] + "..."
+
+        block.set()
+        await asyncio.sleep(0.1)
+
+    @pytest.mark.asyncio
+    async def test_custom_label(self, tmp_path):
+        sm = _manager(tmp_path)
+        block = asyncio.Event()
+        async def _slow_run(spec):
+            await block.wait()
+            return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+        sm.runner.run = _slow_run
+
+        await sm.spawn("task", label="Custom Label", session_key="s1")
+        status = next(iter(sm._task_statuses.values()))
+        assert status.label == "Custom Label"
+
+        block.set()
+        await asyncio.sleep(0.1)
+
+    @pytest.mark.asyncio
+    async def test_cleanup_callback_removes_all_entries(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(return_value=AgentRunResult(
+            final_content="done", messages=[], stop_reason="completed",
+        ))
+        await sm.spawn("task", session_key="s1")
+        await asyncio.sleep(0.1)
+        assert len(sm._running_tasks) == 0
+        assert len(sm._task_statuses) == 0
+        assert len(sm._session_tasks) == 0
+
+
+# ---------------------------------------------------------------------------
+# _run_subagent
+# ---------------------------------------------------------------------------
+
+
+class TestRunSubagent:
+    @pytest.mark.asyncio
+    async def test_successful_run(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(return_value=AgentRunResult(
+            final_content="Task done!", messages=[], stop_reason="completed",
+        ))
+        with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce:
+            await sm._run_subagent(
+                "t1", "do task", "label",
+                {"channel": "cli", "chat_id": "direct"},
+                SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic()),
+            )
+            mock_announce.assert_called_once()
+            assert mock_announce.call_args.args[-2] == "ok"
+
+    @pytest.mark.asyncio
+    async def test_tool_error_run(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(return_value=AgentRunResult(
+            final_content=None, messages=[], stop_reason="tool_error",
+            tool_events=[{"name": "read_file", "status": "error", "detail": "not found"}],
+        ))
+        status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic())
+        with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce:
+            await sm._run_subagent(
+                "t1", "do task", "label",
+                {"channel": "cli", "chat_id": "direct"}, status,
+            )
+            assert mock_announce.call_args.args[-2] == "error"
+
+    @pytest.mark.asyncio
+    async def test_exception_run(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(side_effect=RuntimeError("LLM down"))
+        status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic())
+        with patch.object(sm, "_announce_result", new_callable=AsyncMock) as mock_announce:
+            await sm._run_subagent(
+                "t1", "do task", "label",
+                {"channel": "cli", "chat_id": "direct"}, status,
+            )
+            assert status.phase == "error"
+            assert "LLM down" in status.error
+            assert mock_announce.call_args.args[-2] == "error"
+
+    @pytest.mark.asyncio
+    async def test_status_updated_on_success(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(return_value=AgentRunResult(
+            final_content="ok", messages=[], stop_reason="completed",
+        ))
+        status = SubagentStatus(task_id="t1", label="label", task_description="do task", started_at=time.monotonic())
+        with patch.object(sm, "_announce_result", new_callable=AsyncMock):
+            await sm._run_subagent(
+                "t1", "do task", "label",
+                {"channel": "cli", "chat_id": "direct"}, status,
+            )
+            assert status.phase == "done"
+            assert status.stop_reason == "completed"
+
+
+# ---------------------------------------------------------------------------
+# _announce_result
+# ---------------------------------------------------------------------------
+
+
+class TestAnnounceResult:
+    @pytest.mark.asyncio
+    async def test_publishes_inbound_message(self, tmp_path):
+        sm = _manager(tmp_path)
+        published = []
+        sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+        await sm._announce_result(
+            "t1", "label", "task", "result text",
+            {"channel": "cli", "chat_id": "direct"}, "ok",
+        )
+
+        assert len(published) == 1
+        msg = published[0]
+        assert msg.channel == "system"
+        assert msg.sender_id == "subagent"
+        assert msg.metadata["injected_event"] == "subagent_result"
+        assert msg.metadata["subagent_task_id"] == "t1"
+
+    @pytest.mark.asyncio
+    async def test_session_key_override(self, tmp_path):
+        sm = _manager(tmp_path)
+        published = []
+        sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+        await sm._announce_result(
+            "t1", "label", "task", "result",
+            {"channel": "telegram", "chat_id": "123", "session_key": "s1"}, "ok",
+        )
+
+        assert published[0].session_key_override == "s1"
+
+    @pytest.mark.asyncio
+    async def test_session_key_override_fallback(self, tmp_path):
+        sm = _manager(tmp_path)
+        published = []
+        sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+        await sm._announce_result(
+            "t1", "label", "task", "result",
+            {"channel": "telegram", "chat_id": "123"}, "ok",
+        )
+
+        assert published[0].session_key_override == "telegram:123"
+
+    @pytest.mark.asyncio
+    async def test_ok_status_text(self, tmp_path):
+        sm = _manager(tmp_path)
+        published = []
+        sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+        await sm._announce_result(
+            "t1", "label", "task", "result",
+            {"channel": "cli", "chat_id": "direct"}, "ok",
+        )
+
+        assert "completed successfully" in published[0].content
+
+    @pytest.mark.asyncio
+    async def test_error_status_text(self, tmp_path):
+        sm = _manager(tmp_path)
+        published = []
+        sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+        await sm._announce_result(
+            "t1", "label", "task", "error details",
+            {"channel": "cli", "chat_id": "direct"}, "error",
+        )
+
+        assert "failed" in published[0].content
+
+    @pytest.mark.asyncio
+    async def test_origin_message_id_in_metadata(self, tmp_path):
+        sm = _manager(tmp_path)
+        published = []
+        sm.bus.publish_inbound = AsyncMock(side_effect=lambda msg: published.append(msg))
+
+        await sm._announce_result(
+            "t1", "label", "task", "result",
+            {"channel": "cli", "chat_id": "direct"}, "ok",
+            origin_message_id="msg-123",
+        )
+
+        assert published[0].metadata["origin_message_id"] == "msg-123"
+
+
+# ---------------------------------------------------------------------------
+# _format_partial_progress
+# ---------------------------------------------------------------------------
+
+
+class TestFormatPartialProgress:
+    def _make_result(self, tool_events=None, error=None):
+        return MagicMock(tool_events=tool_events or [], error=error)
+
+    def test_completed_only(self):
+        result = self._make_result(tool_events=[
+            {"name": "read_file", "status": "ok", "detail": "file content"},
+            {"name": "exec", "status": "ok", "detail": "output"},
+        ])
+        text = SubagentManager._format_partial_progress(result)
+        assert "Completed steps:" in text
+        assert "read_file" in text
+        assert "exec" in text
+
+    def test_failure_only(self):
+        result = self._make_result(tool_events=[
+            {"name": "read_file", "status": "error", "detail": "not found"},
+        ])
+        text = SubagentManager._format_partial_progress(result)
+        assert "Failure:" in text
+        assert "not found" in text
+
+    def test_completed_and_failure(self):
+        result = self._make_result(tool_events=[
+            {"name": "read_file", "status": "ok", "detail": "content"},
+            {"name": "exec", "status": "error", "detail": "timeout"},
+        ])
+        text = SubagentManager._format_partial_progress(result)
+        assert "Completed steps:" in text
+        assert "Failure:" in text
+
+    def test_limited_to_last_three(self):
+        result = self._make_result(tool_events=[
+            {"name": f"tool_{i}", "status": "ok", "detail": f"result_{i}"}
+            for i in range(5)
+        ])
+        text = SubagentManager._format_partial_progress(result)
+        assert "tool_2" in text
+        assert "tool_3" in text
+        assert "tool_4" in text
+        assert "tool_0" not in text
+        assert "tool_1" not in text
+
+    def test_error_without_failure_event(self):
+        result = self._make_result(
+            tool_events=[{"name": "read_file", "status": "ok", "detail": "ok"}],
+            error="Something went wrong",
+        )
+        text = SubagentManager._format_partial_progress(result)
+        assert "Something went wrong" in text
+
+    def test_empty_events_with_error(self):
+        result = self._make_result(error="Total failure")
+        text = SubagentManager._format_partial_progress(result)
+        assert "Total failure" in text
+
+    def test_empty_no_error_returns_fallback(self):
+        result = self._make_result()
+        text = SubagentManager._format_partial_progress(result)
+        assert "Error" in text
+
+
+# ---------------------------------------------------------------------------
+# cancel_by_session
+# ---------------------------------------------------------------------------
+
+
+class TestCancelBySession:
+    @pytest.mark.asyncio
+    async def test_cancels_running_tasks(self, tmp_path):
+        sm = _manager(tmp_path)
+        block = asyncio.Event()
+        async def _slow_run(spec):
+            await block.wait()
+            return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+        sm.runner.run = _slow_run
+
+        await sm.spawn("task1", session_key="s1")
+        await sm.spawn("task2", session_key="s1")
+        assert len(sm._session_tasks.get("s1", set())) == 2
+
+        count = await sm.cancel_by_session("s1")
+        assert count == 2
+        block.set()
+        await asyncio.sleep(0.1)
+
+    @pytest.mark.asyncio
+    async def test_no_tasks_returns_zero(self, tmp_path):
+        sm = _manager(tmp_path)
+        count = await sm.cancel_by_session("nonexistent")
+        assert count == 0
+
+    @pytest.mark.asyncio
+    async def test_already_done_not_counted(self, tmp_path):
+        sm = _manager(tmp_path)
+        sm.runner.run = AsyncMock(return_value=AgentRunResult(
+            final_content="done", messages=[], stop_reason="completed",
+        ))
+        await sm.spawn("task1", session_key="s1")
+        await asyncio.sleep(0.1)  # Wait for completion
+
+        count = await sm.cancel_by_session("s1")
+        assert count == 0
+
+
+# ---------------------------------------------------------------------------
+# get_running_count / get_running_count_by_session
+# ---------------------------------------------------------------------------
+
+
+class TestRunningCounts:
+    @pytest.mark.asyncio
+    async def test_running_count_zero(self, tmp_path):
+        sm = _manager(tmp_path)
+        assert sm.get_running_count() == 0
+
+    @pytest.mark.asyncio
+    async def test_running_count_tracks_tasks(self, tmp_path):
+        sm = _manager(tmp_path)
+        block = asyncio.Event()
+        async def _slow_run(spec):
+            await block.wait()
+            return AgentRunResult(final_content="done", messages=[], stop_reason="completed")
+        sm.runner.run = _slow_run
+
+        await sm.spawn("t1", session_key="s1")
+        await sm.spawn("t2", session_key="s1")
+        assert sm.get_running_count() == 2
+        assert sm.get_running_count_by_session("s1") == 2
+
+        block.set()
+        await asyncio.sleep(0.1)
+        assert sm.get_running_count() == 0
+
+    @pytest.mark.asyncio
+    async def test_running_count_by_session_nonexistent(self, tmp_path):
+        sm = _manager(tmp_path)
+        assert sm.get_running_count_by_session("nonexistent") == 0
+
+
+# ---------------------------------------------------------------------------
+# _SubagentHook
+# ---------------------------------------------------------------------------
+
+
+class TestSubagentHook:
+    @pytest.mark.asyncio
+    async def test_before_execute_tools_logs(self, tmp_path):
+        hook = _SubagentHook("t1")
+        tool_call = MagicMock()
+        tool_call.name = "read_file"
+        tool_call.arguments = {"path": "/tmp/test"}
+        ctx = _make_hook_context(tool_calls=[tool_call])
+        # Should not raise
+        await hook.before_execute_tools(ctx)
+
+    @pytest.mark.asyncio
+    async def test_after_iteration_updates_status(self):
+        status = SubagentStatus(
+            task_id="t1", label="test", task_description="do", started_at=time.monotonic(),
+        )
+        hook = _SubagentHook("t1", status)
+        ctx = _make_hook_context(
+            iteration=3,
+            tool_events=[{"name": "read_file", "status": "ok", "detail": ""}],
+            usage={"prompt_tokens": 100},
+        )
+        await hook.after_iteration(ctx)
+        assert status.iteration == 3
+        assert len(status.tool_events) == 1
+        assert status.usage == {"prompt_tokens": 100}
+
+    @pytest.mark.asyncio
+    async def test_after_iteration_no_status_noop(self):
+        hook = _SubagentHook("t1", status=None)
+        ctx = _make_hook_context(iteration=5)
+        # Should not raise
+        await hook.after_iteration(ctx)
+
+    @pytest.mark.asyncio
+    async def test_after_iteration_sets_error(self):
+        status = SubagentStatus(
+            task_id="t1", label="test", task_description="do", started_at=time.monotonic(),
+        )
+        hook = _SubagentHook("t1", status)
+        ctx = _make_hook_context(error="something broke")
+        await hook.after_iteration(ctx)
+        assert status.error == "something broke"