test: improve deterministic unit test coverage

2026-06-13 14:23:58 +00:00 · 2026-06-04 16:25:30 +08:00 · 2026-06-04 16:25:30 +08:00 · 24e56fcf07
commit 24e56fcf07
parent 87bd56468c
27 changed files with 279 additions and 213 deletions
--- a/tests/agent/test_auto_compact.py
+++ b/tests/agent/test_auto_compact.py
@ -111,6 +111,13 @@ def _make_fake_compact(
    return _fake_compact


+async def _drain_background_tasks(loop: AgentLoop) -> None:
+    tasks = list(loop._background_tasks)
+    if tasks:
+        await asyncio.gather(*tasks, return_exceptions=True)
+    await asyncio.sleep(0)
+
+
 class TestSessionTTLConfig:
    """Test session TTL configuration."""

@ -269,7 +276,7 @@ class TestAutoCompact:

        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
        loop.auto_compact.check_expired(loop._schedule_background)
-        await asyncio.sleep(0.1)
+        await _drain_background_tasks(loop)

        active_after = loop.sessions.get_or_create("cli:active")
        assert len(active_after.messages) == 1
@ -710,7 +717,7 @@ class TestProactiveAutoCompact:
            loop._schedule_background,
            active_session_keys=active_session_keys,
        )
-        await asyncio.sleep(0.1)
+        await _drain_background_tasks(loop)

    @pytest.mark.asyncio
    async def test_no_check_when_ttl_disabled(self, tmp_path):
@ -815,12 +822,11 @@ class TestProactiveAutoCompact:

        # Second call should skip (key is in _archiving)
        loop.auto_compact.check_expired(loop._schedule_background)
-        await asyncio.sleep(0.05)
        assert archive_count == 1

        # Clean up
        block_forever.set()
-        await asyncio.sleep(0.1)
+        await _drain_background_tasks(loop)
        await loop.close_mcp()

    @pytest.mark.asyncio
--- a/tests/agent/test_consolidate_offset.py
+++ b/tests/agent/test_consolidate_offset.py
@ -1,10 +1,11 @@
 """Test session management with cache-friendly message handling."""

 import asyncio
+from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock

 import pytest
-from pathlib import Path
+
 from nanobot.session.manager import Session, SessionManager

 # Test constants
@ -603,9 +604,10 @@ class TestNewCommandArchival:
        loop.sessions.save(session)

        archived = asyncio.Event()
+        release_archive = asyncio.Event()

        async def _slow_summarize(_messages) -> bool:
-            await asyncio.sleep(0.1)
+            await release_archive.wait()
            archived.set()
            return True

@ -615,5 +617,6 @@ class TestNewCommandArchival:
        await loop._process_message(new_msg)

        assert not archived.is_set()
+        release_archive.set()
        await loop.close_mcp()
        assert archived.is_set()
--- a/tests/agent/test_consolidator.py
+++ b/tests/agent/test_consolidator.py
@ -9,6 +9,7 @@ from nanobot.agent.memory import (
    Consolidator,
    MemoryStore,
 )
+from nanobot.providers.base import LLMResponse
 from nanobot.session.manager import Session
 from nanobot.utils.prompt_templates import render_template

@ -497,11 +498,12 @@ class TestCompactIdleSession:

        # Use a slow LLM response to ensure the lock is held while we check
        started = asyncio.Event()
+        release_chat = asyncio.Event()

        async def slow_chat(**kwargs):
            started.set()
-            await asyncio.sleep(0.1)
-            return MagicMock(content="Summary.", finish_reason="stop")
+            await release_chat.wait()
+            return LLMResponse(content="Summary.", finish_reason="stop")

        mock_provider.chat_with_retry = slow_chat

@ -520,6 +522,7 @@ class TestCompactIdleSession:
        )
        await started.wait()
        assert lock.locked()
+        release_chat.set()
        await task
        assert not lock.locked()

--- a/tests/agent/test_dream.py
+++ b/tests/agent/test_dream.py
@ -134,9 +134,7 @@ class TestEphemeralDirect:
        provider.supports_tools = True
        provider.generation = MagicMock(max_tokens=4096)
        provider.chat_with_retry = AsyncMock(
-            return_value=MagicMock(
-                content="done", finish_reason="stop", tool_calls=[], usage={},
-            )
+            return_value=LLMResponse(content="done", tool_calls=[], finish_reason="stop", usage={})
        )

        with (
@ -168,9 +166,13 @@ class TestEphemeralDirect:
            mock_archive.assert_not_called()

    async def test_non_ephemeral_runs_normally(self, tmp_path, _make_loop):
-        """Without ephemeral, the normal path is untouched — no crash."""
+        """Without ephemeral, the normal path returns the model response."""
        loop, store = _make_loop
-        await loop.process_direct("test", session_key="cli:normal")
+        response = await loop.process_direct("test", session_key="cli:normal")
+
+        assert response is not None
+        assert response.content == "done"
+        loop.provider.chat_with_retry.assert_awaited()

    async def test_ephemeral_sets_ctx_flag(self, tmp_path, _make_loop):
        """Verify that ephemeral=True is forwarded to TurnContext."""
--- a/tests/agent/test_dream_session.py
+++ b/tests/agent/test_dream_session.py
@ -1,6 +1,6 @@
 """Tests for Dream session key generation and rotation."""
-import time
-from datetime import datetime
+from datetime import datetime, timedelta
+from unittest.mock import patch

 from nanobot.agent.memory import MemoryStore

@ -13,9 +13,12 @@ class TestDreamSessionKey:
        datetime.strptime(ts_part, "%Y%m%d-%H%M%S")

    def test_unique_across_calls(self):
-        k1 = MemoryStore.dream_session_key()
-        time.sleep(1.1)
-        k2 = MemoryStore.dream_session_key()
+        now = datetime(2026, 5, 28, 10, 0, 0)
+        with patch("nanobot.agent.memory.datetime") as mock_dt:
+            mock_dt.now.side_effect = [now, now + timedelta(seconds=1)]
+            k1 = MemoryStore.dream_session_key()
+            k2 = MemoryStore.dream_session_key()
+
        assert k1 != k2


@ -62,3 +65,4 @@ class TestPruneDreamSessions:
        sessions_dir = tmp_path / "sessions"
        sessions_dir.mkdir()
        MemoryStore.prune_dream_sessions(sessions_dir, keep=10)
+        assert list(sessions_dir.iterdir()) == []
--- a/tests/agent/test_hook_composite.py
+++ b/tests/agent/test_hook_composite.py
@ -21,7 +21,8 @@ def _ctx() -> AgentHookContext:
@pytest.mark.asyncio
 async def test_base_hook_emit_reasoning_is_noop():
    hook = AgentHook()
-    await hook.emit_reasoning("should not raise")
+    result = await hook.emit_reasoning("should not raise")
+    assert result is None


 # ---------------------------------------------------------------------------
--- a/tests/agent/test_runner_core.py
+++ b/tests/agent/test_runner_core.py
@ -5,12 +5,11 @@ from __future__ import annotations

 import asyncio
 import time
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

 from nanobot.config.schema import AgentDefaults
-from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest

 _MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
@ -18,7 +17,7 @@ _MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars

@pytest.mark.asyncio
 async def test_runner_preserves_reasoning_fields_and_tool_results():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)
    captured_second_call: list[dict] = []
@ -75,7 +74,7 @@ async def test_runner_preserves_reasoning_fields_and_tool_results():

@pytest.mark.asyncio
 async def test_runner_returns_max_iterations_fallback():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)
    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
@ -106,7 +105,7 @@ async def test_runner_returns_max_iterations_fallback():

@pytest.mark.asyncio
 async def test_runner_times_out_hung_llm_request():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)

@ -136,15 +135,15 @@ async def test_runner_times_out_hung_llm_request():
@pytest.mark.asyncio
 async def test_runner_does_not_apply_outer_wall_timeout_to_streaming_requests():
    from nanobot.agent.hook import AgentHook, AgentHookContext
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)
    streamed: list[str] = []

    async def chat_stream_with_retry(*, on_content_delta, **kwargs):
-        await asyncio.sleep(0.08)
+        await asyncio.sleep(0)
        await on_content_delta("still ")
-        await asyncio.sleep(0.08)
+        await asyncio.sleep(0)
        await on_content_delta("alive")
        return LLMResponse(content="still alive", tool_calls=[])

@ -161,25 +160,28 @@ async def test_runner_does_not_apply_outer_wall_timeout_to_streaming_requests():
            streamed.append(delta)

    runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "think for a while"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        hook=StreamingHook(),
-        llm_timeout_s=0.01,
-    ))
+    wait_for = AsyncMock(side_effect=AssertionError("streaming path must not use wait_for"))
+    with patch("nanobot.agent.runner.asyncio.wait_for", wait_for):
+        result = await runner.run(AgentRunSpec(
+            initial_messages=[{"role": "user", "content": "think for a while"}],
+            tools=tools,
+            model="test-model",
+            max_iterations=1,
+            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+            hook=StreamingHook(),
+            llm_timeout_s=0.01,
+        ))

    assert result.stop_reason == "completed"
    assert result.final_content == "still alive"
    assert streamed == ["still ", "alive"]
    provider.chat_with_retry.assert_not_awaited()
+    wait_for.assert_not_awaited()


@pytest.mark.asyncio
 async def test_runner_replaces_empty_tool_result_with_marker():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)
    captured_second_call: list[dict] = []
@ -218,7 +220,7 @@ async def test_runner_replaces_empty_tool_result_with_marker():
@pytest.mark.asyncio
 async def test_runner_retries_empty_final_response_with_summary_prompt():
    """Empty responses get 2 silent retries before finalization kicks in."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)
    calls: list[dict] = []
@ -263,7 +265,7 @@ async def test_runner_retries_empty_final_response_with_summary_prompt():
@pytest.mark.asyncio
 async def test_runner_uses_specific_message_after_empty_finalization_retry():
    """After silent retries + finalization all return empty, stop_reason is empty_final_response."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE

    provider = MagicMock(spec=LLMProvider)
@ -295,7 +297,7 @@ async def test_runner_empty_response_does_not_break_tool_chain():
    Sequence: tool_call -> empty -> tool_call -> final text.
    The runner should recover via silent retry and complete normally.
    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)
    call_count = 0
@ -352,7 +354,7 @@ async def test_runner_empty_response_does_not_break_tool_chain():
 async def test_runner_accumulates_usage_and_preserves_cached_tokens():
    """Runner should accumulate prompt/completion tokens across iterations
    and preserve cached_tokens from provider responses."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock(spec=LLMProvider)
    call_count = {"n": 0}
@ -399,7 +401,7 @@ async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
    internal retry diagnostics like "Model request failed, retry in 1s"
    to leak to end-user channels as normal progress updates.
    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    captured: dict = {}

@ -441,7 +443,7 @@ async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
@pytest.mark.asyncio
 async def test_runner_passes_temperature_to_provider():
    """temperature from AgentRunSpec should reach provider.chat_with_retry."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    captured: dict = {}

@ -470,7 +472,7 @@ async def test_runner_passes_temperature_to_provider():
@pytest.mark.asyncio
 async def test_runner_passes_max_tokens_to_provider():
    """max_tokens from AgentRunSpec should reach provider.chat_with_retry."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    captured: dict = {}

@ -499,7 +501,7 @@ async def test_runner_passes_max_tokens_to_provider():
@pytest.mark.asyncio
 async def test_runner_passes_reasoning_effort_to_provider():
    """reasoning_effort from AgentRunSpec should reach provider.chat_with_retry."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    captured: dict = {}

--- a/tests/agent/test_runner_injections.py
+++ b/tests/agent/test_runner_injections.py
@ -4,7 +4,6 @@ from __future__ import annotations

 import asyncio
 import base64
-import time
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
@ -35,15 +34,15 @@ def _make_loop(tmp_path):

    with patch("nanobot.agent.loop.ContextBuilder"), \
         patch("nanobot.agent.loop.SessionManager"), \
-         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
-        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+         patch("nanobot.agent.loop.SubagentManager") as mock_sub_mgr:
+        mock_sub_mgr.return_value.cancel_by_session = AsyncMock(return_value=0)
        loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
    return loop

@pytest.mark.asyncio
 async def test_drain_injections_returns_empty_when_no_callback():
    """No injection_callback → empty list."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock()
    runner = AgentRunner(provider)
@ -61,7 +60,7 @@ async def test_drain_injections_returns_empty_when_no_callback():
@pytest.mark.asyncio
 async def test_drain_injections_extracts_content_from_inbound_messages():
    """Should extract .content from InboundMessage objects."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -92,7 +91,7 @@ async def test_drain_injections_extracts_content_from_inbound_messages():
@pytest.mark.asyncio
 async def test_drain_injections_passes_limit_to_callback_when_supported():
    """Limit-aware callbacks can preserve overflow in their own queue."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN
+    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -127,7 +126,7 @@ async def test_drain_injections_passes_limit_to_callback_when_supported():
@pytest.mark.asyncio
 async def test_drain_injections_skips_empty_content():
    """Messages with blank content should be filtered out."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -156,7 +155,7 @@ async def test_drain_injections_skips_empty_content():
@pytest.mark.asyncio
 async def test_drain_injections_handles_callback_exception():
    """If the callback raises, return empty list (error is logged)."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock()
    runner = AgentRunner(provider)
@ -178,7 +177,7 @@ async def test_drain_injections_handles_callback_exception():
@pytest.mark.asyncio
 async def test_checkpoint1_injects_after_tool_execution():
    """Follow-up messages are injected after tool execution, before next LLM call."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -231,8 +230,8 @@ async def test_checkpoint1_injects_after_tool_execution():
@pytest.mark.asyncio
 async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
    """After final response, if injections exist, stream_end should get resuming=True."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -290,7 +289,7 @@ async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
@pytest.mark.asyncio
 async def test_checkpoint2_preserves_final_response_in_history_before_followup():
    """A follow-up injected after a final answer must still see that answer in history."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -405,7 +404,7 @@ async def test_loop_injected_followup_preserves_image_media(tmp_path):
@pytest.mark.asyncio
 async def test_runner_merges_multiple_injected_user_messages_without_losing_media():
    """Multiple injected follow-ups should not create lossy consecutive user messages."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock()
    call_count = {"n": 0}
@ -468,7 +467,7 @@ async def test_runner_merges_multiple_injected_user_messages_without_losing_medi
@pytest.mark.asyncio
 async def test_injection_cycles_capped_at_max():
    """Injection cycles should be capped at _MAX_INJECTION_CYCLES."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+    from nanobot.agent.runner import _MAX_INJECTION_CYCLES, AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -509,7 +508,7 @@ async def test_injection_cycles_capped_at_max():
@pytest.mark.asyncio
 async def test_no_injections_flag_is_false_by_default():
    """had_injections should be False when no injection callback or no messages."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec

    provider = MagicMock()

@ -607,16 +606,12 @@ async def test_followup_routed_to_pending_queue(tmp_path):
    msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up")
    await loop.bus.publish_inbound(msg)

-    deadline = time.time() + 2
-    while pending.empty() and time.time() < deadline:
-        await asyncio.sleep(0.01)
+    queued_msg = await asyncio.wait_for(pending.get(), timeout=2)

    loop.stop()
    await asyncio.wait_for(run_task, timeout=2)

    assert loop._dispatch.await_count == 0
-    assert not pending.empty()
-    queued_msg = pending.get_nowait()
    assert queued_msg.content == "follow-up"
    assert queued_msg.session_key == UNIFIED_SESSION_KEY

@ -625,9 +620,9 @@ async def test_followup_routed_to_pending_queue(tmp_path):
 async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path):
    """Pending queue should leave overflow messages queued for later drains."""
    from nanobot.agent.loop import AgentLoop
+    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN
    from nanobot.bus.events import InboundMessage
    from nanobot.bus.queue import MessageBus
-    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN

    bus = MessageBus()
    provider = MagicMock()
@ -680,7 +675,12 @@ async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
    from nanobot.bus.events import InboundMessage

    loop = _make_loop(tmp_path)
-    loop._dispatch = AsyncMock()  # type: ignore[method-assign]
+    dispatched = asyncio.Event()
+
+    async def _dispatch(_msg):
+        dispatched.set()
+
+    loop._dispatch = AsyncMock(side_effect=_dispatch)  # type: ignore[method-assign]

    pending = asyncio.Queue(maxsize=1)
    pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued"))
@ -690,9 +690,7 @@ async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
    msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up")
    await loop.bus.publish_inbound(msg)

-    deadline = time.time() + 2
-    while loop._dispatch.await_count == 0 and time.time() < deadline:
-        await asyncio.sleep(0.01)
+    await asyncio.wait_for(dispatched.wait(), timeout=2)

    loop.stop()
    await asyncio.wait_for(run_task, timeout=2)
@ -750,7 +748,7 @@ async def test_dispatch_republishes_leftover_queue_messages(tmp_path):
@pytest.mark.asyncio
 async def test_drain_injections_on_fatal_tool_error():
    """Pending injections should be drained even when a fatal tool error occurs."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -803,7 +801,7 @@ async def test_drain_injections_on_fatal_tool_error():
@pytest.mark.asyncio
 async def test_drain_injections_on_llm_error():
    """Pending injections should be drained when the LLM returns an error finish_reason."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -858,7 +856,7 @@ async def test_drain_injections_on_llm_error():
@pytest.mark.asyncio
 async def test_drain_injections_on_empty_final_response():
    """Pending injections should be drained when the runner exits due to empty response."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES
+    from nanobot.agent.runner import _MAX_EMPTY_RETRIES, AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -913,7 +911,7 @@ async def test_drain_injections_on_max_iterations():
    injections are appended to messages but not processed by the LLM.
    The key point is they are consumed from the queue to prevent re-publish.
    """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -965,7 +963,7 @@ async def test_drain_injections_on_max_iterations():
 async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration():
    """Late follow-ups drained in max_iterations should still flip had_injections."""
    from nanobot.agent.hook import AgentHook
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
@ -1027,7 +1025,7 @@ async def test_drain_injections_set_flag_when_followup_arrives_after_last_iterat
@pytest.mark.asyncio
 async def test_injection_cycle_cap_on_error_path():
    """Injection cycles should be capped even when every iteration hits an LLM error."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+    from nanobot.agent.runner import _MAX_INJECTION_CYCLES, AgentRunner, AgentRunSpec
    from nanobot.bus.events import InboundMessage

    provider = MagicMock()
--- a/tests/agent/test_session_manager_history.py
+++ b/tests/agent/test_session_manager_history.py
@ -110,6 +110,7 @@ def test_get_history_drops_orphan_tool_results_when_window_cuts_tool_calls():

    history = session.get_history(max_messages=100)
    _assert_no_orphans(history)
+    assert history[-1]["content"] == "new telegram question"


 # --- Positive test: legitimate pairs survive trimming ---
@ -363,6 +364,7 @@ def test_window_cuts_mid_tool_group():
    # leaving orphan tool results for split_a at the front.
    history = session.get_history(max_messages=6)
    _assert_no_orphans(history)
+    assert history[0]["role"] == "user"


 # --- Image breadcrumbs: media kwarg is synthesized into content for replay ---
@ -599,8 +601,6 @@ def test_enforce_file_cap_no_duplicate_archive_in_else_branch():
    archive_fn = MagicMock()
    session.enforce_file_cap(on_archive=archive_fn, limit=6)

-    # Verify retained messages
-    retained_contents = [m["content"] for m in session.messages]
    assert len(session.messages) <= 6

    # Verify archived messages have NO overlap with retained
--- a/tests/agent/test_subagent_lifecycle.py
+++ b/tests/agent/test_subagent_lifecycle.py
@ -17,7 +17,6 @@ from nanobot.agent.subagent import (
 from nanobot.bus.queue import MessageBus
 from nanobot.providers.base import LLMProvider

-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@ -51,6 +50,13 @@ def _make_hook_context(**overrides) -> AgentHookContext:
    return AgentHookContext(**defaults)


+async def _drain_subagent_tasks(sm: SubagentManager) -> None:
+    tasks = list(sm._running_tasks.values())
+    if tasks:
+        await asyncio.gather(*tasks, return_exceptions=True)
+    await asyncio.sleep(0)
+
+
 # ---------------------------------------------------------------------------
 # SubagentStatus defaults
 # ---------------------------------------------------------------------------
@ -114,7 +120,7 @@ class TestSpawn:
        assert len(sm._running_tasks) == 1

        block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
        assert len(sm._running_tasks) == 0

    @pytest.mark.asyncio
@ -124,7 +130,7 @@ class TestSpawn:
            final_content="done", messages=[], stop_reason="completed",
        ))
        await sm.spawn("my task")
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
        # Status cleaned up after task completes
        assert len(sm._task_statuses) == 0

@ -142,7 +148,7 @@ class TestSpawn:
        assert len(sm._session_tasks["s1"]) == 1

        block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
        assert "s1" not in sm._session_tasks

    @pytest.mark.asyncio
@ -158,7 +164,7 @@ class TestSpawn:
        assert len(sm._session_tasks) == 0

        block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)

    @pytest.mark.asyncio
    async def test_label_defaults_to_truncated_task(self, tmp_path):
@ -175,7 +181,7 @@ class TestSpawn:
        assert status.label == long_task[:30] + "..."

        block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)

    @pytest.mark.asyncio
    async def test_custom_label(self, tmp_path):
@ -191,7 +197,7 @@ class TestSpawn:
        assert status.label == "Custom Label"

        block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)

    @pytest.mark.asyncio
    async def test_cleanup_callback_removes_all_entries(self, tmp_path):
@ -200,7 +206,7 @@ class TestSpawn:
            final_content="done", messages=[], stop_reason="completed",
        ))
        await sm.spawn("task", session_key="s1")
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
        assert len(sm._running_tasks) == 0
        assert len(sm._task_statuses) == 0
        assert len(sm._session_tasks) == 0
@ -452,7 +458,7 @@ class TestCancelBySession:
        count = await sm.cancel_by_session("s1")
        assert count == 2
        block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)

    @pytest.mark.asyncio
    async def test_no_tasks_returns_zero(self, tmp_path):
@ -467,7 +473,7 @@ class TestCancelBySession:
            final_content="done", messages=[], stop_reason="completed",
        ))
        await sm.spawn("task1", session_key="s1")
-        await asyncio.sleep(0.1)  # Wait for completion
+        await _drain_subagent_tasks(sm)

        count = await sm.cancel_by_session("s1")
        assert count == 0
@ -499,7 +505,7 @@ class TestRunningCounts:
        assert sm.get_running_count_by_session("s1") == 2

        block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
        assert sm.get_running_count() == 0

    @pytest.mark.asyncio
@ -521,8 +527,9 @@ class TestSubagentHook:
        tool_call.name = "read_file"
        tool_call.arguments = {"path": "/tmp/test"}
        ctx = _make_hook_context(tool_calls=[tool_call])
-        # Should not raise
-        await hook.before_execute_tools(ctx)
+        result = await hook.before_execute_tools(ctx)
+        assert result is None
+        assert ctx.tool_calls == [tool_call]

    @pytest.mark.asyncio
    async def test_after_iteration_updates_status(self):
@ -544,8 +551,9 @@ class TestSubagentHook:
    async def test_after_iteration_no_status_noop(self):
        hook = _SubagentHook("t1", status=None)
        ctx = _make_hook_context(iteration=5)
-        # Should not raise
-        await hook.after_iteration(ctx)
+        result = await hook.after_iteration(ctx)
+        assert result is None
+        assert ctx.iteration == 5

    @pytest.mark.asyncio
    async def test_after_iteration_sets_error(self):
--- a/tests/agent/test_task_cancel.py
+++ b/tests/agent/test_task_cancel.py
@ -27,8 +27,8 @@ def _make_loop(*, tools_config=None):

    with patch("nanobot.agent.loop.ContextBuilder"), \
         patch("nanobot.agent.loop.SessionManager"), \
-         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
-        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+         patch("nanobot.agent.loop.SubagentManager") as mock_sub_mgr:
+        mock_sub_mgr.return_value.cancel_by_session = AsyncMock(return_value=0)
        loop = AgentLoop(bus=bus, provider=provider, workspace=workspace, tools_config=tools_config)
    return loop, bus

@ -103,8 +103,8 @@ class TestHandleStop:

 class TestDispatch:
    def test_exec_tool_not_registered_when_disabled(self):
-        from nanobot.config.schema import ToolsConfig
        from nanobot.agent.tools.shell import ExecToolConfig
+        from nanobot.config.schema import ToolsConfig

        loop, _bus = _make_loop(tools_config=ToolsConfig(exec=ExecToolConfig(enable=False)))

@ -166,10 +166,14 @@ class TestDispatch:

        loop, bus = _make_loop()
        order = []
+        first_started = asyncio.Event()
+        release_first = asyncio.Event()

        async def mock_process(m, **kwargs):
            order.append(f"start-{m.content}")
-            await asyncio.sleep(0.05)
+            if m.content == "a":
+                first_started.set()
+                await release_first.wait()
            order.append(f"end-{m.content}")
            return OutboundMessage(channel="test", chat_id="c1", content=m.content)

@ -178,7 +182,12 @@ class TestDispatch:
        msg2 = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="b")

        t1 = asyncio.create_task(loop._dispatch(msg1))
+        await asyncio.wait_for(first_started.wait(), timeout=1.0)
        t2 = asyncio.create_task(loop._dispatch(msg2))
+        await asyncio.sleep(0)
+        assert order == ["start-a"]
+
+        release_first.set()
        await asyncio.gather(t1, t2)
        assert order == ["start-a", "end-a", "start-b", "end-b"]

@ -286,8 +295,8 @@ class TestSubagentCancellation:
    @pytest.mark.asyncio
    async def test_subagent_exec_tool_not_registered_when_disabled(self, tmp_path):
        from nanobot.agent.subagent import SubagentManager
-        from nanobot.bus.queue import MessageBus
        from nanobot.agent.tools.shell import ExecToolConfig
+        from nanobot.bus.queue import MessageBus
        from nanobot.config.schema import ToolsConfig

        bus = MessageBus()
--- a/tests/agent/tools/test_self_tool.py
+++ b/tests/agent/tools/test_self_tool.py
@ -231,12 +231,14 @@ class TestModifyRestricted:
    async def test_modify_string_int_coerced(self):
        tool = _make_tool()
        result = await tool.execute(action="set", key="max_iterations", value="80")
+        assert "Set max_iterations" in result
        assert tool._runtime_state.max_iterations == 80

    @pytest.mark.asyncio
    async def test_modify_context_window_valid(self):
        tool = _make_tool()
        result = await tool.execute(action="set", key="context_window_tokens", value=131072)
+        assert "Set context_window_tokens" in result
        assert tool._runtime_state.context_window_tokens == 131072

    @pytest.mark.asyncio
@ -337,12 +339,14 @@ class TestModifyFree:
    async def test_modify_allows_list(self):
        tool = _make_tool()
        result = await tool.execute(action="set", key="items", value=[1, 2, 3])
+        assert result == "Set scratchpad.items = [1, 2, 3]"
        assert tool._runtime_state._runtime_vars["items"] == [1, 2, 3]

    @pytest.mark.asyncio
    async def test_modify_allows_dict(self):
        tool = _make_tool()
        result = await tool.execute(action="set", key="data", value={"a": 1})
+        assert result == "Set scratchpad.data = {'a': 1}"
        assert tool._runtime_state._runtime_vars["data"] == {"a": 1}

    @pytest.mark.asyncio
@ -743,7 +747,10 @@ class TestSubagentHookStatus:

        hook = _SubagentHook("test")
        context = AgentHookContext(iteration=1, messages=[])
-        await hook.after_iteration(context)  # should not raise
+        result = await hook.after_iteration(context)
+
+        assert result is None
+        assert context.iteration == 1


 # ---------------------------------------------------------------------------
--- a/tests/agent/tools/test_subagent_tools.py
+++ b/tests/agent/tools/test_subagent_tools.py
@ -16,8 +16,8 @@ _MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
 async def test_subagent_exec_tool_receives_allowed_env_keys(tmp_path):
    """allowed_env_keys from ExecToolConfig must be forwarded to the subagent's ExecTool."""
    from nanobot.agent.subagent import SubagentManager, SubagentStatus
-    from nanobot.bus.queue import MessageBus
    from nanobot.agent.tools.shell import ExecToolConfig
+    from nanobot.bus.queue import MessageBus
    from nanobot.config.schema import ToolsConfig

    bus = MessageBus()
@ -340,8 +340,8 @@ async def test_drain_pending_blocks_while_subagents_running(tmp_path):
    # With sub-agents running and an empty queue, it should block
    drain_task = asyncio.create_task(injection_callback())

-    # Give it a moment to enter the blocking wait
-    await asyncio.sleep(0.05)
+    # Let the task enter the blocking queue wait.
+    await asyncio.sleep(0)

    # Should still be running (blocked on pending_queue.get())
    assert not drain_task.done(), "drain should block while sub-agents are running"
@ -467,9 +467,12 @@ async def test_drain_pending_timeout(tmp_path):

    assert injection_callback is not None

-    # Patch the timeout to be very short for testing
-    with patch("nanobot.agent.loop.asyncio.wait_for") as mock_wait:
-        mock_wait.side_effect = asyncio.TimeoutError
+    # Patch the timeout path without leaking the queue.get() coroutine.
+    async def _timeout(awaitable, timeout):
+        awaitable.close()
+        raise asyncio.TimeoutError
+
+    with patch("nanobot.agent.loop.asyncio.wait_for", side_effect=_timeout):
        results = await injection_callback()
        assert results == []

--- a/tests/channels/test_channel_plugins.py
+++ b/tests/channels/test_channel_plugins.py
@ -1016,6 +1016,8 @@ async def test_validate_allow_from_allows_empty_list():

    # Should not raise — empty list defers to pairing store
    mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert mgr.channels["test"].config.allow_from == []


@pytest.mark.asyncio
@ -1033,6 +1035,8 @@ async def test_validate_allow_from_passes_with_asterisk():

    # Should not raise
    mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert mgr.channels["test"].config.allow_from == ["*"]


@pytest.mark.asyncio
@ -1049,6 +1053,8 @@ async def test_validate_allow_from_allows_empty_dict_allow_from():
    mgr._dispatch_task = None

    mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert mgr.channels["test"].config["allow_from"] == []


@pytest.mark.asyncio
@ -1079,6 +1085,8 @@ async def test_validate_allow_from_allows_missing_allow_from():

    # Should not raise — pairing-only mode
    mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert "allow_from" not in mgr.channels["test"].config


@pytest.mark.asyncio
@ -1206,6 +1214,8 @@ async def test_start_channel_logs_error_on_failure():

    # Should not raise, just log error
    await mgr._start_channel("failing", ch)
+    assert mgr.channels == {}
+    assert mgr._dispatch_task is None


@pytest.mark.asyncio
@ -1237,6 +1247,8 @@ async def test_stop_all_handles_channel_exception():

    # Should not raise even if channel.stop() raises
    await mgr.stop_all()
+    assert list(mgr.channels) == ["stopfailing"]
+    assert mgr._dispatch_task is None


@pytest.mark.asyncio
--- a/tests/channels/test_feishu_reaction.py
+++ b/tests/channels/test_feishu_reaction.py
@ -105,6 +105,7 @@ class TestRemoveReactionSync:

        # Should not raise
        ch._remove_reaction_sync("om_001", "rx_42")
+        ch._client.im.v1.message_reaction.delete.assert_called_once()

    def test_handles_exception_gracefully(self):
        ch = _make_channel()
@ -112,6 +113,7 @@ class TestRemoveReactionSync:

        # Should not raise
        ch._remove_reaction_sync("om_001", "rx_42")
+        ch._client.im.v1.message_reaction.delete.assert_called_once()


 # ── _remove_reaction (async) ────────────────────────────────────────────────
--- a/tests/channels/test_qq_media.py
+++ b/tests/channels/test_qq_media.py
@ -118,11 +118,13 @@ async def test_send_exception_caught_not_raised() -> None:
    channel = QQChannel(QQConfig(app_id="app", secret="secret", allow_from=["*"]), MessageBus())
    channel._client = _FakeClient()

-    with patch.object(channel, "_send_text_only", new_callable=AsyncMock, side_effect=RuntimeError("boom")):
+    with patch.object(
+        channel, "_send_text_only", new_callable=AsyncMock, side_effect=RuntimeError("boom")
+    ) as send_text:
        await channel.send(
            OutboundMessage(channel="qq", chat_id="user1", content="hello")
        )
-    # No exception raised — test passes if we get here.
+    send_text.assert_awaited_once()


@pytest.mark.asyncio
@ -260,6 +262,8 @@ async def test_on_message_exception_caught_not_raised() -> None:
    bad_data = SimpleNamespace(id="x1", content="hi")
    # Should not raise
    await channel._on_message(bad_data, is_group=False)
+    assert channel._client.api.c2c_calls == []
+    assert channel._client.api.group_calls == []


@pytest.mark.asyncio
--- a/tests/channels/test_websocket_channel.py
+++ b/tests/channels/test_websocket_channel.py
@ -686,6 +686,7 @@ async def test_send_missing_connection_is_noop_without_error() -> None:
    channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus, gateway=_basic_handler(bus))
    msg = OutboundMessage(channel="websocket", chat_id="missing", content="x")
    await channel.send(msg)
+    assert channel._subs == {}


@pytest.mark.asyncio
@ -1006,7 +1007,7 @@ async def test_send_reasoning_without_subscribers_is_noop() -> None:

    await channel.send_reasoning_delta("unattached", "thinking", None)
    await channel.send_reasoning_end("unattached", None)
-    # No subscribers, no exception, no send.
+    assert channel._subs == {}


@pytest.mark.asyncio
@ -1299,6 +1300,7 @@ async def test_send_delta_missing_connection_is_noop() -> None:
    channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"], "streaming": True}, bus, gateway=_basic_handler(bus))
    # No exception, no error — just a no-op
    await channel.send_delta("nonexistent", "chunk", {"_stream_delta": True, "_stream_id": "s1"})
+    assert channel._subs == {}


@pytest.mark.asyncio
@ -1308,6 +1310,7 @@ async def test_stop_is_idempotent() -> None:
    # stop() before start() should not raise
    await channel.stop()
    await channel.stop()
+    assert channel._subs == {}


@pytest.mark.asyncio
--- a/tests/channels/test_wecom_channel.py
+++ b/tests/channels/test_wecom_channel.py
@ -420,7 +420,7 @@ async def test_send_exception_caught_not_raised() -> None:
    await channel.send(
        OutboundMessage(channel="wecom", chat_id="chat1", content="fail test")
    )
-    # No exception — test passes if we reach here.
+    client.reply_stream.assert_called_once()


 # ── _process_message() ──────────────────────────────────────────────
--- a/tests/cli/test_restart_command.py
+++ b/tests/cli/test_restart_command.py
@ -32,15 +32,6 @@ def _make_loop():
    return loop, bus


-async def _wait_until(predicate, *, timeout: float = 0.2, interval: float = 0.01) -> None:
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        if predicate():
-            return
-        await asyncio.sleep(interval)
-    assert predicate()
-
-
 class TestRestartCommand:

    @pytest.mark.asyncio
@ -91,13 +82,29 @@ class TestRestartCommand:
        loop, bus = _make_loop()
        msg = InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="/restart")

+        async def _fast_sleep(_delay: float) -> None:
+            return None
+
+        scheduled: list[asyncio.Task] = []
+
+        def _capture_task(coro):
+            task = asyncio.create_task(coro)
+            scheduled.append(task)
+            return task
+
+        fake_asyncio = SimpleNamespace(
+            sleep=_fast_sleep,
+            create_task=_capture_task,
+        )
+
        with patch.object(loop, "_dispatch", new_callable=AsyncMock) as mock_dispatch, \
+             patch("nanobot.command.builtin.asyncio", new=fake_asyncio), \
             patch("nanobot.command.builtin.os.execv"):
            await bus.publish_inbound(msg)

            loop._running = True
            run_task = asyncio.create_task(loop.run())
-            await asyncio.sleep(0.1)
+            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
            loop._running = False
            run_task.cancel()
            try:
@ -106,8 +113,9 @@ class TestRestartCommand:
                pass

            mock_dispatch.assert_not_called()
-            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
            assert "Restarting" in out.content
+            assert scheduled
+            await scheduled[0]

    @pytest.mark.asyncio
    async def test_status_intercepted_in_run_loop(self):
@ -120,7 +128,7 @@ class TestRestartCommand:

            loop._running = True
            run_task = asyncio.create_task(loop.run())
-            await asyncio.sleep(0.1)
+            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
            loop._running = False
            run_task.cancel()
            try:
@ -129,7 +137,6 @@ class TestRestartCommand:
                pass

            mock_dispatch.assert_not_called()
-            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
            assert "nanobot" in out.content.lower() or "Model" in out.content

    @pytest.mark.asyncio
@ -138,7 +145,7 @@ class TestRestartCommand:
        loop, _bus = _make_loop()

        run_task = asyncio.create_task(loop.run())
-        await asyncio.sleep(0.1)
+        await asyncio.sleep(0)
        run_task.cancel()

        with pytest.raises(asyncio.CancelledError):
--- a/tests/command/test_model_command.py
+++ b/tests/command/test_model_command.py
@ -134,7 +134,15 @@ async def test_model_command_registered_as_exact_and_prefix(tmp_path) -> None:
    out = await router.dispatch(_ctx(loop, "/model fast"))

    assert out is not None
-    assert "Switched model preset" in out.content
+    assert out.channel == "cli"
+    assert out.chat_id == "direct"
+    assert out.metadata == {"render_as": "text"}
+    assert out.content == "\n".join([
+        "Switched model preset to `fast`.",
+        "- Model: `openai/gpt-4.1`",
+        "- Context window: 32768",
+        "- Max output tokens: 4096",
+    ])
    assert loop.model_preset == "fast"


@ -150,7 +158,10 @@ async def test_goal_command_shows_usage_without_args(tmp_path) -> None:
    loop = _make_loop(tmp_path)
    out = await cmd_goal(_ctx(loop, "/goal"))
    assert out is not None
-    assert "Usage: /goal" in out.content
+    assert out.channel == "cli"
+    assert out.chat_id == "direct"
+    assert out.metadata == {"render_as": "text"}
+    assert out.content == "Usage: /goal <long-running task description>"


@pytest.mark.asyncio
@ -158,7 +169,13 @@ async def test_goal_command_rejects_mid_turn_without_session(tmp_path) -> None:
    loop = _make_loop(tmp_path)
    out = await cmd_goal(_ctx(loop, "/goal do work", args="do work"))
    assert out is not None
-    assert "/stop" in out.content
+    assert out.channel == "cli"
+    assert out.chat_id == "direct"
+    assert out.metadata == {"render_as": "text"}
+    assert out.content == (
+        "A task is already running for this chat. "
+        "Use `/stop` first, then send `/goal <long-running task description>` again."
+    )


@pytest.mark.asyncio
--- a/tests/command/test_router_dispatchable.py
+++ b/tests/command/test_router_dispatchable.py
@ -118,6 +118,11 @@ class TestMidTurnCommandDispatchedDirectly:
        )
        result = await router.dispatch(ctx)
        assert result is not None
+        assert result.channel == "test"
+        assert result.chat_id == "chat1"
+        assert result.metadata["render_as"] == "text"
+        assert "/new" in result.content
+        assert "/pairing [list|approve <code>|deny <code>|revoke <user_id>]" in result.content

    @pytest.mark.asyncio
    async def test_prefix_command_args_populated(self, router: CommandRouter) -> None:
@ -211,6 +216,10 @@ class TestPairingCommandDispatch:
        result = await router.dispatch(ctx)
        assert result is not None
        assert "Approved" in result.content
+        assert result.content == (
+            "Approved pairing code `ABCD-EFGH` — 123 can now access telegram"
+        )
+        assert result.metadata.get("_pairing_command") is True

    @pytest.mark.asyncio
    async def test_pairing_revoke_dispatched(
@ -229,3 +238,5 @@ class TestPairingCommandDispatch:
        result = await router.dispatch(ctx)
        assert result is not None
        assert "Revoked" in result.content
+        assert result.content == "Revoked 123 from telegram"
+        assert result.metadata.get("_pairing_command") is True
--- a/tests/pairing/test_store.py
+++ b/tests/pairing/test_store.py
@ -1,5 +1,3 @@
-import time
-
 import pytest

 from nanobot.pairing import __all__ as pairing_all
@ -32,12 +30,15 @@ class TestGenerateCode:
        codes = {store.generate_code("telegram", str(i)) for i in range(20)}
        assert len(codes) == 20

-    def test_ttl_expiration(self) -> None:
+    def test_ttl_expiration(self, monkeypatch) -> None:
+        clock = {"now": 1_000.0}
+        monkeypatch.setattr(store.time, "time", lambda: clock["now"])
+
        code = store.generate_code("telegram", "123", ttl=1)
-        assert store.approve_code(code) is not None
+        assert store.approve_code(code) == ("telegram", "123")

        code2 = store.generate_code("telegram", "456", ttl=0)
-        time.sleep(0.1)
+        clock["now"] += 0.1
        assert store.approve_code(code2) is None


@ -59,9 +60,12 @@ class TestApproveDeny:
    def test_deny_unknown_returns_false(self) -> None:
        assert store.deny_code("UNKNOWN") is False

-    def test_approve_expired_returns_none(self) -> None:
+    def test_approve_expired_returns_none(self, monkeypatch) -> None:
+        clock = {"now": 1_000.0}
+        monkeypatch.setattr(store.time, "time", lambda: clock["now"])
+
        code = store.generate_code("telegram", "123", ttl=0)
-        time.sleep(0.1)
+        clock["now"] += 0.1
        assert store.approve_code(code) is None


@ -91,9 +95,12 @@ class TestListPending:
        channels = {p["channel"] for p in pending}
        assert channels == {"telegram", "discord"}

-    def test_expired_not_listed(self) -> None:
+    def test_expired_not_listed(self, monkeypatch) -> None:
+        clock = {"now": 1_000.0}
+        monkeypatch.setattr(store.time, "time", lambda: clock["now"])
+
        store.generate_code("telegram", "123", ttl=0)
-        time.sleep(0.1)
+        clock["now"] += 0.1
        assert store.list_pending() == []


--- a/tests/test_api_stream.py
+++ b/tests/test_api_stream.py
@ -233,7 +233,7 @@ async def test_stream_segment_end_does_not_close_sse(aiohttp_client) -> None:
        assert on_stream_end is not None
        await on_stream("planning")
        await on_stream_end(resuming=True)
-        await asyncio.sleep(0.05)
+        await asyncio.sleep(0)
        await on_stream(" final")
        await on_stream_end(resuming=False)
        return "planning final"
--- a/tests/test_msteams.py
+++ b/tests/test_msteams.py
@ -851,10 +851,11 @@ async def test_validate_inbound_auth_accepts_observed_botframework_shape(make_ch
        headers={"kid": jwk["kid"]},
    )

-    await ch._validate_inbound_auth(
+    result = await ch._validate_inbound_auth(
        f"Bearer {token}",
        {"serviceUrl": service_url},
    )
+    assert result is None


@pytest.mark.asyncio
--- a/tests/test_openai_api.py
+++ b/tests/test_openai_api.py
@ -235,10 +235,14 @@ async def test_followup_requests_share_same_session_key(aiohttp_client) -> None:
@pytest.mark.asyncio
 async def test_fixed_session_requests_are_serialized(aiohttp_client) -> None:
    order: list[str] = []
+    first_started = asyncio.Event()
+    release_first = asyncio.Event()

    async def slow_process(content, session_key="", channel="", chat_id="", **kwargs):
        order.append(f"start:{content}")
-        await asyncio.sleep(0.1)
+        if content == "first":
+            first_started.set()
+            await release_first.wait()
        order.append(f"end:{content}")
        return content

@ -256,14 +260,17 @@ async def test_fixed_session_requests_are_serialized(aiohttp_client) -> None:
            json={"messages": [{"role": "user", "content": msg}]},
        )

-    r1, r2 = await asyncio.gather(send("first"), send("second"))
+    first = asyncio.create_task(send("first"))
+    await asyncio.wait_for(first_started.wait(), timeout=1.0)
+    second = asyncio.create_task(send("second"))
+    await asyncio.sleep(0)
+    assert order == ["start:first"]
+
+    release_first.set()
+    r1, r2 = await asyncio.gather(first, second)
    assert r1.status == 200
    assert r2.status == 200
-    # Verify serialization: one process must fully finish before the other starts
-    if order[0] == "start:first":
-        assert order.index("end:first") < order.index("start:second")
-    else:
-        assert order.index("end:second") < order.index("start:first")
+    assert order == ["start:first", "end:first", "start:second", "end:second"]


@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
--- a/tests/tools/test_edit_advanced.py
+++ b/tests/tools/test_edit_advanced.py
@ -9,12 +9,11 @@
 """

 import os
-import time

 import pytest

-from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, _find_match
 from nanobot.agent.tools import file_state
+from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, _find_match


@pytest.fixture(autouse=True)
@ -214,59 +213,6 @@ class TestEditDiagnostics:
 # ---------------------------------------------------------------------------


-class TestAdvancedReplaceAll:
-    """replace_all should work correctly for fallback-based matches too."""
-
-    @pytest.fixture()
-    def tool(self, tmp_path):
-        return EditFileTool(workspace=tmp_path)
-
-    @pytest.mark.asyncio
-    async def test_replace_all_preserves_each_match_indentation(self, tool, tmp_path):
-        f = tmp_path / "indent_multi.py"
-        f.write_text(
-            "if a:\n"
-            "    def foo():\n"
-            "        pass\n"
-            "if b:\n"
-            "        def foo():\n"
-            "            pass\n",
-            encoding="utf-8",
-        )
-        result = await tool.execute(
-            path=str(f),
-            old_text="def foo():\n    pass",
-            new_text="def bar():\n    return 1",
-            replace_all=True,
-        )
-        assert "Successfully" in result
-        assert f.read_text(encoding="utf-8") == (
-            "if a:\n"
-            "    def bar():\n"
-            "        return 1\n"
-            "if b:\n"
-            "        def bar():\n"
-            "            return 1\n"
-        )
-
-    @pytest.mark.asyncio
-    async def test_trim_and_quote_fallback_match_succeeds(self, tool, tmp_path):
-        f = tmp_path / "quote_indent.py"
-        f.write_text("    message = “hello”\n", encoding="utf-8")
-        result = await tool.execute(
-            path=str(f),
-            old_text='message = "hello"',
-            new_text='message = "goodbye"',
-        )
-        assert "Successfully" in result
-        assert f.read_text(encoding="utf-8") == "    message = “goodbye”\n"
-
-
-# ---------------------------------------------------------------------------
-# Advanced fallback replacement behavior
-# ---------------------------------------------------------------------------
-
-
 class TestAdvancedReplaceAll:
    """replace_all should work correctly for fallback-based matches too."""

@ -369,8 +315,6 @@ class TestFileSizeProtection:
    async def test_rejects_file_over_size_limit(self, tool, tmp_path):
        f = tmp_path / "huge.txt"
        f.write_text("x", encoding="utf-8")
-        # Monkey-patch the file size check by creating a stat mock
-        original_stat = f.stat

        class FakeStat:
            def __init__(self, real_stat):
@ -412,10 +356,9 @@ class TestStaleDetectionContentFallback:
        f.write_text("hello world", encoding="utf-8")
        await read_tool.execute(path=str(f))

-        # Touch the file to bump mtime without changing content
-        time.sleep(0.05)
-        original_content = f.read_text()
-        f.write_text(original_content, encoding="utf-8")
+        # Bump mtime without changing content.
+        stat = f.stat()
+        os.utime(f, (stat.st_atime, stat.st_mtime + 10))

        result = await edit_tool.execute(path=str(f), old_text="world", new_text="earth")
        assert "Successfully" in result
--- a/tests/utils/test_gitstore.py
+++ b/tests/utils/test_gitstore.py
@ -1,7 +1,6 @@
 """Tests for GitStore — line_ages() and core git operations."""

 import subprocess
-import time
 from datetime import datetime, timedelta, timezone
 from unittest.mock import patch

@ -71,25 +70,32 @@ class TestLineAges:

    def test_partial_edit_only_updates_changed_lines(self, git, tmp_path):
        """Only modified lines should reflect the new commit's timestamp."""
+        now = datetime(2026, 5, 1, tzinfo=timezone.utc)
+        old = now - timedelta(days=30)
+
        (tmp_path / "MEMORY.md").write_text(
            "# Memory\n\n## A\n- old\n\n## B\n- keep\n", encoding="utf-8"
        )
-        git.auto_commit("commit1")
-        time.sleep(1.1)
+        with patch("dulwich.worktree.time.time", return_value=old.timestamp()):
+            git.auto_commit("commit1")

        # Only modify section A
        (tmp_path / "MEMORY.md").write_text(
            "# Memory\n\n## A\n- new\n\n## B\n- keep\n", encoding="utf-8"
        )
-        git.auto_commit("commit2")
+        with patch("dulwich.worktree.time.time", return_value=now.timestamp()):
+            git.auto_commit("commit2")
+
+        with patch("nanobot.utils.gitstore.datetime") as mock_dt:
+            mock_dt.now.return_value = now
+            mock_dt.fromtimestamp = datetime.fromtimestamp
+            ages = git.line_ages("MEMORY.md")

-        ages = git.line_ages("MEMORY.md")
        lines = (tmp_path / "MEMORY.md").read_text(encoding="utf-8").splitlines()
-        # All lines are from today, but verify line-level tracking works
        assert len(ages) == len(lines)
-        # "- new" line and "- keep" line both age=0 (same day), but
-        # the key point is we get per-line results
-        assert len(ages) == 7
+        age_by_line = {line: age.age_days for line, age in zip(lines, ages, strict=True)}
+        assert age_by_line["- new"] == 0
+        assert age_by_line["- keep"] == 30


 class TestNestedRepoProtection: