From 24e56fcf070b0b1fcc0fed2061dbac1ba101cba1 Mon Sep 17 00:00:00 2001
From: chengyongru <chengyongru.ai@gmail.com>
Date: Thu, 4 Jun 2026 16:25:30 +0800
Subject: [PATCH] test: improve deterministic unit test coverage

---
 tests/agent/test_auto_compact.py            | 14 +++--
 tests/agent/test_consolidate_offset.py      |  7 ++-
 tests/agent/test_consolidator.py            |  7 ++-
 tests/agent/test_dream.py                   | 12 ++--
 tests/agent/test_dream_session.py           | 14 +++--
 tests/agent/test_hook_composite.py          |  3 +-
 tests/agent/test_runner_core.py             | 54 ++++++++---------
 tests/agent/test_runner_injections.py       | 58 +++++++++---------
 tests/agent/test_session_manager_history.py |  4 +-
 tests/agent/test_subagent_lifecycle.py      | 38 +++++++-----
 tests/agent/test_task_cancel.py             | 19 ++++--
 tests/agent/tools/test_self_tool.py         |  9 ++-
 tests/agent/tools/test_subagent_tools.py    | 15 +++--
 tests/channels/test_channel_plugins.py      | 12 ++++
 tests/channels/test_feishu_reaction.py      |  2 +
 tests/channels/test_qq_media.py             |  8 ++-
 tests/channels/test_websocket_channel.py    |  5 +-
 tests/channels/test_wecom_channel.py        |  2 +-
 tests/cli/test_restart_command.py           | 35 ++++++-----
 tests/command/test_model_command.py         | 23 +++++++-
 tests/command/test_router_dispatchable.py   | 11 ++++
 tests/pairing/test_store.py                 | 25 +++++---
 tests/test_api_stream.py                    |  2 +-
 tests/test_msteams.py                       |  3 +-
 tests/test_openai_api.py                    | 21 ++++---
 tests/tools/test_edit_advanced.py           | 65 ++-------------------
 tests/utils/test_gitstore.py                | 24 +++++---
 27 files changed, 279 insertions(+), 213 deletions(-)

diff --git a/tests/agent/test_auto_compact.py b/tests/agent/test_auto_compact.py
index 1e711bfd8..ceada74c7 100644
--- a/tests/agent/test_auto_compact.py
+++ b/tests/agent/test_auto_compact.py
@@ -111,6 +111,13 @@ def _make_fake_compact(
     return _fake_compact
 
 
+async def _drain_background_tasks(loop: AgentLoop) -> None:
+    tasks = list(loop._background_tasks)
+    if tasks:
+        await asyncio.gather(*tasks, return_exceptions=True)
+    await asyncio.sleep(0)
+
+
 class TestSessionTTLConfig:
     """Test session TTL configuration."""
 
@@ -269,7 +276,7 @@ class TestAutoCompact:
 
         loop.consolidator.compact_idle_session = _make_fake_compact(loop)
         loop.auto_compact.check_expired(loop._schedule_background)
-        await asyncio.sleep(0.1)
+        await _drain_background_tasks(loop)
 
         active_after = loop.sessions.get_or_create("cli:active")
         assert len(active_after.messages) == 1
@@ -710,7 +717,7 @@ class TestProactiveAutoCompact:
             loop._schedule_background,
             active_session_keys=active_session_keys,
         )
-        await asyncio.sleep(0.1)
+        await _drain_background_tasks(loop)
 
     @pytest.mark.asyncio
     async def test_no_check_when_ttl_disabled(self, tmp_path):
@@ -815,12 +822,11 @@ class TestProactiveAutoCompact:
 
         # Second call should skip (key is in _archiving)
         loop.auto_compact.check_expired(loop._schedule_background)
-        await asyncio.sleep(0.05)
         assert archive_count == 1
 
         # Clean up
         block_forever.set()
-        await asyncio.sleep(0.1)
+        await _drain_background_tasks(loop)
         await loop.close_mcp()
 
     @pytest.mark.asyncio
diff --git a/tests/agent/test_consolidate_offset.py b/tests/agent/test_consolidate_offset.py
index f6232c348..c4b0e9ea8 100644
--- a/tests/agent/test_consolidate_offset.py
+++ b/tests/agent/test_consolidate_offset.py
@@ -1,10 +1,11 @@
 """Test session management with cache-friendly message handling."""
 
 import asyncio
+from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from pathlib import Path
+
 from nanobot.session.manager import Session, SessionManager
 
 # Test constants
@@ -603,9 +604,10 @@ class TestNewCommandArchival:
         loop.sessions.save(session)
 
         archived = asyncio.Event()
+        release_archive = asyncio.Event()
 
         async def _slow_summarize(_messages) -> bool:
-            await asyncio.sleep(0.1)
+            await release_archive.wait()
             archived.set()
             return True
 
@@ -615,5 +617,6 @@ class TestNewCommandArchival:
         await loop._process_message(new_msg)
 
         assert not archived.is_set()
+        release_archive.set()
         await loop.close_mcp()
         assert archived.is_set()
diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py
index 7e3c31960..028bcbedc 100644
--- a/tests/agent/test_consolidator.py
+++ b/tests/agent/test_consolidator.py
@@ -9,6 +9,7 @@ from nanobot.agent.memory import (
     Consolidator,
     MemoryStore,
 )
+from nanobot.providers.base import LLMResponse
 from nanobot.session.manager import Session
 from nanobot.utils.prompt_templates import render_template
 
@@ -497,11 +498,12 @@ class TestCompactIdleSession:
 
         # Use a slow LLM response to ensure the lock is held while we check
         started = asyncio.Event()
+        release_chat = asyncio.Event()
 
         async def slow_chat(**kwargs):
             started.set()
-            await asyncio.sleep(0.1)
-            return MagicMock(content="Summary.", finish_reason="stop")
+            await release_chat.wait()
+            return LLMResponse(content="Summary.", finish_reason="stop")
 
         mock_provider.chat_with_retry = slow_chat
 
@@ -520,6 +522,7 @@ class TestCompactIdleSession:
         )
         await started.wait()
         assert lock.locked()
+        release_chat.set()
         await task
         assert not lock.locked()
 
diff --git a/tests/agent/test_dream.py b/tests/agent/test_dream.py
index 412bb1439..5b9bd32a0 100644
--- a/tests/agent/test_dream.py
+++ b/tests/agent/test_dream.py
@@ -134,9 +134,7 @@ class TestEphemeralDirect:
         provider.supports_tools = True
         provider.generation = MagicMock(max_tokens=4096)
         provider.chat_with_retry = AsyncMock(
-            return_value=MagicMock(
-                content="done", finish_reason="stop", tool_calls=[], usage={},
-            )
+            return_value=LLMResponse(content="done", tool_calls=[], finish_reason="stop", usage={})
         )
 
         with (
@@ -168,9 +166,13 @@ class TestEphemeralDirect:
             mock_archive.assert_not_called()
 
     async def test_non_ephemeral_runs_normally(self, tmp_path, _make_loop):
-        """Without ephemeral, the normal path is untouched — no crash."""
+        """Without ephemeral, the normal path returns the model response."""
         loop, store = _make_loop
-        await loop.process_direct("test", session_key="cli:normal")
+        response = await loop.process_direct("test", session_key="cli:normal")
+
+        assert response is not None
+        assert response.content == "done"
+        loop.provider.chat_with_retry.assert_awaited()
 
     async def test_ephemeral_sets_ctx_flag(self, tmp_path, _make_loop):
         """Verify that ephemeral=True is forwarded to TurnContext."""
diff --git a/tests/agent/test_dream_session.py b/tests/agent/test_dream_session.py
index f1c42263e..aa0303c52 100644
--- a/tests/agent/test_dream_session.py
+++ b/tests/agent/test_dream_session.py
@@ -1,6 +1,6 @@
 """Tests for Dream session key generation and rotation."""
-import time
-from datetime import datetime
+from datetime import datetime, timedelta
+from unittest.mock import patch
 
 from nanobot.agent.memory import MemoryStore
 
@@ -13,9 +13,12 @@ class TestDreamSessionKey:
         datetime.strptime(ts_part, "%Y%m%d-%H%M%S")
 
     def test_unique_across_calls(self):
-        k1 = MemoryStore.dream_session_key()
-        time.sleep(1.1)
-        k2 = MemoryStore.dream_session_key()
+        now = datetime(2026, 5, 28, 10, 0, 0)
+        with patch("nanobot.agent.memory.datetime") as mock_dt:
+            mock_dt.now.side_effect = [now, now + timedelta(seconds=1)]
+            k1 = MemoryStore.dream_session_key()
+            k2 = MemoryStore.dream_session_key()
+
         assert k1 != k2
 
 
@@ -62,3 +65,4 @@ class TestPruneDreamSessions:
         sessions_dir = tmp_path / "sessions"
         sessions_dir.mkdir()
         MemoryStore.prune_dream_sessions(sessions_dir, keep=10)
+        assert list(sessions_dir.iterdir()) == []
diff --git a/tests/agent/test_hook_composite.py b/tests/agent/test_hook_composite.py
index 315c3eeaa..7ee5dd6b5 100644
--- a/tests/agent/test_hook_composite.py
+++ b/tests/agent/test_hook_composite.py
@@ -21,7 +21,8 @@ def _ctx() -> AgentHookContext:
 @pytest.mark.asyncio
 async def test_base_hook_emit_reasoning_is_noop():
     hook = AgentHook()
-    await hook.emit_reasoning("should not raise")
+    result = await hook.emit_reasoning("should not raise")
+    assert result is None
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/agent/test_runner_core.py b/tests/agent/test_runner_core.py
index 7e2d541ed..1fc82b7a3 100644
--- a/tests/agent/test_runner_core.py
+++ b/tests/agent/test_runner_core.py
@@ -5,12 +5,11 @@ from __future__ import annotations
 
 import asyncio
 import time
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
 from nanobot.config.schema import AgentDefaults
-from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 
 _MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
@@ -18,7 +17,7 @@ _MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
 
 @pytest.mark.asyncio
 async def test_runner_preserves_reasoning_fields_and_tool_results():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
     captured_second_call: list[dict] = []
@@ -75,7 +74,7 @@ async def test_runner_preserves_reasoning_fields_and_tool_results():
 
 @pytest.mark.asyncio
 async def test_runner_returns_max_iterations_fallback():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
     provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
@@ -106,7 +105,7 @@ async def test_runner_returns_max_iterations_fallback():
 
 @pytest.mark.asyncio
 async def test_runner_times_out_hung_llm_request():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
 
@@ -136,15 +135,15 @@ async def test_runner_times_out_hung_llm_request():
 @pytest.mark.asyncio
 async def test_runner_does_not_apply_outer_wall_timeout_to_streaming_requests():
     from nanobot.agent.hook import AgentHook, AgentHookContext
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
     streamed: list[str] = []
 
     async def chat_stream_with_retry(*, on_content_delta, **kwargs):
-        await asyncio.sleep(0.08)
+        await asyncio.sleep(0)
         await on_content_delta("still ")
-        await asyncio.sleep(0.08)
+        await asyncio.sleep(0)
         await on_content_delta("alive")
         return LLMResponse(content="still alive", tool_calls=[])
 
@@ -161,25 +160,28 @@ async def test_runner_does_not_apply_outer_wall_timeout_to_streaming_requests():
             streamed.append(delta)
 
     runner = AgentRunner(provider)
-    result = await runner.run(AgentRunSpec(
-        initial_messages=[{"role": "user", "content": "think for a while"}],
-        tools=tools,
-        model="test-model",
-        max_iterations=1,
-        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
-        hook=StreamingHook(),
-        llm_timeout_s=0.01,
-    ))
+    wait_for = AsyncMock(side_effect=AssertionError("streaming path must not use wait_for"))
+    with patch("nanobot.agent.runner.asyncio.wait_for", wait_for):
+        result = await runner.run(AgentRunSpec(
+            initial_messages=[{"role": "user", "content": "think for a while"}],
+            tools=tools,
+            model="test-model",
+            max_iterations=1,
+            max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+            hook=StreamingHook(),
+            llm_timeout_s=0.01,
+        ))
 
     assert result.stop_reason == "completed"
     assert result.final_content == "still alive"
     assert streamed == ["still ", "alive"]
     provider.chat_with_retry.assert_not_awaited()
+    wait_for.assert_not_awaited()
 
 
 @pytest.mark.asyncio
 async def test_runner_replaces_empty_tool_result_with_marker():
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
     captured_second_call: list[dict] = []
@@ -218,7 +220,7 @@ async def test_runner_replaces_empty_tool_result_with_marker():
 @pytest.mark.asyncio
 async def test_runner_retries_empty_final_response_with_summary_prompt():
     """Empty responses get 2 silent retries before finalization kicks in."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
     calls: list[dict] = []
@@ -263,7 +265,7 @@ async def test_runner_retries_empty_final_response_with_summary_prompt():
 @pytest.mark.asyncio
 async def test_runner_uses_specific_message_after_empty_finalization_retry():
     """After silent retries + finalization all return empty, stop_reason is empty_final_response."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
 
     provider = MagicMock(spec=LLMProvider)
@@ -295,7 +297,7 @@ async def test_runner_empty_response_does_not_break_tool_chain():
     Sequence: tool_call -> empty -> tool_call -> final text.
     The runner should recover via silent retry and complete normally.
     """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
     call_count = 0
@@ -352,7 +354,7 @@ async def test_runner_empty_response_does_not_break_tool_chain():
 async def test_runner_accumulates_usage_and_preserves_cached_tokens():
     """Runner should accumulate prompt/completion tokens across iterations
     and preserve cached_tokens from provider responses."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock(spec=LLMProvider)
     call_count = {"n": 0}
@@ -399,7 +401,7 @@ async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
     internal retry diagnostics like "Model request failed, retry in 1s"
     to leak to end-user channels as normal progress updates.
     """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     captured: dict = {}
 
@@ -441,7 +443,7 @@ async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
 @pytest.mark.asyncio
 async def test_runner_passes_temperature_to_provider():
     """temperature from AgentRunSpec should reach provider.chat_with_retry."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     captured: dict = {}
 
@@ -470,7 +472,7 @@ async def test_runner_passes_temperature_to_provider():
 @pytest.mark.asyncio
 async def test_runner_passes_max_tokens_to_provider():
     """max_tokens from AgentRunSpec should reach provider.chat_with_retry."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     captured: dict = {}
 
@@ -499,7 +501,7 @@ async def test_runner_passes_max_tokens_to_provider():
 @pytest.mark.asyncio
 async def test_runner_passes_reasoning_effort_to_provider():
     """reasoning_effort from AgentRunSpec should reach provider.chat_with_retry."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     captured: dict = {}
 
diff --git a/tests/agent/test_runner_injections.py b/tests/agent/test_runner_injections.py
index 95cfc4f8d..3686574c8 100644
--- a/tests/agent/test_runner_injections.py
+++ b/tests/agent/test_runner_injections.py
@@ -4,7 +4,6 @@ from __future__ import annotations
 
 import asyncio
 import base64
-import time
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -35,15 +34,15 @@ def _make_loop(tmp_path):
 
     with patch("nanobot.agent.loop.ContextBuilder"), \
          patch("nanobot.agent.loop.SessionManager"), \
-         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
-        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+         patch("nanobot.agent.loop.SubagentManager") as mock_sub_mgr:
+        mock_sub_mgr.return_value.cancel_by_session = AsyncMock(return_value=0)
         loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path)
     return loop
 
 @pytest.mark.asyncio
 async def test_drain_injections_returns_empty_when_no_callback():
     """No injection_callback → empty list."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock()
     runner = AgentRunner(provider)
@@ -61,7 +60,7 @@ async def test_drain_injections_returns_empty_when_no_callback():
 @pytest.mark.asyncio
 async def test_drain_injections_extracts_content_from_inbound_messages():
     """Should extract .content from InboundMessage objects."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -92,7 +91,7 @@ async def test_drain_injections_extracts_content_from_inbound_messages():
 @pytest.mark.asyncio
 async def test_drain_injections_passes_limit_to_callback_when_supported():
     """Limit-aware callbacks can preserve overflow in their own queue."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTIONS_PER_TURN
+    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -127,7 +126,7 @@ async def test_drain_injections_passes_limit_to_callback_when_supported():
 @pytest.mark.asyncio
 async def test_drain_injections_skips_empty_content():
     """Messages with blank content should be filtered out."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -156,7 +155,7 @@ async def test_drain_injections_skips_empty_content():
 @pytest.mark.asyncio
 async def test_drain_injections_handles_callback_exception():
     """If the callback raises, return empty list (error is logged)."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock()
     runner = AgentRunner(provider)
@@ -178,7 +177,7 @@ async def test_drain_injections_handles_callback_exception():
 @pytest.mark.asyncio
 async def test_checkpoint1_injects_after_tool_execution():
     """Follow-up messages are injected after tool execution, before next LLM call."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -231,8 +230,8 @@ async def test_checkpoint1_injects_after_tool_execution():
 @pytest.mark.asyncio
 async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
     """After final response, if injections exist, stream_end should get resuming=True."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
     from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -290,7 +289,7 @@ async def test_checkpoint2_injects_after_final_response_with_resuming_stream():
 @pytest.mark.asyncio
 async def test_checkpoint2_preserves_final_response_in_history_before_followup():
     """A follow-up injected after a final answer must still see that answer in history."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -405,7 +404,7 @@ async def test_loop_injected_followup_preserves_image_media(tmp_path):
 @pytest.mark.asyncio
 async def test_runner_merges_multiple_injected_user_messages_without_losing_media():
     """Multiple injected follow-ups should not create lossy consecutive user messages."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock()
     call_count = {"n": 0}
@@ -468,7 +467,7 @@ async def test_runner_merges_multiple_injected_user_messages_without_losing_medi
 @pytest.mark.asyncio
 async def test_injection_cycles_capped_at_max():
     """Injection cycles should be capped at _MAX_INJECTION_CYCLES."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+    from nanobot.agent.runner import _MAX_INJECTION_CYCLES, AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -509,7 +508,7 @@ async def test_injection_cycles_capped_at_max():
 @pytest.mark.asyncio
 async def test_no_injections_flag_is_false_by_default():
     """had_injections should be False when no injection callback or no messages."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
 
     provider = MagicMock()
 
@@ -607,16 +606,12 @@ async def test_followup_routed_to_pending_queue(tmp_path):
     msg = InboundMessage(channel="discord", sender_id="u", chat_id="c", content="follow-up")
     await loop.bus.publish_inbound(msg)
 
-    deadline = time.time() + 2
-    while pending.empty() and time.time() < deadline:
-        await asyncio.sleep(0.01)
+    queued_msg = await asyncio.wait_for(pending.get(), timeout=2)
 
     loop.stop()
     await asyncio.wait_for(run_task, timeout=2)
 
     assert loop._dispatch.await_count == 0
-    assert not pending.empty()
-    queued_msg = pending.get_nowait()
     assert queued_msg.content == "follow-up"
     assert queued_msg.session_key == UNIFIED_SESSION_KEY
 
@@ -625,9 +620,9 @@ async def test_followup_routed_to_pending_queue(tmp_path):
 async def test_pending_queue_preserves_overflow_for_next_injection_cycle(tmp_path):
     """Pending queue should leave overflow messages queued for later drains."""
     from nanobot.agent.loop import AgentLoop
+    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN
     from nanobot.bus.events import InboundMessage
     from nanobot.bus.queue import MessageBus
-    from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN
 
     bus = MessageBus()
     provider = MagicMock()
@@ -680,7 +675,12 @@ async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
     from nanobot.bus.events import InboundMessage
 
     loop = _make_loop(tmp_path)
-    loop._dispatch = AsyncMock()  # type: ignore[method-assign]
+    dispatched = asyncio.Event()
+
+    async def _dispatch(_msg):
+        dispatched.set()
+
+    loop._dispatch = AsyncMock(side_effect=_dispatch)  # type: ignore[method-assign]
 
     pending = asyncio.Queue(maxsize=1)
     pending.put_nowait(InboundMessage(channel="cli", sender_id="u", chat_id="c", content="already queued"))
@@ -690,9 +690,7 @@ async def test_pending_queue_full_falls_back_to_queued_task(tmp_path):
     msg = InboundMessage(channel="cli", sender_id="u", chat_id="c", content="follow-up")
     await loop.bus.publish_inbound(msg)
 
-    deadline = time.time() + 2
-    while loop._dispatch.await_count == 0 and time.time() < deadline:
-        await asyncio.sleep(0.01)
+    await asyncio.wait_for(dispatched.wait(), timeout=2)
 
     loop.stop()
     await asyncio.wait_for(run_task, timeout=2)
@@ -750,7 +748,7 @@ async def test_dispatch_republishes_leftover_queue_messages(tmp_path):
 @pytest.mark.asyncio
 async def test_drain_injections_on_fatal_tool_error():
     """Pending injections should be drained even when a fatal tool error occurs."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -803,7 +801,7 @@ async def test_drain_injections_on_fatal_tool_error():
 @pytest.mark.asyncio
 async def test_drain_injections_on_llm_error():
     """Pending injections should be drained when the LLM returns an error finish_reason."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -858,7 +856,7 @@ async def test_drain_injections_on_llm_error():
 @pytest.mark.asyncio
 async def test_drain_injections_on_empty_final_response():
     """Pending injections should be drained when the runner exits due to empty response."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_EMPTY_RETRIES
+    from nanobot.agent.runner import _MAX_EMPTY_RETRIES, AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -913,7 +911,7 @@ async def test_drain_injections_on_max_iterations():
     injections are appended to messages but not processed by the LLM.
     The key point is they are consumed from the queue to prevent re-publish.
     """
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -965,7 +963,7 @@ async def test_drain_injections_on_max_iterations():
 async def test_drain_injections_set_flag_when_followup_arrives_after_last_iteration():
     """Late follow-ups drained in max_iterations should still flip had_injections."""
     from nanobot.agent.hook import AgentHook
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
@@ -1027,7 +1025,7 @@ async def test_drain_injections_set_flag_when_followup_arrives_after_last_iterat
 @pytest.mark.asyncio
 async def test_injection_cycle_cap_on_error_path():
     """Injection cycles should be capped even when every iteration hits an LLM error."""
-    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _MAX_INJECTION_CYCLES
+    from nanobot.agent.runner import _MAX_INJECTION_CYCLES, AgentRunner, AgentRunSpec
     from nanobot.bus.events import InboundMessage
 
     provider = MagicMock()
diff --git a/tests/agent/test_session_manager_history.py b/tests/agent/test_session_manager_history.py
index c4be32172..e3bf4d701 100644
--- a/tests/agent/test_session_manager_history.py
+++ b/tests/agent/test_session_manager_history.py
@@ -110,6 +110,7 @@ def test_get_history_drops_orphan_tool_results_when_window_cuts_tool_calls():
 
     history = session.get_history(max_messages=100)
     _assert_no_orphans(history)
+    assert history[-1]["content"] == "new telegram question"
 
 
 # --- Positive test: legitimate pairs survive trimming ---
@@ -363,6 +364,7 @@ def test_window_cuts_mid_tool_group():
     # leaving orphan tool results for split_a at the front.
     history = session.get_history(max_messages=6)
     _assert_no_orphans(history)
+    assert history[0]["role"] == "user"
 
 
 # --- Image breadcrumbs: media kwarg is synthesized into content for replay ---
@@ -599,8 +601,6 @@ def test_enforce_file_cap_no_duplicate_archive_in_else_branch():
     archive_fn = MagicMock()
     session.enforce_file_cap(on_archive=archive_fn, limit=6)
 
-    # Verify retained messages
-    retained_contents = [m["content"] for m in session.messages]
     assert len(session.messages) <= 6
 
     # Verify archived messages have NO overlap with retained
diff --git a/tests/agent/test_subagent_lifecycle.py b/tests/agent/test_subagent_lifecycle.py
index bf3564f28..d68b3f7c1 100644
--- a/tests/agent/test_subagent_lifecycle.py
+++ b/tests/agent/test_subagent_lifecycle.py
@@ -17,7 +17,6 @@ from nanobot.agent.subagent import (
 from nanobot.bus.queue import MessageBus
 from nanobot.providers.base import LLMProvider
 
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -51,6 +50,13 @@ def _make_hook_context(**overrides) -> AgentHookContext:
     return AgentHookContext(**defaults)
 
 
+async def _drain_subagent_tasks(sm: SubagentManager) -> None:
+    tasks = list(sm._running_tasks.values())
+    if tasks:
+        await asyncio.gather(*tasks, return_exceptions=True)
+    await asyncio.sleep(0)
+
+
 # ---------------------------------------------------------------------------
 # SubagentStatus defaults
 # ---------------------------------------------------------------------------
@@ -114,7 +120,7 @@ class TestSpawn:
         assert len(sm._running_tasks) == 1
 
         block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
         assert len(sm._running_tasks) == 0
 
     @pytest.mark.asyncio
@@ -124,7 +130,7 @@ class TestSpawn:
             final_content="done", messages=[], stop_reason="completed",
         ))
         await sm.spawn("my task")
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
         # Status cleaned up after task completes
         assert len(sm._task_statuses) == 0
 
@@ -142,7 +148,7 @@ class TestSpawn:
         assert len(sm._session_tasks["s1"]) == 1
 
         block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
         assert "s1" not in sm._session_tasks
 
     @pytest.mark.asyncio
@@ -158,7 +164,7 @@ class TestSpawn:
         assert len(sm._session_tasks) == 0
 
         block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
 
     @pytest.mark.asyncio
     async def test_label_defaults_to_truncated_task(self, tmp_path):
@@ -175,7 +181,7 @@ class TestSpawn:
         assert status.label == long_task[:30] + "..."
 
         block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
 
     @pytest.mark.asyncio
     async def test_custom_label(self, tmp_path):
@@ -191,7 +197,7 @@ class TestSpawn:
         assert status.label == "Custom Label"
 
         block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
 
     @pytest.mark.asyncio
     async def test_cleanup_callback_removes_all_entries(self, tmp_path):
@@ -200,7 +206,7 @@ class TestSpawn:
             final_content="done", messages=[], stop_reason="completed",
         ))
         await sm.spawn("task", session_key="s1")
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
         assert len(sm._running_tasks) == 0
         assert len(sm._task_statuses) == 0
         assert len(sm._session_tasks) == 0
@@ -452,7 +458,7 @@ class TestCancelBySession:
         count = await sm.cancel_by_session("s1")
         assert count == 2
         block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
 
     @pytest.mark.asyncio
     async def test_no_tasks_returns_zero(self, tmp_path):
@@ -467,7 +473,7 @@ class TestCancelBySession:
             final_content="done", messages=[], stop_reason="completed",
         ))
         await sm.spawn("task1", session_key="s1")
-        await asyncio.sleep(0.1)  # Wait for completion
+        await _drain_subagent_tasks(sm)
 
         count = await sm.cancel_by_session("s1")
         assert count == 0
@@ -499,7 +505,7 @@ class TestRunningCounts:
         assert sm.get_running_count_by_session("s1") == 2
 
         block.set()
-        await asyncio.sleep(0.1)
+        await _drain_subagent_tasks(sm)
         assert sm.get_running_count() == 0
 
     @pytest.mark.asyncio
@@ -521,8 +527,9 @@ class TestSubagentHook:
         tool_call.name = "read_file"
         tool_call.arguments = {"path": "/tmp/test"}
         ctx = _make_hook_context(tool_calls=[tool_call])
-        # Should not raise
-        await hook.before_execute_tools(ctx)
+        result = await hook.before_execute_tools(ctx)
+        assert result is None
+        assert ctx.tool_calls == [tool_call]
 
     @pytest.mark.asyncio
     async def test_after_iteration_updates_status(self):
@@ -544,8 +551,9 @@ class TestSubagentHook:
     async def test_after_iteration_no_status_noop(self):
         hook = _SubagentHook("t1", status=None)
         ctx = _make_hook_context(iteration=5)
-        # Should not raise
-        await hook.after_iteration(ctx)
+        result = await hook.after_iteration(ctx)
+        assert result is None
+        assert ctx.iteration == 5
 
     @pytest.mark.asyncio
     async def test_after_iteration_sets_error(self):
diff --git a/tests/agent/test_task_cancel.py b/tests/agent/test_task_cancel.py
index a3a42887c..0111c2f59 100644
--- a/tests/agent/test_task_cancel.py
+++ b/tests/agent/test_task_cancel.py
@@ -27,8 +27,8 @@ def _make_loop(*, tools_config=None):
 
     with patch("nanobot.agent.loop.ContextBuilder"), \
          patch("nanobot.agent.loop.SessionManager"), \
-         patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
-        MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
+         patch("nanobot.agent.loop.SubagentManager") as mock_sub_mgr:
+        mock_sub_mgr.return_value.cancel_by_session = AsyncMock(return_value=0)
         loop = AgentLoop(bus=bus, provider=provider, workspace=workspace, tools_config=tools_config)
     return loop, bus
 
@@ -103,8 +103,8 @@ class TestHandleStop:
 
 class TestDispatch:
     def test_exec_tool_not_registered_when_disabled(self):
-        from nanobot.config.schema import ToolsConfig
         from nanobot.agent.tools.shell import ExecToolConfig
+        from nanobot.config.schema import ToolsConfig
 
         loop, _bus = _make_loop(tools_config=ToolsConfig(exec=ExecToolConfig(enable=False)))
 
@@ -166,10 +166,14 @@ class TestDispatch:
 
         loop, bus = _make_loop()
         order = []
+        first_started = asyncio.Event()
+        release_first = asyncio.Event()
 
         async def mock_process(m, **kwargs):
             order.append(f"start-{m.content}")
-            await asyncio.sleep(0.05)
+            if m.content == "a":
+                first_started.set()
+                await release_first.wait()
             order.append(f"end-{m.content}")
             return OutboundMessage(channel="test", chat_id="c1", content=m.content)
 
@@ -178,7 +182,12 @@ class TestDispatch:
         msg2 = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="b")
 
         t1 = asyncio.create_task(loop._dispatch(msg1))
+        await asyncio.wait_for(first_started.wait(), timeout=1.0)
         t2 = asyncio.create_task(loop._dispatch(msg2))
+        await asyncio.sleep(0)
+        assert order == ["start-a"]
+
+        release_first.set()
         await asyncio.gather(t1, t2)
         assert order == ["start-a", "end-a", "start-b", "end-b"]
 
@@ -286,8 +295,8 @@ class TestSubagentCancellation:
     @pytest.mark.asyncio
     async def test_subagent_exec_tool_not_registered_when_disabled(self, tmp_path):
         from nanobot.agent.subagent import SubagentManager
-        from nanobot.bus.queue import MessageBus
         from nanobot.agent.tools.shell import ExecToolConfig
+        from nanobot.bus.queue import MessageBus
         from nanobot.config.schema import ToolsConfig
 
         bus = MessageBus()
diff --git a/tests/agent/tools/test_self_tool.py b/tests/agent/tools/test_self_tool.py
index b10bdab59..927f394db 100644
--- a/tests/agent/tools/test_self_tool.py
+++ b/tests/agent/tools/test_self_tool.py
@@ -231,12 +231,14 @@ class TestModifyRestricted:
     async def test_modify_string_int_coerced(self):
         tool = _make_tool()
         result = await tool.execute(action="set", key="max_iterations", value="80")
+        assert "Set max_iterations" in result
         assert tool._runtime_state.max_iterations == 80
 
     @pytest.mark.asyncio
     async def test_modify_context_window_valid(self):
         tool = _make_tool()
         result = await tool.execute(action="set", key="context_window_tokens", value=131072)
+        assert "Set context_window_tokens" in result
         assert tool._runtime_state.context_window_tokens == 131072
 
     @pytest.mark.asyncio
@@ -337,12 +339,14 @@ class TestModifyFree:
     async def test_modify_allows_list(self):
         tool = _make_tool()
         result = await tool.execute(action="set", key="items", value=[1, 2, 3])
+        assert result == "Set scratchpad.items = [1, 2, 3]"
         assert tool._runtime_state._runtime_vars["items"] == [1, 2, 3]
 
     @pytest.mark.asyncio
     async def test_modify_allows_dict(self):
         tool = _make_tool()
         result = await tool.execute(action="set", key="data", value={"a": 1})
+        assert result == "Set scratchpad.data = {'a': 1}"
         assert tool._runtime_state._runtime_vars["data"] == {"a": 1}
 
     @pytest.mark.asyncio
@@ -743,7 +747,10 @@ class TestSubagentHookStatus:
 
         hook = _SubagentHook("test")
         context = AgentHookContext(iteration=1, messages=[])
-        await hook.after_iteration(context)  # should not raise
+        result = await hook.after_iteration(context)
+
+        assert result is None
+        assert context.iteration == 1
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/agent/tools/test_subagent_tools.py b/tests/agent/tools/test_subagent_tools.py
index 7c6ae66e6..6a037981d 100644
--- a/tests/agent/tools/test_subagent_tools.py
+++ b/tests/agent/tools/test_subagent_tools.py
@@ -16,8 +16,8 @@ _MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
 async def test_subagent_exec_tool_receives_allowed_env_keys(tmp_path):
     """allowed_env_keys from ExecToolConfig must be forwarded to the subagent's ExecTool."""
     from nanobot.agent.subagent import SubagentManager, SubagentStatus
-    from nanobot.bus.queue import MessageBus
     from nanobot.agent.tools.shell import ExecToolConfig
+    from nanobot.bus.queue import MessageBus
     from nanobot.config.schema import ToolsConfig
 
     bus = MessageBus()
@@ -340,8 +340,8 @@ async def test_drain_pending_blocks_while_subagents_running(tmp_path):
     # With sub-agents running and an empty queue, it should block
     drain_task = asyncio.create_task(injection_callback())
 
-    # Give it a moment to enter the blocking wait
-    await asyncio.sleep(0.05)
+    # Let the task enter the blocking queue wait.
+    await asyncio.sleep(0)
 
     # Should still be running (blocked on pending_queue.get())
     assert not drain_task.done(), "drain should block while sub-agents are running"
@@ -467,9 +467,12 @@ async def test_drain_pending_timeout(tmp_path):
 
     assert injection_callback is not None
 
-    # Patch the timeout to be very short for testing
-    with patch("nanobot.agent.loop.asyncio.wait_for") as mock_wait:
-        mock_wait.side_effect = asyncio.TimeoutError
+    # Patch the timeout path without leaking the queue.get() coroutine.
+    async def _timeout(awaitable, timeout):
+        awaitable.close()
+        raise asyncio.TimeoutError
+
+    with patch("nanobot.agent.loop.asyncio.wait_for", side_effect=_timeout):
         results = await injection_callback()
         assert results == []
 
diff --git a/tests/channels/test_channel_plugins.py b/tests/channels/test_channel_plugins.py
index 15543bac5..d29dfe4ff 100644
--- a/tests/channels/test_channel_plugins.py
+++ b/tests/channels/test_channel_plugins.py
@@ -1016,6 +1016,8 @@ async def test_validate_allow_from_allows_empty_list():
 
     # Should not raise — empty list defers to pairing store
     mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert mgr.channels["test"].config.allow_from == []
 
 
 @pytest.mark.asyncio
@@ -1033,6 +1035,8 @@ async def test_validate_allow_from_passes_with_asterisk():
 
     # Should not raise
     mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert mgr.channels["test"].config.allow_from == ["*"]
 
 
 @pytest.mark.asyncio
@@ -1049,6 +1053,8 @@ async def test_validate_allow_from_allows_empty_dict_allow_from():
     mgr._dispatch_task = None
 
     mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert mgr.channels["test"].config["allow_from"] == []
 
 
 @pytest.mark.asyncio
@@ -1079,6 +1085,8 @@ async def test_validate_allow_from_allows_missing_allow_from():
 
     # Should not raise — pairing-only mode
     mgr._validate_allow_from()
+    assert list(mgr.channels) == ["test"]
+    assert "allow_from" not in mgr.channels["test"].config
 
 
 @pytest.mark.asyncio
@@ -1206,6 +1214,8 @@ async def test_start_channel_logs_error_on_failure():
 
     # Should not raise, just log error
     await mgr._start_channel("failing", ch)
+    assert mgr.channels == {}
+    assert mgr._dispatch_task is None
 
 
 @pytest.mark.asyncio
@@ -1237,6 +1247,8 @@ async def test_stop_all_handles_channel_exception():
 
     # Should not raise even if channel.stop() raises
     await mgr.stop_all()
+    assert list(mgr.channels) == ["stopfailing"]
+    assert mgr._dispatch_task is None
 
 
 @pytest.mark.asyncio
diff --git a/tests/channels/test_feishu_reaction.py b/tests/channels/test_feishu_reaction.py
index 9d070ad59..6cc1925e5 100644
--- a/tests/channels/test_feishu_reaction.py
+++ b/tests/channels/test_feishu_reaction.py
@@ -105,6 +105,7 @@ class TestRemoveReactionSync:
 
         # Should not raise
         ch._remove_reaction_sync("om_001", "rx_42")
+        ch._client.im.v1.message_reaction.delete.assert_called_once()
 
     def test_handles_exception_gracefully(self):
         ch = _make_channel()
@@ -112,6 +113,7 @@ class TestRemoveReactionSync:
 
         # Should not raise
         ch._remove_reaction_sync("om_001", "rx_42")
+        ch._client.im.v1.message_reaction.delete.assert_called_once()
 
 
 # ── _remove_reaction (async) ────────────────────────────────────────────────
diff --git a/tests/channels/test_qq_media.py b/tests/channels/test_qq_media.py
index 59b85069c..85297a4cb 100644
--- a/tests/channels/test_qq_media.py
+++ b/tests/channels/test_qq_media.py
@@ -118,11 +118,13 @@ async def test_send_exception_caught_not_raised() -> None:
     channel = QQChannel(QQConfig(app_id="app", secret="secret", allow_from=["*"]), MessageBus())
     channel._client = _FakeClient()
 
-    with patch.object(channel, "_send_text_only", new_callable=AsyncMock, side_effect=RuntimeError("boom")):
+    with patch.object(
+        channel, "_send_text_only", new_callable=AsyncMock, side_effect=RuntimeError("boom")
+    ) as send_text:
         await channel.send(
             OutboundMessage(channel="qq", chat_id="user1", content="hello")
         )
-    # No exception raised — test passes if we get here.
+    send_text.assert_awaited_once()
 
 
 @pytest.mark.asyncio
@@ -260,6 +262,8 @@ async def test_on_message_exception_caught_not_raised() -> None:
     bad_data = SimpleNamespace(id="x1", content="hi")
     # Should not raise
     await channel._on_message(bad_data, is_group=False)
+    assert channel._client.api.c2c_calls == []
+    assert channel._client.api.group_calls == []
 
 
 @pytest.mark.asyncio
diff --git a/tests/channels/test_websocket_channel.py b/tests/channels/test_websocket_channel.py
index d6f047de3..cf6b65250 100644
--- a/tests/channels/test_websocket_channel.py
+++ b/tests/channels/test_websocket_channel.py
@@ -686,6 +686,7 @@ async def test_send_missing_connection_is_noop_without_error() -> None:
     channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus, gateway=_basic_handler(bus))
     msg = OutboundMessage(channel="websocket", chat_id="missing", content="x")
     await channel.send(msg)
+    assert channel._subs == {}
 
 
 @pytest.mark.asyncio
@@ -1006,7 +1007,7 @@ async def test_send_reasoning_without_subscribers_is_noop() -> None:
 
     await channel.send_reasoning_delta("unattached", "thinking", None)
     await channel.send_reasoning_end("unattached", None)
-    # No subscribers, no exception, no send.
+    assert channel._subs == {}
 
 
 @pytest.mark.asyncio
@@ -1299,6 +1300,7 @@ async def test_send_delta_missing_connection_is_noop() -> None:
     channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"], "streaming": True}, bus, gateway=_basic_handler(bus))
     # No exception, no error — just a no-op
     await channel.send_delta("nonexistent", "chunk", {"_stream_delta": True, "_stream_id": "s1"})
+    assert channel._subs == {}
 
 
 @pytest.mark.asyncio
@@ -1308,6 +1310,7 @@ async def test_stop_is_idempotent() -> None:
     # stop() before start() should not raise
     await channel.stop()
     await channel.stop()
+    assert channel._subs == {}
 
 
 @pytest.mark.asyncio
diff --git a/tests/channels/test_wecom_channel.py b/tests/channels/test_wecom_channel.py
index cc0bbf29f..5300ec5f7 100644
--- a/tests/channels/test_wecom_channel.py
+++ b/tests/channels/test_wecom_channel.py
@@ -420,7 +420,7 @@ async def test_send_exception_caught_not_raised() -> None:
     await channel.send(
         OutboundMessage(channel="wecom", chat_id="chat1", content="fail test")
     )
-    # No exception — test passes if we reach here.
+    client.reply_stream.assert_called_once()
 
 
 # ── _process_message() ──────────────────────────────────────────────
diff --git a/tests/cli/test_restart_command.py b/tests/cli/test_restart_command.py
index f61e18923..e35d52d89 100644
--- a/tests/cli/test_restart_command.py
+++ b/tests/cli/test_restart_command.py
@@ -32,15 +32,6 @@ def _make_loop():
     return loop, bus
 
 
-async def _wait_until(predicate, *, timeout: float = 0.2, interval: float = 0.01) -> None:
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        if predicate():
-            return
-        await asyncio.sleep(interval)
-    assert predicate()
-
-
 class TestRestartCommand:
 
     @pytest.mark.asyncio
@@ -91,13 +82,29 @@ class TestRestartCommand:
         loop, bus = _make_loop()
         msg = InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="/restart")
 
+        async def _fast_sleep(_delay: float) -> None:
+            return None
+
+        scheduled: list[asyncio.Task] = []
+
+        def _capture_task(coro):
+            task = asyncio.create_task(coro)
+            scheduled.append(task)
+            return task
+
+        fake_asyncio = SimpleNamespace(
+            sleep=_fast_sleep,
+            create_task=_capture_task,
+        )
+
         with patch.object(loop, "_dispatch", new_callable=AsyncMock) as mock_dispatch, \
+             patch("nanobot.command.builtin.asyncio", new=fake_asyncio), \
              patch("nanobot.command.builtin.os.execv"):
             await bus.publish_inbound(msg)
 
             loop._running = True
             run_task = asyncio.create_task(loop.run())
-            await asyncio.sleep(0.1)
+            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
             loop._running = False
             run_task.cancel()
             try:
@@ -106,8 +113,9 @@ class TestRestartCommand:
                 pass
 
             mock_dispatch.assert_not_called()
-            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
             assert "Restarting" in out.content
+            assert scheduled
+            await scheduled[0]
 
     @pytest.mark.asyncio
     async def test_status_intercepted_in_run_loop(self):
@@ -120,7 +128,7 @@ class TestRestartCommand:
 
             loop._running = True
             run_task = asyncio.create_task(loop.run())
-            await asyncio.sleep(0.1)
+            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
             loop._running = False
             run_task.cancel()
             try:
@@ -129,7 +137,6 @@ class TestRestartCommand:
                 pass
 
             mock_dispatch.assert_not_called()
-            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
             assert "nanobot" in out.content.lower() or "Model" in out.content
 
     @pytest.mark.asyncio
@@ -138,7 +145,7 @@ class TestRestartCommand:
         loop, _bus = _make_loop()
 
         run_task = asyncio.create_task(loop.run())
-        await asyncio.sleep(0.1)
+        await asyncio.sleep(0)
         run_task.cancel()
 
         with pytest.raises(asyncio.CancelledError):
diff --git a/tests/command/test_model_command.py b/tests/command/test_model_command.py
index f95abee30..34a8b0ab8 100644
--- a/tests/command/test_model_command.py
+++ b/tests/command/test_model_command.py
@@ -134,7 +134,15 @@ async def test_model_command_registered_as_exact_and_prefix(tmp_path) -> None:
     out = await router.dispatch(_ctx(loop, "/model fast"))
 
     assert out is not None
-    assert "Switched model preset" in out.content
+    assert out.channel == "cli"
+    assert out.chat_id == "direct"
+    assert out.metadata == {"render_as": "text"}
+    assert out.content == "\n".join([
+        "Switched model preset to `fast`.",
+        "- Model: `openai/gpt-4.1`",
+        "- Context window: 32768",
+        "- Max output tokens: 4096",
+    ])
     assert loop.model_preset == "fast"
 
 
@@ -150,7 +158,10 @@ async def test_goal_command_shows_usage_without_args(tmp_path) -> None:
     loop = _make_loop(tmp_path)
     out = await cmd_goal(_ctx(loop, "/goal"))
     assert out is not None
-    assert "Usage: /goal" in out.content
+    assert out.channel == "cli"
+    assert out.chat_id == "direct"
+    assert out.metadata == {"render_as": "text"}
+    assert out.content == "Usage: /goal <long-running task description>"
 
 
 @pytest.mark.asyncio
@@ -158,7 +169,13 @@ async def test_goal_command_rejects_mid_turn_without_session(tmp_path) -> None:
     loop = _make_loop(tmp_path)
     out = await cmd_goal(_ctx(loop, "/goal do work", args="do work"))
     assert out is not None
-    assert "/stop" in out.content
+    assert out.channel == "cli"
+    assert out.chat_id == "direct"
+    assert out.metadata == {"render_as": "text"}
+    assert out.content == (
+        "A task is already running for this chat. "
+        "Use `/stop` first, then send `/goal <long-running task description>` again."
+    )
 
 
 @pytest.mark.asyncio
diff --git a/tests/command/test_router_dispatchable.py b/tests/command/test_router_dispatchable.py
index 2f67b50ae..ec6b3c6ed 100644
--- a/tests/command/test_router_dispatchable.py
+++ b/tests/command/test_router_dispatchable.py
@@ -118,6 +118,11 @@ class TestMidTurnCommandDispatchedDirectly:
         )
         result = await router.dispatch(ctx)
         assert result is not None
+        assert result.channel == "test"
+        assert result.chat_id == "chat1"
+        assert result.metadata["render_as"] == "text"
+        assert "/new" in result.content
+        assert "/pairing [list|approve <code>|deny <code>|revoke <user_id>]" in result.content
 
     @pytest.mark.asyncio
     async def test_prefix_command_args_populated(self, router: CommandRouter) -> None:
@@ -211,6 +216,10 @@ class TestPairingCommandDispatch:
         result = await router.dispatch(ctx)
         assert result is not None
         assert "Approved" in result.content
+        assert result.content == (
+            "Approved pairing code `ABCD-EFGH` — 123 can now access telegram"
+        )
+        assert result.metadata.get("_pairing_command") is True
 
     @pytest.mark.asyncio
     async def test_pairing_revoke_dispatched(
@@ -229,3 +238,5 @@ class TestPairingCommandDispatch:
         result = await router.dispatch(ctx)
         assert result is not None
         assert "Revoked" in result.content
+        assert result.content == "Revoked 123 from telegram"
+        assert result.metadata.get("_pairing_command") is True
diff --git a/tests/pairing/test_store.py b/tests/pairing/test_store.py
index 25c8ec7c7..6e0f8ba94 100644
--- a/tests/pairing/test_store.py
+++ b/tests/pairing/test_store.py
@@ -1,5 +1,3 @@
-import time
-
 import pytest
 
 from nanobot.pairing import __all__ as pairing_all
@@ -32,12 +30,15 @@ class TestGenerateCode:
         codes = {store.generate_code("telegram", str(i)) for i in range(20)}
         assert len(codes) == 20
 
-    def test_ttl_expiration(self) -> None:
+    def test_ttl_expiration(self, monkeypatch) -> None:
+        clock = {"now": 1_000.0}
+        monkeypatch.setattr(store.time, "time", lambda: clock["now"])
+
         code = store.generate_code("telegram", "123", ttl=1)
-        assert store.approve_code(code) is not None
+        assert store.approve_code(code) == ("telegram", "123")
 
         code2 = store.generate_code("telegram", "456", ttl=0)
-        time.sleep(0.1)
+        clock["now"] += 0.1
         assert store.approve_code(code2) is None
 
 
@@ -59,9 +60,12 @@ class TestApproveDeny:
     def test_deny_unknown_returns_false(self) -> None:
         assert store.deny_code("UNKNOWN") is False
 
-    def test_approve_expired_returns_none(self) -> None:
+    def test_approve_expired_returns_none(self, monkeypatch) -> None:
+        clock = {"now": 1_000.0}
+        monkeypatch.setattr(store.time, "time", lambda: clock["now"])
+
         code = store.generate_code("telegram", "123", ttl=0)
-        time.sleep(0.1)
+        clock["now"] += 0.1
         assert store.approve_code(code) is None
 
 
@@ -91,9 +95,12 @@ class TestListPending:
         channels = {p["channel"] for p in pending}
         assert channels == {"telegram", "discord"}
 
-    def test_expired_not_listed(self) -> None:
+    def test_expired_not_listed(self, monkeypatch) -> None:
+        clock = {"now": 1_000.0}
+        monkeypatch.setattr(store.time, "time", lambda: clock["now"])
+
         store.generate_code("telegram", "123", ttl=0)
-        time.sleep(0.1)
+        clock["now"] += 0.1
         assert store.list_pending() == []
 
 
diff --git a/tests/test_api_stream.py b/tests/test_api_stream.py
index cc7935a82..b98e5b8d7 100644
--- a/tests/test_api_stream.py
+++ b/tests/test_api_stream.py
@@ -233,7 +233,7 @@ async def test_stream_segment_end_does_not_close_sse(aiohttp_client) -> None:
         assert on_stream_end is not None
         await on_stream("planning")
         await on_stream_end(resuming=True)
-        await asyncio.sleep(0.05)
+        await asyncio.sleep(0)
         await on_stream(" final")
         await on_stream_end(resuming=False)
         return "planning final"
diff --git a/tests/test_msteams.py b/tests/test_msteams.py
index b76d6e5fd..6be6424f9 100644
--- a/tests/test_msteams.py
+++ b/tests/test_msteams.py
@@ -851,10 +851,11 @@ async def test_validate_inbound_auth_accepts_observed_botframework_shape(make_ch
         headers={"kid": jwk["kid"]},
     )
 
-    await ch._validate_inbound_auth(
+    result = await ch._validate_inbound_auth(
         f"Bearer {token}",
         {"serviceUrl": service_url},
     )
+    assert result is None
 
 
 @pytest.mark.asyncio
diff --git a/tests/test_openai_api.py b/tests/test_openai_api.py
index 59df6889c..839c1705d 100644
--- a/tests/test_openai_api.py
+++ b/tests/test_openai_api.py
@@ -235,10 +235,14 @@ async def test_followup_requests_share_same_session_key(aiohttp_client) -> None:
 @pytest.mark.asyncio
 async def test_fixed_session_requests_are_serialized(aiohttp_client) -> None:
     order: list[str] = []
+    first_started = asyncio.Event()
+    release_first = asyncio.Event()
 
     async def slow_process(content, session_key="", channel="", chat_id="", **kwargs):
         order.append(f"start:{content}")
-        await asyncio.sleep(0.1)
+        if content == "first":
+            first_started.set()
+            await release_first.wait()
         order.append(f"end:{content}")
         return content
 
@@ -256,14 +260,17 @@ async def test_fixed_session_requests_are_serialized(aiohttp_client) -> None:
             json={"messages": [{"role": "user", "content": msg}]},
         )
 
-    r1, r2 = await asyncio.gather(send("first"), send("second"))
+    first = asyncio.create_task(send("first"))
+    await asyncio.wait_for(first_started.wait(), timeout=1.0)
+    second = asyncio.create_task(send("second"))
+    await asyncio.sleep(0)
+    assert order == ["start:first"]
+
+    release_first.set()
+    r1, r2 = await asyncio.gather(first, second)
     assert r1.status == 200
     assert r2.status == 200
-    # Verify serialization: one process must fully finish before the other starts
-    if order[0] == "start:first":
-        assert order.index("end:first") < order.index("start:second")
-    else:
-        assert order.index("end:second") < order.index("start:first")
+    assert order == ["start:first", "end:first", "start:second", "end:second"]
 
 
 @pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
diff --git a/tests/tools/test_edit_advanced.py b/tests/tools/test_edit_advanced.py
index baf0eb02f..df1f001da 100644
--- a/tests/tools/test_edit_advanced.py
+++ b/tests/tools/test_edit_advanced.py
@@ -9,12 +9,11 @@
 """
 
 import os
-import time
 
 import pytest
 
-from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, _find_match
 from nanobot.agent.tools import file_state
+from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, _find_match
 
 
 @pytest.fixture(autouse=True)
@@ -214,59 +213,6 @@ class TestEditDiagnostics:
 # ---------------------------------------------------------------------------
 
 
-class TestAdvancedReplaceAll:
-    """replace_all should work correctly for fallback-based matches too."""
-
-    @pytest.fixture()
-    def tool(self, tmp_path):
-        return EditFileTool(workspace=tmp_path)
-
-    @pytest.mark.asyncio
-    async def test_replace_all_preserves_each_match_indentation(self, tool, tmp_path):
-        f = tmp_path / "indent_multi.py"
-        f.write_text(
-            "if a:\n"
-            "    def foo():\n"
-            "        pass\n"
-            "if b:\n"
-            "        def foo():\n"
-            "            pass\n",
-            encoding="utf-8",
-        )
-        result = await tool.execute(
-            path=str(f),
-            old_text="def foo():\n    pass",
-            new_text="def bar():\n    return 1",
-            replace_all=True,
-        )
-        assert "Successfully" in result
-        assert f.read_text(encoding="utf-8") == (
-            "if a:\n"
-            "    def bar():\n"
-            "        return 1\n"
-            "if b:\n"
-            "        def bar():\n"
-            "            return 1\n"
-        )
-
-    @pytest.mark.asyncio
-    async def test_trim_and_quote_fallback_match_succeeds(self, tool, tmp_path):
-        f = tmp_path / "quote_indent.py"
-        f.write_text("    message = “hello”\n", encoding="utf-8")
-        result = await tool.execute(
-            path=str(f),
-            old_text='message = "hello"',
-            new_text='message = "goodbye"',
-        )
-        assert "Successfully" in result
-        assert f.read_text(encoding="utf-8") == "    message = “goodbye”\n"
-
-
-# ---------------------------------------------------------------------------
-# Advanced fallback replacement behavior
-# ---------------------------------------------------------------------------
-
-
 class TestAdvancedReplaceAll:
     """replace_all should work correctly for fallback-based matches too."""
 
@@ -369,8 +315,6 @@ class TestFileSizeProtection:
     async def test_rejects_file_over_size_limit(self, tool, tmp_path):
         f = tmp_path / "huge.txt"
         f.write_text("x", encoding="utf-8")
-        # Monkey-patch the file size check by creating a stat mock
-        original_stat = f.stat
 
         class FakeStat:
             def __init__(self, real_stat):
@@ -412,10 +356,9 @@ class TestStaleDetectionContentFallback:
         f.write_text("hello world", encoding="utf-8")
         await read_tool.execute(path=str(f))
 
-        # Touch the file to bump mtime without changing content
-        time.sleep(0.05)
-        original_content = f.read_text()
-        f.write_text(original_content, encoding="utf-8")
+        # Bump mtime without changing content.
+        stat = f.stat()
+        os.utime(f, (stat.st_atime, stat.st_mtime + 10))
 
         result = await edit_tool.execute(path=str(f), old_text="world", new_text="earth")
         assert "Successfully" in result
diff --git a/tests/utils/test_gitstore.py b/tests/utils/test_gitstore.py
index b431bf719..8d5315bf8 100644
--- a/tests/utils/test_gitstore.py
+++ b/tests/utils/test_gitstore.py
@@ -1,7 +1,6 @@
 """Tests for GitStore — line_ages() and core git operations."""
 
 import subprocess
-import time
 from datetime import datetime, timedelta, timezone
 from unittest.mock import patch
 
@@ -71,25 +70,32 @@ class TestLineAges:
 
     def test_partial_edit_only_updates_changed_lines(self, git, tmp_path):
         """Only modified lines should reflect the new commit's timestamp."""
+        now = datetime(2026, 5, 1, tzinfo=timezone.utc)
+        old = now - timedelta(days=30)
+
         (tmp_path / "MEMORY.md").write_text(
             "# Memory\n\n## A\n- old\n\n## B\n- keep\n", encoding="utf-8"
         )
-        git.auto_commit("commit1")
-        time.sleep(1.1)
+        with patch("dulwich.worktree.time.time", return_value=old.timestamp()):
+            git.auto_commit("commit1")
 
         # Only modify section A
         (tmp_path / "MEMORY.md").write_text(
             "# Memory\n\n## A\n- new\n\n## B\n- keep\n", encoding="utf-8"
         )
-        git.auto_commit("commit2")
+        with patch("dulwich.worktree.time.time", return_value=now.timestamp()):
+            git.auto_commit("commit2")
+
+        with patch("nanobot.utils.gitstore.datetime") as mock_dt:
+            mock_dt.now.return_value = now
+            mock_dt.fromtimestamp = datetime.fromtimestamp
+            ages = git.line_ages("MEMORY.md")
 
-        ages = git.line_ages("MEMORY.md")
         lines = (tmp_path / "MEMORY.md").read_text(encoding="utf-8").splitlines()
-        # All lines are from today, but verify line-level tracking works
         assert len(ages) == len(lines)
-        # "- new" line and "- keep" line both age=0 (same day), but
-        # the key point is we get per-line results
-        assert len(ages) == 7
+        age_by_line = {line: age.age_days for line, age in zip(lines, ages, strict=True)}
+        assert age_by_line["- new"] == 0
+        assert age_by_line["- keep"] == 30
 
 
 class TestNestedRepoProtection: