nanobot/tests/agent/test_runner_core.py
chengyongru 99cc6ee808 test(agent): expand coverage and refactor test structure
- Add 42 tests for ContextBuilder (context.py: 0→42 tests)
- Add 37 tests for SubagentManager lifecycle (subagent.py: 2→37 tests)
- Add 42 unit tests for AutoCompact in isolation
- Split monolithic test_runner.py (3313 lines) into 9 focused files:
  test_runner_core, test_runner_hooks, test_runner_errors,
  test_runner_safety, test_runner_persistence, test_runner_governance,
  test_runner_tool_execution, test_runner_injections,
  test_loop_runner_integration
- Add 3 config passthrough tests (temperature/max_tokens/reasoning_effort)
- Fix fragile patch.object(__init__) in test_stop_preserves_context
- Create shared conftest.py with make_provider/make_loop factories

Total: 934 tests passing, 0 regressions
2026-05-13 12:49:17 +08:00

482 lines
16 KiB
Python

"""Tests for core AgentRunner behavior: message passing, iteration limits,
timeouts, empty-response handling, usage accumulation, and config passthrough."""
from __future__ import annotations
import asyncio
import time
from unittest.mock import AsyncMock, MagicMock
import pytest
from nanobot.config.schema import AgentDefaults
from nanobot.agent.tools.registry import ToolRegistry
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
@pytest.mark.asyncio
async def test_runner_preserves_reasoning_fields_and_tool_results():
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock(spec=LLMProvider)
captured_second_call: list[dict] = []
call_count = {"n": 0}
async def chat_with_retry(*, messages, **kwargs):
call_count["n"] += 1
if call_count["n"] == 1:
return LLMResponse(
content="thinking",
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
reasoning_content="hidden reasoning",
thinking_blocks=[{"type": "thinking", "thinking": "step"}],
usage={"prompt_tokens": 5, "completion_tokens": 3},
)
captured_second_call[:] = messages
return LLMResponse(content="done", tool_calls=[], usage={})
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
tools.execute = AsyncMock(return_value="tool result")
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[
{"role": "system", "content": "system"},
{"role": "user", "content": "do task"},
],
tools=tools,
model="test-model",
max_iterations=3,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.final_content == "done"
assert result.tools_used == ["list_dir"]
assert result.tool_events == [
{"name": "list_dir", "status": "ok", "detail": "tool result"}
]
assistant_messages = [
msg for msg in captured_second_call
if msg.get("role") == "assistant" and msg.get("tool_calls")
]
assert len(assistant_messages) == 1
assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
assert any(
msg.get("role") == "tool" and msg.get("content") == "tool result"
for msg in captured_second_call
)
@pytest.mark.asyncio
async def test_runner_returns_max_iterations_fallback():
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content="still working",
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
))
tools = MagicMock()
tools.get_definitions.return_value = []
tools.execute = AsyncMock(return_value="tool result")
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[],
tools=tools,
model="test-model",
max_iterations=2,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.stop_reason == "max_iterations"
assert result.final_content == (
"I reached the maximum number of tool call iterations (2) "
"without completing the task. You can try breaking the task into smaller steps."
)
assert result.messages[-1]["role"] == "assistant"
assert result.messages[-1]["content"] == result.final_content
@pytest.mark.asyncio
async def test_runner_times_out_hung_llm_request():
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock(spec=LLMProvider)
async def chat_with_retry(**kwargs):
await asyncio.sleep(3600)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
started = time.monotonic()
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "hello"}],
tools=tools,
model="test-model",
max_iterations=1,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
llm_timeout_s=0.05,
))
assert (time.monotonic() - started) < 1.0
assert result.stop_reason == "error"
assert "timed out" in (result.final_content or "").lower()
@pytest.mark.asyncio
async def test_runner_replaces_empty_tool_result_with_marker():
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock(spec=LLMProvider)
captured_second_call: list[dict] = []
call_count = {"n": 0}
async def chat_with_retry(*, messages, **kwargs):
call_count["n"] += 1
if call_count["n"] == 1:
return LLMResponse(
content="working",
tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})],
usage={},
)
captured_second_call[:] = messages
return LLMResponse(content="done", tool_calls=[], usage={})
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
tools.execute = AsyncMock(return_value="")
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=2,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.final_content == "done"
tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
assert tool_message["content"] == "(noop completed with no output)"
@pytest.mark.asyncio
async def test_runner_retries_empty_final_response_with_summary_prompt():
"""Empty responses get 2 silent retries before finalization kicks in."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock(spec=LLMProvider)
calls: list[dict] = []
async def chat_with_retry(*, messages, tools=None, **kwargs):
calls.append({"messages": messages, "tools": tools})
if len(calls) <= 2:
return LLMResponse(
content=None,
tool_calls=[],
usage={"prompt_tokens": 5, "completion_tokens": 1},
)
return LLMResponse(
content="final answer",
tool_calls=[],
usage={"prompt_tokens": 3, "completion_tokens": 7},
)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=3,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.final_content == "final answer"
# 2 silent retries (iterations 0,1) + finalization on iteration 1
assert len(calls) == 3
assert calls[0]["tools"] is not None
assert calls[1]["tools"] is not None
assert calls[2]["tools"] is None
assert result.usage["prompt_tokens"] == 13
assert result.usage["completion_tokens"] == 9
@pytest.mark.asyncio
async def test_runner_uses_specific_message_after_empty_finalization_retry():
"""After silent retries + finalization all return empty, stop_reason is empty_final_response."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
provider = MagicMock(spec=LLMProvider)
async def chat_with_retry(*, messages, **kwargs):
return LLMResponse(content=None, tool_calls=[], usage={})
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=3,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE
assert result.stop_reason == "empty_final_response"
@pytest.mark.asyncio
async def test_runner_empty_response_does_not_break_tool_chain():
"""An empty intermediate response must not kill an ongoing tool chain.
Sequence: tool_call -> empty -> tool_call -> final text.
The runner should recover via silent retry and complete normally.
"""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock(spec=LLMProvider)
call_count = 0
async def chat_with_retry(*, messages, tools=None, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
return LLMResponse(
content=None,
tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})],
usage={"prompt_tokens": 10, "completion_tokens": 5},
)
if call_count == 2:
return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1})
if call_count == 3:
return LLMResponse(
content=None,
tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})],
usage={"prompt_tokens": 10, "completion_tokens": 5},
)
return LLMResponse(
content="Here are the results.",
tool_calls=[],
usage={"prompt_tokens": 10, "completion_tokens": 10},
)
provider.chat_with_retry = chat_with_retry
provider.chat_stream_with_retry = chat_with_retry
async def fake_tool(name, args, **kw):
return "file content"
tool_registry = MagicMock()
tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}]
tool_registry.execute = AsyncMock(side_effect=fake_tool)
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "read both files"}],
tools=tool_registry,
model="test-model",
max_iterations=10,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.final_content == "Here are the results."
assert result.stop_reason == "completed"
assert call_count == 4
assert "read_file" in result.tools_used
@pytest.mark.asyncio
async def test_runner_accumulates_usage_and_preserves_cached_tokens():
"""Runner should accumulate prompt/completion tokens across iterations
and preserve cached_tokens from provider responses."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock(spec=LLMProvider)
call_count = {"n": 0}
async def chat_with_retry(*, messages, **kwargs):
call_count["n"] += 1
if call_count["n"] == 1:
return LLMResponse(
content="thinking",
tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80},
)
return LLMResponse(
content="done",
tool_calls=[],
usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
tools.execute = AsyncMock(return_value="file content")
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=3,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
# Usage should be accumulated across iterations
assert result.usage["prompt_tokens"] == 300 # 100 + 200
assert result.usage["completion_tokens"] == 30 # 10 + 20
assert result.usage["cached_tokens"] == 230 # 80 + 150
@pytest.mark.asyncio
async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
"""Regression: provider retry heartbeats must route through
``retry_wait_callback``, not ``progress_callback``. Binding them to
the progress callback (as an earlier runtime refactor did) caused
internal retry diagnostics like "Model request failed, retry in 1s"
to leak to end-user channels as normal progress updates.
"""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
captured: dict = {}
async def chat_with_retry(**kwargs):
captured.update(kwargs)
return LLMResponse(content="done", tool_calls=[], usage={})
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
progress_cb = AsyncMock()
retry_wait_cb = AsyncMock()
runner = AgentRunner(provider)
await runner.run(AgentRunSpec(
initial_messages=[
{"role": "system", "content": "system"},
{"role": "user", "content": "hi"},
],
tools=tools,
model="test-model",
max_iterations=1,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
progress_callback=progress_cb,
retry_wait_callback=retry_wait_cb,
))
assert captured["on_retry_wait"] is retry_wait_cb
assert captured["on_retry_wait"] is not progress_cb
# ---------------------------------------------------------------------------
# Config passthrough tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_runner_passes_temperature_to_provider():
"""temperature from AgentRunSpec should reach provider.chat_with_retry."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
captured: dict = {}
async def chat_with_retry(**kwargs):
captured.update(kwargs)
return LLMResponse(content="done", tool_calls=[], usage={})
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "hi"}],
tools=tools,
model="test-model",
max_iterations=1,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
temperature=0.7,
))
assert captured["temperature"] == 0.7
@pytest.mark.asyncio
async def test_runner_passes_max_tokens_to_provider():
"""max_tokens from AgentRunSpec should reach provider.chat_with_retry."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
captured: dict = {}
async def chat_with_retry(**kwargs):
captured.update(kwargs)
return LLMResponse(content="done", tool_calls=[], usage={})
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "hi"}],
tools=tools,
model="test-model",
max_iterations=1,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
max_tokens=8192,
))
assert captured["max_tokens"] == 8192
@pytest.mark.asyncio
async def test_runner_passes_reasoning_effort_to_provider():
"""reasoning_effort from AgentRunSpec should reach provider.chat_with_retry."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
captured: dict = {}
async def chat_with_retry(**kwargs):
captured.update(kwargs)
return LLMResponse(content="done", tool_calls=[], usage={})
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "hi"}],
tools=tools,
model="test-model",
max_iterations=1,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
reasoning_effort="high",
))
assert captured["reasoning_effort"] == "high"