mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-19 16:12:30 +00:00
- Add 42 tests for ContextBuilder (context.py: 0→42 tests) - Add 37 tests for SubagentManager lifecycle (subagent.py: 2→37 tests) - Add 42 unit tests for AutoCompact in isolation - Split monolithic test_runner.py (3313 lines) into 9 focused files: test_runner_core, test_runner_hooks, test_runner_errors, test_runner_safety, test_runner_persistence, test_runner_governance, test_runner_tool_execution, test_runner_injections, test_loop_runner_integration - Add 3 config passthrough tests (temperature/max_tokens/reasoning_effort) - Fix fragile patch.object(__init__) in test_stop_preserves_context - Create shared conftest.py with make_provider/make_loop factories Total: 934 tests passing, 0 regressions
482 lines
16 KiB
Python
482 lines
16 KiB
Python
"""Tests for core AgentRunner behavior: message passing, iteration limits,
|
|
timeouts, empty-response handling, usage accumulation, and config passthrough."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import time
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from nanobot.config.schema import AgentDefaults
|
|
from nanobot.agent.tools.registry import ToolRegistry
|
|
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
|
|
|
_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_preserves_reasoning_fields_and_tool_results():
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
captured_second_call: list[dict] = []
|
|
call_count = {"n": 0}
|
|
|
|
async def chat_with_retry(*, messages, **kwargs):
|
|
call_count["n"] += 1
|
|
if call_count["n"] == 1:
|
|
return LLMResponse(
|
|
content="thinking",
|
|
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
|
|
reasoning_content="hidden reasoning",
|
|
thinking_blocks=[{"type": "thinking", "thinking": "step"}],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
captured_second_call[:] = messages
|
|
return LLMResponse(content="done", tool_calls=[], usage={})
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
tools.execute = AsyncMock(return_value="tool result")
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[
|
|
{"role": "system", "content": "system"},
|
|
{"role": "user", "content": "do task"},
|
|
],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
assert result.final_content == "done"
|
|
assert result.tools_used == ["list_dir"]
|
|
assert result.tool_events == [
|
|
{"name": "list_dir", "status": "ok", "detail": "tool result"}
|
|
]
|
|
|
|
assistant_messages = [
|
|
msg for msg in captured_second_call
|
|
if msg.get("role") == "assistant" and msg.get("tool_calls")
|
|
]
|
|
assert len(assistant_messages) == 1
|
|
assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
|
|
assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
|
|
assert any(
|
|
msg.get("role") == "tool" and msg.get("content") == "tool result"
|
|
for msg in captured_second_call
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_returns_max_iterations_fallback():
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
|
content="still working",
|
|
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
|
|
))
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
tools.execute = AsyncMock(return_value="tool result")
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=2,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
assert result.stop_reason == "max_iterations"
|
|
assert result.final_content == (
|
|
"I reached the maximum number of tool call iterations (2) "
|
|
"without completing the task. You can try breaking the task into smaller steps."
|
|
)
|
|
assert result.messages[-1]["role"] == "assistant"
|
|
assert result.messages[-1]["content"] == result.final_content
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_times_out_hung_llm_request():
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
await asyncio.sleep(3600)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
runner = AgentRunner(provider)
|
|
started = time.monotonic()
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "hello"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=1,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
llm_timeout_s=0.05,
|
|
))
|
|
|
|
assert (time.monotonic() - started) < 1.0
|
|
assert result.stop_reason == "error"
|
|
assert "timed out" in (result.final_content or "").lower()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_replaces_empty_tool_result_with_marker():
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
captured_second_call: list[dict] = []
|
|
call_count = {"n": 0}
|
|
|
|
async def chat_with_retry(*, messages, **kwargs):
|
|
call_count["n"] += 1
|
|
if call_count["n"] == 1:
|
|
return LLMResponse(
|
|
content="working",
|
|
tool_calls=[ToolCallRequest(id="call_1", name="noop", arguments={})],
|
|
usage={},
|
|
)
|
|
captured_second_call[:] = messages
|
|
return LLMResponse(content="done", tool_calls=[], usage={})
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
tools.execute = AsyncMock(return_value="")
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "do task"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=2,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
assert result.final_content == "done"
|
|
tool_message = next(msg for msg in captured_second_call if msg.get("role") == "tool")
|
|
assert tool_message["content"] == "(noop completed with no output)"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_retries_empty_final_response_with_summary_prompt():
|
|
"""Empty responses get 2 silent retries before finalization kicks in."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
calls: list[dict] = []
|
|
|
|
async def chat_with_retry(*, messages, tools=None, **kwargs):
|
|
calls.append({"messages": messages, "tools": tools})
|
|
if len(calls) <= 2:
|
|
return LLMResponse(
|
|
content=None,
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 1},
|
|
)
|
|
return LLMResponse(
|
|
content="final answer",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 3, "completion_tokens": 7},
|
|
)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "do task"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
assert result.final_content == "final answer"
|
|
# 2 silent retries (iterations 0,1) + finalization on iteration 1
|
|
assert len(calls) == 3
|
|
assert calls[0]["tools"] is not None
|
|
assert calls[1]["tools"] is not None
|
|
assert calls[2]["tools"] is None
|
|
assert result.usage["prompt_tokens"] == 13
|
|
assert result.usage["completion_tokens"] == 9
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_uses_specific_message_after_empty_finalization_retry():
|
|
"""After silent retries + finalization all return empty, stop_reason is empty_final_response."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
|
|
async def chat_with_retry(*, messages, **kwargs):
|
|
return LLMResponse(content=None, tool_calls=[], usage={})
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "do task"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
assert result.final_content == EMPTY_FINAL_RESPONSE_MESSAGE
|
|
assert result.stop_reason == "empty_final_response"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_empty_response_does_not_break_tool_chain():
|
|
"""An empty intermediate response must not kill an ongoing tool chain.
|
|
|
|
Sequence: tool_call -> empty -> tool_call -> final text.
|
|
The runner should recover via silent retry and complete normally.
|
|
"""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
call_count = 0
|
|
|
|
async def chat_with_retry(*, messages, tools=None, **kwargs):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count == 1:
|
|
return LLMResponse(
|
|
content=None,
|
|
tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})],
|
|
usage={"prompt_tokens": 10, "completion_tokens": 5},
|
|
)
|
|
if call_count == 2:
|
|
return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1})
|
|
if call_count == 3:
|
|
return LLMResponse(
|
|
content=None,
|
|
tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})],
|
|
usage={"prompt_tokens": 10, "completion_tokens": 5},
|
|
)
|
|
return LLMResponse(
|
|
content="Here are the results.",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 10, "completion_tokens": 10},
|
|
)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
provider.chat_stream_with_retry = chat_with_retry
|
|
|
|
async def fake_tool(name, args, **kw):
|
|
return "file content"
|
|
|
|
tool_registry = MagicMock()
|
|
tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}]
|
|
tool_registry.execute = AsyncMock(side_effect=fake_tool)
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "read both files"}],
|
|
tools=tool_registry,
|
|
model="test-model",
|
|
max_iterations=10,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
assert result.final_content == "Here are the results."
|
|
assert result.stop_reason == "completed"
|
|
assert call_count == 4
|
|
assert "read_file" in result.tools_used
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_accumulates_usage_and_preserves_cached_tokens():
|
|
"""Runner should accumulate prompt/completion tokens across iterations
|
|
and preserve cached_tokens from provider responses."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
call_count = {"n": 0}
|
|
|
|
async def chat_with_retry(*, messages, **kwargs):
|
|
call_count["n"] += 1
|
|
if call_count["n"] == 1:
|
|
return LLMResponse(
|
|
content="thinking",
|
|
tool_calls=[ToolCallRequest(id="call_1", name="read_file", arguments={"path": "x"})],
|
|
usage={"prompt_tokens": 100, "completion_tokens": 10, "cached_tokens": 80},
|
|
)
|
|
return LLMResponse(
|
|
content="done",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 200, "completion_tokens": 20, "cached_tokens": 150},
|
|
)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
tools.execute = AsyncMock(return_value="file content")
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "do task"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
# Usage should be accumulated across iterations
|
|
assert result.usage["prompt_tokens"] == 300 # 100 + 200
|
|
assert result.usage["completion_tokens"] == 30 # 10 + 20
|
|
assert result.usage["cached_tokens"] == 230 # 80 + 150
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_binds_on_retry_wait_to_retry_callback_not_progress():
|
|
"""Regression: provider retry heartbeats must route through
|
|
``retry_wait_callback``, not ``progress_callback``. Binding them to
|
|
the progress callback (as an earlier runtime refactor did) caused
|
|
internal retry diagnostics like "Model request failed, retry in 1s"
|
|
to leak to end-user channels as normal progress updates.
|
|
"""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
captured: dict = {}
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
captured.update(kwargs)
|
|
return LLMResponse(content="done", tool_calls=[], usage={})
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
progress_cb = AsyncMock()
|
|
retry_wait_cb = AsyncMock()
|
|
|
|
runner = AgentRunner(provider)
|
|
await runner.run(AgentRunSpec(
|
|
initial_messages=[
|
|
{"role": "system", "content": "system"},
|
|
{"role": "user", "content": "hi"},
|
|
],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=1,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
progress_callback=progress_cb,
|
|
retry_wait_callback=retry_wait_cb,
|
|
))
|
|
|
|
assert captured["on_retry_wait"] is retry_wait_cb
|
|
assert captured["on_retry_wait"] is not progress_cb
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config passthrough tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_passes_temperature_to_provider():
|
|
"""temperature from AgentRunSpec should reach provider.chat_with_retry."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
captured: dict = {}
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
captured.update(kwargs)
|
|
return LLMResponse(content="done", tool_calls=[], usage={})
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
runner = AgentRunner(provider)
|
|
await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "hi"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=1,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
temperature=0.7,
|
|
))
|
|
|
|
assert captured["temperature"] == 0.7
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_passes_max_tokens_to_provider():
|
|
"""max_tokens from AgentRunSpec should reach provider.chat_with_retry."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
captured: dict = {}
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
captured.update(kwargs)
|
|
return LLMResponse(content="done", tool_calls=[], usage={})
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
runner = AgentRunner(provider)
|
|
await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "hi"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=1,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
max_tokens=8192,
|
|
))
|
|
|
|
assert captured["max_tokens"] == 8192
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_passes_reasoning_effort_to_provider():
|
|
"""reasoning_effort from AgentRunSpec should reach provider.chat_with_retry."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
captured: dict = {}
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
captured.update(kwargs)
|
|
return LLMResponse(content="done", tool_calls=[], usage={})
|
|
|
|
provider = MagicMock(spec=LLMProvider)
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
runner = AgentRunner(provider)
|
|
await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "hi"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=1,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
reasoning_effort="high",
|
|
))
|
|
|
|
assert captured["reasoning_effort"] == "high"
|