nanobot/tests/agent/tools/test_long_task.py

295 lines
10 KiB
Python

"""Tests for Long Task Tool: HandoffTool, CompleteTool, LongTaskTool."""
import pytest
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
@pytest.mark.asyncio
async def test_handoff_tool_stores_signal():
from nanobot.agent.tools.long_task import HandoffTool
store: dict[str, str] = {}
tool = HandoffTool(store)
result = await tool.execute(message="Processed items 1-8. Results in out.md. Continue with item 9.")
assert result == "Progress recorded. The next step will continue from here."
assert store["type"] == "handoff"
assert store["payload"] == "Processed items 1-8. Results in out.md. Continue with item 9."
@pytest.mark.asyncio
async def test_complete_tool_stores_signal():
from nanobot.agent.tools.long_task import CompleteTool
store: dict[str, str] = {}
tool = CompleteTool(store)
result = await tool.execute(summary="All 100 items processed. Summary in report.md")
assert result == "Task marked as complete."
assert store["type"] == "complete"
assert store["payload"] == "All 100 items processed. Summary in report.md"
@pytest.mark.asyncio
async def test_signal_tools_overwrite_on_multiple_calls():
"""Last call wins -- the orchestrator only reads the final signal."""
from nanobot.agent.tools.long_task import HandoffTool, CompleteTool
store: dict[str, str] = {}
handoff = HandoffTool(store)
complete = CompleteTool(store)
await handoff.execute(message="first progress")
assert store["type"] == "handoff"
await complete.execute(summary="done early")
assert store["type"] == "complete"
assert store["payload"] == "done early"
# ---------------------------------------------------------------------------
# Helper: minimal SubagentManager stub
# ---------------------------------------------------------------------------
def _make_manager_stub():
"""Create a minimal SubagentManager stub with a mockable run_step."""
mgr = MagicMock()
mgr.run_step = AsyncMock()
return mgr
def _step_result(**overrides):
"""Create a minimal AgentRunResult-like namespace."""
defaults = dict(
final_content="step done",
messages=[],
tool_events=[],
stop_reason="completed",
tools_used=[],
)
defaults.update(overrides)
return SimpleNamespace(**defaults)
# ---------------------------------------------------------------------------
# LongTaskTool orchestrator tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_long_task_completes_in_one_step():
"""Subagent calls complete() immediately."""
from nanobot.agent.tools.long_task import LongTaskTool
mgr = _make_manager_stub()
async def fake_run_step(*, system_prompt, user_message, extra_tools):
for t in extra_tools:
if t.name == "complete":
await t.execute(summary="All done. Report in summary.md")
return _step_result(
final_content="All done.",
tools_used=["complete"],
)
mgr.run_step.side_effect = fake_run_step
tool = LongTaskTool(manager=mgr)
result = await tool.execute(goal="Audit all issues.")
assert result == "All done. Report in summary.md"
@pytest.mark.asyncio
async def test_long_task_completes_after_multiple_handoffs():
"""Subagent calls handoff() twice then complete()."""
from nanobot.agent.tools.long_task import LongTaskTool
mgr = _make_manager_stub()
call_count = 0
async def fake_run_step(*, system_prompt, user_message, extra_tools):
nonlocal call_count
call_count += 1
if call_count == 1:
for t in extra_tools:
if t.name == "handoff":
await t.execute(message="Processed 1-8.")
elif call_count == 2:
assert "Processed 1-8." in user_message
assert "8 tool calls" in user_message
for t in extra_tools:
if t.name == "handoff":
await t.execute(message="Processed 9-16.")
else:
for t in extra_tools:
if t.name == "complete":
await t.execute(summary="All 16 items audited.")
return _step_result(tools_used=["handoff"])
mgr.run_step.side_effect = fake_run_step
tool = LongTaskTool(manager=mgr)
result = await tool.execute(goal="Audit 16 issues.")
assert result == "All 16 items audited."
assert call_count == 3
@pytest.mark.asyncio
async def test_long_task_fallback_when_no_signal_called():
"""Subagent doesn't call handoff/complete — extract progress from messages."""
from nanobot.agent.tools.long_task import LongTaskTool
mgr = _make_manager_stub()
async def fake_run_step(*, system_prompt, user_message, extra_tools):
return _step_result(
final_content="Tool budget exhausted.",
messages=[
{"role": "system", "content": "..."},
{"role": "user", "content": "..."},
{"role": "assistant", "content": "I processed items 1-5. Results in out.md."},
{"role": "tool", "content": "ok"},
{"role": "assistant", "content": "Tool budget exhausted. Call handoff() earlier next time."},
],
stop_reason="max_iterations",
)
mgr.run_step.side_effect = fake_run_step
tool = LongTaskTool(manager=mgr)
result = await tool.execute(goal="Do something.", max_steps=2)
# Should reach max_steps and return the fallback extracted from messages
assert "max steps (2)" in result
assert "I processed items 1-5" in result
@pytest.mark.asyncio
async def test_long_task_auto_extracts_on_natural_end():
"""Subagent finishes naturally (stop_reason=completed) without calling signal."""
from nanobot.agent.tools.long_task import LongTaskTool
mgr = _make_manager_stub()
steps = 0
async def fake_run_step(*, system_prompt, user_message, extra_tools):
nonlocal steps
steps += 1
if steps == 1:
return _step_result(
final_content="I processed items 1-5. Results in out.md.",
stop_reason="completed",
)
# Second step: subagent calls complete
for t in extra_tools:
if t.name == "complete":
await t.execute(summary="All done.")
return _step_result(
final_content="All done.",
tools_used=["complete"],
)
mgr.run_step.side_effect = fake_run_step
tool = LongTaskTool(manager=mgr)
result = await tool.execute(goal="Process items.", max_steps=5)
assert "All done." == result
assert steps == 2
@pytest.mark.asyncio
async def test_long_task_goal_appears_in_system_prompt():
"""Verify every step's system_prompt contains the long task system prompt."""
from nanobot.agent.tools.long_task import LongTaskTool
mgr = _make_manager_stub()
captured_prompts = []
async def fake_run_step(*, system_prompt, user_message, extra_tools):
captured_prompts.append(system_prompt)
for t in extra_tools:
if t.name == "complete":
await t.execute(summary="done")
return _step_result(final_content="done")
mgr.run_step.side_effect = fake_run_step
tool = LongTaskTool(manager=mgr)
await tool.execute(goal="Audit everything.")
assert len(captured_prompts) == 1
assert "handoff()" in captured_prompts[0]
assert "complete()" in captured_prompts[0]
assert "filesystem" in captured_prompts[0]
# ---------------------------------------------------------------------------
# Helper function tests
# ---------------------------------------------------------------------------
def test_build_user_message_step_0():
from nanobot.agent.tools.long_task import _build_user_message
msg = _build_user_message("Audit all issues.", step=0, handoff="")
assert msg.startswith("Audit all issues.")
assert "Step 1" in msg
assert "8 tool calls" in msg
assert "Previous Progress" not in msg
def test_build_user_message_later_step():
from nanobot.agent.tools.long_task import _build_user_message
msg = _build_user_message("Audit all issues.", step=3, handoff="Did 1-10.")
assert "Audit all issues." in msg
assert "Previous Progress" in msg
assert "Did 1-10." in msg
assert "Step 4" in msg
assert "8 tool calls" in msg
def test_extract_handoff_from_messages():
from nanobot.agent.tools.long_task import _extract_handoff_from_messages
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "do it"},
{"role": "assistant", "content": ""},
{"role": "tool", "content": "result"},
{"role": "assistant", "content": "I processed items 1-3."},
]
assert _extract_handoff_from_messages(messages) == "I processed items 1-3."
def test_extract_handoff_skips_budget_message():
from nanobot.agent.tools.long_task import _extract_handoff_from_messages
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "do it"},
{"role": "assistant", "content": "I processed items 1-3."},
{"role": "tool", "content": "result"},
{"role": "assistant", "content": "Tool budget exhausted. Call handoff() earlier."},
]
# Should skip the budget message and find the actual progress
assert _extract_handoff_from_messages(messages) == "I processed items 1-3."
def test_extract_handoff_from_empty_messages():
from nanobot.agent.tools.long_task import _extract_handoff_from_messages
assert _extract_handoff_from_messages([]) == ""
assert _extract_handoff_from_messages([{"role": "system", "content": "sys"}]) == ""
# ---------------------------------------------------------------------------
# Integration: verify LongTaskTool is wired into the main agent loop
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_long_task_registered_in_tool_registry(tmp_path):
"""Verify LongTaskTool appears in the main agent's tool registry."""
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
bus = MessageBus()
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
tool = loop.tools.get("long_task")
assert tool is not None
assert tool.name == "long_task"