"""Tests for Long Task Tool: HandoffTool, CompleteTool, LongTaskTool.""" import pytest from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock @pytest.mark.asyncio async def test_handoff_tool_stores_signal(): from nanobot.agent.tools.long_task import HandoffTool store: dict[str, str] = {} tool = HandoffTool(store) result = await tool.execute(message="Processed items 1-8. Results in out.md. Continue with item 9.") assert result == "Progress recorded. The next step will continue from here." assert store["type"] == "handoff" assert store["payload"] == "Processed items 1-8. Results in out.md. Continue with item 9." @pytest.mark.asyncio async def test_complete_tool_stores_signal(): from nanobot.agent.tools.long_task import CompleteTool store: dict[str, str] = {} tool = CompleteTool(store) result = await tool.execute(summary="All 100 items processed. Summary in report.md") assert result == "Task marked as complete." assert store["type"] == "complete" assert store["payload"] == "All 100 items processed. Summary in report.md" @pytest.mark.asyncio async def test_signal_tools_overwrite_on_multiple_calls(): """Last call wins -- the orchestrator only reads the final signal.""" from nanobot.agent.tools.long_task import HandoffTool, CompleteTool store: dict[str, str] = {} handoff = HandoffTool(store) complete = CompleteTool(store) await handoff.execute(message="first progress") assert store["type"] == "handoff" await complete.execute(summary="done early") assert store["type"] == "complete" assert store["payload"] == "done early" # --------------------------------------------------------------------------- # Helper: minimal SubagentManager stub # --------------------------------------------------------------------------- def _make_manager_stub(): """Create a minimal SubagentManager stub with a mockable run_step.""" mgr = MagicMock() mgr.run_step = AsyncMock() return mgr def _step_result(**overrides): """Create a minimal AgentRunResult-like namespace.""" defaults = dict( final_content="step done", messages=[], tool_events=[], stop_reason="completed", tools_used=[], ) defaults.update(overrides) return SimpleNamespace(**defaults) # --------------------------------------------------------------------------- # LongTaskTool orchestrator tests # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_long_task_completes_in_one_step(): """Subagent calls complete() immediately.""" from nanobot.agent.tools.long_task import LongTaskTool mgr = _make_manager_stub() async def fake_run_step(*, system_prompt, user_message, extra_tools): for t in extra_tools: if t.name == "complete": await t.execute(summary="All done. Report in summary.md") return _step_result( final_content="All done.", tools_used=["complete"], ) mgr.run_step.side_effect = fake_run_step tool = LongTaskTool(manager=mgr) result = await tool.execute(goal="Audit all issues.") assert result == "All done. Report in summary.md" @pytest.mark.asyncio async def test_long_task_completes_after_multiple_handoffs(): """Subagent calls handoff() twice then complete().""" from nanobot.agent.tools.long_task import LongTaskTool mgr = _make_manager_stub() call_count = 0 async def fake_run_step(*, system_prompt, user_message, extra_tools): nonlocal call_count call_count += 1 if call_count == 1: for t in extra_tools: if t.name == "handoff": await t.execute(message="Processed 1-8.") elif call_count == 2: assert "Processed 1-8." in user_message assert "8 tool calls" in user_message for t in extra_tools: if t.name == "handoff": await t.execute(message="Processed 9-16.") else: for t in extra_tools: if t.name == "complete": await t.execute(summary="All 16 items audited.") return _step_result(tools_used=["handoff"]) mgr.run_step.side_effect = fake_run_step tool = LongTaskTool(manager=mgr) result = await tool.execute(goal="Audit 16 issues.") assert result == "All 16 items audited." assert call_count == 3 @pytest.mark.asyncio async def test_long_task_fallback_when_no_signal_called(): """Subagent doesn't call handoff/complete — extract progress from messages.""" from nanobot.agent.tools.long_task import LongTaskTool mgr = _make_manager_stub() async def fake_run_step(*, system_prompt, user_message, extra_tools): return _step_result( final_content="Tool budget exhausted.", messages=[ {"role": "system", "content": "..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "I processed items 1-5. Results in out.md."}, {"role": "tool", "content": "ok"}, {"role": "assistant", "content": "Tool budget exhausted. Call handoff() earlier next time."}, ], stop_reason="max_iterations", ) mgr.run_step.side_effect = fake_run_step tool = LongTaskTool(manager=mgr) result = await tool.execute(goal="Do something.", max_steps=2) # Should reach max_steps and return the fallback extracted from messages assert "max steps (2)" in result assert "I processed items 1-5" in result @pytest.mark.asyncio async def test_long_task_goal_appears_in_system_prompt(): """Verify every step's system_prompt contains the long task system prompt.""" from nanobot.agent.tools.long_task import LongTaskTool mgr = _make_manager_stub() captured_prompts = [] async def fake_run_step(*, system_prompt, user_message, extra_tools): captured_prompts.append(system_prompt) for t in extra_tools: if t.name == "complete": await t.execute(summary="done") return _step_result(final_content="done") mgr.run_step.side_effect = fake_run_step tool = LongTaskTool(manager=mgr) await tool.execute(goal="Audit everything.") assert len(captured_prompts) == 1 assert "handoff()" in captured_prompts[0] assert "complete()" in captured_prompts[0] assert "filesystem" in captured_prompts[0] # --------------------------------------------------------------------------- # Helper function tests # --------------------------------------------------------------------------- def test_build_user_message_step_0(): from nanobot.agent.tools.long_task import _build_user_message msg = _build_user_message("Audit all issues.", step=0, handoff="") assert msg.startswith("Audit all issues.") assert "Step 1" in msg assert "8 tool calls" in msg assert "Previous Progress" not in msg def test_build_user_message_later_step(): from nanobot.agent.tools.long_task import _build_user_message msg = _build_user_message("Audit all issues.", step=3, handoff="Did 1-10.") assert "Audit all issues." in msg assert "Previous Progress" in msg assert "Did 1-10." in msg assert "Step 4" in msg assert "8 tool calls" in msg def test_extract_handoff_from_messages(): from nanobot.agent.tools.long_task import _extract_handoff_from_messages messages = [ {"role": "system", "content": "sys"}, {"role": "user", "content": "do it"}, {"role": "assistant", "content": ""}, {"role": "tool", "content": "result"}, {"role": "assistant", "content": "I processed items 1-3."}, ] assert _extract_handoff_from_messages(messages) == "I processed items 1-3." def test_extract_handoff_skips_budget_message(): from nanobot.agent.tools.long_task import _extract_handoff_from_messages messages = [ {"role": "system", "content": "sys"}, {"role": "user", "content": "do it"}, {"role": "assistant", "content": "I processed items 1-3."}, {"role": "tool", "content": "result"}, {"role": "assistant", "content": "Tool budget exhausted. Call handoff() earlier."}, ] # Should skip the budget message and find the actual progress assert _extract_handoff_from_messages(messages) == "I processed items 1-3." def test_extract_handoff_from_empty_messages(): from nanobot.agent.tools.long_task import _extract_handoff_from_messages assert _extract_handoff_from_messages([]) == "" assert _extract_handoff_from_messages([{"role": "system", "content": "sys"}]) == "" # --------------------------------------------------------------------------- # Integration: verify LongTaskTool is wired into the main agent loop # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_long_task_registered_in_tool_registry(tmp_path): """Verify LongTaskTool appears in the main agent's tool registry.""" from nanobot.agent.loop import AgentLoop from nanobot.bus.queue import MessageBus bus = MessageBus() provider = MagicMock() provider.get_default_model.return_value = "test-model" loop = AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") tool = loop.tools.get("long_task") assert tool is not None assert tool.name == "long_task"