diff --git a/nanobot/agent/tools/long_task.py b/nanobot/agent/tools/long_task.py index 291311737..d72b66645 100644 --- a/nanobot/agent/tools/long_task.py +++ b/nanobot/agent/tools/long_task.py @@ -4,7 +4,7 @@ from __future__ import annotations import time from dataclasses import dataclass, field -from typing import Any, TYPE_CHECKING +from typing import TYPE_CHECKING, Any from loguru import logger @@ -30,6 +30,7 @@ if TYPE_CHECKING: class HandoffState: """Structured progress state passed between long-task steps.""" + signal_type: str = "" message: str = "" files_created: list[str] = field(default_factory=list) files_modified: list[str] = field(default_factory=list) @@ -39,6 +40,7 @@ class HandoffState: def is_empty(self) -> bool: return not any( [ + self.signal_type, self.message, self.files_created, self.files_modified, @@ -105,6 +107,7 @@ class HandoffTool(Tool): verification: str = "", **kwargs: Any, ) -> str: + self._store.signal_type = "handoff" self._store.message = message self._store.files_created = list(files_created or []) self._store.files_modified = list(files_modified or []) @@ -139,6 +142,7 @@ class CompleteTool(Tool): ) async def execute(self, summary: str, **kwargs: Any) -> str: + self._store.signal_type = "complete" self._store.message = summary return "Task marked as complete. Awaiting validation." @@ -242,9 +246,19 @@ def _extract_handoff_from_messages(messages: list[dict[str, Any]]) -> str: return "" -_FILE_EVENT_PREFIXES = ("Wrote ", "Edited ") -# NOTE: path extraction depends on write_file/edit_file detail format. -# If those tools change their output format, this mapping must be updated. +def _extract_write_path(detail: str) -> str | None: + prefix = "Successfully wrote " + marker = " to " + if not detail.startswith(prefix) or marker not in detail: + return None + return detail.rsplit(marker, 1)[1].strip() + + +def _extract_edit_path(detail: str) -> str | None: + for prefix in ("Successfully created ", "Successfully edited "): + if detail.startswith(prefix): + return detail.removeprefix(prefix).strip() + return None def _extract_file_changes( @@ -259,13 +273,19 @@ def _extract_file_changes( detail = event.get("detail", "") if status != "ok": continue - if name in ("write_file", "edit_file"): - if detail.startswith(_FILE_EVENT_PREFIXES): - path = detail.split(" ", 1)[1].split(":")[0].strip() - if name == "write_file": - created.append(path) - else: - modified.append(path) + if name == "write_file": + path = _extract_write_path(detail) + if path: + created.append(path) + else: + logger.debug( + "long_task: skipping file event with unexpected detail: {}", + detail[:80], + ) + elif name == "edit_file": + path = _extract_edit_path(detail) + if path: + modified.append(path) else: logger.debug( "long_task: skipping file event with unexpected detail: {}", @@ -502,7 +522,7 @@ class LongTaskTool(Tool): # Determine signal from tool events sig_type = "none" - for event in tool_events: + for event in reversed(tool_events): ev_name = event.get("name", "") if ev_name == "complete": sig_type = "complete" @@ -512,11 +532,9 @@ class LongTaskTool(Tool): break # Fallback: if no explicit signal but CompleteTool/HandoffTool was - # called without arguments (message empty), use final_content - if sig_type == "none" and signal_store.message: - # Tool was called but we couldn't detect from events; - # use the store content as handoff - sig_type = "handoff" + # called but the runner did not expose tool events, trust the store. + if sig_type == "none" and signal_store.signal_type: + sig_type = signal_store.signal_type elif sig_type == "none": signal_store.message = _extract_handoff_from_messages( getattr(result, "messages", []) or [] diff --git a/tests/agent/tools/test_long_task.py b/tests/agent/tools/test_long_task.py index 8207c4576..2953a7d78 100644 --- a/tests/agent/tools/test_long_task.py +++ b/tests/agent/tools/test_long_task.py @@ -1,22 +1,21 @@ """Tests for Long Task Tool: HandoffTool, CompleteTool, LongTaskTool.""" -import pytest from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock +import pytest + from nanobot.agent.tools.long_task import ( + CompleteTool, HandoffState, HandoffTool, - CompleteTool, LongTaskTool, - LongTaskEvent, _build_system_prompt, _build_user_message, _extract_file_changes, _extract_handoff_from_messages, ) - # --------------------------------------------------------------------------- # Signal tool tests # --------------------------------------------------------------------------- @@ -34,6 +33,7 @@ async def test_handoff_tool_stores_structured_signal(): verification="Tests passed", ) assert result == "Progress recorded. The next step will continue from here." + assert store.signal_type == "handoff" assert store.message == "Processed items 1-8. Results in out.md." assert store.files_created == ["out.md", "report.md"] assert store.files_modified == ["main.py"] @@ -58,6 +58,7 @@ async def test_complete_tool_stores_signal(): tool = CompleteTool(store) result = await tool.execute(summary="All 100 items processed. Summary in report.md") assert result == "Task marked as complete. Awaiting validation." + assert store.signal_type == "complete" assert store.message == "All 100 items processed. Summary in report.md" @@ -70,6 +71,7 @@ async def test_signal_tools_overwrite_on_multiple_calls(): await handoff.execute(message="first progress") assert store.message == "first progress" await complete.execute(summary="done early") + assert store.signal_type == "complete" assert store.message == "done early" @@ -191,6 +193,44 @@ async def test_long_task_completes_after_multiple_handoffs(): assert call_count == 4 # 3 main steps + validation +@pytest.mark.asyncio +async def test_long_task_uses_last_signal_when_multiple_signals_called(): + """If a step calls handoff() then complete(), complete() should win.""" + mgr = _make_manager_stub() + call_count = 0 + + async def fake_run_step(*, system_prompt, user_message, extra_tools, max_iterations=None): + nonlocal call_count + call_count += 1 + if call_count == 1: + for t in extra_tools: + if t.name == "handoff": + await t.execute(message="Partial progress.") + elif t.name == "complete": + await t.execute(summary="Actually complete.") + return _step_result( + tools_used=["handoff", "complete"], + tool_events=[ + {"name": "handoff", "status": "ok", "detail": ""}, + {"name": "complete", "status": "ok", "detail": ""}, + ], + ) + + for t in extra_tools: + if t.name == "complete": + await t.execute(summary="Validated") + return _step_result( + tools_used=["complete"], + tool_events=[{"name": "complete", "status": "ok", "detail": ""}], + ) + + mgr.run_step.side_effect = fake_run_step + tool = LongTaskTool(manager=mgr) + result = await tool.execute(goal="Do something.", max_steps=1) + assert result == "Actually complete." + assert call_count == 2 # main step + validation + + @pytest.mark.asyncio async def test_long_task_validation_falls_back_to_handoff(): """Subagent claims complete but validation fails — task continues.""" @@ -566,14 +606,27 @@ def test_extract_handoff_from_empty_messages(): def test_extract_file_changes_from_tool_events(): events = [ - {"name": "write_file", "status": "ok", "detail": "Wrote /workspace/a.py: done"}, - {"name": "edit_file", "status": "ok", "detail": "Edited /workspace/b.py: patched"}, + { + "name": "write_file", + "status": "ok", + "detail": "Successfully wrote 12 characters to /workspace/a.py", + }, + { + "name": "edit_file", + "status": "ok", + "detail": "Successfully edited /workspace/b.py", + }, + { + "name": "edit_file", + "status": "ok", + "detail": "Successfully created /workspace/c.py", + }, {"name": "read_file", "status": "ok", "detail": "Read /workspace/c.py"}, {"name": "write_file", "status": "error", "detail": "Failed"}, ] created, modified = _extract_file_changes(events) assert created == ["/workspace/a.py"] - assert modified == ["/workspace/b.py"] + assert modified == ["/workspace/b.py", "/workspace/c.py"] def test_extract_file_changes_empty(): @@ -736,7 +789,11 @@ async def test_explicit_file_changes_override_auto_detected(): tool_events=[ {"name": "handoff", "status": "ok", "detail": ""}, # Auto-detection would pick this up as "auto.py" - {"name": "write_file", "status": "ok", "detail": "Wrote auto.py: content"}, + { + "name": "write_file", + "status": "ok", + "detail": "Successfully wrote 7 characters to auto.py", + }, ], )