From e7214d96ed0d27fd59054c1e0fbb6bc4b26fabc8 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Mon, 27 Apr 2026 00:39:45 +0800
Subject: [PATCH] fix(long-task): add debug logging for step-level
 observability

---
 nanobot/agent/tools/long_task.py    | 54 ++++++++++++++++++++---------
 tests/agent/tools/test_long_task.py | 32 +++++++++++++++++
 2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/nanobot/agent/tools/long_task.py b/nanobot/agent/tools/long_task.py
index cc5d8c786..5f8d45a06 100644
--- a/nanobot/agent/tools/long_task.py
+++ b/nanobot/agent/tools/long_task.py
@@ -40,8 +40,9 @@ class HandoffTool(Tool):
     @property
     def description(self) -> str:
         return (
-            "You are done with this step. Pass control to the next step. "
-            "You MUST call this (or complete()) before your tool budget runs out."
+            "REQUIRED after finishing your work in this step. "
+            "Pass your progress summary to the next step. "
+            "Use complete() instead if the entire goal is achieved."
         )
 
     async def execute(self, message: str, **kwargs: Any) -> str:
@@ -88,15 +89,16 @@ _STEP_BUDGET = 8
 _BUDGET_EXHAUSTED_PREFIX = "Tool budget exhausted"
 
 _LONG_TASK_SYSTEM_PROMPT = """\
-You are one step in a chain. Do a small chunk of work, then call handoff().
+You are one step in a chain working toward a goal.
 
-1. Check the filesystem to see what's already done (ignore handoff notes).
-2. Do the next small piece of work.
-3. Call handoff() with what you did and where results are saved. \
-If everything is truly done, call complete() instead.
+1. Check the filesystem to see what's already done.
+2. Do the next piece of work. Write results to files as you go — \
+do NOT just collect information without producing output.
+3. When done with your chunk, call handoff() with a brief summary. \
+If the entire goal is finished, call complete() instead.
 
-You have very few tool calls. Do NOT try to finish everything. \
-Do one chunk, call handoff(), done.
+IMPORTANT: Write output to files early and often. If you run out of \
+tool calls, only what's on the filesystem survives.
 """
 
 
@@ -105,8 +107,7 @@ def _build_user_message(goal: str, step: int, handoff: str) -> str:
     budget_note = (
         f"\n\n---\n"
         f"Step {step + 1}. You have {_STEP_BUDGET} tool calls total. "
-        f"Reserve the last 1-2 calls for handoff() or complete(). "
-        f"If you run out of calls without calling one, your progress is LOST."
+        f"Call handoff() or complete() before you run out."
     )
     if step == 0:
         return goal + budget_note
@@ -177,6 +178,7 @@ class LongTaskTool(Tool):
 
     async def execute(self, goal: str, max_steps: int = 20, **kwargs: Any) -> str:
         handoff = ""
+        logger.debug("long_task start: max_steps={}, goal={:.120}", max_steps, goal)
         for step in range(max_steps):
             signal_store: dict[str, str] = {}
             user_msg = _build_user_message(goal, step, handoff)
@@ -195,19 +197,39 @@ class LongTaskTool(Tool):
                     )
                 return f"Long task failed at step {step + 1}/{max_steps}."
             sig_type = signal_store.get("type")
+            sig_payload = signal_store.get("payload", "")
             logger.info(
                 "long_task step {}/{}: signal={}, stop_reason={}, tools={}",
-                step + 1, max_steps, sig_type or "none",
+                step + 1, max_steps, sig_type or "auto",
                 result.stop_reason,
                 result.tools_used,
             )
             if sig_type == "complete":
-                return signal_store["payload"]
-            elif sig_type == "handoff":
-                handoff = signal_store["payload"]
+                logger.debug(
+                    "long_task done at step {}: complete payload={:.200}",
+                    step + 1, sig_payload,
+                )
+                return sig_payload
+
+            # Auto-extract progress — don't require handoff()
+            if sig_type == "handoff":
+                handoff = sig_payload
+                logger.debug("long_task step {} handoff: {:.200}", step + 1, handoff)
+            elif result.stop_reason == "completed":
+                # Subagent returned text naturally (no more tool calls)
+                handoff = result.final_content or ""
+                logger.debug(
+                    "long_task step {} natural end: {:.200}",
+                    step + 1, handoff[:200] if handoff else "(empty)",
+                )
             else:
-                # No signal tool called — extract useful content as fallback
+                # max_iterations hit — extract whatever text the subagent produced
                 handoff = _extract_handoff_from_messages(result.messages)
+                logger.debug(
+                    "long_task step {} auto-extract: {:.200}",
+                    step + 1, handoff[:200] if handoff else "(empty)",
+                )
+        logger.warning("long_task exhausted max_steps={}", max_steps)
         return (
             f"Long task reached max steps ({max_steps}). "
             f"Last progress:\n{handoff}"
diff --git a/tests/agent/tools/test_long_task.py b/tests/agent/tools/test_long_task.py
index e08249251..1bece10c2 100644
--- a/tests/agent/tools/test_long_task.py
+++ b/tests/agent/tools/test_long_task.py
@@ -158,6 +158,38 @@ async def test_long_task_fallback_when_no_signal_called():
     assert "I processed items 1-5" in result
 
 
+@pytest.mark.asyncio
+async def test_long_task_auto_extracts_on_natural_end():
+    """Subagent finishes naturally (stop_reason=completed) without calling signal."""
+    from nanobot.agent.tools.long_task import LongTaskTool
+
+    mgr = _make_manager_stub()
+    steps = 0
+
+    async def fake_run_step(*, system_prompt, user_message, extra_tools):
+        nonlocal steps
+        steps += 1
+        if steps == 1:
+            return _step_result(
+                final_content="I processed items 1-5. Results in out.md.",
+                stop_reason="completed",
+            )
+        # Second step: subagent calls complete
+        for t in extra_tools:
+            if t.name == "complete":
+                await t.execute(summary="All done.")
+        return _step_result(
+            final_content="All done.",
+            tools_used=["complete"],
+        )
+
+    mgr.run_step.side_effect = fake_run_step
+    tool = LongTaskTool(manager=mgr)
+    result = await tool.execute(goal="Process items.", max_steps=5)
+    assert "All done." == result
+    assert steps == 2
+
+
 @pytest.mark.asyncio
 async def test_long_task_goal_appears_in_system_prompt():
     """Verify every step's system_prompt contains the long task system prompt."""