From e7214d96ed0d27fd59054c1e0fbb6bc4b26fabc8 Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Mon, 27 Apr 2026 00:39:45 +0800 Subject: [PATCH] fix(long-task): add debug logging for step-level observability --- nanobot/agent/tools/long_task.py | 54 ++++++++++++++++++++--------- tests/agent/tools/test_long_task.py | 32 +++++++++++++++++ 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/nanobot/agent/tools/long_task.py b/nanobot/agent/tools/long_task.py index cc5d8c786..5f8d45a06 100644 --- a/nanobot/agent/tools/long_task.py +++ b/nanobot/agent/tools/long_task.py @@ -40,8 +40,9 @@ class HandoffTool(Tool): @property def description(self) -> str: return ( - "You are done with this step. Pass control to the next step. " - "You MUST call this (or complete()) before your tool budget runs out." + "REQUIRED after finishing your work in this step. " + "Pass your progress summary to the next step. " + "Use complete() instead if the entire goal is achieved." ) async def execute(self, message: str, **kwargs: Any) -> str: @@ -88,15 +89,16 @@ _STEP_BUDGET = 8 _BUDGET_EXHAUSTED_PREFIX = "Tool budget exhausted" _LONG_TASK_SYSTEM_PROMPT = """\ -You are one step in a chain. Do a small chunk of work, then call handoff(). +You are one step in a chain working toward a goal. -1. Check the filesystem to see what's already done (ignore handoff notes). -2. Do the next small piece of work. -3. Call handoff() with what you did and where results are saved. \ -If everything is truly done, call complete() instead. +1. Check the filesystem to see what's already done. +2. Do the next piece of work. Write results to files as you go — \ +do NOT just collect information without producing output. +3. When done with your chunk, call handoff() with a brief summary. \ +If the entire goal is finished, call complete() instead. -You have very few tool calls. Do NOT try to finish everything. \ -Do one chunk, call handoff(), done. +IMPORTANT: Write output to files early and often. If you run out of \ +tool calls, only what's on the filesystem survives. """ @@ -105,8 +107,7 @@ def _build_user_message(goal: str, step: int, handoff: str) -> str: budget_note = ( f"\n\n---\n" f"Step {step + 1}. You have {_STEP_BUDGET} tool calls total. " - f"Reserve the last 1-2 calls for handoff() or complete(). " - f"If you run out of calls without calling one, your progress is LOST." + f"Call handoff() or complete() before you run out." ) if step == 0: return goal + budget_note @@ -177,6 +178,7 @@ class LongTaskTool(Tool): async def execute(self, goal: str, max_steps: int = 20, **kwargs: Any) -> str: handoff = "" + logger.debug("long_task start: max_steps={}, goal={:.120}", max_steps, goal) for step in range(max_steps): signal_store: dict[str, str] = {} user_msg = _build_user_message(goal, step, handoff) @@ -195,19 +197,39 @@ class LongTaskTool(Tool): ) return f"Long task failed at step {step + 1}/{max_steps}." sig_type = signal_store.get("type") + sig_payload = signal_store.get("payload", "") logger.info( "long_task step {}/{}: signal={}, stop_reason={}, tools={}", - step + 1, max_steps, sig_type or "none", + step + 1, max_steps, sig_type or "auto", result.stop_reason, result.tools_used, ) if sig_type == "complete": - return signal_store["payload"] - elif sig_type == "handoff": - handoff = signal_store["payload"] + logger.debug( + "long_task done at step {}: complete payload={:.200}", + step + 1, sig_payload, + ) + return sig_payload + + # Auto-extract progress — don't require handoff() + if sig_type == "handoff": + handoff = sig_payload + logger.debug("long_task step {} handoff: {:.200}", step + 1, handoff) + elif result.stop_reason == "completed": + # Subagent returned text naturally (no more tool calls) + handoff = result.final_content or "" + logger.debug( + "long_task step {} natural end: {:.200}", + step + 1, handoff[:200] if handoff else "(empty)", + ) else: - # No signal tool called — extract useful content as fallback + # max_iterations hit — extract whatever text the subagent produced handoff = _extract_handoff_from_messages(result.messages) + logger.debug( + "long_task step {} auto-extract: {:.200}", + step + 1, handoff[:200] if handoff else "(empty)", + ) + logger.warning("long_task exhausted max_steps={}", max_steps) return ( f"Long task reached max steps ({max_steps}). " f"Last progress:\n{handoff}" diff --git a/tests/agent/tools/test_long_task.py b/tests/agent/tools/test_long_task.py index e08249251..1bece10c2 100644 --- a/tests/agent/tools/test_long_task.py +++ b/tests/agent/tools/test_long_task.py @@ -158,6 +158,38 @@ async def test_long_task_fallback_when_no_signal_called(): assert "I processed items 1-5" in result +@pytest.mark.asyncio +async def test_long_task_auto_extracts_on_natural_end(): + """Subagent finishes naturally (stop_reason=completed) without calling signal.""" + from nanobot.agent.tools.long_task import LongTaskTool + + mgr = _make_manager_stub() + steps = 0 + + async def fake_run_step(*, system_prompt, user_message, extra_tools): + nonlocal steps + steps += 1 + if steps == 1: + return _step_result( + final_content="I processed items 1-5. Results in out.md.", + stop_reason="completed", + ) + # Second step: subagent calls complete + for t in extra_tools: + if t.name == "complete": + await t.execute(summary="All done.") + return _step_result( + final_content="All done.", + tools_used=["complete"], + ) + + mgr.run_step.side_effect = fake_run_step + tool = LongTaskTool(manager=mgr) + result = await tool.execute(goal="Process items.", max_steps=5) + assert "All done." == result + assert steps == 2 + + @pytest.mark.asyncio async def test_long_task_goal_appears_in_system_prompt(): """Verify every step's system_prompt contains the long task system prompt."""