mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-20 00:22:31 +00:00
fix(long-task): add debug logging for step-level observability
This commit is contained in:
parent
bf5762a3d4
commit
e7214d96ed
@ -40,8 +40,9 @@ class HandoffTool(Tool):
|
|||||||
@property
|
@property
|
||||||
def description(self) -> str:
|
def description(self) -> str:
|
||||||
return (
|
return (
|
||||||
"You are done with this step. Pass control to the next step. "
|
"REQUIRED after finishing your work in this step. "
|
||||||
"You MUST call this (or complete()) before your tool budget runs out."
|
"Pass your progress summary to the next step. "
|
||||||
|
"Use complete() instead if the entire goal is achieved."
|
||||||
)
|
)
|
||||||
|
|
||||||
async def execute(self, message: str, **kwargs: Any) -> str:
|
async def execute(self, message: str, **kwargs: Any) -> str:
|
||||||
@ -88,15 +89,16 @@ _STEP_BUDGET = 8
|
|||||||
_BUDGET_EXHAUSTED_PREFIX = "Tool budget exhausted"
|
_BUDGET_EXHAUSTED_PREFIX = "Tool budget exhausted"
|
||||||
|
|
||||||
_LONG_TASK_SYSTEM_PROMPT = """\
|
_LONG_TASK_SYSTEM_PROMPT = """\
|
||||||
You are one step in a chain. Do a small chunk of work, then call handoff().
|
You are one step in a chain working toward a goal.
|
||||||
|
|
||||||
1. Check the filesystem to see what's already done (ignore handoff notes).
|
1. Check the filesystem to see what's already done.
|
||||||
2. Do the next small piece of work.
|
2. Do the next piece of work. Write results to files as you go — \
|
||||||
3. Call handoff() with what you did and where results are saved. \
|
do NOT just collect information without producing output.
|
||||||
If everything is truly done, call complete() instead.
|
3. When done with your chunk, call handoff() with a brief summary. \
|
||||||
|
If the entire goal is finished, call complete() instead.
|
||||||
|
|
||||||
You have very few tool calls. Do NOT try to finish everything. \
|
IMPORTANT: Write output to files early and often. If you run out of \
|
||||||
Do one chunk, call handoff(), done.
|
tool calls, only what's on the filesystem survives.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@ -105,8 +107,7 @@ def _build_user_message(goal: str, step: int, handoff: str) -> str:
|
|||||||
budget_note = (
|
budget_note = (
|
||||||
f"\n\n---\n"
|
f"\n\n---\n"
|
||||||
f"Step {step + 1}. You have {_STEP_BUDGET} tool calls total. "
|
f"Step {step + 1}. You have {_STEP_BUDGET} tool calls total. "
|
||||||
f"Reserve the last 1-2 calls for handoff() or complete(). "
|
f"Call handoff() or complete() before you run out."
|
||||||
f"If you run out of calls without calling one, your progress is LOST."
|
|
||||||
)
|
)
|
||||||
if step == 0:
|
if step == 0:
|
||||||
return goal + budget_note
|
return goal + budget_note
|
||||||
@ -177,6 +178,7 @@ class LongTaskTool(Tool):
|
|||||||
|
|
||||||
async def execute(self, goal: str, max_steps: int = 20, **kwargs: Any) -> str:
|
async def execute(self, goal: str, max_steps: int = 20, **kwargs: Any) -> str:
|
||||||
handoff = ""
|
handoff = ""
|
||||||
|
logger.debug("long_task start: max_steps={}, goal={:.120}", max_steps, goal)
|
||||||
for step in range(max_steps):
|
for step in range(max_steps):
|
||||||
signal_store: dict[str, str] = {}
|
signal_store: dict[str, str] = {}
|
||||||
user_msg = _build_user_message(goal, step, handoff)
|
user_msg = _build_user_message(goal, step, handoff)
|
||||||
@ -195,19 +197,39 @@ class LongTaskTool(Tool):
|
|||||||
)
|
)
|
||||||
return f"Long task failed at step {step + 1}/{max_steps}."
|
return f"Long task failed at step {step + 1}/{max_steps}."
|
||||||
sig_type = signal_store.get("type")
|
sig_type = signal_store.get("type")
|
||||||
|
sig_payload = signal_store.get("payload", "")
|
||||||
logger.info(
|
logger.info(
|
||||||
"long_task step {}/{}: signal={}, stop_reason={}, tools={}",
|
"long_task step {}/{}: signal={}, stop_reason={}, tools={}",
|
||||||
step + 1, max_steps, sig_type or "none",
|
step + 1, max_steps, sig_type or "auto",
|
||||||
result.stop_reason,
|
result.stop_reason,
|
||||||
result.tools_used,
|
result.tools_used,
|
||||||
)
|
)
|
||||||
if sig_type == "complete":
|
if sig_type == "complete":
|
||||||
return signal_store["payload"]
|
logger.debug(
|
||||||
elif sig_type == "handoff":
|
"long_task done at step {}: complete payload={:.200}",
|
||||||
handoff = signal_store["payload"]
|
step + 1, sig_payload,
|
||||||
|
)
|
||||||
|
return sig_payload
|
||||||
|
|
||||||
|
# Auto-extract progress — don't require handoff()
|
||||||
|
if sig_type == "handoff":
|
||||||
|
handoff = sig_payload
|
||||||
|
logger.debug("long_task step {} handoff: {:.200}", step + 1, handoff)
|
||||||
|
elif result.stop_reason == "completed":
|
||||||
|
# Subagent returned text naturally (no more tool calls)
|
||||||
|
handoff = result.final_content or ""
|
||||||
|
logger.debug(
|
||||||
|
"long_task step {} natural end: {:.200}",
|
||||||
|
step + 1, handoff[:200] if handoff else "(empty)",
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# No signal tool called — extract useful content as fallback
|
# max_iterations hit — extract whatever text the subagent produced
|
||||||
handoff = _extract_handoff_from_messages(result.messages)
|
handoff = _extract_handoff_from_messages(result.messages)
|
||||||
|
logger.debug(
|
||||||
|
"long_task step {} auto-extract: {:.200}",
|
||||||
|
step + 1, handoff[:200] if handoff else "(empty)",
|
||||||
|
)
|
||||||
|
logger.warning("long_task exhausted max_steps={}", max_steps)
|
||||||
return (
|
return (
|
||||||
f"Long task reached max steps ({max_steps}). "
|
f"Long task reached max steps ({max_steps}). "
|
||||||
f"Last progress:\n{handoff}"
|
f"Last progress:\n{handoff}"
|
||||||
|
|||||||
@ -158,6 +158,38 @@ async def test_long_task_fallback_when_no_signal_called():
|
|||||||
assert "I processed items 1-5" in result
|
assert "I processed items 1-5" in result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_long_task_auto_extracts_on_natural_end():
|
||||||
|
"""Subagent finishes naturally (stop_reason=completed) without calling signal."""
|
||||||
|
from nanobot.agent.tools.long_task import LongTaskTool
|
||||||
|
|
||||||
|
mgr = _make_manager_stub()
|
||||||
|
steps = 0
|
||||||
|
|
||||||
|
async def fake_run_step(*, system_prompt, user_message, extra_tools):
|
||||||
|
nonlocal steps
|
||||||
|
steps += 1
|
||||||
|
if steps == 1:
|
||||||
|
return _step_result(
|
||||||
|
final_content="I processed items 1-5. Results in out.md.",
|
||||||
|
stop_reason="completed",
|
||||||
|
)
|
||||||
|
# Second step: subagent calls complete
|
||||||
|
for t in extra_tools:
|
||||||
|
if t.name == "complete":
|
||||||
|
await t.execute(summary="All done.")
|
||||||
|
return _step_result(
|
||||||
|
final_content="All done.",
|
||||||
|
tools_used=["complete"],
|
||||||
|
)
|
||||||
|
|
||||||
|
mgr.run_step.side_effect = fake_run_step
|
||||||
|
tool = LongTaskTool(manager=mgr)
|
||||||
|
result = await tool.execute(goal="Process items.", max_steps=5)
|
||||||
|
assert "All done." == result
|
||||||
|
assert steps == 2
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_long_task_goal_appears_in_system_prompt():
|
async def test_long_task_goal_appears_in_system_prompt():
|
||||||
"""Verify every step's system_prompt contains the long task system prompt."""
|
"""Verify every step's system_prompt contains the long task system prompt."""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user