From cbc8877aa2448845509542b94e125ba71fcba1e2 Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Thu, 14 May 2026 01:06:05 +0800 Subject: [PATCH] feat(long-task): improve prompt structure and agent contract - Expand LongTaskTool.description to instruct parent agent on goal construction, return value semantics, and how to handle results. - Expand CompleteTool.description to emphasize that the summary IS the final answer returned to the parent agent. - Prefix validated return value with an explicit "final answer" directive to stop parent agent from re-running work. - Redesign step_start.md: Step 1 is now explicitly for exploration, planning, and skeleton-building. complete() is discouraged. - Remove bulky payload debug logging from _emit(); add targeted info/warning/error logs at key state transitions instead. - Add signal_type to HandoffState for cleaner signal detection. --- nanobot/agent/tools/long_task.py | 34 ++++++++++++++++--- .../templates/agent/long_task/step_start.md | 17 ++++++---- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/nanobot/agent/tools/long_task.py b/nanobot/agent/tools/long_task.py index d72b66645..39d2b3a42 100644 --- a/nanobot/agent/tools/long_task.py +++ b/nanobot/agent/tools/long_task.py @@ -138,7 +138,11 @@ class CompleteTool(Tool): def description(self) -> str: return ( "The ENTIRE goal is achieved. Call this only when nothing remains. " - "Your claim will be validated — if unproven, the task continues." + "Your claim will be validated — if unproven, the task continues.\n\n" + "IMPORTANT: The summary you provide here will be returned to the parent " + "agent as the FINAL ANSWER to the user. Include all key findings, data, " + "and conclusions directly in the summary. Do NOT rely on the parent agent " + "reading files afterwards — it sees ONLY this summary." ) async def execute(self, summary: str, **kwargs: Any) -> str: @@ -368,7 +372,12 @@ class LongTaskTool(Tool): "original goal and progress from the previous step. Use this for batch " "processing (auditing many files, processing many items), large-scale " "refactoring, or any multi-step task where you might lose track of the " - "goal. For simple independent tasks, use spawn instead." + "goal. For simple independent tasks, use spawn instead.\n\n" + "When constructing the goal, be explicit: list concrete deliverables, " + "required output format (e.g. Markdown table), and any file paths. " + "The tool returns a text summary when finished. " + "If the summary contains the full answer, present it to the user directly. " + "Only read files afterwards if the user explicitly asks for verification." ) @classmethod @@ -440,7 +449,6 @@ class LongTaskTool(Tool): def _emit(self, event_type: str, **payload: Any) -> None: """Emit an event to registered hooks.""" event = LongTaskEvent(type=event_type, payload=payload) - logger.debug("LongTask event: {} | {}", event_type, payload) # Call catch-all hook catch_all = self._hooks.get("on_event") @@ -494,6 +502,10 @@ class LongTaskTool(Tool): if result is None: # Fatal error after retry self._state["status"] = "error" + logger.error( + "long_task step {}/{} failed after retry: {}", + step + 1, max_steps, self._state["error"], + ) self._emit("task_error", step=step, error=self._state["error"]) if handoff.message: return ( @@ -566,9 +578,22 @@ class LongTaskTool(Tool): ) if validated: self._state["status"] = "completed" + logger.info( + "long_task complete at step {}/{} after validation", + step + 1, max_steps, + ) self._emit("task_complete", step=step, summary=sig_payload) - return sig_payload + return ( + "The task is complete. This is the final answer — " + "present it to the user directly without calling additional " + "tools or reading files.\n\n" + f"{sig_payload}" + ) else: + logger.warning( + "long_task validation failed at step {}/{}", + step + 1, max_steps, + ) self._emit( "validation_failed", step=step, @@ -600,6 +625,7 @@ class LongTaskTool(Tool): self._state["last_handoff"] = handoff self._state["status"] = "error" + logger.error("long_task reached max steps ({})", max_steps) self._emit("task_error", step=max_steps, error="Max steps reached") return ( f"Long task reached max steps ({max_steps}). " diff --git a/nanobot/templates/agent/long_task/step_start.md b/nanobot/templates/agent/long_task/step_start.md index 4440eb600..ee234dd93 100644 --- a/nanobot/templates/agent/long_task/step_start.md +++ b/nanobot/templates/agent/long_task/step_start.md @@ -1,14 +1,19 @@ # Long Task — Step {{ step + 1 }}/{{ max_steps }} -You are the FIRST step in a chain working toward a goal. +You are the FIRST step in a meta-ReAct loop. Your job is NOT to finish the entire goal — it is to **explore, plan, and build the skeleton**. ## Goal {{ goal }} ## Instructions -1. **Explore first**: Check the filesystem to understand the current state. Do NOT assume anything. -2. **Plan your work**: Decide what chunk you will do in this step. -3. **Do the work**: Make concrete progress. Write results to files — do NOT just collect information without producing output. -4. **Handoff**: When done, call `handoff()` with a detailed summary. If the ENTIRE goal is already achieved, call `complete()` instead. +1. **Explore**: Check the filesystem and any relevant state. Do NOT assume anything. +2. **Plan the split**: Explicitly decide how the goal will be divided across the remaining {{ max_steps - 1 }} steps. Write this plan down. +3. **Do ONE chunk**: Make concrete progress on ONLY the first chunk. Do NOT attempt to finish the entire goal now, even if you have enough tool calls. The meta-loop exists so later steps can review, correct, and refine your work. +4. **Handoff**: Call `handoff()` with: + - A detailed summary of what you did + - Files changed + - The explicit plan for the remaining steps + - A clear hint for the next step -You have {{ budget }} tool calls total. Reserve the last 1-2 calls for `handoff()` or `complete()`. +You have {{ budget }} tool calls. Reserve the last 1-2 for `handoff()`. +Do NOT call `complete()` in Step 1 unless the goal is literally a single trivial action.