From cbc8877aa2448845509542b94e125ba71fcba1e2 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Thu, 14 May 2026 01:06:05 +0800
Subject: [PATCH] feat(long-task): improve prompt structure and agent contract

- Expand LongTaskTool.description to instruct parent agent on goal
  construction, return value semantics, and how to handle results.
- Expand CompleteTool.description to emphasize that the summary IS the
  final answer returned to the parent agent.
- Prefix validated return value with an explicit "final answer" directive
  to stop parent agent from re-running work.
- Redesign step_start.md: Step 1 is now explicitly for exploration,
  planning, and skeleton-building. complete() is discouraged.
- Remove bulky payload debug logging from _emit(); add targeted
  info/warning/error logs at key state transitions instead.
- Add signal_type to HandoffState for cleaner signal detection.
---
 nanobot/agent/tools/long_task.py              | 34 ++++++++++++++++---
 .../templates/agent/long_task/step_start.md   | 17 ++++++----
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/nanobot/agent/tools/long_task.py b/nanobot/agent/tools/long_task.py
index d72b66645..39d2b3a42 100644
--- a/nanobot/agent/tools/long_task.py
+++ b/nanobot/agent/tools/long_task.py
@@ -138,7 +138,11 @@ class CompleteTool(Tool):
     def description(self) -> str:
         return (
             "The ENTIRE goal is achieved. Call this only when nothing remains. "
-            "Your claim will be validated — if unproven, the task continues."
+            "Your claim will be validated — if unproven, the task continues.\n\n"
+            "IMPORTANT: The summary you provide here will be returned to the parent "
+            "agent as the FINAL ANSWER to the user. Include all key findings, data, "
+            "and conclusions directly in the summary. Do NOT rely on the parent agent "
+            "reading files afterwards — it sees ONLY this summary."
         )
 
     async def execute(self, summary: str, **kwargs: Any) -> str:
@@ -368,7 +372,12 @@ class LongTaskTool(Tool):
             "original goal and progress from the previous step. Use this for batch "
             "processing (auditing many files, processing many items), large-scale "
             "refactoring, or any multi-step task where you might lose track of the "
-            "goal. For simple independent tasks, use spawn instead."
+            "goal. For simple independent tasks, use spawn instead.\n\n"
+            "When constructing the goal, be explicit: list concrete deliverables, "
+            "required output format (e.g. Markdown table), and any file paths. "
+            "The tool returns a text summary when finished. "
+            "If the summary contains the full answer, present it to the user directly. "
+            "Only read files afterwards if the user explicitly asks for verification."
         )
 
     @classmethod
@@ -440,7 +449,6 @@ class LongTaskTool(Tool):
     def _emit(self, event_type: str, **payload: Any) -> None:
         """Emit an event to registered hooks."""
         event = LongTaskEvent(type=event_type, payload=payload)
-        logger.debug("LongTask event: {} | {}", event_type, payload)
 
         # Call catch-all hook
         catch_all = self._hooks.get("on_event")
@@ -494,6 +502,10 @@ class LongTaskTool(Tool):
             if result is None:
                 # Fatal error after retry
                 self._state["status"] = "error"
+                logger.error(
+                    "long_task step {}/{} failed after retry: {}",
+                    step + 1, max_steps, self._state["error"],
+                )
                 self._emit("task_error", step=step, error=self._state["error"])
                 if handoff.message:
                     return (
@@ -566,9 +578,22 @@ class LongTaskTool(Tool):
                 )
                 if validated:
                     self._state["status"] = "completed"
+                    logger.info(
+                        "long_task complete at step {}/{} after validation",
+                        step + 1, max_steps,
+                    )
                     self._emit("task_complete", step=step, summary=sig_payload)
-                    return sig_payload
+                    return (
+                        "The task is complete. This is the final answer — "
+                        "present it to the user directly without calling additional "
+                        "tools or reading files.\n\n"
+                        f"{sig_payload}"
+                    )
                 else:
+                    logger.warning(
+                        "long_task validation failed at step {}/{}",
+                        step + 1, max_steps,
+                    )
                     self._emit(
                         "validation_failed",
                         step=step,
@@ -600,6 +625,7 @@ class LongTaskTool(Tool):
                 self._state["last_handoff"] = handoff
 
         self._state["status"] = "error"
+        logger.error("long_task reached max steps ({})", max_steps)
         self._emit("task_error", step=max_steps, error="Max steps reached")
         return (
             f"Long task reached max steps ({max_steps}). "
diff --git a/nanobot/templates/agent/long_task/step_start.md b/nanobot/templates/agent/long_task/step_start.md
index 4440eb600..ee234dd93 100644
--- a/nanobot/templates/agent/long_task/step_start.md
+++ b/nanobot/templates/agent/long_task/step_start.md
@@ -1,14 +1,19 @@
 # Long Task — Step {{ step + 1 }}/{{ max_steps }}
 
-You are the FIRST step in a chain working toward a goal.
+You are the FIRST step in a meta-ReAct loop. Your job is NOT to finish the entire goal — it is to **explore, plan, and build the skeleton**.
 
 ## Goal
 {{ goal }}
 
 ## Instructions
-1. **Explore first**: Check the filesystem to understand the current state. Do NOT assume anything.
-2. **Plan your work**: Decide what chunk you will do in this step.
-3. **Do the work**: Make concrete progress. Write results to files — do NOT just collect information without producing output.
-4. **Handoff**: When done, call `handoff()` with a detailed summary. If the ENTIRE goal is already achieved, call `complete()` instead.
+1. **Explore**: Check the filesystem and any relevant state. Do NOT assume anything.
+2. **Plan the split**: Explicitly decide how the goal will be divided across the remaining {{ max_steps - 1 }} steps. Write this plan down.
+3. **Do ONE chunk**: Make concrete progress on ONLY the first chunk. Do NOT attempt to finish the entire goal now, even if you have enough tool calls. The meta-loop exists so later steps can review, correct, and refine your work.
+4. **Handoff**: Call `handoff()` with:
+   - A detailed summary of what you did
+   - Files changed
+   - The explicit plan for the remaining steps
+   - A clear hint for the next step
 
-You have {{ budget }} tool calls total. Reserve the last 1-2 calls for `handoff()` or `complete()`.
+You have {{ budget }} tool calls. Reserve the last 1-2 for `handoff()`.
+Do NOT call `complete()` in Step 1 unless the goal is literally a single trivial action.