feat(long-task): improve prompt structure and agent contract

- Expand LongTaskTool.description to instruct parent agent on goal
  construction, return value semantics, and how to handle results.
- Expand CompleteTool.description to emphasize that the summary IS the
  final answer returned to the parent agent.
- Prefix validated return value with an explicit "final answer" directive
  to stop parent agent from re-running work.
- Redesign step_start.md: Step 1 is now explicitly for exploration,
  planning, and skeleton-building. complete() is discouraged.
- Remove bulky payload debug logging from _emit(); add targeted
  info/warning/error logs at key state transitions instead.
- Add signal_type to HandoffState for cleaner signal detection.
This commit is contained in:
chengyongru 2026-05-14 01:06:05 +08:00
parent 78e8cc3e55
commit cbc8877aa2
2 changed files with 41 additions and 10 deletions

View File

@ -138,7 +138,11 @@ class CompleteTool(Tool):
def description(self) -> str: def description(self) -> str:
return ( return (
"The ENTIRE goal is achieved. Call this only when nothing remains. " "The ENTIRE goal is achieved. Call this only when nothing remains. "
"Your claim will be validated — if unproven, the task continues." "Your claim will be validated — if unproven, the task continues.\n\n"
"IMPORTANT: The summary you provide here will be returned to the parent "
"agent as the FINAL ANSWER to the user. Include all key findings, data, "
"and conclusions directly in the summary. Do NOT rely on the parent agent "
"reading files afterwards — it sees ONLY this summary."
) )
async def execute(self, summary: str, **kwargs: Any) -> str: async def execute(self, summary: str, **kwargs: Any) -> str:
@ -368,7 +372,12 @@ class LongTaskTool(Tool):
"original goal and progress from the previous step. Use this for batch " "original goal and progress from the previous step. Use this for batch "
"processing (auditing many files, processing many items), large-scale " "processing (auditing many files, processing many items), large-scale "
"refactoring, or any multi-step task where you might lose track of the " "refactoring, or any multi-step task where you might lose track of the "
"goal. For simple independent tasks, use spawn instead." "goal. For simple independent tasks, use spawn instead.\n\n"
"When constructing the goal, be explicit: list concrete deliverables, "
"required output format (e.g. Markdown table), and any file paths. "
"The tool returns a text summary when finished. "
"If the summary contains the full answer, present it to the user directly. "
"Only read files afterwards if the user explicitly asks for verification."
) )
@classmethod @classmethod
@ -440,7 +449,6 @@ class LongTaskTool(Tool):
def _emit(self, event_type: str, **payload: Any) -> None: def _emit(self, event_type: str, **payload: Any) -> None:
"""Emit an event to registered hooks.""" """Emit an event to registered hooks."""
event = LongTaskEvent(type=event_type, payload=payload) event = LongTaskEvent(type=event_type, payload=payload)
logger.debug("LongTask event: {} | {}", event_type, payload)
# Call catch-all hook # Call catch-all hook
catch_all = self._hooks.get("on_event") catch_all = self._hooks.get("on_event")
@ -494,6 +502,10 @@ class LongTaskTool(Tool):
if result is None: if result is None:
# Fatal error after retry # Fatal error after retry
self._state["status"] = "error" self._state["status"] = "error"
logger.error(
"long_task step {}/{} failed after retry: {}",
step + 1, max_steps, self._state["error"],
)
self._emit("task_error", step=step, error=self._state["error"]) self._emit("task_error", step=step, error=self._state["error"])
if handoff.message: if handoff.message:
return ( return (
@ -566,9 +578,22 @@ class LongTaskTool(Tool):
) )
if validated: if validated:
self._state["status"] = "completed" self._state["status"] = "completed"
logger.info(
"long_task complete at step {}/{} after validation",
step + 1, max_steps,
)
self._emit("task_complete", step=step, summary=sig_payload) self._emit("task_complete", step=step, summary=sig_payload)
return sig_payload return (
"The task is complete. This is the final answer — "
"present it to the user directly without calling additional "
"tools or reading files.\n\n"
f"{sig_payload}"
)
else: else:
logger.warning(
"long_task validation failed at step {}/{}",
step + 1, max_steps,
)
self._emit( self._emit(
"validation_failed", "validation_failed",
step=step, step=step,
@ -600,6 +625,7 @@ class LongTaskTool(Tool):
self._state["last_handoff"] = handoff self._state["last_handoff"] = handoff
self._state["status"] = "error" self._state["status"] = "error"
logger.error("long_task reached max steps ({})", max_steps)
self._emit("task_error", step=max_steps, error="Max steps reached") self._emit("task_error", step=max_steps, error="Max steps reached")
return ( return (
f"Long task reached max steps ({max_steps}). " f"Long task reached max steps ({max_steps}). "

View File

@ -1,14 +1,19 @@
# Long Task — Step {{ step + 1 }}/{{ max_steps }} # Long Task — Step {{ step + 1 }}/{{ max_steps }}
You are the FIRST step in a chain working toward a goal. You are the FIRST step in a meta-ReAct loop. Your job is NOT to finish the entire goal — it is to **explore, plan, and build the skeleton**.
## Goal ## Goal
{{ goal }} {{ goal }}
## Instructions ## Instructions
1. **Explore first**: Check the filesystem to understand the current state. Do NOT assume anything. 1. **Explore**: Check the filesystem and any relevant state. Do NOT assume anything.
2. **Plan your work**: Decide what chunk you will do in this step. 2. **Plan the split**: Explicitly decide how the goal will be divided across the remaining {{ max_steps - 1 }} steps. Write this plan down.
3. **Do the work**: Make concrete progress. Write results to files — do NOT just collect information without producing output. 3. **Do ONE chunk**: Make concrete progress on ONLY the first chunk. Do NOT attempt to finish the entire goal now, even if you have enough tool calls. The meta-loop exists so later steps can review, correct, and refine your work.
4. **Handoff**: When done, call `handoff()` with a detailed summary. If the ENTIRE goal is already achieved, call `complete()` instead. 4. **Handoff**: Call `handoff()` with:
- A detailed summary of what you did
- Files changed
- The explicit plan for the remaining steps
- A clear hint for the next step
You have {{ budget }} tool calls total. Reserve the last 1-2 calls for `handoff()` or `complete()`. You have {{ budget }} tool calls. Reserve the last 1-2 for `handoff()`.
Do NOT call `complete()` in Step 1 unless the goal is literally a single trivial action.