fix(agent): finalize max-iteration turns without tools

This commit is contained in:
chengyongru 2026-06-10 11:24:03 +08:00 committed by Xubin Ren
parent 99f7f371fa
commit 5d91d59cf7
9 changed files with 199 additions and 18 deletions

View File

@ -816,6 +816,11 @@ class AgentLoop:
), ),
goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False, goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
goal_continue_message=_goal_continue, goal_continue_message=_goal_continue,
finalize_on_max_iterations=turn_continuation.should_finalize_on_max_iterations(
pending_queue_available=pending_queue is not None and session is not None,
session_metadata=session_metadata,
message_metadata=metadata,
),
)) ))
finally: finally:
reset_workspace_scope(workspace_token) reset_workspace_scope(workspace_token)

View File

@ -44,6 +44,7 @@ from nanobot.utils.progress_events import (
from nanobot.utils.prompt_templates import render_template from nanobot.utils.prompt_templates import render_template
from nanobot.utils.runtime import ( from nanobot.utils.runtime import (
EMPTY_FINAL_RESPONSE_MESSAGE, EMPTY_FINAL_RESPONSE_MESSAGE,
build_budget_exhausted_finalization_message,
build_finalization_retry_message, build_finalization_retry_message,
build_goal_continue_message, build_goal_continue_message,
build_length_recovery_message, build_length_recovery_message,
@ -109,6 +110,7 @@ class AgentRunSpec:
llm_timeout_s: float | None = None llm_timeout_s: float | None = None
goal_active_predicate: Callable[[], bool] | None = None goal_active_predicate: Callable[[], bool] | None = None
goal_continue_message: str | None = None goal_continue_message: str | None = None
finalize_on_max_iterations: bool = True
@dataclass(slots=True) @dataclass(slots=True)
@ -631,28 +633,28 @@ class AgentRunner:
break break
else: else:
stop_reason = "max_iterations" stop_reason = "max_iterations"
if spec.max_iterations_message:
final_content = spec.max_iterations_message.format(
max_iterations=spec.max_iterations,
)
else:
final_content = render_template(
"agent/max_iterations_message.md",
strip=True,
max_iterations=spec.max_iterations,
)
self._append_final_message(messages, final_content)
# Drain any remaining injections so they are appended to the # Drain any remaining injections so they are appended to the
# conversation history instead of being re-published as # conversation history instead of being re-published as
# independent inbound messages by _dispatch's finally block. # independent inbound messages by _dispatch's finally block.
# We ignore should_continue here because the for-loop has already # We include them before the no-tools finalization pass so the
# exhausted all iterations. # final response can account for every known follow-up.
drained_after_max_iterations, injection_cycles = await self._try_drain_injections( drained_after_max_iterations, injection_cycles = await self._try_drain_injections(
spec, messages, None, injection_cycles, spec, messages, None, injection_cycles,
phase="after max_iterations", phase="after max_iterations",
) )
if drained_after_max_iterations: if drained_after_max_iterations:
had_injections = True had_injections = True
final_content = None
if spec.finalize_on_max_iterations:
final_content = await self._try_finalize_after_max_iterations(
spec,
hook,
messages,
usage,
)
if final_content is None:
final_content = self._max_iterations_fallback(spec)
self._append_final_message(messages, final_content)
return AgentRunResult( return AgentRunResult(
final_content=final_content, final_content=final_content,
@ -831,8 +833,7 @@ class AgentRunner:
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
): ):
retry_messages = self._finalization_retry_messages(messages) retry_messages = self._finalization_retry_messages(messages)
kwargs = self._build_request_kwargs(spec, retry_messages, tools=None) return await self._request_no_tools(spec, retry_messages)
return await self.provider.chat_with_retry(**kwargs)
@staticmethod @staticmethod
def _finalization_retry_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: def _finalization_retry_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@ -840,6 +841,75 @@ class AgentRunner:
retry_messages.append(build_finalization_retry_message()) retry_messages.append(build_finalization_retry_message())
return retry_messages return retry_messages
async def _try_finalize_after_max_iterations(
self,
spec: AgentRunSpec,
hook: AgentHook,
messages: list[dict[str, Any]],
usage: dict[str, int],
) -> str | None:
retry_messages = self._budget_exhausted_finalization_messages(messages)
try:
response = await self._request_no_tools(spec, retry_messages)
except Exception:
logger.exception(
"Budget-exhausted finalization failed for {}; using fallback",
spec.session_key or "default",
)
return None
raw_usage = self._usage_or_estimate(spec, retry_messages, response)
self._accumulate_usage(usage, raw_usage)
if response.finish_reason == "error" or response.has_tool_calls:
logger.warning(
"Budget-exhausted finalization returned finish_reason='{}' "
"with {} tool call(s) for {}; using fallback",
response.finish_reason,
len(response.tool_calls),
spec.session_key or "default",
)
return None
context = AgentHookContext(
iteration=spec.max_iterations,
messages=messages,
response=response,
usage=dict(raw_usage),
session_key=spec.session_key,
)
clean = hook.finalize_content(context, response.content)
if is_blank_text(clean):
return None
return clean
async def _request_no_tools(
self,
spec: AgentRunSpec,
messages: list[dict[str, Any]],
) -> LLMResponse:
kwargs = self._build_request_kwargs(spec, messages, tools=None)
return await self.provider.chat_with_retry(**kwargs)
@staticmethod
def _budget_exhausted_finalization_messages(
messages: list[dict[str, Any]],
) -> list[dict[str, Any]]:
retry_messages = list(messages)
retry_messages.append(build_budget_exhausted_finalization_message())
return retry_messages
@staticmethod
def _max_iterations_fallback(spec: AgentRunSpec) -> str:
if spec.max_iterations_message:
return spec.max_iterations_message.format(
max_iterations=spec.max_iterations,
)
return render_template(
"agent/max_iterations_message.md",
strip=True,
max_iterations=spec.max_iterations,
)
def _usage_or_estimate( def _usage_or_estimate(
self, self,
spec: AgentRunSpec, spec: AgentRunSpec,

View File

@ -248,6 +248,7 @@ class SubagentManager:
max_tool_result_chars=self.max_tool_result_chars, max_tool_result_chars=self.max_tool_result_chars,
hook=_SubagentHook(task_id, status), hook=_SubagentHook(task_id, status),
max_iterations_message="Task completed but no final response was generated.", max_iterations_message="Task completed but no final response was generated.",
finalize_on_max_iterations=False,
error_message=None, error_message=None,
fail_on_tool_error=True, fail_on_tool_error=True,
checkpoint_callback=_on_checkpoint, checkpoint_callback=_on_checkpoint,

View File

@ -70,14 +70,36 @@ def should_stream_budget_response(
message_metadata: Mapping[str, Any] | None = None, message_metadata: Mapping[str, Any] | None = None,
) -> bool: ) -> bool:
"""Return whether the budget-boundary response should be sent to the user.""" """Return whether the budget-boundary response should be sent to the user."""
return not _continuation_available( if stop_reason != "max_iterations":
stop_reason=stop_reason, return True
return should_finalize_on_max_iterations(
pending_queue_available=pending_queue_available, pending_queue_available=pending_queue_available,
session_metadata=session_metadata, session_metadata=session_metadata,
message_metadata=message_metadata, message_metadata=message_metadata,
) )
def should_finalize_on_max_iterations(
*,
pending_queue_available: bool,
session_metadata: Mapping[str, Any] | None,
message_metadata: Mapping[str, Any] | None = None,
) -> bool:
"""Return whether a max-iteration boundary should produce a final response.
When a sustained goal can continue internally, the current runner slice
should stop without spending an extra no-tools finalization call. The next
queued continuation slice owns the eventual user-visible response.
"""
return not (
pending_queue_available
and _goal_continuation_available(
session_metadata,
message_metadata=message_metadata,
)
)
async def maybe_continue_turn(ctx: Any) -> bool: async def maybe_continue_turn(ctx: Any) -> bool:
"""Queue an internal continuation for *ctx* when policy allows it.""" """Queue an internal continuation for *ctx* when policy allows it."""
if ctx.session is None or ctx.pending_queue is None: if ctx.session is None or ctx.pending_queue is None:

View File

@ -24,6 +24,14 @@ FINALIZATION_RETRY_PROMPT = (
"Please provide your response to the user based on the conversation above." "Please provide your response to the user based on the conversation above."
) )
BUDGET_EXHAUSTED_FINALIZATION_PROMPT = (
"The tool-call budget for this turn is exhausted. Based only on the "
"conversation and tool results above, provide a concise final response to "
"the user. Do not call or request tools. Do not claim the task is complete "
"unless the evidence above clearly shows it is complete. State what was "
"done, what remains, and the best next step if anything is incomplete."
)
LENGTH_RECOVERY_PROMPT = ( LENGTH_RECOVERY_PROMPT = (
"Output limit reached. Continue exactly where you left off " "Output limit reached. Continue exactly where you left off "
"— no recap, no apology. Break remaining work into smaller steps if needed." "— no recap, no apology. Break remaining work into smaller steps if needed."
@ -65,6 +73,11 @@ def build_finalization_retry_message() -> dict[str, str]:
return {"role": "user", "content": FINALIZATION_RETRY_PROMPT} return {"role": "user", "content": FINALIZATION_RETRY_PROMPT}
def build_budget_exhausted_finalization_message() -> dict[str, str]:
"""Prompt the model for a no-tools final response after budget exhaustion."""
return {"role": "user", "content": BUDGET_EXHAUSTED_FINALIZATION_PROMPT}
def build_length_recovery_message() -> dict[str, str]: def build_length_recovery_message() -> dict[str, str]:
"""Prompt the model to continue after hitting output token limit.""" """Prompt the model to continue after hitting output token limit."""
return {"role": "user", "content": LENGTH_RECOVERY_PROMPT} return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}

View File

@ -64,7 +64,8 @@ async def test_loop_goal_turn_uses_standard_iteration_budget(tmp_path):
) )
assert stop_reason == "max_iterations" assert stop_reason == "max_iterations"
assert loop.provider.chat_with_retry.await_count == 2 assert loop.provider.chat_with_retry.await_count == 3
assert loop.provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
assert final_content == ( assert final_content == (
"I reached the maximum number of tool call iterations (2) " "I reached the maximum number of tool call iterations (2) "
"without completing the task. You can try breaking the task into smaller steps." "without completing the task. You can try breaking the task into smaller steps."

View File

@ -101,6 +101,61 @@ async def test_runner_returns_max_iterations_fallback():
) )
assert result.messages[-1]["role"] == "assistant" assert result.messages[-1]["role"] == "assistant"
assert result.messages[-1]["content"] == result.final_content assert result.messages[-1]["content"] == result.final_content
assert provider.chat_with_retry.await_count == 3
assert provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
assert tools.execute.await_count == 2
@pytest.mark.asyncio
async def test_runner_uses_no_tools_finalization_after_max_iterations():
from nanobot.agent.runner import AgentRunner, AgentRunSpec
provider = MagicMock(spec=LLMProvider)
calls: list[dict] = []
async def chat_with_retry(*, messages, tools=None, **kwargs):
calls.append({"messages": messages, "tools": tools})
if len(calls) <= 2:
return LLMResponse(
content="still working",
tool_calls=[
ToolCallRequest(
id=f"call_{len(calls)}",
name="list_dir",
arguments={"path": "."},
)
],
)
return LLMResponse(
content="Read the directory twice. More investigation remains.",
tool_calls=[],
usage={"prompt_tokens": 10, "completion_tokens": 7},
)
provider.chat_with_retry = chat_with_retry
tools = MagicMock()
tools.get_definitions.return_value = []
tools.execute = AsyncMock(return_value="tool result")
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "inspect the repo"}],
tools=tools,
model="test-model",
max_iterations=2,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.stop_reason == "max_iterations"
assert result.final_content == "Read the directory twice. More investigation remains."
assert result.messages[-1] == {
"role": "assistant",
"content": "Read the directory twice. More investigation remains.",
}
assert len(calls) == 3
assert calls[-1]["tools"] is None
assert "tool-call budget" in calls[-1]["messages"][-1]["content"]
assert tools.execute.await_count == 2
@pytest.mark.asyncio @pytest.mark.asyncio

View File

@ -150,6 +150,7 @@ async def test_runner_goal_continue_not_limited_by_injection_cycle_cap():
max_iterations=max_iterations, max_iterations=max_iterations,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
goal_active_predicate=lambda: True, goal_active_predicate=lambda: True,
finalize_on_max_iterations=False,
)) ))
assert result.stop_reason == "max_iterations" assert result.stop_reason == "max_iterations"

View File

@ -17,6 +17,7 @@ from nanobot.session.turn_continuation import (
internal_continuation_pending, internal_continuation_pending,
internal_continuation_run_started_at, internal_continuation_run_started_at,
maybe_continue_turn, maybe_continue_turn,
should_finalize_on_max_iterations,
should_stream_budget_response, should_stream_budget_response,
) )
@ -125,3 +126,15 @@ def test_internal_continuation_requires_budget_boundary_and_queue():
pending_queue_available=False, pending_queue_available=False,
session_metadata=meta, session_metadata=meta,
) )
assert not should_finalize_on_max_iterations(
pending_queue_available=True,
session_metadata=meta,
)
assert should_finalize_on_max_iterations(
pending_queue_available=False,
session_metadata=meta,
)
assert should_finalize_on_max_iterations(
pending_queue_available=True,
session_metadata={},
)