mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-15 07:14:08 +00:00
fix(agent): finalize max-iteration turns without tools
This commit is contained in:
parent
99f7f371fa
commit
5d91d59cf7
@ -816,6 +816,11 @@ class AgentLoop:
|
||||
),
|
||||
goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
|
||||
goal_continue_message=_goal_continue,
|
||||
finalize_on_max_iterations=turn_continuation.should_finalize_on_max_iterations(
|
||||
pending_queue_available=pending_queue is not None and session is not None,
|
||||
session_metadata=session_metadata,
|
||||
message_metadata=metadata,
|
||||
),
|
||||
))
|
||||
finally:
|
||||
reset_workspace_scope(workspace_token)
|
||||
|
||||
@ -44,6 +44,7 @@ from nanobot.utils.progress_events import (
|
||||
from nanobot.utils.prompt_templates import render_template
|
||||
from nanobot.utils.runtime import (
|
||||
EMPTY_FINAL_RESPONSE_MESSAGE,
|
||||
build_budget_exhausted_finalization_message,
|
||||
build_finalization_retry_message,
|
||||
build_goal_continue_message,
|
||||
build_length_recovery_message,
|
||||
@ -109,6 +110,7 @@ class AgentRunSpec:
|
||||
llm_timeout_s: float | None = None
|
||||
goal_active_predicate: Callable[[], bool] | None = None
|
||||
goal_continue_message: str | None = None
|
||||
finalize_on_max_iterations: bool = True
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -631,28 +633,28 @@ class AgentRunner:
|
||||
break
|
||||
else:
|
||||
stop_reason = "max_iterations"
|
||||
if spec.max_iterations_message:
|
||||
final_content = spec.max_iterations_message.format(
|
||||
max_iterations=spec.max_iterations,
|
||||
)
|
||||
else:
|
||||
final_content = render_template(
|
||||
"agent/max_iterations_message.md",
|
||||
strip=True,
|
||||
max_iterations=spec.max_iterations,
|
||||
)
|
||||
self._append_final_message(messages, final_content)
|
||||
# Drain any remaining injections so they are appended to the
|
||||
# conversation history instead of being re-published as
|
||||
# independent inbound messages by _dispatch's finally block.
|
||||
# We ignore should_continue here because the for-loop has already
|
||||
# exhausted all iterations.
|
||||
# We include them before the no-tools finalization pass so the
|
||||
# final response can account for every known follow-up.
|
||||
drained_after_max_iterations, injection_cycles = await self._try_drain_injections(
|
||||
spec, messages, None, injection_cycles,
|
||||
phase="after max_iterations",
|
||||
)
|
||||
if drained_after_max_iterations:
|
||||
had_injections = True
|
||||
final_content = None
|
||||
if spec.finalize_on_max_iterations:
|
||||
final_content = await self._try_finalize_after_max_iterations(
|
||||
spec,
|
||||
hook,
|
||||
messages,
|
||||
usage,
|
||||
)
|
||||
if final_content is None:
|
||||
final_content = self._max_iterations_fallback(spec)
|
||||
self._append_final_message(messages, final_content)
|
||||
|
||||
return AgentRunResult(
|
||||
final_content=final_content,
|
||||
@ -831,8 +833,7 @@ class AgentRunner:
|
||||
messages: list[dict[str, Any]],
|
||||
):
|
||||
retry_messages = self._finalization_retry_messages(messages)
|
||||
kwargs = self._build_request_kwargs(spec, retry_messages, tools=None)
|
||||
return await self.provider.chat_with_retry(**kwargs)
|
||||
return await self._request_no_tools(spec, retry_messages)
|
||||
|
||||
@staticmethod
|
||||
def _finalization_retry_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
@ -840,6 +841,75 @@ class AgentRunner:
|
||||
retry_messages.append(build_finalization_retry_message())
|
||||
return retry_messages
|
||||
|
||||
async def _try_finalize_after_max_iterations(
|
||||
self,
|
||||
spec: AgentRunSpec,
|
||||
hook: AgentHook,
|
||||
messages: list[dict[str, Any]],
|
||||
usage: dict[str, int],
|
||||
) -> str | None:
|
||||
retry_messages = self._budget_exhausted_finalization_messages(messages)
|
||||
try:
|
||||
response = await self._request_no_tools(spec, retry_messages)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Budget-exhausted finalization failed for {}; using fallback",
|
||||
spec.session_key or "default",
|
||||
)
|
||||
return None
|
||||
|
||||
raw_usage = self._usage_or_estimate(spec, retry_messages, response)
|
||||
self._accumulate_usage(usage, raw_usage)
|
||||
if response.finish_reason == "error" or response.has_tool_calls:
|
||||
logger.warning(
|
||||
"Budget-exhausted finalization returned finish_reason='{}' "
|
||||
"with {} tool call(s) for {}; using fallback",
|
||||
response.finish_reason,
|
||||
len(response.tool_calls),
|
||||
spec.session_key or "default",
|
||||
)
|
||||
return None
|
||||
|
||||
context = AgentHookContext(
|
||||
iteration=spec.max_iterations,
|
||||
messages=messages,
|
||||
response=response,
|
||||
usage=dict(raw_usage),
|
||||
session_key=spec.session_key,
|
||||
)
|
||||
clean = hook.finalize_content(context, response.content)
|
||||
if is_blank_text(clean):
|
||||
return None
|
||||
return clean
|
||||
|
||||
async def _request_no_tools(
|
||||
self,
|
||||
spec: AgentRunSpec,
|
||||
messages: list[dict[str, Any]],
|
||||
) -> LLMResponse:
|
||||
kwargs = self._build_request_kwargs(spec, messages, tools=None)
|
||||
return await self.provider.chat_with_retry(**kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _budget_exhausted_finalization_messages(
|
||||
messages: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
retry_messages = list(messages)
|
||||
retry_messages.append(build_budget_exhausted_finalization_message())
|
||||
return retry_messages
|
||||
|
||||
@staticmethod
|
||||
def _max_iterations_fallback(spec: AgentRunSpec) -> str:
|
||||
if spec.max_iterations_message:
|
||||
return spec.max_iterations_message.format(
|
||||
max_iterations=spec.max_iterations,
|
||||
)
|
||||
return render_template(
|
||||
"agent/max_iterations_message.md",
|
||||
strip=True,
|
||||
max_iterations=spec.max_iterations,
|
||||
)
|
||||
|
||||
def _usage_or_estimate(
|
||||
self,
|
||||
spec: AgentRunSpec,
|
||||
|
||||
@ -248,6 +248,7 @@ class SubagentManager:
|
||||
max_tool_result_chars=self.max_tool_result_chars,
|
||||
hook=_SubagentHook(task_id, status),
|
||||
max_iterations_message="Task completed but no final response was generated.",
|
||||
finalize_on_max_iterations=False,
|
||||
error_message=None,
|
||||
fail_on_tool_error=True,
|
||||
checkpoint_callback=_on_checkpoint,
|
||||
|
||||
@ -70,14 +70,36 @@ def should_stream_budget_response(
|
||||
message_metadata: Mapping[str, Any] | None = None,
|
||||
) -> bool:
|
||||
"""Return whether the budget-boundary response should be sent to the user."""
|
||||
return not _continuation_available(
|
||||
stop_reason=stop_reason,
|
||||
if stop_reason != "max_iterations":
|
||||
return True
|
||||
return should_finalize_on_max_iterations(
|
||||
pending_queue_available=pending_queue_available,
|
||||
session_metadata=session_metadata,
|
||||
message_metadata=message_metadata,
|
||||
)
|
||||
|
||||
|
||||
def should_finalize_on_max_iterations(
|
||||
*,
|
||||
pending_queue_available: bool,
|
||||
session_metadata: Mapping[str, Any] | None,
|
||||
message_metadata: Mapping[str, Any] | None = None,
|
||||
) -> bool:
|
||||
"""Return whether a max-iteration boundary should produce a final response.
|
||||
|
||||
When a sustained goal can continue internally, the current runner slice
|
||||
should stop without spending an extra no-tools finalization call. The next
|
||||
queued continuation slice owns the eventual user-visible response.
|
||||
"""
|
||||
return not (
|
||||
pending_queue_available
|
||||
and _goal_continuation_available(
|
||||
session_metadata,
|
||||
message_metadata=message_metadata,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def maybe_continue_turn(ctx: Any) -> bool:
|
||||
"""Queue an internal continuation for *ctx* when policy allows it."""
|
||||
if ctx.session is None or ctx.pending_queue is None:
|
||||
|
||||
@ -24,6 +24,14 @@ FINALIZATION_RETRY_PROMPT = (
|
||||
"Please provide your response to the user based on the conversation above."
|
||||
)
|
||||
|
||||
BUDGET_EXHAUSTED_FINALIZATION_PROMPT = (
|
||||
"The tool-call budget for this turn is exhausted. Based only on the "
|
||||
"conversation and tool results above, provide a concise final response to "
|
||||
"the user. Do not call or request tools. Do not claim the task is complete "
|
||||
"unless the evidence above clearly shows it is complete. State what was "
|
||||
"done, what remains, and the best next step if anything is incomplete."
|
||||
)
|
||||
|
||||
LENGTH_RECOVERY_PROMPT = (
|
||||
"Output limit reached. Continue exactly where you left off "
|
||||
"— no recap, no apology. Break remaining work into smaller steps if needed."
|
||||
@ -65,6 +73,11 @@ def build_finalization_retry_message() -> dict[str, str]:
|
||||
return {"role": "user", "content": FINALIZATION_RETRY_PROMPT}
|
||||
|
||||
|
||||
def build_budget_exhausted_finalization_message() -> dict[str, str]:
|
||||
"""Prompt the model for a no-tools final response after budget exhaustion."""
|
||||
return {"role": "user", "content": BUDGET_EXHAUSTED_FINALIZATION_PROMPT}
|
||||
|
||||
|
||||
def build_length_recovery_message() -> dict[str, str]:
|
||||
"""Prompt the model to continue after hitting output token limit."""
|
||||
return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
|
||||
|
||||
@ -64,7 +64,8 @@ async def test_loop_goal_turn_uses_standard_iteration_budget(tmp_path):
|
||||
)
|
||||
|
||||
assert stop_reason == "max_iterations"
|
||||
assert loop.provider.chat_with_retry.await_count == 2
|
||||
assert loop.provider.chat_with_retry.await_count == 3
|
||||
assert loop.provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
|
||||
assert final_content == (
|
||||
"I reached the maximum number of tool call iterations (2) "
|
||||
"without completing the task. You can try breaking the task into smaller steps."
|
||||
|
||||
@ -101,6 +101,61 @@ async def test_runner_returns_max_iterations_fallback():
|
||||
)
|
||||
assert result.messages[-1]["role"] == "assistant"
|
||||
assert result.messages[-1]["content"] == result.final_content
|
||||
assert provider.chat_with_retry.await_count == 3
|
||||
assert provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
|
||||
assert tools.execute.await_count == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_uses_no_tools_finalization_after_max_iterations():
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
calls: list[dict] = []
|
||||
|
||||
async def chat_with_retry(*, messages, tools=None, **kwargs):
|
||||
calls.append({"messages": messages, "tools": tools})
|
||||
if len(calls) <= 2:
|
||||
return LLMResponse(
|
||||
content="still working",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id=f"call_{len(calls)}",
|
||||
name="list_dir",
|
||||
arguments={"path": "."},
|
||||
)
|
||||
],
|
||||
)
|
||||
return LLMResponse(
|
||||
content="Read the directory twice. More investigation remains.",
|
||||
tool_calls=[],
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 7},
|
||||
)
|
||||
|
||||
provider.chat_with_retry = chat_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
tools.execute = AsyncMock(return_value="tool result")
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "inspect the repo"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=2,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
))
|
||||
|
||||
assert result.stop_reason == "max_iterations"
|
||||
assert result.final_content == "Read the directory twice. More investigation remains."
|
||||
assert result.messages[-1] == {
|
||||
"role": "assistant",
|
||||
"content": "Read the directory twice. More investigation remains.",
|
||||
}
|
||||
assert len(calls) == 3
|
||||
assert calls[-1]["tools"] is None
|
||||
assert "tool-call budget" in calls[-1]["messages"][-1]["content"]
|
||||
assert tools.execute.await_count == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -150,6 +150,7 @@ async def test_runner_goal_continue_not_limited_by_injection_cycle_cap():
|
||||
max_iterations=max_iterations,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
goal_active_predicate=lambda: True,
|
||||
finalize_on_max_iterations=False,
|
||||
))
|
||||
|
||||
assert result.stop_reason == "max_iterations"
|
||||
|
||||
@ -17,6 +17,7 @@ from nanobot.session.turn_continuation import (
|
||||
internal_continuation_pending,
|
||||
internal_continuation_run_started_at,
|
||||
maybe_continue_turn,
|
||||
should_finalize_on_max_iterations,
|
||||
should_stream_budget_response,
|
||||
)
|
||||
|
||||
@ -125,3 +126,15 @@ def test_internal_continuation_requires_budget_boundary_and_queue():
|
||||
pending_queue_available=False,
|
||||
session_metadata=meta,
|
||||
)
|
||||
assert not should_finalize_on_max_iterations(
|
||||
pending_queue_available=True,
|
||||
session_metadata=meta,
|
||||
)
|
||||
assert should_finalize_on_max_iterations(
|
||||
pending_queue_available=False,
|
||||
session_metadata=meta,
|
||||
)
|
||||
assert should_finalize_on_max_iterations(
|
||||
pending_queue_available=True,
|
||||
session_metadata={},
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user