fix(agent): finalize max-iteration turns without tools

2026-06-15 07:14:08 +00:00 · 2026-06-10 11:24:03 +08:00 · 2026-06-10 11:24:03 +08:00 · 5d91d59cf7
commit 5d91d59cf7
parent 99f7f371fa
9 changed files with 199 additions and 18 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -816,6 +816,11 @@ class AgentLoop:
                ),
                goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
                goal_continue_message=_goal_continue,
                finalize_on_max_iterations=turn_continuation.should_finalize_on_max_iterations(
                    pending_queue_available=pending_queue is not None and session is not None,
                    session_metadata=session_metadata,
                    message_metadata=metadata,
                ),
            ))
        finally:
            reset_workspace_scope(workspace_token)
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -44,6 +44,7 @@ from nanobot.utils.progress_events import (
 from nanobot.utils.prompt_templates import render_template
 from nanobot.utils.runtime import (
    EMPTY_FINAL_RESPONSE_MESSAGE,
    build_budget_exhausted_finalization_message,
    build_finalization_retry_message,
    build_goal_continue_message,
    build_length_recovery_message,
@ -109,6 +110,7 @@ class AgentRunSpec:
    llm_timeout_s: float | None = None
    goal_active_predicate: Callable[[], bool] | None = None
    goal_continue_message: str | None = None
    finalize_on_max_iterations: bool = True
@dataclass(slots=True)
@ -631,28 +633,28 @@ class AgentRunner:
            break
        else:
            stop_reason = "max_iterations"
            if spec.max_iterations_message:
                final_content = spec.max_iterations_message.format(
                    max_iterations=spec.max_iterations,
                )
            else:
                final_content = render_template(
                    "agent/max_iterations_message.md",
                    strip=True,
                    max_iterations=spec.max_iterations,
                )
            self._append_final_message(messages, final_content)
            # Drain any remaining injections so they are appended to the
            # conversation history instead of being re-published as
            # independent inbound messages by _dispatch's finally block.
-            # We ignore should_continue here because the for-loop has already
+            # We include them before the no-tools finalization pass so the
-            # exhausted all iterations.
+            # final response can account for every known follow-up.
            drained_after_max_iterations, injection_cycles = await self._try_drain_injections(
                spec, messages, None, injection_cycles,
                phase="after max_iterations",
            )
            if drained_after_max_iterations:
                had_injections = True
            final_content = None
            if spec.finalize_on_max_iterations:
                final_content = await self._try_finalize_after_max_iterations(
                    spec,
                    hook,
                    messages,
                    usage,
                )
            if final_content is None:
                final_content = self._max_iterations_fallback(spec)
            self._append_final_message(messages, final_content)
        return AgentRunResult(
            final_content=final_content,
@ -831,8 +833,7 @@ class AgentRunner:
        messages: list[dict[str, Any]],
    ):
        retry_messages = self._finalization_retry_messages(messages)
-        kwargs = self._build_request_kwargs(spec, retry_messages, tools=None)
+        return await self._request_no_tools(spec, retry_messages)
        return await self.provider.chat_with_retry(**kwargs)
    @staticmethod
    def _finalization_retry_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@ -840,6 +841,75 @@ class AgentRunner:
        retry_messages.append(build_finalization_retry_message())
        return retry_messages
    async def _try_finalize_after_max_iterations(
        self,
        spec: AgentRunSpec,
        hook: AgentHook,
        messages: list[dict[str, Any]],
        usage: dict[str, int],
    ) -> str | None:
        retry_messages = self._budget_exhausted_finalization_messages(messages)
        try:
            response = await self._request_no_tools(spec, retry_messages)
        except Exception:
            logger.exception(
                "Budget-exhausted finalization failed for {}; using fallback",
                spec.session_key or "default",
            )
            return None
        raw_usage = self._usage_or_estimate(spec, retry_messages, response)
        self._accumulate_usage(usage, raw_usage)
        if response.finish_reason == "error" or response.has_tool_calls:
            logger.warning(
                "Budget-exhausted finalization returned finish_reason='{}' "
                "with {} tool call(s) for {}; using fallback",
                response.finish_reason,
                len(response.tool_calls),
                spec.session_key or "default",
            )
            return None
        context = AgentHookContext(
            iteration=spec.max_iterations,
            messages=messages,
            response=response,
            usage=dict(raw_usage),
            session_key=spec.session_key,
        )
        clean = hook.finalize_content(context, response.content)
        if is_blank_text(clean):
            return None
        return clean
    async def _request_no_tools(
        self,
        spec: AgentRunSpec,
        messages: list[dict[str, Any]],
    ) -> LLMResponse:
        kwargs = self._build_request_kwargs(spec, messages, tools=None)
        return await self.provider.chat_with_retry(**kwargs)
    @staticmethod
    def _budget_exhausted_finalization_messages(
        messages: list[dict[str, Any]],
    ) -> list[dict[str, Any]]:
        retry_messages = list(messages)
        retry_messages.append(build_budget_exhausted_finalization_message())
        return retry_messages
    @staticmethod
    def _max_iterations_fallback(spec: AgentRunSpec) -> str:
        if spec.max_iterations_message:
            return spec.max_iterations_message.format(
                max_iterations=spec.max_iterations,
            )
        return render_template(
            "agent/max_iterations_message.md",
            strip=True,
            max_iterations=spec.max_iterations,
        )
    def _usage_or_estimate(
        self,
        spec: AgentRunSpec,
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@ -248,6 +248,7 @@ class SubagentManager:
                    max_tool_result_chars=self.max_tool_result_chars,
                    hook=_SubagentHook(task_id, status),
                    max_iterations_message="Task completed but no final response was generated.",
                    finalize_on_max_iterations=False,
                    error_message=None,
                    fail_on_tool_error=True,
                    checkpoint_callback=_on_checkpoint,
--- a/nanobot/session/turn_continuation.py
+++ b/nanobot/session/turn_continuation.py
@ -70,14 +70,36 @@ def should_stream_budget_response(
    message_metadata: Mapping[str, Any] | None = None,
 ) -> bool:
    """Return whether the budget-boundary response should be sent to the user."""
-    return not _continuation_available(
+    if stop_reason != "max_iterations":
-        stop_reason=stop_reason,
+        return True
    return should_finalize_on_max_iterations(
        pending_queue_available=pending_queue_available,
        session_metadata=session_metadata,
        message_metadata=message_metadata,
    )
 def should_finalize_on_max_iterations(
    *,
    pending_queue_available: bool,
    session_metadata: Mapping[str, Any] | None,
    message_metadata: Mapping[str, Any] | None = None,
 ) -> bool:
    """Return whether a max-iteration boundary should produce a final response.
    When a sustained goal can continue internally, the current runner slice
    should stop without spending an extra no-tools finalization call. The next
    queued continuation slice owns the eventual user-visible response.
    """
    return not (
        pending_queue_available
        and _goal_continuation_available(
            session_metadata,
            message_metadata=message_metadata,
        )
    )
 async def maybe_continue_turn(ctx: Any) -> bool:
    """Queue an internal continuation for *ctx* when policy allows it."""
    if ctx.session is None or ctx.pending_queue is None:
--- a/nanobot/utils/runtime.py
+++ b/nanobot/utils/runtime.py
@ -24,6 +24,14 @@ FINALIZATION_RETRY_PROMPT = (
    "Please provide your response to the user based on the conversation above."
 )
 BUDGET_EXHAUSTED_FINALIZATION_PROMPT = (
    "The tool-call budget for this turn is exhausted. Based only on the "
    "conversation and tool results above, provide a concise final response to "
    "the user. Do not call or request tools. Do not claim the task is complete "
    "unless the evidence above clearly shows it is complete. State what was "
    "done, what remains, and the best next step if anything is incomplete."
 )
 LENGTH_RECOVERY_PROMPT = (
    "Output limit reached. Continue exactly where you left off "
    "— no recap, no apology. Break remaining work into smaller steps if needed."
@ -65,6 +73,11 @@ def build_finalization_retry_message() -> dict[str, str]:
    return {"role": "user", "content": FINALIZATION_RETRY_PROMPT}
 def build_budget_exhausted_finalization_message() -> dict[str, str]:
    """Prompt the model for a no-tools final response after budget exhaustion."""
    return {"role": "user", "content": BUDGET_EXHAUSTED_FINALIZATION_PROMPT}
 def build_length_recovery_message() -> dict[str, str]:
    """Prompt the model to continue after hitting output token limit."""
    return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
--- a/tests/agent/test_loop_runner_integration.py
+++ b/tests/agent/test_loop_runner_integration.py
@ -64,7 +64,8 @@ async def test_loop_goal_turn_uses_standard_iteration_budget(tmp_path):
    )
    assert stop_reason == "max_iterations"
-    assert loop.provider.chat_with_retry.await_count == 2
+    assert loop.provider.chat_with_retry.await_count == 3
    assert loop.provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
    assert final_content == (
        "I reached the maximum number of tool call iterations (2) "
        "without completing the task. You can try breaking the task into smaller steps."
--- a/tests/agent/test_runner_core.py
+++ b/tests/agent/test_runner_core.py
@ -101,6 +101,61 @@ async def test_runner_returns_max_iterations_fallback():
    )
    assert result.messages[-1]["role"] == "assistant"
    assert result.messages[-1]["content"] == result.final_content
    assert provider.chat_with_retry.await_count == 3
    assert provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
    assert tools.execute.await_count == 2
@pytest.mark.asyncio
 async def test_runner_uses_no_tools_finalization_after_max_iterations():
    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    provider = MagicMock(spec=LLMProvider)
    calls: list[dict] = []
    async def chat_with_retry(*, messages, tools=None, **kwargs):
        calls.append({"messages": messages, "tools": tools})
        if len(calls) <= 2:
            return LLMResponse(
                content="still working",
                tool_calls=[
                    ToolCallRequest(
                        id=f"call_{len(calls)}",
                        name="list_dir",
                        arguments={"path": "."},
                    )
                ],
            )
        return LLMResponse(
            content="Read the directory twice. More investigation remains.",
            tool_calls=[],
            usage={"prompt_tokens": 10, "completion_tokens": 7},
        )
    provider.chat_with_retry = chat_with_retry
    tools = MagicMock()
    tools.get_definitions.return_value = []
    tools.execute = AsyncMock(return_value="tool result")
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "inspect the repo"}],
        tools=tools,
        model="test-model",
        max_iterations=2,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
    ))
    assert result.stop_reason == "max_iterations"
    assert result.final_content == "Read the directory twice. More investigation remains."
    assert result.messages[-1] == {
        "role": "assistant",
        "content": "Read the directory twice. More investigation remains.",
    }
    assert len(calls) == 3
    assert calls[-1]["tools"] is None
    assert "tool-call budget" in calls[-1]["messages"][-1]["content"]
    assert tools.execute.await_count == 2
@pytest.mark.asyncio
--- a/tests/agent/test_runner_goal_continue.py
+++ b/tests/agent/test_runner_goal_continue.py
@ -150,6 +150,7 @@ async def test_runner_goal_continue_not_limited_by_injection_cycle_cap():
        max_iterations=max_iterations,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
        goal_active_predicate=lambda: True,
        finalize_on_max_iterations=False,
    ))
    assert result.stop_reason == "max_iterations"
--- a/tests/session/test_turn_continuation.py
+++ b/tests/session/test_turn_continuation.py
@ -17,6 +17,7 @@ from nanobot.session.turn_continuation import (
    internal_continuation_pending,
    internal_continuation_run_started_at,
    maybe_continue_turn,
    should_finalize_on_max_iterations,
    should_stream_budget_response,
 )
@ -125,3 +126,15 @@ def test_internal_continuation_requires_budget_boundary_and_queue():
        pending_queue_available=False,
        session_metadata=meta,
    )
    assert not should_finalize_on_max_iterations(
        pending_queue_available=True,
        session_metadata=meta,
    )
    assert should_finalize_on_max_iterations(
        pending_queue_available=False,
        session_metadata=meta,
    )
    assert should_finalize_on_max_iterations(
        pending_queue_available=True,
        session_metadata={},
    )