fix(agent): finalize max-iteration turns without tools

2026-06-15 07:14:08 +00:00 · 2026-06-10 11:24:03 +08:00 · 2026-06-10 11:24:03 +08:00 · 5d91d59cf7
commit 5d91d59cf7
parent 99f7f371fa
9 changed files with 199 additions and 18 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -816,6 +816,11 @@ class AgentLoop:
                ),
                goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
                goal_continue_message=_goal_continue,
+                finalize_on_max_iterations=turn_continuation.should_finalize_on_max_iterations(
+                    pending_queue_available=pending_queue is not None and session is not None,
+                    session_metadata=session_metadata,
+                    message_metadata=metadata,
+                ),
            ))
        finally:
            reset_workspace_scope(workspace_token)
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -44,6 +44,7 @@ from nanobot.utils.progress_events import (
 from nanobot.utils.prompt_templates import render_template
 from nanobot.utils.runtime import (
    EMPTY_FINAL_RESPONSE_MESSAGE,
+    build_budget_exhausted_finalization_message,
    build_finalization_retry_message,
    build_goal_continue_message,
    build_length_recovery_message,
@ -109,6 +110,7 @@ class AgentRunSpec:
    llm_timeout_s: float | None = None
    goal_active_predicate: Callable[[], bool] | None = None
    goal_continue_message: str | None = None
+    finalize_on_max_iterations: bool = True


@dataclass(slots=True)
@ -631,28 +633,28 @@ class AgentRunner:
            break
        else:
            stop_reason = "max_iterations"
-            if spec.max_iterations_message:
-                final_content = spec.max_iterations_message.format(
-                    max_iterations=spec.max_iterations,
-                )
-            else:
-                final_content = render_template(
-                    "agent/max_iterations_message.md",
-                    strip=True,
-                    max_iterations=spec.max_iterations,
-                )
-            self._append_final_message(messages, final_content)
            # Drain any remaining injections so they are appended to the
            # conversation history instead of being re-published as
            # independent inbound messages by _dispatch's finally block.
-            # We ignore should_continue here because the for-loop has already
-            # exhausted all iterations.
+            # We include them before the no-tools finalization pass so the
+            # final response can account for every known follow-up.
            drained_after_max_iterations, injection_cycles = await self._try_drain_injections(
                spec, messages, None, injection_cycles,
                phase="after max_iterations",
            )
            if drained_after_max_iterations:
                had_injections = True
+            final_content = None
+            if spec.finalize_on_max_iterations:
+                final_content = await self._try_finalize_after_max_iterations(
+                    spec,
+                    hook,
+                    messages,
+                    usage,
+                )
+            if final_content is None:
+                final_content = self._max_iterations_fallback(spec)
+            self._append_final_message(messages, final_content)

        return AgentRunResult(
            final_content=final_content,
@ -831,8 +833,7 @@ class AgentRunner:
        messages: list[dict[str, Any]],
    ):
        retry_messages = self._finalization_retry_messages(messages)
-        kwargs = self._build_request_kwargs(spec, retry_messages, tools=None)
-        return await self.provider.chat_with_retry(**kwargs)
+        return await self._request_no_tools(spec, retry_messages)

    @staticmethod
    def _finalization_retry_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@ -840,6 +841,75 @@ class AgentRunner:
        retry_messages.append(build_finalization_retry_message())
        return retry_messages

+    async def _try_finalize_after_max_iterations(
+        self,
+        spec: AgentRunSpec,
+        hook: AgentHook,
+        messages: list[dict[str, Any]],
+        usage: dict[str, int],
+    ) -> str | None:
+        retry_messages = self._budget_exhausted_finalization_messages(messages)
+        try:
+            response = await self._request_no_tools(spec, retry_messages)
+        except Exception:
+            logger.exception(
+                "Budget-exhausted finalization failed for {}; using fallback",
+                spec.session_key or "default",
+            )
+            return None
+
+        raw_usage = self._usage_or_estimate(spec, retry_messages, response)
+        self._accumulate_usage(usage, raw_usage)
+        if response.finish_reason == "error" or response.has_tool_calls:
+            logger.warning(
+                "Budget-exhausted finalization returned finish_reason='{}' "
+                "with {} tool call(s) for {}; using fallback",
+                response.finish_reason,
+                len(response.tool_calls),
+                spec.session_key or "default",
+            )
+            return None
+
+        context = AgentHookContext(
+            iteration=spec.max_iterations,
+            messages=messages,
+            response=response,
+            usage=dict(raw_usage),
+            session_key=spec.session_key,
+        )
+        clean = hook.finalize_content(context, response.content)
+        if is_blank_text(clean):
+            return None
+        return clean
+
+    async def _request_no_tools(
+        self,
+        spec: AgentRunSpec,
+        messages: list[dict[str, Any]],
+    ) -> LLMResponse:
+        kwargs = self._build_request_kwargs(spec, messages, tools=None)
+        return await self.provider.chat_with_retry(**kwargs)
+
+    @staticmethod
+    def _budget_exhausted_finalization_messages(
+        messages: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        retry_messages = list(messages)
+        retry_messages.append(build_budget_exhausted_finalization_message())
+        return retry_messages
+
+    @staticmethod
+    def _max_iterations_fallback(spec: AgentRunSpec) -> str:
+        if spec.max_iterations_message:
+            return spec.max_iterations_message.format(
+                max_iterations=spec.max_iterations,
+            )
+        return render_template(
+            "agent/max_iterations_message.md",
+            strip=True,
+            max_iterations=spec.max_iterations,
+        )
+
    def _usage_or_estimate(
        self,
        spec: AgentRunSpec,
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@ -248,6 +248,7 @@ class SubagentManager:
                    max_tool_result_chars=self.max_tool_result_chars,
                    hook=_SubagentHook(task_id, status),
                    max_iterations_message="Task completed but no final response was generated.",
+                    finalize_on_max_iterations=False,
                    error_message=None,
                    fail_on_tool_error=True,
                    checkpoint_callback=_on_checkpoint,
--- a/nanobot/session/turn_continuation.py
+++ b/nanobot/session/turn_continuation.py
@ -70,14 +70,36 @@ def should_stream_budget_response(
    message_metadata: Mapping[str, Any] | None = None,
 ) -> bool:
    """Return whether the budget-boundary response should be sent to the user."""
-    return not _continuation_available(
-        stop_reason=stop_reason,
+    if stop_reason != "max_iterations":
+        return True
+    return should_finalize_on_max_iterations(
        pending_queue_available=pending_queue_available,
        session_metadata=session_metadata,
        message_metadata=message_metadata,
    )


+def should_finalize_on_max_iterations(
+    *,
+    pending_queue_available: bool,
+    session_metadata: Mapping[str, Any] | None,
+    message_metadata: Mapping[str, Any] | None = None,
+) -> bool:
+    """Return whether a max-iteration boundary should produce a final response.
+
+    When a sustained goal can continue internally, the current runner slice
+    should stop without spending an extra no-tools finalization call. The next
+    queued continuation slice owns the eventual user-visible response.
+    """
+    return not (
+        pending_queue_available
+        and _goal_continuation_available(
+            session_metadata,
+            message_metadata=message_metadata,
+        )
+    )
+
+
 async def maybe_continue_turn(ctx: Any) -> bool:
    """Queue an internal continuation for *ctx* when policy allows it."""
    if ctx.session is None or ctx.pending_queue is None:
--- a/nanobot/utils/runtime.py
+++ b/nanobot/utils/runtime.py
@ -24,6 +24,14 @@ FINALIZATION_RETRY_PROMPT = (
    "Please provide your response to the user based on the conversation above."
 )

+BUDGET_EXHAUSTED_FINALIZATION_PROMPT = (
+    "The tool-call budget for this turn is exhausted. Based only on the "
+    "conversation and tool results above, provide a concise final response to "
+    "the user. Do not call or request tools. Do not claim the task is complete "
+    "unless the evidence above clearly shows it is complete. State what was "
+    "done, what remains, and the best next step if anything is incomplete."
+)
+
 LENGTH_RECOVERY_PROMPT = (
    "Output limit reached. Continue exactly where you left off "
    "— no recap, no apology. Break remaining work into smaller steps if needed."
@ -65,6 +73,11 @@ def build_finalization_retry_message() -> dict[str, str]:
    return {"role": "user", "content": FINALIZATION_RETRY_PROMPT}


+def build_budget_exhausted_finalization_message() -> dict[str, str]:
+    """Prompt the model for a no-tools final response after budget exhaustion."""
+    return {"role": "user", "content": BUDGET_EXHAUSTED_FINALIZATION_PROMPT}
+
+
 def build_length_recovery_message() -> dict[str, str]:
    """Prompt the model to continue after hitting output token limit."""
    return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
--- a/tests/agent/test_loop_runner_integration.py
+++ b/tests/agent/test_loop_runner_integration.py
@ -64,7 +64,8 @@ async def test_loop_goal_turn_uses_standard_iteration_budget(tmp_path):
    )

    assert stop_reason == "max_iterations"
-    assert loop.provider.chat_with_retry.await_count == 2
+    assert loop.provider.chat_with_retry.await_count == 3
+    assert loop.provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
    assert final_content == (
        "I reached the maximum number of tool call iterations (2) "
        "without completing the task. You can try breaking the task into smaller steps."
--- a/tests/agent/test_runner_core.py
+++ b/tests/agent/test_runner_core.py
@ -101,6 +101,61 @@ async def test_runner_returns_max_iterations_fallback():
    )
    assert result.messages[-1]["role"] == "assistant"
    assert result.messages[-1]["content"] == result.final_content
+    assert provider.chat_with_retry.await_count == 3
+    assert provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
+    assert tools.execute.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_runner_uses_no_tools_finalization_after_max_iterations():
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    calls: list[dict] = []
+
+    async def chat_with_retry(*, messages, tools=None, **kwargs):
+        calls.append({"messages": messages, "tools": tools})
+        if len(calls) <= 2:
+            return LLMResponse(
+                content="still working",
+                tool_calls=[
+                    ToolCallRequest(
+                        id=f"call_{len(calls)}",
+                        name="list_dir",
+                        arguments={"path": "."},
+                    )
+                ],
+            )
+        return LLMResponse(
+            content="Read the directory twice. More investigation remains.",
+            tool_calls=[],
+            usage={"prompt_tokens": 10, "completion_tokens": 7},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="tool result")
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "inspect the repo"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.stop_reason == "max_iterations"
+    assert result.final_content == "Read the directory twice. More investigation remains."
+    assert result.messages[-1] == {
+        "role": "assistant",
+        "content": "Read the directory twice. More investigation remains.",
+    }
+    assert len(calls) == 3
+    assert calls[-1]["tools"] is None
+    assert "tool-call budget" in calls[-1]["messages"][-1]["content"]
+    assert tools.execute.await_count == 2


@pytest.mark.asyncio
--- a/tests/agent/test_runner_goal_continue.py
+++ b/tests/agent/test_runner_goal_continue.py
@ -150,6 +150,7 @@ async def test_runner_goal_continue_not_limited_by_injection_cycle_cap():
        max_iterations=max_iterations,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
        goal_active_predicate=lambda: True,
+        finalize_on_max_iterations=False,
    ))

    assert result.stop_reason == "max_iterations"
--- a/tests/session/test_turn_continuation.py
+++ b/tests/session/test_turn_continuation.py
@ -17,6 +17,7 @@ from nanobot.session.turn_continuation import (
    internal_continuation_pending,
    internal_continuation_run_started_at,
    maybe_continue_turn,
+    should_finalize_on_max_iterations,
    should_stream_budget_response,
 )

@ -125,3 +126,15 @@ def test_internal_continuation_requires_budget_boundary_and_queue():
        pending_queue_available=False,
        session_metadata=meta,
    )
+    assert not should_finalize_on_max_iterations(
+        pending_queue_available=True,
+        session_metadata=meta,
+    )
+    assert should_finalize_on_max_iterations(
+        pending_queue_available=False,
+        session_metadata=meta,
+    )
+    assert should_finalize_on_max_iterations(
+        pending_queue_available=True,
+        session_metadata={},
+    )