diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index f31589cb9..b1bde811c 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -816,6 +816,11 @@ class AgentLoop: ), goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False, goal_continue_message=_goal_continue, + finalize_on_max_iterations=turn_continuation.should_finalize_on_max_iterations( + pending_queue_available=pending_queue is not None and session is not None, + session_metadata=session_metadata, + message_metadata=metadata, + ), )) finally: reset_workspace_scope(workspace_token) diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 8cffb3fdc..5c9ff6e2d 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -44,6 +44,7 @@ from nanobot.utils.progress_events import ( from nanobot.utils.prompt_templates import render_template from nanobot.utils.runtime import ( EMPTY_FINAL_RESPONSE_MESSAGE, + build_budget_exhausted_finalization_message, build_finalization_retry_message, build_goal_continue_message, build_length_recovery_message, @@ -109,6 +110,7 @@ class AgentRunSpec: llm_timeout_s: float | None = None goal_active_predicate: Callable[[], bool] | None = None goal_continue_message: str | None = None + finalize_on_max_iterations: bool = True @dataclass(slots=True) @@ -631,28 +633,28 @@ class AgentRunner: break else: stop_reason = "max_iterations" - if spec.max_iterations_message: - final_content = spec.max_iterations_message.format( - max_iterations=spec.max_iterations, - ) - else: - final_content = render_template( - "agent/max_iterations_message.md", - strip=True, - max_iterations=spec.max_iterations, - ) - self._append_final_message(messages, final_content) # Drain any remaining injections so they are appended to the # conversation history instead of being re-published as # independent inbound messages by _dispatch's finally block. - # We ignore should_continue here because the for-loop has already - # exhausted all iterations. + # We include them before the no-tools finalization pass so the + # final response can account for every known follow-up. drained_after_max_iterations, injection_cycles = await self._try_drain_injections( spec, messages, None, injection_cycles, phase="after max_iterations", ) if drained_after_max_iterations: had_injections = True + final_content = None + if spec.finalize_on_max_iterations: + final_content = await self._try_finalize_after_max_iterations( + spec, + hook, + messages, + usage, + ) + if final_content is None: + final_content = self._max_iterations_fallback(spec) + self._append_final_message(messages, final_content) return AgentRunResult( final_content=final_content, @@ -831,8 +833,7 @@ class AgentRunner: messages: list[dict[str, Any]], ): retry_messages = self._finalization_retry_messages(messages) - kwargs = self._build_request_kwargs(spec, retry_messages, tools=None) - return await self.provider.chat_with_retry(**kwargs) + return await self._request_no_tools(spec, retry_messages) @staticmethod def _finalization_retry_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: @@ -840,6 +841,75 @@ class AgentRunner: retry_messages.append(build_finalization_retry_message()) return retry_messages + async def _try_finalize_after_max_iterations( + self, + spec: AgentRunSpec, + hook: AgentHook, + messages: list[dict[str, Any]], + usage: dict[str, int], + ) -> str | None: + retry_messages = self._budget_exhausted_finalization_messages(messages) + try: + response = await self._request_no_tools(spec, retry_messages) + except Exception: + logger.exception( + "Budget-exhausted finalization failed for {}; using fallback", + spec.session_key or "default", + ) + return None + + raw_usage = self._usage_or_estimate(spec, retry_messages, response) + self._accumulate_usage(usage, raw_usage) + if response.finish_reason == "error" or response.has_tool_calls: + logger.warning( + "Budget-exhausted finalization returned finish_reason='{}' " + "with {} tool call(s) for {}; using fallback", + response.finish_reason, + len(response.tool_calls), + spec.session_key or "default", + ) + return None + + context = AgentHookContext( + iteration=spec.max_iterations, + messages=messages, + response=response, + usage=dict(raw_usage), + session_key=spec.session_key, + ) + clean = hook.finalize_content(context, response.content) + if is_blank_text(clean): + return None + return clean + + async def _request_no_tools( + self, + spec: AgentRunSpec, + messages: list[dict[str, Any]], + ) -> LLMResponse: + kwargs = self._build_request_kwargs(spec, messages, tools=None) + return await self.provider.chat_with_retry(**kwargs) + + @staticmethod + def _budget_exhausted_finalization_messages( + messages: list[dict[str, Any]], + ) -> list[dict[str, Any]]: + retry_messages = list(messages) + retry_messages.append(build_budget_exhausted_finalization_message()) + return retry_messages + + @staticmethod + def _max_iterations_fallback(spec: AgentRunSpec) -> str: + if spec.max_iterations_message: + return spec.max_iterations_message.format( + max_iterations=spec.max_iterations, + ) + return render_template( + "agent/max_iterations_message.md", + strip=True, + max_iterations=spec.max_iterations, + ) + def _usage_or_estimate( self, spec: AgentRunSpec, diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index 8a752c6f7..88c22e610 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -248,6 +248,7 @@ class SubagentManager: max_tool_result_chars=self.max_tool_result_chars, hook=_SubagentHook(task_id, status), max_iterations_message="Task completed but no final response was generated.", + finalize_on_max_iterations=False, error_message=None, fail_on_tool_error=True, checkpoint_callback=_on_checkpoint, diff --git a/nanobot/session/turn_continuation.py b/nanobot/session/turn_continuation.py index 28c77bf64..17c8e237b 100644 --- a/nanobot/session/turn_continuation.py +++ b/nanobot/session/turn_continuation.py @@ -70,14 +70,36 @@ def should_stream_budget_response( message_metadata: Mapping[str, Any] | None = None, ) -> bool: """Return whether the budget-boundary response should be sent to the user.""" - return not _continuation_available( - stop_reason=stop_reason, + if stop_reason != "max_iterations": + return True + return should_finalize_on_max_iterations( pending_queue_available=pending_queue_available, session_metadata=session_metadata, message_metadata=message_metadata, ) +def should_finalize_on_max_iterations( + *, + pending_queue_available: bool, + session_metadata: Mapping[str, Any] | None, + message_metadata: Mapping[str, Any] | None = None, +) -> bool: + """Return whether a max-iteration boundary should produce a final response. + + When a sustained goal can continue internally, the current runner slice + should stop without spending an extra no-tools finalization call. The next + queued continuation slice owns the eventual user-visible response. + """ + return not ( + pending_queue_available + and _goal_continuation_available( + session_metadata, + message_metadata=message_metadata, + ) + ) + + async def maybe_continue_turn(ctx: Any) -> bool: """Queue an internal continuation for *ctx* when policy allows it.""" if ctx.session is None or ctx.pending_queue is None: diff --git a/nanobot/utils/runtime.py b/nanobot/utils/runtime.py index 70d14c442..9141583ea 100644 --- a/nanobot/utils/runtime.py +++ b/nanobot/utils/runtime.py @@ -24,6 +24,14 @@ FINALIZATION_RETRY_PROMPT = ( "Please provide your response to the user based on the conversation above." ) +BUDGET_EXHAUSTED_FINALIZATION_PROMPT = ( + "The tool-call budget for this turn is exhausted. Based only on the " + "conversation and tool results above, provide a concise final response to " + "the user. Do not call or request tools. Do not claim the task is complete " + "unless the evidence above clearly shows it is complete. State what was " + "done, what remains, and the best next step if anything is incomplete." +) + LENGTH_RECOVERY_PROMPT = ( "Output limit reached. Continue exactly where you left off " "— no recap, no apology. Break remaining work into smaller steps if needed." @@ -65,6 +73,11 @@ def build_finalization_retry_message() -> dict[str, str]: return {"role": "user", "content": FINALIZATION_RETRY_PROMPT} +def build_budget_exhausted_finalization_message() -> dict[str, str]: + """Prompt the model for a no-tools final response after budget exhaustion.""" + return {"role": "user", "content": BUDGET_EXHAUSTED_FINALIZATION_PROMPT} + + def build_length_recovery_message() -> dict[str, str]: """Prompt the model to continue after hitting output token limit.""" return {"role": "user", "content": LENGTH_RECOVERY_PROMPT} diff --git a/tests/agent/test_loop_runner_integration.py b/tests/agent/test_loop_runner_integration.py index 5f9c356ce..dbd213185 100644 --- a/tests/agent/test_loop_runner_integration.py +++ b/tests/agent/test_loop_runner_integration.py @@ -64,7 +64,8 @@ async def test_loop_goal_turn_uses_standard_iteration_budget(tmp_path): ) assert stop_reason == "max_iterations" - assert loop.provider.chat_with_retry.await_count == 2 + assert loop.provider.chat_with_retry.await_count == 3 + assert loop.provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None assert final_content == ( "I reached the maximum number of tool call iterations (2) " "without completing the task. You can try breaking the task into smaller steps." diff --git a/tests/agent/test_runner_core.py b/tests/agent/test_runner_core.py index 1fc82b7a3..1119930ce 100644 --- a/tests/agent/test_runner_core.py +++ b/tests/agent/test_runner_core.py @@ -101,6 +101,61 @@ async def test_runner_returns_max_iterations_fallback(): ) assert result.messages[-1]["role"] == "assistant" assert result.messages[-1]["content"] == result.final_content + assert provider.chat_with_retry.await_count == 3 + assert provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None + assert tools.execute.await_count == 2 + + +@pytest.mark.asyncio +async def test_runner_uses_no_tools_finalization_after_max_iterations(): + from nanobot.agent.runner import AgentRunner, AgentRunSpec + + provider = MagicMock(spec=LLMProvider) + calls: list[dict] = [] + + async def chat_with_retry(*, messages, tools=None, **kwargs): + calls.append({"messages": messages, "tools": tools}) + if len(calls) <= 2: + return LLMResponse( + content="still working", + tool_calls=[ + ToolCallRequest( + id=f"call_{len(calls)}", + name="list_dir", + arguments={"path": "."}, + ) + ], + ) + return LLMResponse( + content="Read the directory twice. More investigation remains.", + tool_calls=[], + usage={"prompt_tokens": 10, "completion_tokens": 7}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock(return_value="tool result") + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "inspect the repo"}], + tools=tools, + model="test-model", + max_iterations=2, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.stop_reason == "max_iterations" + assert result.final_content == "Read the directory twice. More investigation remains." + assert result.messages[-1] == { + "role": "assistant", + "content": "Read the directory twice. More investigation remains.", + } + assert len(calls) == 3 + assert calls[-1]["tools"] is None + assert "tool-call budget" in calls[-1]["messages"][-1]["content"] + assert tools.execute.await_count == 2 @pytest.mark.asyncio diff --git a/tests/agent/test_runner_goal_continue.py b/tests/agent/test_runner_goal_continue.py index 88be011ec..e5aec92fd 100644 --- a/tests/agent/test_runner_goal_continue.py +++ b/tests/agent/test_runner_goal_continue.py @@ -150,6 +150,7 @@ async def test_runner_goal_continue_not_limited_by_injection_cycle_cap(): max_iterations=max_iterations, max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, goal_active_predicate=lambda: True, + finalize_on_max_iterations=False, )) assert result.stop_reason == "max_iterations" diff --git a/tests/session/test_turn_continuation.py b/tests/session/test_turn_continuation.py index c6d58e5dc..a42ad4781 100644 --- a/tests/session/test_turn_continuation.py +++ b/tests/session/test_turn_continuation.py @@ -17,6 +17,7 @@ from nanobot.session.turn_continuation import ( internal_continuation_pending, internal_continuation_run_started_at, maybe_continue_turn, + should_finalize_on_max_iterations, should_stream_budget_response, ) @@ -125,3 +126,15 @@ def test_internal_continuation_requires_budget_boundary_and_queue(): pending_queue_available=False, session_metadata=meta, ) + assert not should_finalize_on_max_iterations( + pending_queue_available=True, + session_metadata=meta, + ) + assert should_finalize_on_max_iterations( + pending_queue_available=False, + session_metadata=meta, + ) + assert should_finalize_on_max_iterations( + pending_queue_available=True, + session_metadata={}, + )