diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index f31589cb9..b1bde811c 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -816,6 +816,11 @@ class AgentLoop:
                 ),
                 goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
                 goal_continue_message=_goal_continue,
+                finalize_on_max_iterations=turn_continuation.should_finalize_on_max_iterations(
+                    pending_queue_available=pending_queue is not None and session is not None,
+                    session_metadata=session_metadata,
+                    message_metadata=metadata,
+                ),
             ))
         finally:
             reset_workspace_scope(workspace_token)
diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py
index 8cffb3fdc..5c9ff6e2d 100644
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@@ -44,6 +44,7 @@ from nanobot.utils.progress_events import (
 from nanobot.utils.prompt_templates import render_template
 from nanobot.utils.runtime import (
     EMPTY_FINAL_RESPONSE_MESSAGE,
+    build_budget_exhausted_finalization_message,
     build_finalization_retry_message,
     build_goal_continue_message,
     build_length_recovery_message,
@@ -109,6 +110,7 @@ class AgentRunSpec:
     llm_timeout_s: float | None = None
     goal_active_predicate: Callable[[], bool] | None = None
     goal_continue_message: str | None = None
+    finalize_on_max_iterations: bool = True
 
 
 @dataclass(slots=True)
@@ -631,28 +633,28 @@ class AgentRunner:
             break
         else:
             stop_reason = "max_iterations"
-            if spec.max_iterations_message:
-                final_content = spec.max_iterations_message.format(
-                    max_iterations=spec.max_iterations,
-                )
-            else:
-                final_content = render_template(
-                    "agent/max_iterations_message.md",
-                    strip=True,
-                    max_iterations=spec.max_iterations,
-                )
-            self._append_final_message(messages, final_content)
             # Drain any remaining injections so they are appended to the
             # conversation history instead of being re-published as
             # independent inbound messages by _dispatch's finally block.
-            # We ignore should_continue here because the for-loop has already
-            # exhausted all iterations.
+            # We include them before the no-tools finalization pass so the
+            # final response can account for every known follow-up.
             drained_after_max_iterations, injection_cycles = await self._try_drain_injections(
                 spec, messages, None, injection_cycles,
                 phase="after max_iterations",
             )
             if drained_after_max_iterations:
                 had_injections = True
+            final_content = None
+            if spec.finalize_on_max_iterations:
+                final_content = await self._try_finalize_after_max_iterations(
+                    spec,
+                    hook,
+                    messages,
+                    usage,
+                )
+            if final_content is None:
+                final_content = self._max_iterations_fallback(spec)
+            self._append_final_message(messages, final_content)
 
         return AgentRunResult(
             final_content=final_content,
@@ -831,8 +833,7 @@ class AgentRunner:
         messages: list[dict[str, Any]],
     ):
         retry_messages = self._finalization_retry_messages(messages)
-        kwargs = self._build_request_kwargs(spec, retry_messages, tools=None)
-        return await self.provider.chat_with_retry(**kwargs)
+        return await self._request_no_tools(spec, retry_messages)
 
     @staticmethod
     def _finalization_retry_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -840,6 +841,75 @@ class AgentRunner:
         retry_messages.append(build_finalization_retry_message())
         return retry_messages
 
+    async def _try_finalize_after_max_iterations(
+        self,
+        spec: AgentRunSpec,
+        hook: AgentHook,
+        messages: list[dict[str, Any]],
+        usage: dict[str, int],
+    ) -> str | None:
+        retry_messages = self._budget_exhausted_finalization_messages(messages)
+        try:
+            response = await self._request_no_tools(spec, retry_messages)
+        except Exception:
+            logger.exception(
+                "Budget-exhausted finalization failed for {}; using fallback",
+                spec.session_key or "default",
+            )
+            return None
+
+        raw_usage = self._usage_or_estimate(spec, retry_messages, response)
+        self._accumulate_usage(usage, raw_usage)
+        if response.finish_reason == "error" or response.has_tool_calls:
+            logger.warning(
+                "Budget-exhausted finalization returned finish_reason='{}' "
+                "with {} tool call(s) for {}; using fallback",
+                response.finish_reason,
+                len(response.tool_calls),
+                spec.session_key or "default",
+            )
+            return None
+
+        context = AgentHookContext(
+            iteration=spec.max_iterations,
+            messages=messages,
+            response=response,
+            usage=dict(raw_usage),
+            session_key=spec.session_key,
+        )
+        clean = hook.finalize_content(context, response.content)
+        if is_blank_text(clean):
+            return None
+        return clean
+
+    async def _request_no_tools(
+        self,
+        spec: AgentRunSpec,
+        messages: list[dict[str, Any]],
+    ) -> LLMResponse:
+        kwargs = self._build_request_kwargs(spec, messages, tools=None)
+        return await self.provider.chat_with_retry(**kwargs)
+
+    @staticmethod
+    def _budget_exhausted_finalization_messages(
+        messages: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        retry_messages = list(messages)
+        retry_messages.append(build_budget_exhausted_finalization_message())
+        return retry_messages
+
+    @staticmethod
+    def _max_iterations_fallback(spec: AgentRunSpec) -> str:
+        if spec.max_iterations_message:
+            return spec.max_iterations_message.format(
+                max_iterations=spec.max_iterations,
+            )
+        return render_template(
+            "agent/max_iterations_message.md",
+            strip=True,
+            max_iterations=spec.max_iterations,
+        )
+
     def _usage_or_estimate(
         self,
         spec: AgentRunSpec,
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index 8a752c6f7..88c22e610 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -248,6 +248,7 @@ class SubagentManager:
                     max_tool_result_chars=self.max_tool_result_chars,
                     hook=_SubagentHook(task_id, status),
                     max_iterations_message="Task completed but no final response was generated.",
+                    finalize_on_max_iterations=False,
                     error_message=None,
                     fail_on_tool_error=True,
                     checkpoint_callback=_on_checkpoint,
diff --git a/nanobot/session/turn_continuation.py b/nanobot/session/turn_continuation.py
index 28c77bf64..17c8e237b 100644
--- a/nanobot/session/turn_continuation.py
+++ b/nanobot/session/turn_continuation.py
@@ -70,14 +70,36 @@ def should_stream_budget_response(
     message_metadata: Mapping[str, Any] | None = None,
 ) -> bool:
     """Return whether the budget-boundary response should be sent to the user."""
-    return not _continuation_available(
-        stop_reason=stop_reason,
+    if stop_reason != "max_iterations":
+        return True
+    return should_finalize_on_max_iterations(
         pending_queue_available=pending_queue_available,
         session_metadata=session_metadata,
         message_metadata=message_metadata,
     )
 
 
+def should_finalize_on_max_iterations(
+    *,
+    pending_queue_available: bool,
+    session_metadata: Mapping[str, Any] | None,
+    message_metadata: Mapping[str, Any] | None = None,
+) -> bool:
+    """Return whether a max-iteration boundary should produce a final response.
+
+    When a sustained goal can continue internally, the current runner slice
+    should stop without spending an extra no-tools finalization call. The next
+    queued continuation slice owns the eventual user-visible response.
+    """
+    return not (
+        pending_queue_available
+        and _goal_continuation_available(
+            session_metadata,
+            message_metadata=message_metadata,
+        )
+    )
+
+
 async def maybe_continue_turn(ctx: Any) -> bool:
     """Queue an internal continuation for *ctx* when policy allows it."""
     if ctx.session is None or ctx.pending_queue is None:
diff --git a/nanobot/utils/runtime.py b/nanobot/utils/runtime.py
index 70d14c442..9141583ea 100644
--- a/nanobot/utils/runtime.py
+++ b/nanobot/utils/runtime.py
@@ -24,6 +24,14 @@ FINALIZATION_RETRY_PROMPT = (
     "Please provide your response to the user based on the conversation above."
 )
 
+BUDGET_EXHAUSTED_FINALIZATION_PROMPT = (
+    "The tool-call budget for this turn is exhausted. Based only on the "
+    "conversation and tool results above, provide a concise final response to "
+    "the user. Do not call or request tools. Do not claim the task is complete "
+    "unless the evidence above clearly shows it is complete. State what was "
+    "done, what remains, and the best next step if anything is incomplete."
+)
+
 LENGTH_RECOVERY_PROMPT = (
     "Output limit reached. Continue exactly where you left off "
     "— no recap, no apology. Break remaining work into smaller steps if needed."
@@ -65,6 +73,11 @@ def build_finalization_retry_message() -> dict[str, str]:
     return {"role": "user", "content": FINALIZATION_RETRY_PROMPT}
 
 
+def build_budget_exhausted_finalization_message() -> dict[str, str]:
+    """Prompt the model for a no-tools final response after budget exhaustion."""
+    return {"role": "user", "content": BUDGET_EXHAUSTED_FINALIZATION_PROMPT}
+
+
 def build_length_recovery_message() -> dict[str, str]:
     """Prompt the model to continue after hitting output token limit."""
     return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
diff --git a/tests/agent/test_loop_runner_integration.py b/tests/agent/test_loop_runner_integration.py
index 5f9c356ce..dbd213185 100644
--- a/tests/agent/test_loop_runner_integration.py
+++ b/tests/agent/test_loop_runner_integration.py
@@ -64,7 +64,8 @@ async def test_loop_goal_turn_uses_standard_iteration_budget(tmp_path):
     )
 
     assert stop_reason == "max_iterations"
-    assert loop.provider.chat_with_retry.await_count == 2
+    assert loop.provider.chat_with_retry.await_count == 3
+    assert loop.provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
     assert final_content == (
         "I reached the maximum number of tool call iterations (2) "
         "without completing the task. You can try breaking the task into smaller steps."
diff --git a/tests/agent/test_runner_core.py b/tests/agent/test_runner_core.py
index 1fc82b7a3..1119930ce 100644
--- a/tests/agent/test_runner_core.py
+++ b/tests/agent/test_runner_core.py
@@ -101,6 +101,61 @@ async def test_runner_returns_max_iterations_fallback():
     )
     assert result.messages[-1]["role"] == "assistant"
     assert result.messages[-1]["content"] == result.final_content
+    assert provider.chat_with_retry.await_count == 3
+    assert provider.chat_with_retry.await_args_list[-1].kwargs["tools"] is None
+    assert tools.execute.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_runner_uses_no_tools_finalization_after_max_iterations():
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    calls: list[dict] = []
+
+    async def chat_with_retry(*, messages, tools=None, **kwargs):
+        calls.append({"messages": messages, "tools": tools})
+        if len(calls) <= 2:
+            return LLMResponse(
+                content="still working",
+                tool_calls=[
+                    ToolCallRequest(
+                        id=f"call_{len(calls)}",
+                        name="list_dir",
+                        arguments={"path": "."},
+                    )
+                ],
+            )
+        return LLMResponse(
+            content="Read the directory twice. More investigation remains.",
+            tool_calls=[],
+            usage={"prompt_tokens": 10, "completion_tokens": 7},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+    tools.execute = AsyncMock(return_value="tool result")
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "inspect the repo"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.stop_reason == "max_iterations"
+    assert result.final_content == "Read the directory twice. More investigation remains."
+    assert result.messages[-1] == {
+        "role": "assistant",
+        "content": "Read the directory twice. More investigation remains.",
+    }
+    assert len(calls) == 3
+    assert calls[-1]["tools"] is None
+    assert "tool-call budget" in calls[-1]["messages"][-1]["content"]
+    assert tools.execute.await_count == 2
 
 
 @pytest.mark.asyncio
diff --git a/tests/agent/test_runner_goal_continue.py b/tests/agent/test_runner_goal_continue.py
index 88be011ec..e5aec92fd 100644
--- a/tests/agent/test_runner_goal_continue.py
+++ b/tests/agent/test_runner_goal_continue.py
@@ -150,6 +150,7 @@ async def test_runner_goal_continue_not_limited_by_injection_cycle_cap():
         max_iterations=max_iterations,
         max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
         goal_active_predicate=lambda: True,
+        finalize_on_max_iterations=False,
     ))
 
     assert result.stop_reason == "max_iterations"
diff --git a/tests/session/test_turn_continuation.py b/tests/session/test_turn_continuation.py
index c6d58e5dc..a42ad4781 100644
--- a/tests/session/test_turn_continuation.py
+++ b/tests/session/test_turn_continuation.py
@@ -17,6 +17,7 @@ from nanobot.session.turn_continuation import (
     internal_continuation_pending,
     internal_continuation_run_started_at,
     maybe_continue_turn,
+    should_finalize_on_max_iterations,
     should_stream_budget_response,
 )
 
@@ -125,3 +126,15 @@ def test_internal_continuation_requires_budget_boundary_and_queue():
         pending_queue_available=False,
         session_metadata=meta,
     )
+    assert not should_finalize_on_max_iterations(
+        pending_queue_available=True,
+        session_metadata=meta,
+    )
+    assert should_finalize_on_max_iterations(
+        pending_queue_available=False,
+        session_metadata=meta,
+    )
+    assert should_finalize_on_max_iterations(
+        pending_queue_available=True,
+        session_metadata={},
+    )