fix(agent): prevent runner from exiting while sustained goal is active

`long_task` registers a sustained objective, but `AgentRunner` would still exit with `stop_reason="completed"` when the LLM produced a final text response without calling `complete_goal`. This defeated the purpose of sustained goals. Add `goal_active_predicate` and `goal_continue_message` to `AgentRunSpec`. When the predicate returns `True` at the natural completion checkpoint, inject a continuation message via the existing `_try_drain_injections` machinery, forcing the runner to continue looping. Also extract the default continuation prompt to `nanobot/utils/runtime.py` alongside the existing recovery-message builders.
2026-06-13 14:23:58 +00:00 · 2026-05-25 18:11:08 +08:00 · 2026-05-25 18:11:08 +08:00 · 7bbd9c7103
commit 7bbd9c7103
parent 418cb23da2
4 changed files with 221 additions and 2 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -34,7 +34,9 @@ from nanobot.config.schema import AgentDefaults, ModelPresetConfig
 from nanobot.providers.base import LLMProvider
 from nanobot.providers.factory import ProviderSnapshot
 from nanobot.session.goal_state import (
+    goal_state_runtime_lines,
    runner_wall_llm_timeout_s,
+    sustained_goal_active,
 )
 from nanobot.session.manager import Session, SessionManager
 from nanobot.session.webui_turns import (
@ -47,7 +49,10 @@ from nanobot.utils.helpers import image_placeholder_text
 from nanobot.utils.helpers import truncate_text as truncate_text_fn
 from nanobot.utils.image_generation_intent import image_generation_prompt
 from nanobot.utils.llm_runtime import LLMRuntime
-from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
+from nanobot.utils.runtime import (
+    EMPTY_FINAL_RESPONSE_MESSAGE,
+    SUSTAINED_GOAL_CONTINUE_PROMPT,
+)

 if TYPE_CHECKING:
    from nanobot.config.schema import (
@ -729,6 +734,15 @@ class AgentLoop:

        active_session_key = session.key if session else session_key
        file_state_token = bind_file_states(self._file_state_store.for_session(active_session_key))
+        # Build continuation message that embeds the active goal objective so
+        # the LLM can see it even if earlier Runtime Context was truncated.
+        _goal_lines = goal_state_runtime_lines(session.metadata if session is not None else None)
+        _goal_continue = (
+            "You have an active sustained goal:\n\n"
+            + "\n".join(_goal_lines)
+            + "\n\nPlease continue working toward the objective using your tools, "
+            "or call complete_goal if the work is truly finished."
+        ) if _goal_lines else SUSTAINED_GOAL_CONTINUE_PROMPT
        try:
            result = await self.runner.run(AgentRunSpec(
                initial_messages=initial_messages,
@ -756,6 +770,8 @@ class AgentLoop:
                    session.key if session is not None else session_key,
                    metadata=(session.metadata if session is not None else None),
                ),
+                goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
+                goal_continue_message=_goal_continue,
            ))
        finally:
            reset_file_states(file_state_token)
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -8,7 +8,7 @@ import os
 from contextlib import suppress
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any
+from typing import Any, Callable

 from loguru import logger

@ -42,6 +42,7 @@ from nanobot.utils.prompt_templates import render_template
 from nanobot.utils.runtime import (
    EMPTY_FINAL_RESPONSE_MESSAGE,
    build_finalization_retry_message,
+    build_goal_continue_message,
    build_length_recovery_message,
    ensure_nonempty_tool_result,
    is_blank_text,
@ -97,6 +98,8 @@ class AgentRunSpec:
    checkpoint_callback: Any | None = None
    injection_callback: Any | None = None
    llm_timeout_s: float | None = None
+    goal_active_predicate: Callable[[], bool] | None = None
+    goal_continue_message: str | None = None


@dataclass(slots=True)
@ -167,6 +170,7 @@ class AgentRunner:
        *,
        phase: str = "after error",
        iteration: int | None = None,
+        allow_goal_continue: bool = False,
    ) -> tuple[bool, int]:
        """Drain pending injections. Returns (should_continue, updated_cycles).

@ -178,6 +182,10 @@ class AgentRunner:
        if injection_cycles >= _MAX_INJECTION_CYCLES:
            return False, injection_cycles
        injections = await self._drain_injections(spec)
+        if not injections and allow_goal_continue and assistant_message is not None:
+            predicate = spec.goal_active_predicate
+            if predicate is not None and predicate():
+                injections = [build_goal_continue_message(spec.goal_continue_message)]
        if not injections:
            return False, injection_cycles
        injection_cycles += 1
@ -475,6 +483,7 @@ class AgentRunner:
                spec, messages, assistant_message, injection_cycles,
                phase="after final response",
                iteration=iteration,
+                allow_goal_continue=True,
            )
            if should_continue:
                had_injections = True
--- a/nanobot/utils/runtime.py
+++ b/nanobot/utils/runtime.py
@ -29,6 +29,11 @@ LENGTH_RECOVERY_PROMPT = (
    "— no recap, no apology. Break remaining work into smaller steps if needed."
 )

+SUSTAINED_GOAL_CONTINUE_PROMPT = (
+    "You have an active sustained goal. Please continue working toward the "
+    "objective using your tools, or call complete_goal if the work is truly finished."
+)
+

 def empty_tool_result_message(tool_name: str) -> str:
    """Short prompt-safe marker for tools that completed without visible output."""
@ -65,6 +70,11 @@ def build_length_recovery_message() -> dict[str, str]:
    return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}


+def build_goal_continue_message(custom: str | None = None) -> dict[str, str]:
+    """Prompt the model to continue when a sustained goal is still active."""
+    return {"role": "user", "content": custom or SUSTAINED_GOAL_CONTINUE_PROMPT}
+
+
 def external_lookup_signature(tool_name: str, arguments: dict[str, Any]) -> str | None:
    """Stable signature for repeated external lookups we want to throttle."""
    if tool_name == "web_fetch":
--- a/tests/agent/test_runner_goal_continue.py
+++ b/tests/agent/test_runner_goal_continue.py
@ -0,0 +1,184 @@
+"""Tests for sustained-goal continuation in AgentRunner.
+
+When a goal_active_predicate returns True, the runner must not exit with
+stop_reason="completed" after a plain-text final response. Instead it should
+inject a continuation message and keep looping (similar to mid-turn injection).
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.config.schema import AgentDefaults
+from nanobot.providers.base import LLMProvider, LLMResponse
+
+_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
+
+
+@pytest.mark.asyncio
+async def test_runner_exits_normally_without_predicate():
+    """Baseline: no predicate, runner exits with completed on final text."""
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="all done", tool_calls=[], usage={},
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.stop_reason == "completed"
+    assert result.final_content == "all done"
+
+
+@pytest.mark.asyncio
+async def test_runner_exits_normally_with_inactive_goal():
+    """Predicate returns False, runner should exit normally."""
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="all done", tool_calls=[], usage={},
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        goal_active_predicate=lambda: False,
+    ))
+
+    assert result.stop_reason == "completed"
+    assert result.final_content == "all done"
+
+
+@pytest.mark.asyncio
+async def test_runner_forces_continue_when_goal_active():
+    """Predicate returns True on final text → runner injects continuation and loops.
+
+    We set max_iterations=3 and let the provider return final text every time.
+    Without the fix this would exit on the first iteration with stop_reason
+    "completed". With the fix the runner is forced to continue until
+    max_iterations is hit.
+    """
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="still working", tool_calls=[], usage={},
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        goal_active_predicate=lambda: True,
+    ))
+
+    # Because the predicate keeps returning True, the runner should never
+    # naturally complete. It loops until max_iterations is exhausted.
+    assert result.stop_reason == "max_iterations"
+    # The injected continuation message should be present in the message list.
+    user_msgs = [m for m in result.messages if m.get("role") == "user"]
+    assert any("active sustained goal" in str(m.get("content", "")) for m in user_msgs)
+
+
+@pytest.mark.asyncio
+async def test_runner_respects_max_iterations_even_with_active_goal():
+    """A single iteration with active goal still hits max_iterations."""
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="still working", tool_calls=[], usage={},
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=1,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        goal_active_predicate=lambda: True,
+    ))
+
+    assert result.stop_reason == "max_iterations"
+
+
+@pytest.mark.asyncio
+async def test_runner_does_not_force_continue_on_error():
+    """Even with active goal, an LLM error should exit with stop_reason="error"."""
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content=None, tool_calls=[], usage={},
+        finish_reason="error",
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        goal_active_predicate=lambda: True,
+    ))
+
+    assert result.stop_reason == "error"
+
+
+@pytest.mark.asyncio
+async def test_runner_uses_custom_goal_continue_message():
+    """Custom goal_continue_message should be injected instead of the default."""
+    from nanobot.agent.runner import AgentRunner, AgentRunSpec
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="still working", tool_calls=[], usage={},
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    custom_msg = "CUSTOM_CONTINUE_PLEASE"
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "do task"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=2,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        goal_active_predicate=lambda: True,
+        goal_continue_message=custom_msg,
+    ))
+
+    user_msgs = [m for m in result.messages if m.get("role") == "user"]
+    assert any(custom_msg in str(m.get("content", "")) for m in user_msgs)