fix(agent): prevent runner from exiting while sustained goal is active

`long_task` registers a sustained objective, but `AgentRunner` would still exit with `stop_reason="completed"` when the LLM produced a final text response without calling `complete_goal`. This defeated the purpose of sustained goals. Add `goal_active_predicate` and `goal_continue_message` to `AgentRunSpec`. When the predicate returns `True` at the natural completion checkpoint, inject a continuation message via the existing `_try_drain_injections` machinery, forcing the runner to continue looping. Also extract the default continuation prompt to `nanobot/utils/runtime.py` alongside the existing recovery-message builders.
2026-06-14 14:54:06 +00:00 · 2026-05-25 18:11:08 +08:00 · 2026-05-25 18:11:08 +08:00 · 7bbd9c7103
commit 7bbd9c7103
parent 418cb23da2
4 changed files with 221 additions and 2 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -34,7 +34,9 @@ from nanobot.config.schema import AgentDefaults, ModelPresetConfig
 from nanobot.providers.base import LLMProvider
 from nanobot.providers.factory import ProviderSnapshot
 from nanobot.session.goal_state import (
    goal_state_runtime_lines,
    runner_wall_llm_timeout_s,
    sustained_goal_active,
 )
 from nanobot.session.manager import Session, SessionManager
 from nanobot.session.webui_turns import (
@ -47,7 +49,10 @@ from nanobot.utils.helpers import image_placeholder_text
 from nanobot.utils.helpers import truncate_text as truncate_text_fn
 from nanobot.utils.image_generation_intent import image_generation_prompt
 from nanobot.utils.llm_runtime import LLMRuntime
-from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
+from nanobot.utils.runtime import (
    EMPTY_FINAL_RESPONSE_MESSAGE,
    SUSTAINED_GOAL_CONTINUE_PROMPT,
 )
 if TYPE_CHECKING:
    from nanobot.config.schema import (
@ -729,6 +734,15 @@ class AgentLoop:
        active_session_key = session.key if session else session_key
        file_state_token = bind_file_states(self._file_state_store.for_session(active_session_key))
        # Build continuation message that embeds the active goal objective so
        # the LLM can see it even if earlier Runtime Context was truncated.
        _goal_lines = goal_state_runtime_lines(session.metadata if session is not None else None)
        _goal_continue = (
            "You have an active sustained goal:\n\n"
            + "\n".join(_goal_lines)
            + "\n\nPlease continue working toward the objective using your tools, "
            "or call complete_goal if the work is truly finished."
        ) if _goal_lines else SUSTAINED_GOAL_CONTINUE_PROMPT
        try:
            result = await self.runner.run(AgentRunSpec(
                initial_messages=initial_messages,
@ -756,6 +770,8 @@ class AgentLoop:
                    session.key if session is not None else session_key,
                    metadata=(session.metadata if session is not None else None),
                ),
                goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
                goal_continue_message=_goal_continue,
            ))
        finally:
            reset_file_states(file_state_token)
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -8,7 +8,7 @@ import os
 from contextlib import suppress
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any
+from typing import Any, Callable
 from loguru import logger
@ -42,6 +42,7 @@ from nanobot.utils.prompt_templates import render_template
 from nanobot.utils.runtime import (
    EMPTY_FINAL_RESPONSE_MESSAGE,
    build_finalization_retry_message,
    build_goal_continue_message,
    build_length_recovery_message,
    ensure_nonempty_tool_result,
    is_blank_text,
@ -97,6 +98,8 @@ class AgentRunSpec:
    checkpoint_callback: Any | None = None
    injection_callback: Any | None = None
    llm_timeout_s: float | None = None
    goal_active_predicate: Callable[[], bool] | None = None
    goal_continue_message: str | None = None
@dataclass(slots=True)
@ -167,6 +170,7 @@ class AgentRunner:
        *,
        phase: str = "after error",
        iteration: int | None = None,
        allow_goal_continue: bool = False,
    ) -> tuple[bool, int]:
        """Drain pending injections. Returns (should_continue, updated_cycles).
@ -178,6 +182,10 @@ class AgentRunner:
        if injection_cycles >= _MAX_INJECTION_CYCLES:
            return False, injection_cycles
        injections = await self._drain_injections(spec)
        if not injections and allow_goal_continue and assistant_message is not None:
            predicate = spec.goal_active_predicate
            if predicate is not None and predicate():
                injections = [build_goal_continue_message(spec.goal_continue_message)]
        if not injections:
            return False, injection_cycles
        injection_cycles += 1
@ -475,6 +483,7 @@ class AgentRunner:
                spec, messages, assistant_message, injection_cycles,
                phase="after final response",
                iteration=iteration,
                allow_goal_continue=True,
            )
            if should_continue:
                had_injections = True
--- a/nanobot/utils/runtime.py
+++ b/nanobot/utils/runtime.py
@ -29,6 +29,11 @@ LENGTH_RECOVERY_PROMPT = (
    "— no recap, no apology. Break remaining work into smaller steps if needed."
 )
 SUSTAINED_GOAL_CONTINUE_PROMPT = (
    "You have an active sustained goal. Please continue working toward the "
    "objective using your tools, or call complete_goal if the work is truly finished."
 )
 def empty_tool_result_message(tool_name: str) -> str:
    """Short prompt-safe marker for tools that completed without visible output."""
@ -65,6 +70,11 @@ def build_length_recovery_message() -> dict[str, str]:
    return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
 def build_goal_continue_message(custom: str | None = None) -> dict[str, str]:
    """Prompt the model to continue when a sustained goal is still active."""
    return {"role": "user", "content": custom or SUSTAINED_GOAL_CONTINUE_PROMPT}
 def external_lookup_signature(tool_name: str, arguments: dict[str, Any]) -> str | None:
    """Stable signature for repeated external lookups we want to throttle."""
    if tool_name == "web_fetch":
--- a/tests/agent/test_runner_goal_continue.py
+++ b/tests/agent/test_runner_goal_continue.py
@ -0,0 +1,184 @@
 """Tests for sustained-goal continuation in AgentRunner.
 When a goal_active_predicate returns True, the runner must not exit with
 stop_reason="completed" after a plain-text final response. Instead it should
 inject a continuation message and keep looping (similar to mid-turn injection).
 """
 from __future__ import annotations
 from unittest.mock import AsyncMock, MagicMock
 import pytest
 from nanobot.config.schema import AgentDefaults
 from nanobot.providers.base import LLMProvider, LLMResponse
 _MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
@pytest.mark.asyncio
 async def test_runner_exits_normally_without_predicate():
    """Baseline: no predicate, runner exits with completed on final text."""
    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    provider = MagicMock(spec=LLMProvider)
    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
        content="all done", tool_calls=[], usage={},
    ))
    tools = MagicMock()
    tools.get_definitions.return_value = []
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
        max_iterations=2,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
    ))
    assert result.stop_reason == "completed"
    assert result.final_content == "all done"
@pytest.mark.asyncio
 async def test_runner_exits_normally_with_inactive_goal():
    """Predicate returns False, runner should exit normally."""
    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    provider = MagicMock(spec=LLMProvider)
    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
        content="all done", tool_calls=[], usage={},
    ))
    tools = MagicMock()
    tools.get_definitions.return_value = []
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
        max_iterations=2,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
        goal_active_predicate=lambda: False,
    ))
    assert result.stop_reason == "completed"
    assert result.final_content == "all done"
@pytest.mark.asyncio
 async def test_runner_forces_continue_when_goal_active():
    """Predicate returns True on final text → runner injects continuation and loops.
    We set max_iterations=3 and let the provider return final text every time.
    Without the fix this would exit on the first iteration with stop_reason
    "completed". With the fix the runner is forced to continue until
    max_iterations is hit.
    """
    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    provider = MagicMock(spec=LLMProvider)
    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
        content="still working", tool_calls=[], usage={},
    ))
    tools = MagicMock()
    tools.get_definitions.return_value = []
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
        max_iterations=3,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
        goal_active_predicate=lambda: True,
    ))
    # Because the predicate keeps returning True, the runner should never
    # naturally complete. It loops until max_iterations is exhausted.
    assert result.stop_reason == "max_iterations"
    # The injected continuation message should be present in the message list.
    user_msgs = [m for m in result.messages if m.get("role") == "user"]
    assert any("active sustained goal" in str(m.get("content", "")) for m in user_msgs)
@pytest.mark.asyncio
 async def test_runner_respects_max_iterations_even_with_active_goal():
    """A single iteration with active goal still hits max_iterations."""
    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    provider = MagicMock(spec=LLMProvider)
    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
        content="still working", tool_calls=[], usage={},
    ))
    tools = MagicMock()
    tools.get_definitions.return_value = []
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
        max_iterations=1,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
        goal_active_predicate=lambda: True,
    ))
    assert result.stop_reason == "max_iterations"
@pytest.mark.asyncio
 async def test_runner_does_not_force_continue_on_error():
    """Even with active goal, an LLM error should exit with stop_reason="error"."""
    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    provider = MagicMock(spec=LLMProvider)
    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
        content=None, tool_calls=[], usage={},
        finish_reason="error",
    ))
    tools = MagicMock()
    tools.get_definitions.return_value = []
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
        max_iterations=2,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
        goal_active_predicate=lambda: True,
    ))
    assert result.stop_reason == "error"
@pytest.mark.asyncio
 async def test_runner_uses_custom_goal_continue_message():
    """Custom goal_continue_message should be injected instead of the default."""
    from nanobot.agent.runner import AgentRunner, AgentRunSpec
    provider = MagicMock(spec=LLMProvider)
    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
        content="still working", tool_calls=[], usage={},
    ))
    tools = MagicMock()
    tools.get_definitions.return_value = []
    custom_msg = "CUSTOM_CONTINUE_PLEASE"
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
        max_iterations=2,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
        goal_active_predicate=lambda: True,
        goal_continue_message=custom_msg,
    ))
    user_msgs = [m for m in result.messages if m.get("role") == "user"]
    assert any(custom_msg in str(m.get("content", "")) for m in user_msgs)