fix(agent): prevent runner from exiting while sustained goal is active

`long_task` registers a sustained objective, but `AgentRunner` would
still exit with `stop_reason="completed"` when the LLM produced a final
text response without calling `complete_goal`. This defeated the purpose
of sustained goals.

Add `goal_active_predicate` and `goal_continue_message` to `AgentRunSpec`.
When the predicate returns `True` at the natural completion checkpoint,
inject a continuation message via the existing `_try_drain_injections`
machinery, forcing the runner to continue looping.

Also extract the default continuation prompt to
`nanobot/utils/runtime.py` alongside the existing recovery-message
builders.
This commit is contained in:
chengyongru 2026-05-25 18:11:08 +08:00 committed by Xubin Ren
parent 418cb23da2
commit 7bbd9c7103
4 changed files with 221 additions and 2 deletions

View File

@ -34,7 +34,9 @@ from nanobot.config.schema import AgentDefaults, ModelPresetConfig
from nanobot.providers.base import LLMProvider from nanobot.providers.base import LLMProvider
from nanobot.providers.factory import ProviderSnapshot from nanobot.providers.factory import ProviderSnapshot
from nanobot.session.goal_state import ( from nanobot.session.goal_state import (
goal_state_runtime_lines,
runner_wall_llm_timeout_s, runner_wall_llm_timeout_s,
sustained_goal_active,
) )
from nanobot.session.manager import Session, SessionManager from nanobot.session.manager import Session, SessionManager
from nanobot.session.webui_turns import ( from nanobot.session.webui_turns import (
@ -47,7 +49,10 @@ from nanobot.utils.helpers import image_placeholder_text
from nanobot.utils.helpers import truncate_text as truncate_text_fn from nanobot.utils.helpers import truncate_text as truncate_text_fn
from nanobot.utils.image_generation_intent import image_generation_prompt from nanobot.utils.image_generation_intent import image_generation_prompt
from nanobot.utils.llm_runtime import LLMRuntime from nanobot.utils.llm_runtime import LLMRuntime
from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE from nanobot.utils.runtime import (
EMPTY_FINAL_RESPONSE_MESSAGE,
SUSTAINED_GOAL_CONTINUE_PROMPT,
)
if TYPE_CHECKING: if TYPE_CHECKING:
from nanobot.config.schema import ( from nanobot.config.schema import (
@ -729,6 +734,15 @@ class AgentLoop:
active_session_key = session.key if session else session_key active_session_key = session.key if session else session_key
file_state_token = bind_file_states(self._file_state_store.for_session(active_session_key)) file_state_token = bind_file_states(self._file_state_store.for_session(active_session_key))
# Build continuation message that embeds the active goal objective so
# the LLM can see it even if earlier Runtime Context was truncated.
_goal_lines = goal_state_runtime_lines(session.metadata if session is not None else None)
_goal_continue = (
"You have an active sustained goal:\n\n"
+ "\n".join(_goal_lines)
+ "\n\nPlease continue working toward the objective using your tools, "
"or call complete_goal if the work is truly finished."
) if _goal_lines else SUSTAINED_GOAL_CONTINUE_PROMPT
try: try:
result = await self.runner.run(AgentRunSpec( result = await self.runner.run(AgentRunSpec(
initial_messages=initial_messages, initial_messages=initial_messages,
@ -756,6 +770,8 @@ class AgentLoop:
session.key if session is not None else session_key, session.key if session is not None else session_key,
metadata=(session.metadata if session is not None else None), metadata=(session.metadata if session is not None else None),
), ),
goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
goal_continue_message=_goal_continue,
)) ))
finally: finally:
reset_file_states(file_state_token) reset_file_states(file_state_token)

View File

@ -8,7 +8,7 @@ import os
from contextlib import suppress from contextlib import suppress
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any, Callable
from loguru import logger from loguru import logger
@ -42,6 +42,7 @@ from nanobot.utils.prompt_templates import render_template
from nanobot.utils.runtime import ( from nanobot.utils.runtime import (
EMPTY_FINAL_RESPONSE_MESSAGE, EMPTY_FINAL_RESPONSE_MESSAGE,
build_finalization_retry_message, build_finalization_retry_message,
build_goal_continue_message,
build_length_recovery_message, build_length_recovery_message,
ensure_nonempty_tool_result, ensure_nonempty_tool_result,
is_blank_text, is_blank_text,
@ -97,6 +98,8 @@ class AgentRunSpec:
checkpoint_callback: Any | None = None checkpoint_callback: Any | None = None
injection_callback: Any | None = None injection_callback: Any | None = None
llm_timeout_s: float | None = None llm_timeout_s: float | None = None
goal_active_predicate: Callable[[], bool] | None = None
goal_continue_message: str | None = None
@dataclass(slots=True) @dataclass(slots=True)
@ -167,6 +170,7 @@ class AgentRunner:
*, *,
phase: str = "after error", phase: str = "after error",
iteration: int | None = None, iteration: int | None = None,
allow_goal_continue: bool = False,
) -> tuple[bool, int]: ) -> tuple[bool, int]:
"""Drain pending injections. Returns (should_continue, updated_cycles). """Drain pending injections. Returns (should_continue, updated_cycles).
@ -178,6 +182,10 @@ class AgentRunner:
if injection_cycles >= _MAX_INJECTION_CYCLES: if injection_cycles >= _MAX_INJECTION_CYCLES:
return False, injection_cycles return False, injection_cycles
injections = await self._drain_injections(spec) injections = await self._drain_injections(spec)
if not injections and allow_goal_continue and assistant_message is not None:
predicate = spec.goal_active_predicate
if predicate is not None and predicate():
injections = [build_goal_continue_message(spec.goal_continue_message)]
if not injections: if not injections:
return False, injection_cycles return False, injection_cycles
injection_cycles += 1 injection_cycles += 1
@ -475,6 +483,7 @@ class AgentRunner:
spec, messages, assistant_message, injection_cycles, spec, messages, assistant_message, injection_cycles,
phase="after final response", phase="after final response",
iteration=iteration, iteration=iteration,
allow_goal_continue=True,
) )
if should_continue: if should_continue:
had_injections = True had_injections = True

View File

@ -29,6 +29,11 @@ LENGTH_RECOVERY_PROMPT = (
"— no recap, no apology. Break remaining work into smaller steps if needed." "— no recap, no apology. Break remaining work into smaller steps if needed."
) )
SUSTAINED_GOAL_CONTINUE_PROMPT = (
"You have an active sustained goal. Please continue working toward the "
"objective using your tools, or call complete_goal if the work is truly finished."
)
def empty_tool_result_message(tool_name: str) -> str: def empty_tool_result_message(tool_name: str) -> str:
"""Short prompt-safe marker for tools that completed without visible output.""" """Short prompt-safe marker for tools that completed without visible output."""
@ -65,6 +70,11 @@ def build_length_recovery_message() -> dict[str, str]:
return {"role": "user", "content": LENGTH_RECOVERY_PROMPT} return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
def build_goal_continue_message(custom: str | None = None) -> dict[str, str]:
"""Prompt the model to continue when a sustained goal is still active."""
return {"role": "user", "content": custom or SUSTAINED_GOAL_CONTINUE_PROMPT}
def external_lookup_signature(tool_name: str, arguments: dict[str, Any]) -> str | None: def external_lookup_signature(tool_name: str, arguments: dict[str, Any]) -> str | None:
"""Stable signature for repeated external lookups we want to throttle.""" """Stable signature for repeated external lookups we want to throttle."""
if tool_name == "web_fetch": if tool_name == "web_fetch":

View File

@ -0,0 +1,184 @@
"""Tests for sustained-goal continuation in AgentRunner.
When a goal_active_predicate returns True, the runner must not exit with
stop_reason="completed" after a plain-text final response. Instead it should
inject a continuation message and keep looping (similar to mid-turn injection).
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock
import pytest
from nanobot.config.schema import AgentDefaults
from nanobot.providers.base import LLMProvider, LLMResponse
_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
@pytest.mark.asyncio
async def test_runner_exits_normally_without_predicate():
"""Baseline: no predicate, runner exits with completed on final text."""
from nanobot.agent.runner import AgentRunner, AgentRunSpec
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content="all done", tool_calls=[], usage={},
))
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=2,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.stop_reason == "completed"
assert result.final_content == "all done"
@pytest.mark.asyncio
async def test_runner_exits_normally_with_inactive_goal():
"""Predicate returns False, runner should exit normally."""
from nanobot.agent.runner import AgentRunner, AgentRunSpec
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content="all done", tool_calls=[], usage={},
))
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=2,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
goal_active_predicate=lambda: False,
))
assert result.stop_reason == "completed"
assert result.final_content == "all done"
@pytest.mark.asyncio
async def test_runner_forces_continue_when_goal_active():
"""Predicate returns True on final text → runner injects continuation and loops.
We set max_iterations=3 and let the provider return final text every time.
Without the fix this would exit on the first iteration with stop_reason
"completed". With the fix the runner is forced to continue until
max_iterations is hit.
"""
from nanobot.agent.runner import AgentRunner, AgentRunSpec
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content="still working", tool_calls=[], usage={},
))
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=3,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
goal_active_predicate=lambda: True,
))
# Because the predicate keeps returning True, the runner should never
# naturally complete. It loops until max_iterations is exhausted.
assert result.stop_reason == "max_iterations"
# The injected continuation message should be present in the message list.
user_msgs = [m for m in result.messages if m.get("role") == "user"]
assert any("active sustained goal" in str(m.get("content", "")) for m in user_msgs)
@pytest.mark.asyncio
async def test_runner_respects_max_iterations_even_with_active_goal():
"""A single iteration with active goal still hits max_iterations."""
from nanobot.agent.runner import AgentRunner, AgentRunSpec
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content="still working", tool_calls=[], usage={},
))
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=1,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
goal_active_predicate=lambda: True,
))
assert result.stop_reason == "max_iterations"
@pytest.mark.asyncio
async def test_runner_does_not_force_continue_on_error():
"""Even with active goal, an LLM error should exit with stop_reason="error"."""
from nanobot.agent.runner import AgentRunner, AgentRunSpec
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content=None, tool_calls=[], usage={},
finish_reason="error",
))
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=2,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
goal_active_predicate=lambda: True,
))
assert result.stop_reason == "error"
@pytest.mark.asyncio
async def test_runner_uses_custom_goal_continue_message():
"""Custom goal_continue_message should be injected instead of the default."""
from nanobot.agent.runner import AgentRunner, AgentRunSpec
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content="still working", tool_calls=[], usage={},
))
tools = MagicMock()
tools.get_definitions.return_value = []
custom_msg = "CUSTOM_CONTINUE_PLEASE"
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do task"}],
tools=tools,
model="test-model",
max_iterations=2,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
goal_active_predicate=lambda: True,
goal_continue_message=custom_msg,
))
user_msgs = [m for m in result.messages if m.get("role") == "user"]
assert any(custom_msg in str(m.get("content", "")) for m in user_msgs)