mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-13 14:23:58 +00:00
fix(agent): prevent runner from exiting while sustained goal is active
`long_task` registers a sustained objective, but `AgentRunner` would still exit with `stop_reason="completed"` when the LLM produced a final text response without calling `complete_goal`. This defeated the purpose of sustained goals. Add `goal_active_predicate` and `goal_continue_message` to `AgentRunSpec`. When the predicate returns `True` at the natural completion checkpoint, inject a continuation message via the existing `_try_drain_injections` machinery, forcing the runner to continue looping. Also extract the default continuation prompt to `nanobot/utils/runtime.py` alongside the existing recovery-message builders.
This commit is contained in:
parent
418cb23da2
commit
7bbd9c7103
@ -34,7 +34,9 @@ from nanobot.config.schema import AgentDefaults, ModelPresetConfig
|
||||
from nanobot.providers.base import LLMProvider
|
||||
from nanobot.providers.factory import ProviderSnapshot
|
||||
from nanobot.session.goal_state import (
|
||||
goal_state_runtime_lines,
|
||||
runner_wall_llm_timeout_s,
|
||||
sustained_goal_active,
|
||||
)
|
||||
from nanobot.session.manager import Session, SessionManager
|
||||
from nanobot.session.webui_turns import (
|
||||
@ -47,7 +49,10 @@ from nanobot.utils.helpers import image_placeholder_text
|
||||
from nanobot.utils.helpers import truncate_text as truncate_text_fn
|
||||
from nanobot.utils.image_generation_intent import image_generation_prompt
|
||||
from nanobot.utils.llm_runtime import LLMRuntime
|
||||
from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
|
||||
from nanobot.utils.runtime import (
|
||||
EMPTY_FINAL_RESPONSE_MESSAGE,
|
||||
SUSTAINED_GOAL_CONTINUE_PROMPT,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nanobot.config.schema import (
|
||||
@ -729,6 +734,15 @@ class AgentLoop:
|
||||
|
||||
active_session_key = session.key if session else session_key
|
||||
file_state_token = bind_file_states(self._file_state_store.for_session(active_session_key))
|
||||
# Build continuation message that embeds the active goal objective so
|
||||
# the LLM can see it even if earlier Runtime Context was truncated.
|
||||
_goal_lines = goal_state_runtime_lines(session.metadata if session is not None else None)
|
||||
_goal_continue = (
|
||||
"You have an active sustained goal:\n\n"
|
||||
+ "\n".join(_goal_lines)
|
||||
+ "\n\nPlease continue working toward the objective using your tools, "
|
||||
"or call complete_goal if the work is truly finished."
|
||||
) if _goal_lines else SUSTAINED_GOAL_CONTINUE_PROMPT
|
||||
try:
|
||||
result = await self.runner.run(AgentRunSpec(
|
||||
initial_messages=initial_messages,
|
||||
@ -756,6 +770,8 @@ class AgentLoop:
|
||||
session.key if session is not None else session_key,
|
||||
metadata=(session.metadata if session is not None else None),
|
||||
),
|
||||
goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
|
||||
goal_continue_message=_goal_continue,
|
||||
))
|
||||
finally:
|
||||
reset_file_states(file_state_token)
|
||||
|
||||
@ -8,7 +8,7 @@ import os
|
||||
from contextlib import suppress
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Any, Callable
|
||||
|
||||
from loguru import logger
|
||||
|
||||
@ -42,6 +42,7 @@ from nanobot.utils.prompt_templates import render_template
|
||||
from nanobot.utils.runtime import (
|
||||
EMPTY_FINAL_RESPONSE_MESSAGE,
|
||||
build_finalization_retry_message,
|
||||
build_goal_continue_message,
|
||||
build_length_recovery_message,
|
||||
ensure_nonempty_tool_result,
|
||||
is_blank_text,
|
||||
@ -97,6 +98,8 @@ class AgentRunSpec:
|
||||
checkpoint_callback: Any | None = None
|
||||
injection_callback: Any | None = None
|
||||
llm_timeout_s: float | None = None
|
||||
goal_active_predicate: Callable[[], bool] | None = None
|
||||
goal_continue_message: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -167,6 +170,7 @@ class AgentRunner:
|
||||
*,
|
||||
phase: str = "after error",
|
||||
iteration: int | None = None,
|
||||
allow_goal_continue: bool = False,
|
||||
) -> tuple[bool, int]:
|
||||
"""Drain pending injections. Returns (should_continue, updated_cycles).
|
||||
|
||||
@ -178,6 +182,10 @@ class AgentRunner:
|
||||
if injection_cycles >= _MAX_INJECTION_CYCLES:
|
||||
return False, injection_cycles
|
||||
injections = await self._drain_injections(spec)
|
||||
if not injections and allow_goal_continue and assistant_message is not None:
|
||||
predicate = spec.goal_active_predicate
|
||||
if predicate is not None and predicate():
|
||||
injections = [build_goal_continue_message(spec.goal_continue_message)]
|
||||
if not injections:
|
||||
return False, injection_cycles
|
||||
injection_cycles += 1
|
||||
@ -475,6 +483,7 @@ class AgentRunner:
|
||||
spec, messages, assistant_message, injection_cycles,
|
||||
phase="after final response",
|
||||
iteration=iteration,
|
||||
allow_goal_continue=True,
|
||||
)
|
||||
if should_continue:
|
||||
had_injections = True
|
||||
|
||||
@ -29,6 +29,11 @@ LENGTH_RECOVERY_PROMPT = (
|
||||
"— no recap, no apology. Break remaining work into smaller steps if needed."
|
||||
)
|
||||
|
||||
SUSTAINED_GOAL_CONTINUE_PROMPT = (
|
||||
"You have an active sustained goal. Please continue working toward the "
|
||||
"objective using your tools, or call complete_goal if the work is truly finished."
|
||||
)
|
||||
|
||||
|
||||
def empty_tool_result_message(tool_name: str) -> str:
|
||||
"""Short prompt-safe marker for tools that completed without visible output."""
|
||||
@ -65,6 +70,11 @@ def build_length_recovery_message() -> dict[str, str]:
|
||||
return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
|
||||
|
||||
|
||||
def build_goal_continue_message(custom: str | None = None) -> dict[str, str]:
|
||||
"""Prompt the model to continue when a sustained goal is still active."""
|
||||
return {"role": "user", "content": custom or SUSTAINED_GOAL_CONTINUE_PROMPT}
|
||||
|
||||
|
||||
def external_lookup_signature(tool_name: str, arguments: dict[str, Any]) -> str | None:
|
||||
"""Stable signature for repeated external lookups we want to throttle."""
|
||||
if tool_name == "web_fetch":
|
||||
|
||||
184
tests/agent/test_runner_goal_continue.py
Normal file
184
tests/agent/test_runner_goal_continue.py
Normal file
@ -0,0 +1,184 @@
|
||||
"""Tests for sustained-goal continuation in AgentRunner.
|
||||
|
||||
When a goal_active_predicate returns True, the runner must not exit with
|
||||
stop_reason="completed" after a plain-text final response. Instead it should
|
||||
inject a continuation message and keep looping (similar to mid-turn injection).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.config.schema import AgentDefaults
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse
|
||||
|
||||
_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_exits_normally_without_predicate():
|
||||
"""Baseline: no predicate, runner exits with completed on final text."""
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||
content="all done", tool_calls=[], usage={},
|
||||
))
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "do task"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=2,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
))
|
||||
|
||||
assert result.stop_reason == "completed"
|
||||
assert result.final_content == "all done"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_exits_normally_with_inactive_goal():
|
||||
"""Predicate returns False, runner should exit normally."""
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||
content="all done", tool_calls=[], usage={},
|
||||
))
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "do task"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=2,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
goal_active_predicate=lambda: False,
|
||||
))
|
||||
|
||||
assert result.stop_reason == "completed"
|
||||
assert result.final_content == "all done"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_forces_continue_when_goal_active():
|
||||
"""Predicate returns True on final text → runner injects continuation and loops.
|
||||
|
||||
We set max_iterations=3 and let the provider return final text every time.
|
||||
Without the fix this would exit on the first iteration with stop_reason
|
||||
"completed". With the fix the runner is forced to continue until
|
||||
max_iterations is hit.
|
||||
"""
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||
content="still working", tool_calls=[], usage={},
|
||||
))
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "do task"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
goal_active_predicate=lambda: True,
|
||||
))
|
||||
|
||||
# Because the predicate keeps returning True, the runner should never
|
||||
# naturally complete. It loops until max_iterations is exhausted.
|
||||
assert result.stop_reason == "max_iterations"
|
||||
# The injected continuation message should be present in the message list.
|
||||
user_msgs = [m for m in result.messages if m.get("role") == "user"]
|
||||
assert any("active sustained goal" in str(m.get("content", "")) for m in user_msgs)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_respects_max_iterations_even_with_active_goal():
|
||||
"""A single iteration with active goal still hits max_iterations."""
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||
content="still working", tool_calls=[], usage={},
|
||||
))
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "do task"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=1,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
goal_active_predicate=lambda: True,
|
||||
))
|
||||
|
||||
assert result.stop_reason == "max_iterations"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_does_not_force_continue_on_error():
|
||||
"""Even with active goal, an LLM error should exit with stop_reason="error"."""
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||
content=None, tool_calls=[], usage={},
|
||||
finish_reason="error",
|
||||
))
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "do task"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=2,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
goal_active_predicate=lambda: True,
|
||||
))
|
||||
|
||||
assert result.stop_reason == "error"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_uses_custom_goal_continue_message():
|
||||
"""Custom goal_continue_message should be injected instead of the default."""
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||
content="still working", tool_calls=[], usage={},
|
||||
))
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
custom_msg = "CUSTOM_CONTINUE_PLEASE"
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "do task"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=2,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
goal_active_predicate=lambda: True,
|
||||
goal_continue_message=custom_msg,
|
||||
))
|
||||
|
||||
user_msgs = [m for m in result.messages if m.get("role") == "user"]
|
||||
assert any(custom_msg in str(m.get("content", "")) for m in user_msgs)
|
||||
Loading…
x
Reference in New Issue
Block a user