mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-14 14:54:06 +00:00
fix(agent): prevent runner from exiting while sustained goal is active
`long_task` registers a sustained objective, but `AgentRunner` would still exit with `stop_reason="completed"` when the LLM produced a final text response without calling `complete_goal`. This defeated the purpose of sustained goals. Add `goal_active_predicate` and `goal_continue_message` to `AgentRunSpec`. When the predicate returns `True` at the natural completion checkpoint, inject a continuation message via the existing `_try_drain_injections` machinery, forcing the runner to continue looping. Also extract the default continuation prompt to `nanobot/utils/runtime.py` alongside the existing recovery-message builders.
This commit is contained in:
parent
418cb23da2
commit
7bbd9c7103
@ -34,7 +34,9 @@ from nanobot.config.schema import AgentDefaults, ModelPresetConfig
|
|||||||
from nanobot.providers.base import LLMProvider
|
from nanobot.providers.base import LLMProvider
|
||||||
from nanobot.providers.factory import ProviderSnapshot
|
from nanobot.providers.factory import ProviderSnapshot
|
||||||
from nanobot.session.goal_state import (
|
from nanobot.session.goal_state import (
|
||||||
|
goal_state_runtime_lines,
|
||||||
runner_wall_llm_timeout_s,
|
runner_wall_llm_timeout_s,
|
||||||
|
sustained_goal_active,
|
||||||
)
|
)
|
||||||
from nanobot.session.manager import Session, SessionManager
|
from nanobot.session.manager import Session, SessionManager
|
||||||
from nanobot.session.webui_turns import (
|
from nanobot.session.webui_turns import (
|
||||||
@ -47,7 +49,10 @@ from nanobot.utils.helpers import image_placeholder_text
|
|||||||
from nanobot.utils.helpers import truncate_text as truncate_text_fn
|
from nanobot.utils.helpers import truncate_text as truncate_text_fn
|
||||||
from nanobot.utils.image_generation_intent import image_generation_prompt
|
from nanobot.utils.image_generation_intent import image_generation_prompt
|
||||||
from nanobot.utils.llm_runtime import LLMRuntime
|
from nanobot.utils.llm_runtime import LLMRuntime
|
||||||
from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
|
from nanobot.utils.runtime import (
|
||||||
|
EMPTY_FINAL_RESPONSE_MESSAGE,
|
||||||
|
SUSTAINED_GOAL_CONTINUE_PROMPT,
|
||||||
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from nanobot.config.schema import (
|
from nanobot.config.schema import (
|
||||||
@ -729,6 +734,15 @@ class AgentLoop:
|
|||||||
|
|
||||||
active_session_key = session.key if session else session_key
|
active_session_key = session.key if session else session_key
|
||||||
file_state_token = bind_file_states(self._file_state_store.for_session(active_session_key))
|
file_state_token = bind_file_states(self._file_state_store.for_session(active_session_key))
|
||||||
|
# Build continuation message that embeds the active goal objective so
|
||||||
|
# the LLM can see it even if earlier Runtime Context was truncated.
|
||||||
|
_goal_lines = goal_state_runtime_lines(session.metadata if session is not None else None)
|
||||||
|
_goal_continue = (
|
||||||
|
"You have an active sustained goal:\n\n"
|
||||||
|
+ "\n".join(_goal_lines)
|
||||||
|
+ "\n\nPlease continue working toward the objective using your tools, "
|
||||||
|
"or call complete_goal if the work is truly finished."
|
||||||
|
) if _goal_lines else SUSTAINED_GOAL_CONTINUE_PROMPT
|
||||||
try:
|
try:
|
||||||
result = await self.runner.run(AgentRunSpec(
|
result = await self.runner.run(AgentRunSpec(
|
||||||
initial_messages=initial_messages,
|
initial_messages=initial_messages,
|
||||||
@ -756,6 +770,8 @@ class AgentLoop:
|
|||||||
session.key if session is not None else session_key,
|
session.key if session is not None else session_key,
|
||||||
metadata=(session.metadata if session is not None else None),
|
metadata=(session.metadata if session is not None else None),
|
||||||
),
|
),
|
||||||
|
goal_active_predicate=lambda: sustained_goal_active(session.metadata) if session is not None else False,
|
||||||
|
goal_continue_message=_goal_continue,
|
||||||
))
|
))
|
||||||
finally:
|
finally:
|
||||||
reset_file_states(file_state_token)
|
reset_file_states(file_state_token)
|
||||||
|
|||||||
@ -8,7 +8,7 @@ import os
|
|||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any, Callable
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
@ -42,6 +42,7 @@ from nanobot.utils.prompt_templates import render_template
|
|||||||
from nanobot.utils.runtime import (
|
from nanobot.utils.runtime import (
|
||||||
EMPTY_FINAL_RESPONSE_MESSAGE,
|
EMPTY_FINAL_RESPONSE_MESSAGE,
|
||||||
build_finalization_retry_message,
|
build_finalization_retry_message,
|
||||||
|
build_goal_continue_message,
|
||||||
build_length_recovery_message,
|
build_length_recovery_message,
|
||||||
ensure_nonempty_tool_result,
|
ensure_nonempty_tool_result,
|
||||||
is_blank_text,
|
is_blank_text,
|
||||||
@ -97,6 +98,8 @@ class AgentRunSpec:
|
|||||||
checkpoint_callback: Any | None = None
|
checkpoint_callback: Any | None = None
|
||||||
injection_callback: Any | None = None
|
injection_callback: Any | None = None
|
||||||
llm_timeout_s: float | None = None
|
llm_timeout_s: float | None = None
|
||||||
|
goal_active_predicate: Callable[[], bool] | None = None
|
||||||
|
goal_continue_message: str | None = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
@ -167,6 +170,7 @@ class AgentRunner:
|
|||||||
*,
|
*,
|
||||||
phase: str = "after error",
|
phase: str = "after error",
|
||||||
iteration: int | None = None,
|
iteration: int | None = None,
|
||||||
|
allow_goal_continue: bool = False,
|
||||||
) -> tuple[bool, int]:
|
) -> tuple[bool, int]:
|
||||||
"""Drain pending injections. Returns (should_continue, updated_cycles).
|
"""Drain pending injections. Returns (should_continue, updated_cycles).
|
||||||
|
|
||||||
@ -178,6 +182,10 @@ class AgentRunner:
|
|||||||
if injection_cycles >= _MAX_INJECTION_CYCLES:
|
if injection_cycles >= _MAX_INJECTION_CYCLES:
|
||||||
return False, injection_cycles
|
return False, injection_cycles
|
||||||
injections = await self._drain_injections(spec)
|
injections = await self._drain_injections(spec)
|
||||||
|
if not injections and allow_goal_continue and assistant_message is not None:
|
||||||
|
predicate = spec.goal_active_predicate
|
||||||
|
if predicate is not None and predicate():
|
||||||
|
injections = [build_goal_continue_message(spec.goal_continue_message)]
|
||||||
if not injections:
|
if not injections:
|
||||||
return False, injection_cycles
|
return False, injection_cycles
|
||||||
injection_cycles += 1
|
injection_cycles += 1
|
||||||
@ -475,6 +483,7 @@ class AgentRunner:
|
|||||||
spec, messages, assistant_message, injection_cycles,
|
spec, messages, assistant_message, injection_cycles,
|
||||||
phase="after final response",
|
phase="after final response",
|
||||||
iteration=iteration,
|
iteration=iteration,
|
||||||
|
allow_goal_continue=True,
|
||||||
)
|
)
|
||||||
if should_continue:
|
if should_continue:
|
||||||
had_injections = True
|
had_injections = True
|
||||||
|
|||||||
@ -29,6 +29,11 @@ LENGTH_RECOVERY_PROMPT = (
|
|||||||
"— no recap, no apology. Break remaining work into smaller steps if needed."
|
"— no recap, no apology. Break remaining work into smaller steps if needed."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
SUSTAINED_GOAL_CONTINUE_PROMPT = (
|
||||||
|
"You have an active sustained goal. Please continue working toward the "
|
||||||
|
"objective using your tools, or call complete_goal if the work is truly finished."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def empty_tool_result_message(tool_name: str) -> str:
|
def empty_tool_result_message(tool_name: str) -> str:
|
||||||
"""Short prompt-safe marker for tools that completed without visible output."""
|
"""Short prompt-safe marker for tools that completed without visible output."""
|
||||||
@ -65,6 +70,11 @@ def build_length_recovery_message() -> dict[str, str]:
|
|||||||
return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
|
return {"role": "user", "content": LENGTH_RECOVERY_PROMPT}
|
||||||
|
|
||||||
|
|
||||||
|
def build_goal_continue_message(custom: str | None = None) -> dict[str, str]:
|
||||||
|
"""Prompt the model to continue when a sustained goal is still active."""
|
||||||
|
return {"role": "user", "content": custom or SUSTAINED_GOAL_CONTINUE_PROMPT}
|
||||||
|
|
||||||
|
|
||||||
def external_lookup_signature(tool_name: str, arguments: dict[str, Any]) -> str | None:
|
def external_lookup_signature(tool_name: str, arguments: dict[str, Any]) -> str | None:
|
||||||
"""Stable signature for repeated external lookups we want to throttle."""
|
"""Stable signature for repeated external lookups we want to throttle."""
|
||||||
if tool_name == "web_fetch":
|
if tool_name == "web_fetch":
|
||||||
|
|||||||
184
tests/agent/test_runner_goal_continue.py
Normal file
184
tests/agent/test_runner_goal_continue.py
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
"""Tests for sustained-goal continuation in AgentRunner.
|
||||||
|
|
||||||
|
When a goal_active_predicate returns True, the runner must not exit with
|
||||||
|
stop_reason="completed" after a plain-text final response. Instead it should
|
||||||
|
inject a continuation message and keep looping (similar to mid-turn injection).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from nanobot.config.schema import AgentDefaults
|
||||||
|
from nanobot.providers.base import LLMProvider, LLMResponse
|
||||||
|
|
||||||
|
_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_exits_normally_without_predicate():
|
||||||
|
"""Baseline: no predicate, runner exits with completed on final text."""
|
||||||
|
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||||
|
|
||||||
|
provider = MagicMock(spec=LLMProvider)
|
||||||
|
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||||
|
content="all done", tool_calls=[], usage={},
|
||||||
|
))
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "do task"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=2,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.stop_reason == "completed"
|
||||||
|
assert result.final_content == "all done"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_exits_normally_with_inactive_goal():
|
||||||
|
"""Predicate returns False, runner should exit normally."""
|
||||||
|
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||||
|
|
||||||
|
provider = MagicMock(spec=LLMProvider)
|
||||||
|
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||||
|
content="all done", tool_calls=[], usage={},
|
||||||
|
))
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "do task"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=2,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
goal_active_predicate=lambda: False,
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.stop_reason == "completed"
|
||||||
|
assert result.final_content == "all done"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_forces_continue_when_goal_active():
|
||||||
|
"""Predicate returns True on final text → runner injects continuation and loops.
|
||||||
|
|
||||||
|
We set max_iterations=3 and let the provider return final text every time.
|
||||||
|
Without the fix this would exit on the first iteration with stop_reason
|
||||||
|
"completed". With the fix the runner is forced to continue until
|
||||||
|
max_iterations is hit.
|
||||||
|
"""
|
||||||
|
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||||
|
|
||||||
|
provider = MagicMock(spec=LLMProvider)
|
||||||
|
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||||
|
content="still working", tool_calls=[], usage={},
|
||||||
|
))
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "do task"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=3,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
goal_active_predicate=lambda: True,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Because the predicate keeps returning True, the runner should never
|
||||||
|
# naturally complete. It loops until max_iterations is exhausted.
|
||||||
|
assert result.stop_reason == "max_iterations"
|
||||||
|
# The injected continuation message should be present in the message list.
|
||||||
|
user_msgs = [m for m in result.messages if m.get("role") == "user"]
|
||||||
|
assert any("active sustained goal" in str(m.get("content", "")) for m in user_msgs)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_respects_max_iterations_even_with_active_goal():
|
||||||
|
"""A single iteration with active goal still hits max_iterations."""
|
||||||
|
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||||
|
|
||||||
|
provider = MagicMock(spec=LLMProvider)
|
||||||
|
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||||
|
content="still working", tool_calls=[], usage={},
|
||||||
|
))
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "do task"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=1,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
goal_active_predicate=lambda: True,
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.stop_reason == "max_iterations"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_does_not_force_continue_on_error():
|
||||||
|
"""Even with active goal, an LLM error should exit with stop_reason="error"."""
|
||||||
|
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||||
|
|
||||||
|
provider = MagicMock(spec=LLMProvider)
|
||||||
|
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||||
|
content=None, tool_calls=[], usage={},
|
||||||
|
finish_reason="error",
|
||||||
|
))
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "do task"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=2,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
goal_active_predicate=lambda: True,
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.stop_reason == "error"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_uses_custom_goal_continue_message():
|
||||||
|
"""Custom goal_continue_message should be injected instead of the default."""
|
||||||
|
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||||
|
|
||||||
|
provider = MagicMock(spec=LLMProvider)
|
||||||
|
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||||
|
content="still working", tool_calls=[], usage={},
|
||||||
|
))
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
custom_msg = "CUSTOM_CONTINUE_PLEASE"
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "do task"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=2,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
goal_active_predicate=lambda: True,
|
||||||
|
goal_continue_message=custom_msg,
|
||||||
|
))
|
||||||
|
|
||||||
|
user_msgs = [m for m in result.messages if m.get("role") == "user"]
|
||||||
|
assert any(custom_msg in str(m.get("content", "")) for m in user_msgs)
|
||||||
Loading…
x
Reference in New Issue
Block a user