mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-26 12:55:58 +00:00
Merge PR #3045: fix(agent): preserve tool results on fatal error to prevent orphan tool_calls
fix(agent): preserve tool results on fatal error to prevent orphan tool_calls (#2943)
This commit is contained in:
commit
a70928cc5c
@ -205,14 +205,21 @@ class AgentRunner:
|
|||||||
messages_for_model = self._microcompact(messages_for_model)
|
messages_for_model = self._microcompact(messages_for_model)
|
||||||
messages_for_model = self._apply_tool_result_budget(spec, messages_for_model)
|
messages_for_model = self._apply_tool_result_budget(spec, messages_for_model)
|
||||||
messages_for_model = self._snip_history(spec, messages_for_model)
|
messages_for_model = self._snip_history(spec, messages_for_model)
|
||||||
|
# Snipping may have created new orphans; clean them up.
|
||||||
|
messages_for_model = self._drop_orphan_tool_results(messages_for_model)
|
||||||
|
messages_for_model = self._backfill_missing_tool_results(messages_for_model)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Context governance failed on turn {} for {}: {}; using raw messages",
|
"Context governance failed on turn {} for {}: {}; applying minimal repair",
|
||||||
iteration,
|
iteration,
|
||||||
spec.session_key or "default",
|
spec.session_key or "default",
|
||||||
exc,
|
exc,
|
||||||
)
|
)
|
||||||
messages_for_model = messages
|
try:
|
||||||
|
messages_for_model = self._drop_orphan_tool_results(messages)
|
||||||
|
messages_for_model = self._backfill_missing_tool_results(messages_for_model)
|
||||||
|
except Exception:
|
||||||
|
messages_for_model = messages
|
||||||
context = AgentHookContext(iteration=iteration, messages=messages)
|
context = AgentHookContext(iteration=iteration, messages=messages)
|
||||||
await hook.before_iteration(context)
|
await hook.before_iteration(context)
|
||||||
response = await self._request_model(spec, messages_for_model, hook, context)
|
response = await self._request_model(spec, messages_for_model, hook, context)
|
||||||
@ -256,16 +263,6 @@ class AgentRunner:
|
|||||||
tool_events.extend(new_events)
|
tool_events.extend(new_events)
|
||||||
context.tool_results = list(results)
|
context.tool_results = list(results)
|
||||||
context.tool_events = list(new_events)
|
context.tool_events = list(new_events)
|
||||||
if fatal_error is not None:
|
|
||||||
error = f"Error: {type(fatal_error).__name__}: {fatal_error}"
|
|
||||||
final_content = error
|
|
||||||
stop_reason = "tool_error"
|
|
||||||
self._append_final_message(messages, final_content)
|
|
||||||
context.final_content = final_content
|
|
||||||
context.error = error
|
|
||||||
context.stop_reason = stop_reason
|
|
||||||
await hook.after_iteration(context)
|
|
||||||
break
|
|
||||||
completed_tool_results: list[dict[str, Any]] = []
|
completed_tool_results: list[dict[str, Any]] = []
|
||||||
for tool_call, result in zip(response.tool_calls, results):
|
for tool_call, result in zip(response.tool_calls, results):
|
||||||
tool_message = {
|
tool_message = {
|
||||||
@ -281,6 +278,16 @@ class AgentRunner:
|
|||||||
}
|
}
|
||||||
messages.append(tool_message)
|
messages.append(tool_message)
|
||||||
completed_tool_results.append(tool_message)
|
completed_tool_results.append(tool_message)
|
||||||
|
if fatal_error is not None:
|
||||||
|
error = f"Error: {type(fatal_error).__name__}: {fatal_error}"
|
||||||
|
final_content = error
|
||||||
|
stop_reason = "tool_error"
|
||||||
|
self._append_final_message(messages, final_content)
|
||||||
|
context.final_content = final_content
|
||||||
|
context.error = error
|
||||||
|
context.stop_reason = stop_reason
|
||||||
|
await hook.after_iteration(context)
|
||||||
|
break
|
||||||
await self._emit_checkpoint(
|
await self._emit_checkpoint(
|
||||||
spec,
|
spec,
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1610,6 +1610,119 @@ async def test_microcompact_skips_non_compactable_tools():
|
|||||||
assert result is messages # no compactable tools found
|
assert result is messages # no compactable tools found
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_tool_error_preserves_tool_results_in_messages():
|
||||||
|
"""When a tool raises a fatal error, its results must still be appended
|
||||||
|
to messages so the session never contains orphan tool_calls (#2943)."""
|
||||||
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||||
|
|
||||||
|
provider = MagicMock()
|
||||||
|
|
||||||
|
async def chat_with_retry(*, messages, **kwargs):
|
||||||
|
return LLMResponse(
|
||||||
|
content=None,
|
||||||
|
tool_calls=[
|
||||||
|
ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}),
|
||||||
|
ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}),
|
||||||
|
],
|
||||||
|
usage={},
|
||||||
|
)
|
||||||
|
|
||||||
|
provider.chat_with_retry = chat_with_retry
|
||||||
|
provider.chat_stream_with_retry = chat_with_retry
|
||||||
|
|
||||||
|
call_idx = 0
|
||||||
|
|
||||||
|
async def fake_execute(name, args, **kw):
|
||||||
|
nonlocal call_idx
|
||||||
|
call_idx += 1
|
||||||
|
if call_idx == 2:
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
return "file content"
|
||||||
|
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
tools.execute = AsyncMock(side_effect=fake_execute)
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "do stuff"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=1,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
fail_on_tool_error=True,
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.stop_reason == "tool_error"
|
||||||
|
# Both tool results must be in messages even though tc2 had a fatal error.
|
||||||
|
tool_msgs = [m for m in result.messages if m.get("role") == "tool"]
|
||||||
|
assert len(tool_msgs) == 2
|
||||||
|
assert tool_msgs[0]["tool_call_id"] == "tc1"
|
||||||
|
assert tool_msgs[1]["tool_call_id"] == "tc2"
|
||||||
|
# The assistant message with tool_calls must precede the tool results.
|
||||||
|
asst_tc_idx = next(
|
||||||
|
i for i, m in enumerate(result.messages)
|
||||||
|
if m.get("role") == "assistant" and m.get("tool_calls")
|
||||||
|
)
|
||||||
|
tool_indices = [
|
||||||
|
i for i, m in enumerate(result.messages) if m.get("role") == "tool"
|
||||||
|
]
|
||||||
|
assert all(ti > asst_tc_idx for ti in tool_indices)
|
||||||
|
|
||||||
|
|
||||||
|
def test_governance_repairs_orphans_after_snip():
|
||||||
|
"""After _snip_history clips an assistant+tool_calls, the second
|
||||||
|
_drop_orphan_tool_results pass must clean up the resulting orphans."""
|
||||||
|
from nanobot.agent.runner import AgentRunner
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "system"},
|
||||||
|
{"role": "user", "content": "old msg"},
|
||||||
|
{"role": "assistant", "content": None,
|
||||||
|
"tool_calls": [{"id": "tc_old", "type": "function",
|
||||||
|
"function": {"name": "search", "arguments": "{}"}}]},
|
||||||
|
{"role": "tool", "tool_call_id": "tc_old", "name": "search",
|
||||||
|
"content": "old result"},
|
||||||
|
{"role": "assistant", "content": "old answer"},
|
||||||
|
{"role": "user", "content": "new msg"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Simulate snipping that keeps only the tail: drop the assistant with
|
||||||
|
# tool_calls but keep its tool result (orphan).
|
||||||
|
snipped = [
|
||||||
|
{"role": "system", "content": "system"},
|
||||||
|
{"role": "tool", "tool_call_id": "tc_old", "name": "search",
|
||||||
|
"content": "old result"},
|
||||||
|
{"role": "assistant", "content": "old answer"},
|
||||||
|
{"role": "user", "content": "new msg"},
|
||||||
|
]
|
||||||
|
|
||||||
|
cleaned = AgentRunner._drop_orphan_tool_results(snipped)
|
||||||
|
# The orphan tool result should be removed.
|
||||||
|
assert not any(
|
||||||
|
m.get("role") == "tool" and m.get("tool_call_id") == "tc_old"
|
||||||
|
for m in cleaned
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_governance_fallback_still_repairs_orphans():
|
||||||
|
"""When full governance fails, the fallback must still run
|
||||||
|
_drop_orphan_tool_results and _backfill_missing_tool_results."""
|
||||||
|
from nanobot.agent.runner import AgentRunner
|
||||||
|
|
||||||
|
# Messages with an orphan tool result (no matching assistant tool_call).
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
{"role": "tool", "tool_call_id": "orphan_tc", "name": "read",
|
||||||
|
"content": "stale"},
|
||||||
|
{"role": "assistant", "content": "hi"},
|
||||||
|
]
|
||||||
|
|
||||||
|
repaired = AgentRunner._drop_orphan_tool_results(messages)
|
||||||
|
repaired = AgentRunner._backfill_missing_tool_results(repaired)
|
||||||
|
# Orphan tool result should be gone.
|
||||||
|
assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired)
|
||||||
# ── Mid-turn injection tests ──────────────────────────────────────────────
|
# ── Mid-turn injection tests ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user