fix(agent): preserve tool results on fatal error to prevent orphan tool_calls (#2943)

This commit is contained in:
04cb 2026-04-11 21:48:31 +08:00
parent f5640d69fe
commit 4cd4ed8ada
2 changed files with 134 additions and 12 deletions

View File

@ -111,14 +111,21 @@ class AgentRunner:
messages_for_model = self._microcompact(messages_for_model)
messages_for_model = self._apply_tool_result_budget(spec, messages_for_model)
messages_for_model = self._snip_history(spec, messages_for_model)
# Snipping may have created new orphans; clean them up.
messages_for_model = self._drop_orphan_tool_results(messages_for_model)
messages_for_model = self._backfill_missing_tool_results(messages_for_model)
except Exception as exc:
logger.warning(
"Context governance failed on turn {} for {}: {}; using raw messages",
"Context governance failed on turn {} for {}: {}; applying minimal repair",
iteration,
spec.session_key or "default",
exc,
)
messages_for_model = messages
try:
messages_for_model = self._drop_orphan_tool_results(messages)
messages_for_model = self._backfill_missing_tool_results(messages_for_model)
except Exception:
messages_for_model = messages
context = AgentHookContext(iteration=iteration, messages=messages)
await hook.before_iteration(context)
response = await self._request_model(spec, messages_for_model, hook, context)
@ -162,16 +169,6 @@ class AgentRunner:
tool_events.extend(new_events)
context.tool_results = list(results)
context.tool_events = list(new_events)
if fatal_error is not None:
error = f"Error: {type(fatal_error).__name__}: {fatal_error}"
final_content = error
stop_reason = "tool_error"
self._append_final_message(messages, final_content)
context.final_content = final_content
context.error = error
context.stop_reason = stop_reason
await hook.after_iteration(context)
break
completed_tool_results: list[dict[str, Any]] = []
for tool_call, result in zip(response.tool_calls, results):
tool_message = {
@ -187,6 +184,16 @@ class AgentRunner:
}
messages.append(tool_message)
completed_tool_results.append(tool_message)
if fatal_error is not None:
error = f"Error: {type(fatal_error).__name__}: {fatal_error}"
final_content = error
stop_reason = "tool_error"
self._append_final_message(messages, final_content)
context.final_content = final_content
context.error = error
context.stop_reason = stop_reason
await hook.after_iteration(context)
break
await self._emit_checkpoint(
spec,
{

View File

@ -1607,3 +1607,118 @@ async def test_microcompact_skips_non_compactable_tools():
result = AgentRunner._microcompact(messages)
assert result is messages # no compactable tools found
@pytest.mark.asyncio
async def test_runner_tool_error_preserves_tool_results_in_messages():
"""When a tool raises a fatal error, its results must still be appended
to messages so the session never contains orphan tool_calls (#2943)."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner
provider = MagicMock()
async def chat_with_retry(*, messages, **kwargs):
return LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a"}),
ToolCallRequest(id="tc2", name="exec", arguments={"cmd": "bad"}),
],
usage={},
)
provider.chat_with_retry = chat_with_retry
provider.chat_stream_with_retry = chat_with_retry
call_idx = 0
async def fake_execute(name, args, **kw):
nonlocal call_idx
call_idx += 1
if call_idx == 2:
raise RuntimeError("boom")
return "file content"
tools = MagicMock()
tools.get_definitions.return_value = []
tools.execute = AsyncMock(side_effect=fake_execute)
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "do stuff"}],
tools=tools,
model="test-model",
max_iterations=1,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
fail_on_tool_error=True,
))
assert result.stop_reason == "tool_error"
# Both tool results must be in messages even though tc2 had a fatal error.
tool_msgs = [m for m in result.messages if m.get("role") == "tool"]
assert len(tool_msgs) == 2
assert tool_msgs[0]["tool_call_id"] == "tc1"
assert tool_msgs[1]["tool_call_id"] == "tc2"
# The assistant message with tool_calls must precede the tool results.
asst_tc_idx = next(
i for i, m in enumerate(result.messages)
if m.get("role") == "assistant" and m.get("tool_calls")
)
tool_indices = [
i for i, m in enumerate(result.messages) if m.get("role") == "tool"
]
assert all(ti > asst_tc_idx for ti in tool_indices)
def test_governance_repairs_orphans_after_snip():
"""After _snip_history clips an assistant+tool_calls, the second
_drop_orphan_tool_results pass must clean up the resulting orphans."""
from nanobot.agent.runner import AgentRunner
messages = [
{"role": "system", "content": "system"},
{"role": "user", "content": "old msg"},
{"role": "assistant", "content": None,
"tool_calls": [{"id": "tc_old", "type": "function",
"function": {"name": "search", "arguments": "{}"}}]},
{"role": "tool", "tool_call_id": "tc_old", "name": "search",
"content": "old result"},
{"role": "assistant", "content": "old answer"},
{"role": "user", "content": "new msg"},
]
# Simulate snipping that keeps only the tail: drop the assistant with
# tool_calls but keep its tool result (orphan).
snipped = [
{"role": "system", "content": "system"},
{"role": "tool", "tool_call_id": "tc_old", "name": "search",
"content": "old result"},
{"role": "assistant", "content": "old answer"},
{"role": "user", "content": "new msg"},
]
cleaned = AgentRunner._drop_orphan_tool_results(snipped)
# The orphan tool result should be removed.
assert not any(
m.get("role") == "tool" and m.get("tool_call_id") == "tc_old"
for m in cleaned
)
def test_governance_fallback_still_repairs_orphans():
"""When full governance fails, the fallback must still run
_drop_orphan_tool_results and _backfill_missing_tool_results."""
from nanobot.agent.runner import AgentRunner
# Messages with an orphan tool result (no matching assistant tool_call).
messages = [
{"role": "user", "content": "hello"},
{"role": "tool", "tool_call_id": "orphan_tc", "name": "read",
"content": "stale"},
{"role": "assistant", "content": "hi"},
]
repaired = AgentRunner._drop_orphan_tool_results(messages)
repaired = AgentRunner._backfill_missing_tool_results(repaired)
# Orphan tool result should be gone.
assert not any(m.get("tool_call_id") == "orphan_tc" for m in repaired)