mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-30 14:56:01 +00:00
Two small follow-ups to the guard:
1. Fix the should_execute_tools docstring so it matches the actual code.
The previous version said "Only execute when finish_reason explicitly
signals tool intent" but the code also accepts finish_reason == "stop".
Explain why (some compliant providers emit "stop" with legitimate tool
calls — openai_compat_provider.py already mirrors this at lines ~633 /
~678 where ("tool_calls", "stop") are both treated as the terminal
tool-call state). Without this, a strict "tool_calls"-only guard would
regress 15 existing runner tests that construct LLMResponse with
tool_calls but no explicit finish_reason (default = "stop").
2. Add tests/providers/test_llm_response.py. This locks the three cases:
- no tool calls -> never executes
- tool calls + "tool_calls"/stop -> executes
- tool calls + refusal / content_filter / error / length / ... -> blocked
These are exactly the boundary cases the #3220 fix is about; without a
test here a future refactor could silently revert the guard.
Body + tests only, no behavior change beyond the existing PR's intent.
Made-with: Cursor
58 lines
2.5 KiB
Python
58 lines
2.5 KiB
Python
"""Regression tests for ``LLMResponse.should_execute_tools`` (#3220).
|
|
|
|
The agent used to execute tool calls whenever ``has_tool_calls`` was true, regardless
|
|
of ``finish_reason``. Non-compliant API gateways that inject empty / bogus tool calls
|
|
under ``refusal`` / ``content_filter`` / ``error`` pushed the agent into a tight loop
|
|
until ``max_iterations`` fired. ``should_execute_tools`` is the single guard that
|
|
every tool-execution site now funnels through.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from nanobot.providers.base import LLMResponse, ToolCallRequest
|
|
|
|
|
|
def _response(finish_reason: str, *, with_tool_call: bool = True) -> LLMResponse:
|
|
tool_calls = (
|
|
[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})]
|
|
if with_tool_call
|
|
else []
|
|
)
|
|
return LLMResponse(content=None, tool_calls=tool_calls, finish_reason=finish_reason)
|
|
|
|
|
|
class TestShouldExecuteTools:
|
|
def test_no_tool_calls_never_executes(self) -> None:
|
|
# No tool calls present -> guard must reject regardless of finish_reason.
|
|
for reason in ("tool_calls", "stop", "length", "error", "refusal", "content_filter"):
|
|
resp = _response(reason, with_tool_call=False)
|
|
assert resp.should_execute_tools is False, f"rejected for finish_reason={reason!r}"
|
|
|
|
def test_tool_calls_with_tool_calls_reason_executes(self) -> None:
|
|
# The canonical case: provider explicitly signals tool intent.
|
|
resp = _response("tool_calls")
|
|
assert resp.has_tool_calls is True
|
|
assert resp.should_execute_tools is True
|
|
|
|
def test_tool_calls_with_stop_reason_executes(self) -> None:
|
|
# Some compliant providers emit "stop" together with tool_calls; the
|
|
# guard must accept this to avoid breaking real tool-calling flows.
|
|
# See openai_compat_provider.py:~633,678 where ("tool_calls", "stop")
|
|
# are both treated as terminal tool-call states.
|
|
resp = _response("stop")
|
|
assert resp.should_execute_tools is True
|
|
|
|
@pytest.mark.parametrize(
|
|
"anomalous_reason",
|
|
["refusal", "content_filter", "error", "length", "function_call", ""],
|
|
)
|
|
def test_tool_calls_under_anomalous_reason_blocked(self, anomalous_reason: str) -> None:
|
|
# This is the #3220 bug: gateways injecting tool_calls under any of these
|
|
# finish_reasons must not cause execution. Blocking here is what prevents
|
|
# the infinite empty tool-call loop.
|
|
resp = _response(anomalous_reason)
|
|
assert resp.has_tool_calls is True
|
|
assert resp.should_execute_tools is False
|