nanobot/tests/providers/test_llm_response.py
2026-05-18 22:01:33 +08:00

64 lines
2.8 KiB
Python

"""Regression tests for ``LLMResponse.should_execute_tools`` (#3220).
The agent used to execute tool calls whenever ``has_tool_calls`` was true, regardless
of ``finish_reason``. Non-compliant API gateways that inject empty / bogus tool calls
under ``refusal`` / ``content_filter`` / ``error`` pushed the agent into a tight loop
until ``max_iterations`` fired. ``should_execute_tools`` is the single guard that
every tool-execution site now funnels through.
"""
from __future__ import annotations
import pytest
from nanobot.providers.base import LLMResponse, ToolCallRequest
def _response(finish_reason: str, *, with_tool_call: bool = True) -> LLMResponse:
tool_calls = (
[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})]
if with_tool_call
else []
)
return LLMResponse(content=None, tool_calls=tool_calls, finish_reason=finish_reason)
class TestShouldExecuteTools:
def test_no_tool_calls_never_executes(self) -> None:
# No tool calls present -> guard must reject regardless of finish_reason.
for reason in ("tool_calls", "stop", "length", "error", "refusal", "content_filter"):
resp = _response(reason, with_tool_call=False)
assert resp.should_execute_tools is False, f"rejected for finish_reason={reason!r}"
def test_tool_calls_with_tool_calls_reason_executes(self) -> None:
# The canonical case: provider explicitly signals tool intent.
resp = _response("tool_calls")
assert resp.has_tool_calls is True
assert resp.should_execute_tools is True
def test_tool_calls_with_stop_reason_executes(self) -> None:
# Some compliant providers emit "stop" together with tool_calls; the
# guard must accept this to avoid breaking real tool-calling flows.
# See openai_compat_provider.py:~633,678 where ("tool_calls", "stop")
# are both treated as terminal tool-call states.
resp = _response("stop")
assert resp.should_execute_tools is True
def test_legacy_function_call_reason_executes(self) -> None:
# Older OpenAI-compatible streaming APIs can still use the singular
# function_call finish reason while carrying a tool-call-shaped payload.
resp = _response("function_call")
assert resp.should_execute_tools is True
@pytest.mark.parametrize(
"anomalous_reason",
["refusal", "content_filter", "error", "length", ""],
)
def test_tool_calls_under_anomalous_reason_blocked(self, anomalous_reason: str) -> None:
# This is the #3220 bug: gateways injecting tool_calls under any of these
# finish_reasons must not cause execution. Blocking here is what prevents
# the infinite empty tool-call loop.
resp = _response(anomalous_reason)
assert resp.has_tool_calls is True
assert resp.should_execute_tools is False