mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-13 14:23:58 +00:00
fix(providers): surface clear arrearage warning on quota/billing errors (#3006)
This commit is contained in:
parent
672fabe5be
commit
9d3fe7c34b
@ -53,6 +53,10 @@ from nanobot.utils.runtime import (
|
||||
)
|
||||
|
||||
_DEFAULT_ERROR_MESSAGE = "Sorry, I encountered an error calling the AI model."
|
||||
_ARREARAGE_ERROR_MESSAGE = (
|
||||
"The AI provider rejected the request because the API key is out of quota or the "
|
||||
"account is in arrears. Please top up / check the billing status of your API key and try again."
|
||||
)
|
||||
_PERSISTED_MODEL_ERROR_PLACEHOLDER = "[Assistant reply unavailable due to model error.]"
|
||||
_MAX_EMPTY_RETRIES = 2
|
||||
_MAX_LENGTH_RECOVERIES = 3
|
||||
@ -504,7 +508,10 @@ class AgentRunner:
|
||||
continue
|
||||
|
||||
if response.finish_reason == "error":
|
||||
final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
|
||||
if LLMProvider.is_arrearage_response(response):
|
||||
final_content = _ARREARAGE_ERROR_MESSAGE
|
||||
else:
|
||||
final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
|
||||
stop_reason = "error"
|
||||
error = final_content
|
||||
self._append_model_error_placeholder(messages)
|
||||
|
||||
@ -315,6 +315,29 @@ class LLMProvider(ABC):
|
||||
|
||||
return cls._is_transient_error(response.content)
|
||||
|
||||
@classmethod
|
||||
def is_arrearage_response(cls, response: LLMResponse) -> bool:
|
||||
"""Detect API-key arrearage / quota / billing errors that won't clear on retry.
|
||||
|
||||
These surface as HTTP 402 or as billing semantic tokens (e.g.
|
||||
``insufficient_quota``, ``payment_required``); reuses the same token and
|
||||
text markers the 429 retry policy treats as non-retryable.
|
||||
"""
|
||||
if response.error_status_code is not None and int(response.error_status_code) == 402:
|
||||
return True
|
||||
|
||||
type_token = cls._normalize_error_token(response.error_type)
|
||||
code_token = cls._normalize_error_token(response.error_code)
|
||||
if any(
|
||||
token in cls._NON_RETRYABLE_429_ERROR_TOKENS
|
||||
for token in (type_token, code_token)
|
||||
if token is not None
|
||||
):
|
||||
return True
|
||||
|
||||
content = (response.content or "").lower()
|
||||
return any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_error_token(value: Any) -> str | None:
|
||||
if value is None:
|
||||
|
||||
@ -78,6 +78,31 @@ async def test_llm_error_not_appended_to_session_messages():
|
||||
assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_arrearage_error_surfaces_clear_message():
|
||||
"""Arrearage errors yield a clear user-facing message, not a raw dump (#3006)."""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner, _ARREARAGE_ERROR_MESSAGE
|
||||
|
||||
provider = MagicMock(spec=LLMProvider)
|
||||
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
|
||||
content="HTTP 402 insufficient_quota", finish_reason="error", error_status_code=402,
|
||||
))
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "hello"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=5,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
))
|
||||
|
||||
assert result.stop_reason == "error"
|
||||
assert result.final_content == _ARREARAGE_ERROR_MESSAGE
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_tool_error_sets_final_content():
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.providers.anthropic_provider import AnthropicProvider
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse
|
||||
from nanobot.providers.openai_compat_provider import OpenAICompatProvider
|
||||
|
||||
|
||||
@ -79,3 +82,14 @@ def test_anthropic_handle_error_marks_connection_kind() -> None:
|
||||
|
||||
assert response.finish_reason == "error"
|
||||
assert response.error_kind == "connection"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("expected, kwargs", [
|
||||
(True, {"error_status_code": 402}), # HTTP 402
|
||||
(True, {"error_type": "insufficient_quota"}), # billing token
|
||||
(True, {"content": "429 You exceeded your current quota"}), # text marker
|
||||
(False, {"error_status_code": 429, "error_type": "rate_limit_exceeded"}), # plain rate limit
|
||||
])
|
||||
def test_is_arrearage_response(expected: bool, kwargs: dict) -> None:
|
||||
response = LLMResponse(finish_reason="error", **{"content": "boom", **kwargs})
|
||||
assert LLMProvider.is_arrearage_response(response) is expected
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user