fix(providers): surface clear arrearage warning on quota/billing errors (#3006)

This commit is contained in:
04cb 2026-05-29 09:55:06 +08:00 committed by Xubin Ren
parent 672fabe5be
commit 9d3fe7c34b
4 changed files with 70 additions and 1 deletions

View File

@ -53,6 +53,10 @@ from nanobot.utils.runtime import (
)
_DEFAULT_ERROR_MESSAGE = "Sorry, I encountered an error calling the AI model."
_ARREARAGE_ERROR_MESSAGE = (
"The AI provider rejected the request because the API key is out of quota or the "
"account is in arrears. Please top up / check the billing status of your API key and try again."
)
_PERSISTED_MODEL_ERROR_PLACEHOLDER = "[Assistant reply unavailable due to model error.]"
_MAX_EMPTY_RETRIES = 2
_MAX_LENGTH_RECOVERIES = 3
@ -504,7 +508,10 @@ class AgentRunner:
continue
if response.finish_reason == "error":
final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
if LLMProvider.is_arrearage_response(response):
final_content = _ARREARAGE_ERROR_MESSAGE
else:
final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
stop_reason = "error"
error = final_content
self._append_model_error_placeholder(messages)

View File

@ -315,6 +315,29 @@ class LLMProvider(ABC):
return cls._is_transient_error(response.content)
@classmethod
def is_arrearage_response(cls, response: LLMResponse) -> bool:
"""Detect API-key arrearage / quota / billing errors that won't clear on retry.
These surface as HTTP 402 or as billing semantic tokens (e.g.
``insufficient_quota``, ``payment_required``); reuses the same token and
text markers the 429 retry policy treats as non-retryable.
"""
if response.error_status_code is not None and int(response.error_status_code) == 402:
return True
type_token = cls._normalize_error_token(response.error_type)
code_token = cls._normalize_error_token(response.error_code)
if any(
token in cls._NON_RETRYABLE_429_ERROR_TOKENS
for token in (type_token, code_token)
if token is not None
):
return True
content = (response.content or "").lower()
return any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS)
@staticmethod
def _normalize_error_token(value: Any) -> str | None:
if value is None:

View File

@ -78,6 +78,31 @@ async def test_llm_error_not_appended_to_session_messages():
assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER
@pytest.mark.asyncio
async def test_llm_arrearage_error_surfaces_clear_message():
"""Arrearage errors yield a clear user-facing message, not a raw dump (#3006)."""
from nanobot.agent.runner import AgentRunSpec, AgentRunner, _ARREARAGE_ERROR_MESSAGE
provider = MagicMock(spec=LLMProvider)
provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
content="HTTP 402 insufficient_quota", finish_reason="error", error_status_code=402,
))
tools = MagicMock()
tools.get_definitions.return_value = []
runner = AgentRunner(provider)
result = await runner.run(AgentRunSpec(
initial_messages=[{"role": "user", "content": "hello"}],
tools=tools,
model="test-model",
max_iterations=5,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
))
assert result.stop_reason == "error"
assert result.final_content == _ARREARAGE_ERROR_MESSAGE
@pytest.mark.asyncio
async def test_runner_tool_error_sets_final_content():
from nanobot.agent.runner import AgentRunSpec, AgentRunner

View File

@ -1,6 +1,9 @@
from types import SimpleNamespace
import pytest
from nanobot.providers.anthropic_provider import AnthropicProvider
from nanobot.providers.base import LLMProvider, LLMResponse
from nanobot.providers.openai_compat_provider import OpenAICompatProvider
@ -79,3 +82,14 @@ def test_anthropic_handle_error_marks_connection_kind() -> None:
assert response.finish_reason == "error"
assert response.error_kind == "connection"
@pytest.mark.parametrize("expected, kwargs", [
(True, {"error_status_code": 402}), # HTTP 402
(True, {"error_type": "insufficient_quota"}), # billing token
(True, {"content": "429 You exceeded your current quota"}), # text marker
(False, {"error_status_code": 429, "error_type": "rate_limit_exceeded"}), # plain rate limit
])
def test_is_arrearage_response(expected: bool, kwargs: dict) -> None:
response = LLMResponse(finish_reason="error", **{"content": "boom", **kwargs})
assert LLMProvider.is_arrearage_response(response) is expected