From 9d3fe7c34b1aa283a26f801b3a915cb4c50f48f4 Mon Sep 17 00:00:00 2001 From: 04cb <0x04cb@gmail.com> Date: Fri, 29 May 2026 09:55:06 +0800 Subject: [PATCH] fix(providers): surface clear arrearage warning on quota/billing errors (#3006) --- nanobot/agent/runner.py | 9 ++++++- nanobot/providers/base.py | 23 +++++++++++++++++ tests/agent/test_runner_errors.py | 25 +++++++++++++++++++ .../providers/test_provider_error_metadata.py | 14 +++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index cb70116b6..6b854a9a8 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -53,6 +53,10 @@ from nanobot.utils.runtime import ( ) _DEFAULT_ERROR_MESSAGE = "Sorry, I encountered an error calling the AI model." +_ARREARAGE_ERROR_MESSAGE = ( + "The AI provider rejected the request because the API key is out of quota or the " + "account is in arrears. Please top up / check the billing status of your API key and try again." +) _PERSISTED_MODEL_ERROR_PLACEHOLDER = "[Assistant reply unavailable due to model error.]" _MAX_EMPTY_RETRIES = 2 _MAX_LENGTH_RECOVERIES = 3 @@ -504,7 +508,10 @@ class AgentRunner: continue if response.finish_reason == "error": - final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE + if LLMProvider.is_arrearage_response(response): + final_content = _ARREARAGE_ERROR_MESSAGE + else: + final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE stop_reason = "error" error = final_content self._append_model_error_placeholder(messages) diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 8bac5d4ba..c36593cb2 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -315,6 +315,29 @@ class LLMProvider(ABC): return cls._is_transient_error(response.content) + @classmethod + def is_arrearage_response(cls, response: LLMResponse) -> bool: + """Detect API-key arrearage / quota / billing errors that won't clear on retry. + + These surface as HTTP 402 or as billing semantic tokens (e.g. + ``insufficient_quota``, ``payment_required``); reuses the same token and + text markers the 429 retry policy treats as non-retryable. + """ + if response.error_status_code is not None and int(response.error_status_code) == 402: + return True + + type_token = cls._normalize_error_token(response.error_type) + code_token = cls._normalize_error_token(response.error_code) + if any( + token in cls._NON_RETRYABLE_429_ERROR_TOKENS + for token in (type_token, code_token) + if token is not None + ): + return True + + content = (response.content or "").lower() + return any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS) + @staticmethod def _normalize_error_token(value: Any) -> str | None: if value is None: diff --git a/tests/agent/test_runner_errors.py b/tests/agent/test_runner_errors.py index 8df7ad8f3..65550377a 100644 --- a/tests/agent/test_runner_errors.py +++ b/tests/agent/test_runner_errors.py @@ -78,6 +78,31 @@ async def test_llm_error_not_appended_to_session_messages(): assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER +@pytest.mark.asyncio +async def test_llm_arrearage_error_surfaces_clear_message(): + """Arrearage errors yield a clear user-facing message, not a raw dump (#3006).""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner, _ARREARAGE_ERROR_MESSAGE + + provider = MagicMock(spec=LLMProvider) + provider.chat_with_retry = AsyncMock(return_value=LLMResponse( + content="HTTP 402 insufficient_quota", finish_reason="error", error_status_code=402, + )) + tools = MagicMock() + tools.get_definitions.return_value = [] + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "hello"}], + tools=tools, + model="test-model", + max_iterations=5, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert result.stop_reason == "error" + assert result.final_content == _ARREARAGE_ERROR_MESSAGE + + @pytest.mark.asyncio async def test_runner_tool_error_sets_final_content(): from nanobot.agent.runner import AgentRunSpec, AgentRunner diff --git a/tests/providers/test_provider_error_metadata.py b/tests/providers/test_provider_error_metadata.py index ea2532acf..2105c0ed4 100644 --- a/tests/providers/test_provider_error_metadata.py +++ b/tests/providers/test_provider_error_metadata.py @@ -1,6 +1,9 @@ from types import SimpleNamespace +import pytest + from nanobot.providers.anthropic_provider import AnthropicProvider +from nanobot.providers.base import LLMProvider, LLMResponse from nanobot.providers.openai_compat_provider import OpenAICompatProvider @@ -79,3 +82,14 @@ def test_anthropic_handle_error_marks_connection_kind() -> None: assert response.finish_reason == "error" assert response.error_kind == "connection" + + +@pytest.mark.parametrize("expected, kwargs", [ + (True, {"error_status_code": 402}), # HTTP 402 + (True, {"error_type": "insufficient_quota"}), # billing token + (True, {"content": "429 You exceeded your current quota"}), # text marker + (False, {"error_status_code": 429, "error_type": "rate_limit_exceeded"}), # plain rate limit +]) +def test_is_arrearage_response(expected: bool, kwargs: dict) -> None: + response = LLMResponse(finish_reason="error", **{"content": "boom", **kwargs}) + assert LLMProvider.is_arrearage_response(response) is expected