fix(providers): surface clear arrearage warning on quota/billing errors (#3006)

2026-06-13 14:23:58 +00:00 · 2026-05-29 09:55:06 +08:00 · 2026-05-29 09:55:06 +08:00 · 9d3fe7c34b
commit 9d3fe7c34b
parent 672fabe5be
4 changed files with 70 additions and 1 deletions
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -53,6 +53,10 @@ from nanobot.utils.runtime import (
 )

 _DEFAULT_ERROR_MESSAGE = "Sorry, I encountered an error calling the AI model."
+_ARREARAGE_ERROR_MESSAGE = (
+    "The AI provider rejected the request because the API key is out of quota or the "
+    "account is in arrears. Please top up / check the billing status of your API key and try again."
+)
 _PERSISTED_MODEL_ERROR_PLACEHOLDER = "[Assistant reply unavailable due to model error.]"
 _MAX_EMPTY_RETRIES = 2
 _MAX_LENGTH_RECOVERIES = 3
@ -504,7 +508,10 @@ class AgentRunner:
                continue

            if response.finish_reason == "error":
-                final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
+                if LLMProvider.is_arrearage_response(response):
+                    final_content = _ARREARAGE_ERROR_MESSAGE
+                else:
+                    final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
                stop_reason = "error"
                error = final_content
                self._append_model_error_placeholder(messages)
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@ -315,6 +315,29 @@ class LLMProvider(ABC):

        return cls._is_transient_error(response.content)

+    @classmethod
+    def is_arrearage_response(cls, response: LLMResponse) -> bool:
+        """Detect API-key arrearage / quota / billing errors that won't clear on retry.
+
+        These surface as HTTP 402 or as billing semantic tokens (e.g.
+        ``insufficient_quota``, ``payment_required``); reuses the same token and
+        text markers the 429 retry policy treats as non-retryable.
+        """
+        if response.error_status_code is not None and int(response.error_status_code) == 402:
+            return True
+
+        type_token = cls._normalize_error_token(response.error_type)
+        code_token = cls._normalize_error_token(response.error_code)
+        if any(
+            token in cls._NON_RETRYABLE_429_ERROR_TOKENS
+            for token in (type_token, code_token)
+            if token is not None
+        ):
+            return True
+
+        content = (response.content or "").lower()
+        return any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS)
+
    @staticmethod
    def _normalize_error_token(value: Any) -> str | None:
        if value is None:
--- a/tests/agent/test_runner_errors.py
+++ b/tests/agent/test_runner_errors.py
@ -78,6 +78,31 @@ async def test_llm_error_not_appended_to_session_messages():
    assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER


+@pytest.mark.asyncio
+async def test_llm_arrearage_error_surfaces_clear_message():
+    """Arrearage errors yield a clear user-facing message, not a raw dump (#3006)."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _ARREARAGE_ERROR_MESSAGE
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="HTTP 402 insufficient_quota", finish_reason="error", error_status_code=402,
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.stop_reason == "error"
+    assert result.final_content == _ARREARAGE_ERROR_MESSAGE
+
+
@pytest.mark.asyncio
 async def test_runner_tool_error_sets_final_content():
    from nanobot.agent.runner import AgentRunSpec, AgentRunner
--- a/tests/providers/test_provider_error_metadata.py
+++ b/tests/providers/test_provider_error_metadata.py
@ -1,6 +1,9 @@
 from types import SimpleNamespace

+import pytest
+
 from nanobot.providers.anthropic_provider import AnthropicProvider
+from nanobot.providers.base import LLMProvider, LLMResponse
 from nanobot.providers.openai_compat_provider import OpenAICompatProvider


@ -79,3 +82,14 @@ def test_anthropic_handle_error_marks_connection_kind() -> None:

    assert response.finish_reason == "error"
    assert response.error_kind == "connection"
+
+
+@pytest.mark.parametrize("expected, kwargs", [
+    (True, {"error_status_code": 402}),  # HTTP 402
+    (True, {"error_type": "insufficient_quota"}),  # billing token
+    (True, {"content": "429 You exceeded your current quota"}),  # text marker
+    (False, {"error_status_code": 429, "error_type": "rate_limit_exceeded"}),  # plain rate limit
+])
+def test_is_arrearage_response(expected: bool, kwargs: dict) -> None:
+    response = LLMResponse(finish_reason="error", **{"content": "boom", **kwargs})
+    assert LLMProvider.is_arrearage_response(response) is expected