From 9d3fe7c34b1aa283a26f801b3a915cb4c50f48f4 Mon Sep 17 00:00:00 2001
From: 04cb <0x04cb@gmail.com>
Date: Fri, 29 May 2026 09:55:06 +0800
Subject: [PATCH] fix(providers): surface clear arrearage warning on
 quota/billing errors (#3006)

---
 nanobot/agent/runner.py                       |  9 ++++++-
 nanobot/providers/base.py                     | 23 +++++++++++++++++
 tests/agent/test_runner_errors.py             | 25 +++++++++++++++++++
 .../providers/test_provider_error_metadata.py | 14 +++++++++++
 4 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py
index cb70116b6..6b854a9a8 100644
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@@ -53,6 +53,10 @@ from nanobot.utils.runtime import (
 )
 
 _DEFAULT_ERROR_MESSAGE = "Sorry, I encountered an error calling the AI model."
+_ARREARAGE_ERROR_MESSAGE = (
+    "The AI provider rejected the request because the API key is out of quota or the "
+    "account is in arrears. Please top up / check the billing status of your API key and try again."
+)
 _PERSISTED_MODEL_ERROR_PLACEHOLDER = "[Assistant reply unavailable due to model error.]"
 _MAX_EMPTY_RETRIES = 2
 _MAX_LENGTH_RECOVERIES = 3
@@ -504,7 +508,10 @@ class AgentRunner:
                 continue
 
             if response.finish_reason == "error":
-                final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
+                if LLMProvider.is_arrearage_response(response):
+                    final_content = _ARREARAGE_ERROR_MESSAGE
+                else:
+                    final_content = clean or spec.error_message or _DEFAULT_ERROR_MESSAGE
                 stop_reason = "error"
                 error = final_content
                 self._append_model_error_placeholder(messages)
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index 8bac5d4ba..c36593cb2 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -315,6 +315,29 @@ class LLMProvider(ABC):
 
         return cls._is_transient_error(response.content)
 
+    @classmethod
+    def is_arrearage_response(cls, response: LLMResponse) -> bool:
+        """Detect API-key arrearage / quota / billing errors that won't clear on retry.
+
+        These surface as HTTP 402 or as billing semantic tokens (e.g.
+        ``insufficient_quota``, ``payment_required``); reuses the same token and
+        text markers the 429 retry policy treats as non-retryable.
+        """
+        if response.error_status_code is not None and int(response.error_status_code) == 402:
+            return True
+
+        type_token = cls._normalize_error_token(response.error_type)
+        code_token = cls._normalize_error_token(response.error_code)
+        if any(
+            token in cls._NON_RETRYABLE_429_ERROR_TOKENS
+            for token in (type_token, code_token)
+            if token is not None
+        ):
+            return True
+
+        content = (response.content or "").lower()
+        return any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS)
+
     @staticmethod
     def _normalize_error_token(value: Any) -> str | None:
         if value is None:
diff --git a/tests/agent/test_runner_errors.py b/tests/agent/test_runner_errors.py
index 8df7ad8f3..65550377a 100644
--- a/tests/agent/test_runner_errors.py
+++ b/tests/agent/test_runner_errors.py
@@ -78,6 +78,31 @@ async def test_llm_error_not_appended_to_session_messages():
     assert assistant_msgs[-1]["content"] == _PERSISTED_MODEL_ERROR_PLACEHOLDER
 
 
+@pytest.mark.asyncio
+async def test_llm_arrearage_error_surfaces_clear_message():
+    """Arrearage errors yield a clear user-facing message, not a raw dump (#3006)."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner, _ARREARAGE_ERROR_MESSAGE
+
+    provider = MagicMock(spec=LLMProvider)
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(
+        content="HTTP 402 insufficient_quota", finish_reason="error", error_status_code=402,
+    ))
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "hello"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=5,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    ))
+
+    assert result.stop_reason == "error"
+    assert result.final_content == _ARREARAGE_ERROR_MESSAGE
+
+
 @pytest.mark.asyncio
 async def test_runner_tool_error_sets_final_content():
     from nanobot.agent.runner import AgentRunSpec, AgentRunner
diff --git a/tests/providers/test_provider_error_metadata.py b/tests/providers/test_provider_error_metadata.py
index ea2532acf..2105c0ed4 100644
--- a/tests/providers/test_provider_error_metadata.py
+++ b/tests/providers/test_provider_error_metadata.py
@@ -1,6 +1,9 @@
 from types import SimpleNamespace
 
+import pytest
+
 from nanobot.providers.anthropic_provider import AnthropicProvider
+from nanobot.providers.base import LLMProvider, LLMResponse
 from nanobot.providers.openai_compat_provider import OpenAICompatProvider
 
 
@@ -79,3 +82,14 @@ def test_anthropic_handle_error_marks_connection_kind() -> None:
 
     assert response.finish_reason == "error"
     assert response.error_kind == "connection"
+
+
+@pytest.mark.parametrize("expected, kwargs", [
+    (True, {"error_status_code": 402}),  # HTTP 402
+    (True, {"error_type": "insufficient_quota"}),  # billing token
+    (True, {"content": "429 You exceeded your current quota"}),  # text marker
+    (False, {"error_status_code": 429, "error_type": "rate_limit_exceeded"}),  # plain rate limit
+])
+def test_is_arrearage_response(expected: bool, kwargs: dict) -> None:
+    response = LLMResponse(finish_reason="error", **{"content": "boom", **kwargs})
+    assert LLMProvider.is_arrearage_response(response) is expected