From 31d3061a0aa32e20c21c6db677a0dbf6ae11d64b Mon Sep 17 00:00:00 2001 From: pikaxinge <2392811793@qq.com> Date: Sat, 4 Apr 2026 05:23:21 +0000 Subject: [PATCH] fix(retry): classify 429 as WAIT vs STOP using semantic signals --- nanobot/providers/anthropic_provider.py | 18 ++- nanobot/providers/base.py | 103 ++++++++++++++++++ nanobot/providers/openai_compat_provider.py | 15 +++ .../providers/test_provider_error_metadata.py | 8 +- tests/providers/test_provider_retry.py | 54 ++++++++- 5 files changed, 194 insertions(+), 4 deletions(-) diff --git a/nanobot/providers/anthropic_provider.py b/nanobot/providers/anthropic_provider.py index 3a5e435f0..230250566 100644 --- a/nanobot/providers/anthropic_provider.py +++ b/nanobot/providers/anthropic_provider.py @@ -102,7 +102,20 @@ class AnthropicProvider(LLMProvider): def _error_response(cls, e: Exception) -> LLMResponse: response = getattr(e, "response", None) headers = getattr(response, "headers", None) - msg = f"Error calling LLM: {e}" + payload = ( + getattr(e, "body", None) + or getattr(e, "doc", None) + or getattr(response, "text", None) + ) + if payload is None and response is not None: + response_json = getattr(response, "json", None) + if callable(response_json): + try: + payload = response_json() + except Exception: + payload = None + payload_text = payload if isinstance(payload, str) else str(payload) if payload is not None else "" + msg = f"Error: {payload_text.strip()[:500]}" if payload_text.strip() else f"Error calling LLM: {e}" retry_after = cls._parse_retry_after_headers(headers) if retry_after is None: retry_after = LLMProvider._extract_retry_after(msg) @@ -127,6 +140,7 @@ class AnthropicProvider(LLMProvider): error_kind = "timeout" elif "connection" in error_name: error_kind = "connection" + error_type, error_code = LLMProvider._extract_error_type_code(payload) return LLMResponse( content=msg, @@ -134,6 +148,8 @@ class AnthropicProvider(LLMProvider): retry_after=retry_after, error_status_code=int(status_code) if status_code is not None else None, error_kind=error_kind, + error_type=error_type, + error_code=error_code, error_retry_after_s=retry_after, error_should_retry=should_retry, ) diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 6e6468a9c..0eb93cc5e 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -57,6 +57,8 @@ class LLMResponse: # Structured error metadata used by retry policy when finish_reason == "error". error_status_code: int | None = None error_kind: str | None = None # e.g. "timeout", "connection" + error_type: str | None = None # Provider/type semantic, e.g. insufficient_quota. + error_code: str | None = None # Provider/code semantic, e.g. rate_limit_exceeded. error_retry_after_s: float | None = None error_should_retry: bool | None = None @@ -98,6 +100,50 @@ class LLMProvider(ABC): ) _RETRYABLE_STATUS_CODES = frozenset({408, 409, 429}) _TRANSIENT_ERROR_KINDS = frozenset({"timeout", "connection"}) + _NON_RETRYABLE_429_ERROR_TOKENS = frozenset({ + "insufficient_quota", + "quota_exceeded", + "quota_exhausted", + "billing_hard_limit_reached", + "insufficient_balance", + "credit_balance_too_low", + "billing_not_active", + "payment_required", + }) + _RETRYABLE_429_ERROR_TOKENS = frozenset({ + "rate_limit_exceeded", + "rate_limit_error", + "too_many_requests", + "request_limit_exceeded", + "requests_limit_exceeded", + "overloaded_error", + }) + _NON_RETRYABLE_429_TEXT_MARKERS = ( + "insufficient_quota", + "insufficient quota", + "quota exceeded", + "quota exhausted", + "billing hard limit", + "billing_hard_limit_reached", + "billing not active", + "insufficient balance", + "insufficient_balance", + "credit balance too low", + "payment required", + "out of credits", + "out of quota", + "exceeded your current quota", + ) + _RETRYABLE_429_TEXT_MARKERS = ( + "rate limit", + "rate_limit", + "too many requests", + "retry after", + "try again in", + "temporarily unavailable", + "overloaded", + "concurrency limit", + ) _SENTINEL = object() @@ -209,6 +255,8 @@ class LLMProvider(ABC): if response.error_status_code is not None: status = int(response.error_status_code) + if status == 429: + return cls._is_retryable_429_response(response) if status in cls._RETRYABLE_STATUS_CODES or status >= 500: return True @@ -218,6 +266,61 @@ class LLMProvider(ABC): return cls._is_transient_error(response.content) + @staticmethod + def _normalize_error_token(value: Any) -> str | None: + if value is None: + return None + token = str(value).strip().lower() + return token or None + + @classmethod + def _extract_error_type_code(cls, payload: Any) -> tuple[str | None, str | None]: + data: dict[str, Any] | None = None + if isinstance(payload, dict): + data = payload + elif isinstance(payload, str): + text = payload.strip() + if text: + try: + parsed = json.loads(text) + except Exception: + parsed = None + if isinstance(parsed, dict): + data = parsed + if not isinstance(data, dict): + return None, None + + error_obj = data.get("error") + type_value = data.get("type") + code_value = data.get("code") + if isinstance(error_obj, dict): + type_value = error_obj.get("type") or type_value + code_value = error_obj.get("code") or code_value + + return cls._normalize_error_token(type_value), cls._normalize_error_token(code_value) + + @classmethod + def _is_retryable_429_response(cls, response: LLMResponse) -> bool: + type_token = cls._normalize_error_token(response.error_type) + code_token = cls._normalize_error_token(response.error_code) + semantic_tokens = { + token for token in (type_token, code_token) + if token is not None + } + if any(token in cls._NON_RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens): + return False + + content = (response.content or "").lower() + if any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS): + return False + + if any(token in cls._RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens): + return True + if any(marker in content for marker in cls._RETRYABLE_429_TEXT_MARKERS): + return True + # Unknown 429 defaults to WAIT+retry. + return True + @staticmethod def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None: """Replace image_url blocks with text placeholder. Returns None if no images found.""" diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index 3120261d1..cb25e6f8c 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -621,6 +621,19 @@ class OpenAICompatProvider(LLMProvider): def _extract_error_metadata(cls, e: Exception) -> dict[str, Any]: response = getattr(e, "response", None) headers = getattr(response, "headers", None) + payload = ( + getattr(e, "body", None) + or getattr(e, "doc", None) + or getattr(response, "text", None) + ) + if payload is None and response is not None: + response_json = getattr(response, "json", None) + if callable(response_json): + try: + payload = response_json() + except Exception: + payload = None + error_type, error_code = LLMProvider._extract_error_type_code(payload) status_code = getattr(e, "status_code", None) if status_code is None and response is not None: @@ -646,6 +659,8 @@ class OpenAICompatProvider(LLMProvider): return { "error_status_code": int(status_code) if status_code is not None else None, "error_kind": error_kind, + "error_type": error_type, + "error_code": error_code, "error_retry_after_s": cls._parse_retry_after_headers(headers), "error_should_retry": should_retry, } diff --git a/tests/providers/test_provider_error_metadata.py b/tests/providers/test_provider_error_metadata.py index b13c667de..27f0eb0f1 100644 --- a/tests/providers/test_provider_error_metadata.py +++ b/tests/providers/test_provider_error_metadata.py @@ -26,14 +26,16 @@ def test_openai_handle_error_extracts_structured_metadata() -> None: err.response = _fake_response( status_code=409, headers={"retry-after-ms": "250", "x-should-retry": "false"}, - text='{"error":"conflict"}', + text='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}', ) - err.body = {"error": "conflict"} + err.body = {"error": {"type": "rate_limit_exceeded", "code": "rate_limit_exceeded"}} response = OpenAICompatProvider._handle_error(err) assert response.finish_reason == "error" assert response.error_status_code == 409 + assert response.error_type == "rate_limit_exceeded" + assert response.error_code == "rate_limit_exceeded" assert response.error_retry_after_s == 0.25 assert response.error_should_retry is False @@ -58,11 +60,13 @@ def test_anthropic_error_response_extracts_structured_metadata() -> None: status_code=408, headers={"retry-after": "1.5", "x-should-retry": "true"}, ) + err.body = {"type": "error", "error": {"type": "rate_limit_error"}} response = AnthropicProvider._error_response(err) assert response.finish_reason == "error" assert response.error_status_code == 408 + assert response.error_type == "rate_limit_error" assert response.error_retry_after_s == 1.5 assert response.error_should_retry is True diff --git a/tests/providers/test_provider_retry.py b/tests/providers/test_provider_retry.py index 038473c69..ad8048162 100644 --- a/tests/providers/test_provider_retry.py +++ b/tests/providers/test_provider_retry.py @@ -297,6 +297,59 @@ async def test_chat_with_retry_retries_structured_status_code_without_keyword(mo assert delays == [1] +@pytest.mark.asyncio +async def test_chat_with_retry_stops_on_429_quota_exhausted(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content='{"error":{"type":"insufficient_quota","code":"insufficient_quota"}}', + finish_reason="error", + error_status_code=429, + error_type="insufficient_quota", + error_code="insufficient_quota", + ), + LLMResponse(content="ok"), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.finish_reason == "error" + assert provider.calls == 1 + assert delays == [] + + +@pytest.mark.asyncio +async def test_chat_with_retry_retries_429_transient_rate_limit(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}', + finish_reason="error", + error_status_code=429, + error_type="rate_limit_exceeded", + error_code="rate_limit_exceeded", + error_retry_after_s=0.2, + ), + LLMResponse(content="ok"), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.content == "ok" + assert provider.calls == 2 + assert delays == [0.2] + + @pytest.mark.asyncio async def test_chat_with_retry_retries_structured_timeout_kind(monkeypatch) -> None: provider = ScriptedProvider([ @@ -389,4 +442,3 @@ async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monk assert response.content == "429 rate limit" assert provider.calls == 10 assert delays == [1, 2, 4, 4, 4, 4, 4, 4, 4] -