From b951b37c979b66ac0a48565c5c3b103b0a8b554f Mon Sep 17 00:00:00 2001 From: pikaxinge <2392811793@qq.com> Date: Thu, 2 Apr 2026 18:14:09 +0000 Subject: [PATCH 1/4] fix: use structured error metadata for app-layer retry --- nanobot/providers/anthropic_provider.py | 74 ++++++++++++++- nanobot/providers/base.py | 34 ++++++- nanobot/providers/openai_compat_provider.py | 83 +++++++++++++++- .../providers/test_provider_error_metadata.py | 77 +++++++++++++++ tests/providers/test_provider_retry.py | 95 ++++++++++++++++++- 5 files changed, 355 insertions(+), 8 deletions(-) create mode 100644 tests/providers/test_provider_error_metadata.py diff --git a/nanobot/providers/anthropic_provider.py b/nanobot/providers/anthropic_provider.py index eaec77789..9eb04922b 100644 --- a/nanobot/providers/anthropic_provider.py +++ b/nanobot/providers/anthropic_provider.py @@ -3,10 +3,12 @@ from __future__ import annotations import asyncio +import email.utils import os import re import secrets import string +import time from collections.abc import Awaitable, Callable from typing import Any @@ -51,6 +53,73 @@ class AnthropicProvider(LLMProvider): client_kw["default_headers"] = extra_headers self._client = AsyncAnthropic(**client_kw) + @staticmethod + def _parse_retry_after_headers(headers: Any) -> float | None: + if headers is None: + return None + + try: + retry_ms = headers.get("retry-after-ms") + if retry_ms is not None: + value = float(retry_ms) / 1000.0 + if value > 0: + return value + except (TypeError, ValueError): + pass + + retry_after = headers.get("retry-after") + try: + if retry_after is not None: + value = float(retry_after) + if value > 0: + return value + except (TypeError, ValueError): + pass + + if retry_after is None: + return None + retry_date_tuple = email.utils.parsedate_tz(retry_after) + if retry_date_tuple is None: + return None + retry_date = email.utils.mktime_tz(retry_date_tuple) + value = float(retry_date - time.time()) + return value if value > 0 else None + + @classmethod + def _error_response(cls, e: Exception) -> LLMResponse: + response = getattr(e, "response", None) + headers = getattr(response, "headers", None) + + status_code = getattr(e, "status_code", None) + if status_code is None and response is not None: + status_code = getattr(response, "status_code", None) + + should_retry: bool | None = None + if headers is not None: + raw = headers.get("x-should-retry") + if isinstance(raw, str): + lowered = raw.strip().lower() + if lowered == "true": + should_retry = True + elif lowered == "false": + should_retry = False + + error_kind: str | None = None + error_name = e.__class__.__name__.lower() + if "timeout" in error_name: + error_kind = "timeout" + elif "connection" in error_name: + error_kind = "connection" + + return LLMResponse( + content=f"Error calling LLM: {e}", + finish_reason="error", + error_status_code=int(status_code) if status_code is not None else None, + error_kind=error_kind, + error_retry_after_s=cls._parse_retry_after_headers(headers), + error_should_retry=should_retry, + ) + @staticmethod def _strip_prefix(model: str) -> str: if model.startswith("anthropic/"): @@ -419,7 +488,7 @@ class AnthropicProvider(LLMProvider): response = await self._client.messages.create(**kwargs) return self._parse_response(response) except Exception as e: - return LLMResponse(content=f"Error calling LLM: {e}", finish_reason="error") + return self._error_response(e) async def chat_stream( self, @@ -462,9 +531,10 @@ class AnthropicProvider(LLMProvider): f"{idle_timeout_s} seconds" ), finish_reason="error", + error_kind="timeout", ) except Exception as e: - return LLMResponse(content=f"Error calling LLM: {e}", finish_reason="error") + return self._error_response(e) def get_default_model(self) -> str: return self.default_model diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 852e9c973..4ad24b785 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -51,6 +51,11 @@ class LLMResponse: usage: dict[str, int] = field(default_factory=dict) reasoning_content: str | None = None # Kimi, DeepSeek-R1 etc. thinking_blocks: list[dict] | None = None # Anthropic extended thinking + # Structured error metadata used by retry policy when finish_reason == "error". + error_status_code: int | None = None + error_kind: str | None = None # e.g. "timeout", "connection" + error_retry_after_s: float | None = None + error_should_retry: bool | None = None @property def has_tool_calls(self) -> bool: @@ -88,6 +93,8 @@ class LLMProvider(ABC): "server error", "temporarily unavailable", ) + _RETRYABLE_STATUS_CODES = frozenset({408, 409, 429}) + _TRANSIENT_ERROR_KINDS = frozenset({"timeout", "connection"}) _SENTINEL = object() @@ -191,6 +198,23 @@ class LLMProvider(ABC): err = (content or "").lower() return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS) + @classmethod + def _is_transient_response(cls, response: LLMResponse) -> bool: + """Prefer structured error metadata, fallback to text markers for legacy providers.""" + if response.error_should_retry is not None: + return bool(response.error_should_retry) + + if response.error_status_code is not None: + status = int(response.error_status_code) + if status in cls._RETRYABLE_STATUS_CODES or status >= 500: + return True + + kind = (response.error_kind or "").strip().lower() + if kind in cls._TRANSIENT_ERROR_KINDS: + return True + + return cls._is_transient_error(response.content) + @staticmethod def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None: """Replace image_url blocks with text placeholder. Returns None if no images found.""" @@ -345,6 +369,12 @@ class LLMProvider(ABC): return value * 60.0 return value + @classmethod + def _extract_retry_after_from_response(cls, response: LLMResponse) -> float | None: + if response.error_retry_after_s is not None and response.error_retry_after_s > 0: + return response.error_retry_after_s + return cls._extract_retry_after(response.content) + async def _sleep_with_heartbeat( self, delay: float, @@ -393,7 +423,7 @@ class LLMProvider(ABC): last_error_key = error_key identical_error_count = 1 if error_key else 0 - if not self._is_transient_error(response.content): + if not self._is_transient_response(response): stripped = self._strip_image_content(original_messages) if stripped is not None and stripped != kw["messages"]: logger.warning( @@ -416,7 +446,7 @@ class LLMProvider(ABC): break base_delay = delays[min(attempt - 1, len(delays) - 1)] - delay = self._extract_retry_after(response.content) or base_delay + delay = self._extract_retry_after_from_response(response) or base_delay if persistent: delay = min(delay, self._PERSISTENT_MAX_DELAY) diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index 3e0a34fbf..268905b58 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -3,10 +3,12 @@ from __future__ import annotations import asyncio +import email.utils import hashlib import os import secrets import string +import time import uuid from collections.abc import Awaitable, Callable from typing import TYPE_CHECKING, Any @@ -569,11 +571,85 @@ class OpenAICompatProvider(LLMProvider): usage=usage, ) + @staticmethod + def _parse_retry_after_headers(headers: Any) -> float | None: + if headers is None: + return None + + try: + retry_ms = headers.get("retry-after-ms") + if retry_ms is not None: + value = float(retry_ms) / 1000.0 + if value > 0: + return value + except (TypeError, ValueError): + pass + + retry_after = headers.get("retry-after") + try: + if retry_after is not None: + value = float(retry_after) + if value > 0: + return value + except (TypeError, ValueError): + pass + + if retry_after is None: + return None + retry_date_tuple = email.utils.parsedate_tz(retry_after) + if retry_date_tuple is None: + return None + retry_date = email.utils.mktime_tz(retry_date_tuple) + value = float(retry_date - time.time()) + return value if value > 0 else None + + @classmethod + def _extract_error_metadata(cls, e: Exception) -> dict[str, Any]: + response = getattr(e, "response", None) + headers = getattr(response, "headers", None) + + status_code = getattr(e, "status_code", None) + if status_code is None and response is not None: + status_code = getattr(response, "status_code", None) + + should_retry: bool | None = None + if headers is not None: + raw = headers.get("x-should-retry") + if isinstance(raw, str): + lowered = raw.strip().lower() + if lowered == "true": + should_retry = True + elif lowered == "false": + should_retry = False + + error_kind: str | None = None + error_name = e.__class__.__name__.lower() + if "timeout" in error_name: + error_kind = "timeout" + elif "connection" in error_name: + error_kind = "connection" + + return { + "error_status_code": int(status_code) if status_code is not None else None, + "error_kind": error_kind, + "error_retry_after_s": cls._parse_retry_after_headers(headers), + "error_should_retry": should_retry, + } + @staticmethod def _handle_error(e: Exception) -> LLMResponse: - body = getattr(e, "doc", None) or getattr(getattr(e, "response", None), "text", None) - msg = f"Error: {body.strip()[:500]}" if body and body.strip() else f"Error calling LLM: {e}" - return LLMResponse(content=msg, finish_reason="error") + body = ( + getattr(e, "doc", None) + or getattr(e, "body", None) + or getattr(getattr(e, "response", None), "text", None) + ) + body_text = body if isinstance(body, str) else str(body) if body is not None else "" + msg = f"Error: {body_text.strip()[:500]}" if body_text.strip() else f"Error calling LLM: {e}" + return LLMResponse( + content=msg, + finish_reason="error", + **OpenAICompatProvider._extract_error_metadata(e), + ) # ------------------------------------------------------------------ # Public API @@ -641,6 +717,7 @@ class OpenAICompatProvider(LLMProvider): f"{idle_timeout_s} seconds" ), finish_reason="error", + error_kind="timeout", ) except Exception as e: return self._handle_error(e) diff --git a/tests/providers/test_provider_error_metadata.py b/tests/providers/test_provider_error_metadata.py new file mode 100644 index 000000000..b13c667de --- /dev/null +++ b/tests/providers/test_provider_error_metadata.py @@ -0,0 +1,77 @@ +from types import SimpleNamespace + +from nanobot.providers.anthropic_provider import AnthropicProvider +from nanobot.providers.openai_compat_provider import OpenAICompatProvider + + +def _fake_response( + *, + status_code: int, + headers: dict[str, str] | None = None, + text: str = "", +) -> SimpleNamespace: + return SimpleNamespace( + status_code=status_code, + headers=headers or {}, + text=text, + ) + + +def test_openai_handle_error_extracts_structured_metadata() -> None: + class FakeStatusError(Exception): + pass + + err = FakeStatusError("boom") + err.status_code = 409 + err.response = _fake_response( + status_code=409, + headers={"retry-after-ms": "250", "x-should-retry": "false"}, + text='{"error":"conflict"}', + ) + err.body = {"error": "conflict"} + + response = OpenAICompatProvider._handle_error(err) + + assert response.finish_reason == "error" + assert response.error_status_code == 409 + assert response.error_retry_after_s == 0.25 + assert response.error_should_retry is False + + +def test_openai_handle_error_marks_timeout_kind() -> None: + class FakeTimeoutError(Exception): + pass + + response = OpenAICompatProvider._handle_error(FakeTimeoutError("timeout")) + + assert response.finish_reason == "error" + assert response.error_kind == "timeout" + + +def test_anthropic_error_response_extracts_structured_metadata() -> None: + class FakeStatusError(Exception): + pass + + err = FakeStatusError("boom") + err.status_code = 408 + err.response = _fake_response( + status_code=408, + headers={"retry-after": "1.5", "x-should-retry": "true"}, + ) + + response = AnthropicProvider._error_response(err) + + assert response.finish_reason == "error" + assert response.error_status_code == 408 + assert response.error_retry_after_s == 1.5 + assert response.error_should_retry is True + + +def test_anthropic_error_response_marks_connection_kind() -> None: + class FakeConnectionError(Exception): + pass + + response = AnthropicProvider._error_response(FakeConnectionError("connection")) + + assert response.finish_reason == "error" + assert response.error_kind == "connection" diff --git a/tests/providers/test_provider_retry.py b/tests/providers/test_provider_retry.py index 1d8facf52..eb9e7c54c 100644 --- a/tests/providers/test_provider_retry.py +++ b/tests/providers/test_provider_retry.py @@ -240,6 +240,100 @@ async def test_chat_with_retry_uses_retry_after_and_emits_wait_progress(monkeypa assert progress and "7s" in progress[0] +@pytest.mark.asyncio +async def test_chat_with_retry_retries_structured_status_code_without_keyword(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content="request failed", + finish_reason="error", + error_status_code=409, + ), + LLMResponse(content="ok"), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.content == "ok" + assert provider.calls == 2 + assert delays == [1] + + +@pytest.mark.asyncio +async def test_chat_with_retry_retries_structured_timeout_kind(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content="request failed", + finish_reason="error", + error_kind="timeout", + ), + LLMResponse(content="ok"), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.content == "ok" + assert provider.calls == 2 + assert delays == [1] + + +@pytest.mark.asyncio +async def test_chat_with_retry_structured_should_retry_false_disables_retry(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content="429 rate limit", + finish_reason="error", + error_should_retry=False, + ), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.finish_reason == "error" + assert provider.calls == 1 + assert delays == [] + + +@pytest.mark.asyncio +async def test_chat_with_retry_prefers_structured_retry_after(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content="429 rate limit, retry after 99s", + finish_reason="error", + error_retry_after_s=0.2, + ), + LLMResponse(content="ok"), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.content == "ok" + assert delays == [0.2] + + @pytest.mark.asyncio async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monkeypatch) -> None: provider = ScriptedProvider([ @@ -263,4 +357,3 @@ async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monk assert provider.calls == 10 assert delays == [1, 2, 4, 4, 4, 4, 4, 4, 4] - From 31d3061a0aa32e20c21c6db677a0dbf6ae11d64b Mon Sep 17 00:00:00 2001 From: pikaxinge <2392811793@qq.com> Date: Sat, 4 Apr 2026 05:23:21 +0000 Subject: [PATCH 2/4] fix(retry): classify 429 as WAIT vs STOP using semantic signals --- nanobot/providers/anthropic_provider.py | 18 ++- nanobot/providers/base.py | 103 ++++++++++++++++++ nanobot/providers/openai_compat_provider.py | 15 +++ .../providers/test_provider_error_metadata.py | 8 +- tests/providers/test_provider_retry.py | 54 ++++++++- 5 files changed, 194 insertions(+), 4 deletions(-) diff --git a/nanobot/providers/anthropic_provider.py b/nanobot/providers/anthropic_provider.py index 3a5e435f0..230250566 100644 --- a/nanobot/providers/anthropic_provider.py +++ b/nanobot/providers/anthropic_provider.py @@ -102,7 +102,20 @@ class AnthropicProvider(LLMProvider): def _error_response(cls, e: Exception) -> LLMResponse: response = getattr(e, "response", None) headers = getattr(response, "headers", None) - msg = f"Error calling LLM: {e}" + payload = ( + getattr(e, "body", None) + or getattr(e, "doc", None) + or getattr(response, "text", None) + ) + if payload is None and response is not None: + response_json = getattr(response, "json", None) + if callable(response_json): + try: + payload = response_json() + except Exception: + payload = None + payload_text = payload if isinstance(payload, str) else str(payload) if payload is not None else "" + msg = f"Error: {payload_text.strip()[:500]}" if payload_text.strip() else f"Error calling LLM: {e}" retry_after = cls._parse_retry_after_headers(headers) if retry_after is None: retry_after = LLMProvider._extract_retry_after(msg) @@ -127,6 +140,7 @@ class AnthropicProvider(LLMProvider): error_kind = "timeout" elif "connection" in error_name: error_kind = "connection" + error_type, error_code = LLMProvider._extract_error_type_code(payload) return LLMResponse( content=msg, @@ -134,6 +148,8 @@ class AnthropicProvider(LLMProvider): retry_after=retry_after, error_status_code=int(status_code) if status_code is not None else None, error_kind=error_kind, + error_type=error_type, + error_code=error_code, error_retry_after_s=retry_after, error_should_retry=should_retry, ) diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 6e6468a9c..0eb93cc5e 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -57,6 +57,8 @@ class LLMResponse: # Structured error metadata used by retry policy when finish_reason == "error". error_status_code: int | None = None error_kind: str | None = None # e.g. "timeout", "connection" + error_type: str | None = None # Provider/type semantic, e.g. insufficient_quota. + error_code: str | None = None # Provider/code semantic, e.g. rate_limit_exceeded. error_retry_after_s: float | None = None error_should_retry: bool | None = None @@ -98,6 +100,50 @@ class LLMProvider(ABC): ) _RETRYABLE_STATUS_CODES = frozenset({408, 409, 429}) _TRANSIENT_ERROR_KINDS = frozenset({"timeout", "connection"}) + _NON_RETRYABLE_429_ERROR_TOKENS = frozenset({ + "insufficient_quota", + "quota_exceeded", + "quota_exhausted", + "billing_hard_limit_reached", + "insufficient_balance", + "credit_balance_too_low", + "billing_not_active", + "payment_required", + }) + _RETRYABLE_429_ERROR_TOKENS = frozenset({ + "rate_limit_exceeded", + "rate_limit_error", + "too_many_requests", + "request_limit_exceeded", + "requests_limit_exceeded", + "overloaded_error", + }) + _NON_RETRYABLE_429_TEXT_MARKERS = ( + "insufficient_quota", + "insufficient quota", + "quota exceeded", + "quota exhausted", + "billing hard limit", + "billing_hard_limit_reached", + "billing not active", + "insufficient balance", + "insufficient_balance", + "credit balance too low", + "payment required", + "out of credits", + "out of quota", + "exceeded your current quota", + ) + _RETRYABLE_429_TEXT_MARKERS = ( + "rate limit", + "rate_limit", + "too many requests", + "retry after", + "try again in", + "temporarily unavailable", + "overloaded", + "concurrency limit", + ) _SENTINEL = object() @@ -209,6 +255,8 @@ class LLMProvider(ABC): if response.error_status_code is not None: status = int(response.error_status_code) + if status == 429: + return cls._is_retryable_429_response(response) if status in cls._RETRYABLE_STATUS_CODES or status >= 500: return True @@ -218,6 +266,61 @@ class LLMProvider(ABC): return cls._is_transient_error(response.content) + @staticmethod + def _normalize_error_token(value: Any) -> str | None: + if value is None: + return None + token = str(value).strip().lower() + return token or None + + @classmethod + def _extract_error_type_code(cls, payload: Any) -> tuple[str | None, str | None]: + data: dict[str, Any] | None = None + if isinstance(payload, dict): + data = payload + elif isinstance(payload, str): + text = payload.strip() + if text: + try: + parsed = json.loads(text) + except Exception: + parsed = None + if isinstance(parsed, dict): + data = parsed + if not isinstance(data, dict): + return None, None + + error_obj = data.get("error") + type_value = data.get("type") + code_value = data.get("code") + if isinstance(error_obj, dict): + type_value = error_obj.get("type") or type_value + code_value = error_obj.get("code") or code_value + + return cls._normalize_error_token(type_value), cls._normalize_error_token(code_value) + + @classmethod + def _is_retryable_429_response(cls, response: LLMResponse) -> bool: + type_token = cls._normalize_error_token(response.error_type) + code_token = cls._normalize_error_token(response.error_code) + semantic_tokens = { + token for token in (type_token, code_token) + if token is not None + } + if any(token in cls._NON_RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens): + return False + + content = (response.content or "").lower() + if any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS): + return False + + if any(token in cls._RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens): + return True + if any(marker in content for marker in cls._RETRYABLE_429_TEXT_MARKERS): + return True + # Unknown 429 defaults to WAIT+retry. + return True + @staticmethod def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None: """Replace image_url blocks with text placeholder. Returns None if no images found.""" diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index 3120261d1..cb25e6f8c 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -621,6 +621,19 @@ class OpenAICompatProvider(LLMProvider): def _extract_error_metadata(cls, e: Exception) -> dict[str, Any]: response = getattr(e, "response", None) headers = getattr(response, "headers", None) + payload = ( + getattr(e, "body", None) + or getattr(e, "doc", None) + or getattr(response, "text", None) + ) + if payload is None and response is not None: + response_json = getattr(response, "json", None) + if callable(response_json): + try: + payload = response_json() + except Exception: + payload = None + error_type, error_code = LLMProvider._extract_error_type_code(payload) status_code = getattr(e, "status_code", None) if status_code is None and response is not None: @@ -646,6 +659,8 @@ class OpenAICompatProvider(LLMProvider): return { "error_status_code": int(status_code) if status_code is not None else None, "error_kind": error_kind, + "error_type": error_type, + "error_code": error_code, "error_retry_after_s": cls._parse_retry_after_headers(headers), "error_should_retry": should_retry, } diff --git a/tests/providers/test_provider_error_metadata.py b/tests/providers/test_provider_error_metadata.py index b13c667de..27f0eb0f1 100644 --- a/tests/providers/test_provider_error_metadata.py +++ b/tests/providers/test_provider_error_metadata.py @@ -26,14 +26,16 @@ def test_openai_handle_error_extracts_structured_metadata() -> None: err.response = _fake_response( status_code=409, headers={"retry-after-ms": "250", "x-should-retry": "false"}, - text='{"error":"conflict"}', + text='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}', ) - err.body = {"error": "conflict"} + err.body = {"error": {"type": "rate_limit_exceeded", "code": "rate_limit_exceeded"}} response = OpenAICompatProvider._handle_error(err) assert response.finish_reason == "error" assert response.error_status_code == 409 + assert response.error_type == "rate_limit_exceeded" + assert response.error_code == "rate_limit_exceeded" assert response.error_retry_after_s == 0.25 assert response.error_should_retry is False @@ -58,11 +60,13 @@ def test_anthropic_error_response_extracts_structured_metadata() -> None: status_code=408, headers={"retry-after": "1.5", "x-should-retry": "true"}, ) + err.body = {"type": "error", "error": {"type": "rate_limit_error"}} response = AnthropicProvider._error_response(err) assert response.finish_reason == "error" assert response.error_status_code == 408 + assert response.error_type == "rate_limit_error" assert response.error_retry_after_s == 1.5 assert response.error_should_retry is True diff --git a/tests/providers/test_provider_retry.py b/tests/providers/test_provider_retry.py index 038473c69..ad8048162 100644 --- a/tests/providers/test_provider_retry.py +++ b/tests/providers/test_provider_retry.py @@ -297,6 +297,59 @@ async def test_chat_with_retry_retries_structured_status_code_without_keyword(mo assert delays == [1] +@pytest.mark.asyncio +async def test_chat_with_retry_stops_on_429_quota_exhausted(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content='{"error":{"type":"insufficient_quota","code":"insufficient_quota"}}', + finish_reason="error", + error_status_code=429, + error_type="insufficient_quota", + error_code="insufficient_quota", + ), + LLMResponse(content="ok"), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.finish_reason == "error" + assert provider.calls == 1 + assert delays == [] + + +@pytest.mark.asyncio +async def test_chat_with_retry_retries_429_transient_rate_limit(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse( + content='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}', + finish_reason="error", + error_status_code=429, + error_type="rate_limit_exceeded", + error_code="rate_limit_exceeded", + error_retry_after_s=0.2, + ), + LLMResponse(content="ok"), + ]) + delays: list[float] = [] + + async def _fake_sleep(delay: float) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.content == "ok" + assert provider.calls == 2 + assert delays == [0.2] + + @pytest.mark.asyncio async def test_chat_with_retry_retries_structured_timeout_kind(monkeypatch) -> None: provider = ScriptedProvider([ @@ -389,4 +442,3 @@ async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monk assert response.content == "429 rate limit" assert provider.calls == 10 assert delays == [1, 2, 4, 4, 4, 4, 4, 4, 4] - From aeba9a23e63939b276b125c304e69533e86ce0be Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Mon, 6 Apr 2026 08:35:02 +0000 Subject: [PATCH 3/4] refactor: remove dead _error_response wrapper in Anthropic provider Fold _error_response back into _handle_error to match OpenAI/Azure convention. Update all call sites and tests accordingly. Made-with: Cursor --- nanobot/providers/anthropic_provider.py | 10 +++------- tests/providers/test_provider_error_metadata.py | 8 ++++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/nanobot/providers/anthropic_provider.py b/nanobot/providers/anthropic_provider.py index 4b90080a0..7ed94a9ba 100644 --- a/nanobot/providers/anthropic_provider.py +++ b/nanobot/providers/anthropic_provider.py @@ -98,7 +98,7 @@ class AnthropicProvider(LLMProvider): return value if value > 0 else None @classmethod - def _error_response(cls, e: Exception) -> LLMResponse: + def _handle_error(cls, e: Exception) -> LLMResponse: response = getattr(e, "response", None) headers = getattr(response, "headers", None) payload = ( @@ -505,10 +505,6 @@ class AnthropicProvider(LLMProvider): # Public API # ------------------------------------------------------------------ - @classmethod - def _handle_error(cls, e: Exception) -> LLMResponse: - return cls._error_response(e) - async def chat( self, messages: list[dict[str, Any]], @@ -527,7 +523,7 @@ class AnthropicProvider(LLMProvider): response = await self._client.messages.create(**kwargs) return self._parse_response(response) except Exception as e: - return self._error_response(e) + return self._handle_error(e) async def chat_stream( self, @@ -573,7 +569,7 @@ class AnthropicProvider(LLMProvider): error_kind="timeout", ) except Exception as e: - return self._error_response(e) + return self._handle_error(e) def get_default_model(self) -> str: return self.default_model diff --git a/tests/providers/test_provider_error_metadata.py b/tests/providers/test_provider_error_metadata.py index 27f0eb0f1..ea2532acf 100644 --- a/tests/providers/test_provider_error_metadata.py +++ b/tests/providers/test_provider_error_metadata.py @@ -50,7 +50,7 @@ def test_openai_handle_error_marks_timeout_kind() -> None: assert response.error_kind == "timeout" -def test_anthropic_error_response_extracts_structured_metadata() -> None: +def test_anthropic_handle_error_extracts_structured_metadata() -> None: class FakeStatusError(Exception): pass @@ -62,7 +62,7 @@ def test_anthropic_error_response_extracts_structured_metadata() -> None: ) err.body = {"type": "error", "error": {"type": "rate_limit_error"}} - response = AnthropicProvider._error_response(err) + response = AnthropicProvider._handle_error(err) assert response.finish_reason == "error" assert response.error_status_code == 408 @@ -71,11 +71,11 @@ def test_anthropic_error_response_extracts_structured_metadata() -> None: assert response.error_should_retry is True -def test_anthropic_error_response_marks_connection_kind() -> None: +def test_anthropic_handle_error_marks_connection_kind() -> None: class FakeConnectionError(Exception): pass - response = AnthropicProvider._error_response(FakeConnectionError("connection")) + response = AnthropicProvider._handle_error(FakeConnectionError("connection")) assert response.finish_reason == "error" assert response.error_kind == "connection" From 35f53a721d372a36ffe503be4918cb59c3a930a1 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Mon, 6 Apr 2026 08:44:52 +0000 Subject: [PATCH 4/4] refactor: consolidate _parse_retry_after_headers into base class Merge the three retry-after header parsers (base, OpenAI, Anthropic) into a single _extract_retry_after_from_headers on LLMProvider that handles retry-after-ms, case-insensitive lookup, and HTTP date. Remove the per-provider _parse_retry_after_headers duplicates and their now-unused email.utils / time imports. Add test for retry-after-ms. Made-with: Cursor --- nanobot/providers/anthropic_provider.py | 47 +-------------------- nanobot/providers/base.py | 30 +++++++++---- nanobot/providers/openai_compat_provider.py | 36 +--------------- tests/providers/test_provider_retry.py | 8 ++++ 4 files changed, 32 insertions(+), 89 deletions(-) diff --git a/nanobot/providers/anthropic_provider.py b/nanobot/providers/anthropic_provider.py index 7ed94a9ba..e389b51ed 100644 --- a/nanobot/providers/anthropic_provider.py +++ b/nanobot/providers/anthropic_provider.py @@ -3,12 +3,10 @@ from __future__ import annotations import asyncio -import email.utils import os import re import secrets import string -import time from collections.abc import Awaitable, Callable from typing import Any @@ -54,49 +52,6 @@ class AnthropicProvider(LLMProvider): client_kw["max_retries"] = 0 self._client = AsyncAnthropic(**client_kw) - @staticmethod - def _parse_retry_after_headers(headers: Any) -> float | None: - if headers is None: - return None - - def _header_value(name: str) -> Any: - if hasattr(headers, "get"): - value = headers.get(name) or headers.get(name.title()) - if value is not None: - return value - if isinstance(headers, dict): - for key, value in headers.items(): - if isinstance(key, str) and key.lower() == name.lower(): - return value - return None - - try: - retry_ms = _header_value("retry-after-ms") - if retry_ms is not None: - value = float(retry_ms) / 1000.0 - if value > 0: - return value - except (TypeError, ValueError): - pass - - retry_after = _header_value("retry-after") - try: - if retry_after is not None: - value = float(retry_after) - if value > 0: - return value - except (TypeError, ValueError): - pass - - if retry_after is None: - return None - retry_date_tuple = email.utils.parsedate_tz(retry_after) - if retry_date_tuple is None: - return None - retry_date = email.utils.mktime_tz(retry_date_tuple) - value = float(retry_date - time.time()) - return value if value > 0 else None - @classmethod def _handle_error(cls, e: Exception) -> LLMResponse: response = getattr(e, "response", None) @@ -115,7 +70,7 @@ class AnthropicProvider(LLMProvider): payload = None payload_text = payload if isinstance(payload, str) else str(payload) if payload is not None else "" msg = f"Error: {payload_text.strip()[:500]}" if payload_text.strip() else f"Error calling LLM: {e}" - retry_after = cls._parse_retry_after_headers(headers) + retry_after = cls._extract_retry_after_from_headers(headers) if retry_after is None: retry_after = LLMProvider._extract_retry_after(msg) diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index da229dcc3..d5833c9ae 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -524,14 +524,28 @@ class LLMProvider(ABC): def _extract_retry_after_from_headers(cls, headers: Any) -> float | None: if not headers: return None - retry_after: Any = None - if hasattr(headers, "get"): - retry_after = headers.get("retry-after") or headers.get("Retry-After") - if retry_after is None and isinstance(headers, dict): - for key, value in headers.items(): - if isinstance(key, str) and key.lower() == "retry-after": - retry_after = value - break + + def _header_value(name: str) -> Any: + if hasattr(headers, "get"): + value = headers.get(name) or headers.get(name.title()) + if value is not None: + return value + if isinstance(headers, dict): + for key, value in headers.items(): + if isinstance(key, str) and key.lower() == name.lower(): + return value + return None + + try: + retry_ms = _header_value("retry-after-ms") + if retry_ms is not None: + value = float(retry_ms) / 1000.0 + if value > 0: + return value + except (TypeError, ValueError): + pass + + retry_after = _header_value("retry-after") if retry_after is None: return None retry_after_text = str(retry_after).strip() diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index cb662556a..706268585 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -3,13 +3,11 @@ from __future__ import annotations import asyncio -import email.utils import hashlib import importlib.util import os import secrets import string -import time import uuid from collections.abc import Awaitable, Callable from typing import TYPE_CHECKING, Any @@ -636,38 +634,6 @@ class OpenAICompatProvider(LLMProvider): reasoning_content="".join(reasoning_parts) or None, ) - @staticmethod - def _parse_retry_after_headers(headers: Any) -> float | None: - if headers is None: - return None - - try: - retry_ms = headers.get("retry-after-ms") - if retry_ms is not None: - value = float(retry_ms) / 1000.0 - if value > 0: - return value - except (TypeError, ValueError): - pass - - retry_after = headers.get("retry-after") - try: - if retry_after is not None: - value = float(retry_after) - if value > 0: - return value - except (TypeError, ValueError): - pass - - if retry_after is None: - return None - retry_date_tuple = email.utils.parsedate_tz(retry_after) - if retry_date_tuple is None: - return None - retry_date = email.utils.mktime_tz(retry_date_tuple) - value = float(retry_date - time.time()) - return value if value > 0 else None - @classmethod def _extract_error_metadata(cls, e: Exception) -> dict[str, Any]: response = getattr(e, "response", None) @@ -712,7 +678,7 @@ class OpenAICompatProvider(LLMProvider): "error_kind": error_kind, "error_type": error_type, "error_code": error_code, - "error_retry_after_s": cls._parse_retry_after_headers(headers), + "error_retry_after_s": cls._extract_retry_after_from_headers(headers), "error_should_retry": should_retry, } diff --git a/tests/providers/test_provider_retry.py b/tests/providers/test_provider_retry.py index ad8048162..78c2a791e 100644 --- a/tests/providers/test_provider_retry.py +++ b/tests/providers/test_provider_retry.py @@ -254,6 +254,14 @@ def test_extract_retry_after_from_headers_supports_numeric_and_http_date() -> No ) == 0.1 +def test_extract_retry_after_from_headers_supports_retry_after_ms() -> None: + assert LLMProvider._extract_retry_after_from_headers({"retry-after-ms": "250"}) == 0.25 + assert LLMProvider._extract_retry_after_from_headers({"Retry-After-Ms": "1000"}) == 1.0 + assert LLMProvider._extract_retry_after_from_headers( + {"retry-after-ms": "500", "retry-after": "10"}, + ) == 0.5 + + @pytest.mark.asyncio async def test_chat_with_retry_prefers_structured_retry_after_when_present(monkeypatch) -> None: provider = ScriptedProvider([