mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-08 20:23:41 +00:00
fix(retry): classify 429 as WAIT vs STOP using semantic signals
This commit is contained in:
parent
cabf093915
commit
31d3061a0a
@ -102,7 +102,20 @@ class AnthropicProvider(LLMProvider):
|
||||
def _error_response(cls, e: Exception) -> LLMResponse:
|
||||
response = getattr(e, "response", None)
|
||||
headers = getattr(response, "headers", None)
|
||||
msg = f"Error calling LLM: {e}"
|
||||
payload = (
|
||||
getattr(e, "body", None)
|
||||
or getattr(e, "doc", None)
|
||||
or getattr(response, "text", None)
|
||||
)
|
||||
if payload is None and response is not None:
|
||||
response_json = getattr(response, "json", None)
|
||||
if callable(response_json):
|
||||
try:
|
||||
payload = response_json()
|
||||
except Exception:
|
||||
payload = None
|
||||
payload_text = payload if isinstance(payload, str) else str(payload) if payload is not None else ""
|
||||
msg = f"Error: {payload_text.strip()[:500]}" if payload_text.strip() else f"Error calling LLM: {e}"
|
||||
retry_after = cls._parse_retry_after_headers(headers)
|
||||
if retry_after is None:
|
||||
retry_after = LLMProvider._extract_retry_after(msg)
|
||||
@ -127,6 +140,7 @@ class AnthropicProvider(LLMProvider):
|
||||
error_kind = "timeout"
|
||||
elif "connection" in error_name:
|
||||
error_kind = "connection"
|
||||
error_type, error_code = LLMProvider._extract_error_type_code(payload)
|
||||
|
||||
return LLMResponse(
|
||||
content=msg,
|
||||
@ -134,6 +148,8 @@ class AnthropicProvider(LLMProvider):
|
||||
retry_after=retry_after,
|
||||
error_status_code=int(status_code) if status_code is not None else None,
|
||||
error_kind=error_kind,
|
||||
error_type=error_type,
|
||||
error_code=error_code,
|
||||
error_retry_after_s=retry_after,
|
||||
error_should_retry=should_retry,
|
||||
)
|
||||
|
||||
@ -57,6 +57,8 @@ class LLMResponse:
|
||||
# Structured error metadata used by retry policy when finish_reason == "error".
|
||||
error_status_code: int | None = None
|
||||
error_kind: str | None = None # e.g. "timeout", "connection"
|
||||
error_type: str | None = None # Provider/type semantic, e.g. insufficient_quota.
|
||||
error_code: str | None = None # Provider/code semantic, e.g. rate_limit_exceeded.
|
||||
error_retry_after_s: float | None = None
|
||||
error_should_retry: bool | None = None
|
||||
|
||||
@ -98,6 +100,50 @@ class LLMProvider(ABC):
|
||||
)
|
||||
_RETRYABLE_STATUS_CODES = frozenset({408, 409, 429})
|
||||
_TRANSIENT_ERROR_KINDS = frozenset({"timeout", "connection"})
|
||||
_NON_RETRYABLE_429_ERROR_TOKENS = frozenset({
|
||||
"insufficient_quota",
|
||||
"quota_exceeded",
|
||||
"quota_exhausted",
|
||||
"billing_hard_limit_reached",
|
||||
"insufficient_balance",
|
||||
"credit_balance_too_low",
|
||||
"billing_not_active",
|
||||
"payment_required",
|
||||
})
|
||||
_RETRYABLE_429_ERROR_TOKENS = frozenset({
|
||||
"rate_limit_exceeded",
|
||||
"rate_limit_error",
|
||||
"too_many_requests",
|
||||
"request_limit_exceeded",
|
||||
"requests_limit_exceeded",
|
||||
"overloaded_error",
|
||||
})
|
||||
_NON_RETRYABLE_429_TEXT_MARKERS = (
|
||||
"insufficient_quota",
|
||||
"insufficient quota",
|
||||
"quota exceeded",
|
||||
"quota exhausted",
|
||||
"billing hard limit",
|
||||
"billing_hard_limit_reached",
|
||||
"billing not active",
|
||||
"insufficient balance",
|
||||
"insufficient_balance",
|
||||
"credit balance too low",
|
||||
"payment required",
|
||||
"out of credits",
|
||||
"out of quota",
|
||||
"exceeded your current quota",
|
||||
)
|
||||
_RETRYABLE_429_TEXT_MARKERS = (
|
||||
"rate limit",
|
||||
"rate_limit",
|
||||
"too many requests",
|
||||
"retry after",
|
||||
"try again in",
|
||||
"temporarily unavailable",
|
||||
"overloaded",
|
||||
"concurrency limit",
|
||||
)
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
@ -209,6 +255,8 @@ class LLMProvider(ABC):
|
||||
|
||||
if response.error_status_code is not None:
|
||||
status = int(response.error_status_code)
|
||||
if status == 429:
|
||||
return cls._is_retryable_429_response(response)
|
||||
if status in cls._RETRYABLE_STATUS_CODES or status >= 500:
|
||||
return True
|
||||
|
||||
@ -218,6 +266,61 @@ class LLMProvider(ABC):
|
||||
|
||||
return cls._is_transient_error(response.content)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_error_token(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
token = str(value).strip().lower()
|
||||
return token or None
|
||||
|
||||
@classmethod
|
||||
def _extract_error_type_code(cls, payload: Any) -> tuple[str | None, str | None]:
|
||||
data: dict[str, Any] | None = None
|
||||
if isinstance(payload, dict):
|
||||
data = payload
|
||||
elif isinstance(payload, str):
|
||||
text = payload.strip()
|
||||
if text:
|
||||
try:
|
||||
parsed = json.loads(text)
|
||||
except Exception:
|
||||
parsed = None
|
||||
if isinstance(parsed, dict):
|
||||
data = parsed
|
||||
if not isinstance(data, dict):
|
||||
return None, None
|
||||
|
||||
error_obj = data.get("error")
|
||||
type_value = data.get("type")
|
||||
code_value = data.get("code")
|
||||
if isinstance(error_obj, dict):
|
||||
type_value = error_obj.get("type") or type_value
|
||||
code_value = error_obj.get("code") or code_value
|
||||
|
||||
return cls._normalize_error_token(type_value), cls._normalize_error_token(code_value)
|
||||
|
||||
@classmethod
|
||||
def _is_retryable_429_response(cls, response: LLMResponse) -> bool:
|
||||
type_token = cls._normalize_error_token(response.error_type)
|
||||
code_token = cls._normalize_error_token(response.error_code)
|
||||
semantic_tokens = {
|
||||
token for token in (type_token, code_token)
|
||||
if token is not None
|
||||
}
|
||||
if any(token in cls._NON_RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens):
|
||||
return False
|
||||
|
||||
content = (response.content or "").lower()
|
||||
if any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS):
|
||||
return False
|
||||
|
||||
if any(token in cls._RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens):
|
||||
return True
|
||||
if any(marker in content for marker in cls._RETRYABLE_429_TEXT_MARKERS):
|
||||
return True
|
||||
# Unknown 429 defaults to WAIT+retry.
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
|
||||
"""Replace image_url blocks with text placeholder. Returns None if no images found."""
|
||||
|
||||
@ -621,6 +621,19 @@ class OpenAICompatProvider(LLMProvider):
|
||||
def _extract_error_metadata(cls, e: Exception) -> dict[str, Any]:
|
||||
response = getattr(e, "response", None)
|
||||
headers = getattr(response, "headers", None)
|
||||
payload = (
|
||||
getattr(e, "body", None)
|
||||
or getattr(e, "doc", None)
|
||||
or getattr(response, "text", None)
|
||||
)
|
||||
if payload is None and response is not None:
|
||||
response_json = getattr(response, "json", None)
|
||||
if callable(response_json):
|
||||
try:
|
||||
payload = response_json()
|
||||
except Exception:
|
||||
payload = None
|
||||
error_type, error_code = LLMProvider._extract_error_type_code(payload)
|
||||
|
||||
status_code = getattr(e, "status_code", None)
|
||||
if status_code is None and response is not None:
|
||||
@ -646,6 +659,8 @@ class OpenAICompatProvider(LLMProvider):
|
||||
return {
|
||||
"error_status_code": int(status_code) if status_code is not None else None,
|
||||
"error_kind": error_kind,
|
||||
"error_type": error_type,
|
||||
"error_code": error_code,
|
||||
"error_retry_after_s": cls._parse_retry_after_headers(headers),
|
||||
"error_should_retry": should_retry,
|
||||
}
|
||||
|
||||
@ -26,14 +26,16 @@ def test_openai_handle_error_extracts_structured_metadata() -> None:
|
||||
err.response = _fake_response(
|
||||
status_code=409,
|
||||
headers={"retry-after-ms": "250", "x-should-retry": "false"},
|
||||
text='{"error":"conflict"}',
|
||||
text='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}',
|
||||
)
|
||||
err.body = {"error": "conflict"}
|
||||
err.body = {"error": {"type": "rate_limit_exceeded", "code": "rate_limit_exceeded"}}
|
||||
|
||||
response = OpenAICompatProvider._handle_error(err)
|
||||
|
||||
assert response.finish_reason == "error"
|
||||
assert response.error_status_code == 409
|
||||
assert response.error_type == "rate_limit_exceeded"
|
||||
assert response.error_code == "rate_limit_exceeded"
|
||||
assert response.error_retry_after_s == 0.25
|
||||
assert response.error_should_retry is False
|
||||
|
||||
@ -58,11 +60,13 @@ def test_anthropic_error_response_extracts_structured_metadata() -> None:
|
||||
status_code=408,
|
||||
headers={"retry-after": "1.5", "x-should-retry": "true"},
|
||||
)
|
||||
err.body = {"type": "error", "error": {"type": "rate_limit_error"}}
|
||||
|
||||
response = AnthropicProvider._error_response(err)
|
||||
|
||||
assert response.finish_reason == "error"
|
||||
assert response.error_status_code == 408
|
||||
assert response.error_type == "rate_limit_error"
|
||||
assert response.error_retry_after_s == 1.5
|
||||
assert response.error_should_retry is True
|
||||
|
||||
|
||||
@ -297,6 +297,59 @@ async def test_chat_with_retry_retries_structured_status_code_without_keyword(mo
|
||||
assert delays == [1]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_with_retry_stops_on_429_quota_exhausted(monkeypatch) -> None:
|
||||
provider = ScriptedProvider([
|
||||
LLMResponse(
|
||||
content='{"error":{"type":"insufficient_quota","code":"insufficient_quota"}}',
|
||||
finish_reason="error",
|
||||
error_status_code=429,
|
||||
error_type="insufficient_quota",
|
||||
error_code="insufficient_quota",
|
||||
),
|
||||
LLMResponse(content="ok"),
|
||||
])
|
||||
delays: list[float] = []
|
||||
|
||||
async def _fake_sleep(delay: float) -> None:
|
||||
delays.append(delay)
|
||||
|
||||
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||
|
||||
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||
|
||||
assert response.finish_reason == "error"
|
||||
assert provider.calls == 1
|
||||
assert delays == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_with_retry_retries_429_transient_rate_limit(monkeypatch) -> None:
|
||||
provider = ScriptedProvider([
|
||||
LLMResponse(
|
||||
content='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}',
|
||||
finish_reason="error",
|
||||
error_status_code=429,
|
||||
error_type="rate_limit_exceeded",
|
||||
error_code="rate_limit_exceeded",
|
||||
error_retry_after_s=0.2,
|
||||
),
|
||||
LLMResponse(content="ok"),
|
||||
])
|
||||
delays: list[float] = []
|
||||
|
||||
async def _fake_sleep(delay: float) -> None:
|
||||
delays.append(delay)
|
||||
|
||||
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||
|
||||
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||
|
||||
assert response.content == "ok"
|
||||
assert provider.calls == 2
|
||||
assert delays == [0.2]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_with_retry_retries_structured_timeout_kind(monkeypatch) -> None:
|
||||
provider = ScriptedProvider([
|
||||
@ -389,4 +442,3 @@ async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monk
|
||||
assert response.content == "429 rate limit"
|
||||
assert provider.calls == 10
|
||||
assert delays == [1, 2, 4, 4, 4, 4, 4, 4, 4]
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user