mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-03 16:25:53 +00:00
Merge PR #2762: fix: make app-layer retry classification structured
fix: make app-layer retry classification structured (408/409/timeout/connection)
This commit is contained in:
commit
f65f788ab1
@ -52,6 +52,62 @@ class AnthropicProvider(LLMProvider):
|
|||||||
client_kw["max_retries"] = 0
|
client_kw["max_retries"] = 0
|
||||||
self._client = AsyncAnthropic(**client_kw)
|
self._client = AsyncAnthropic(**client_kw)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _handle_error(cls, e: Exception) -> LLMResponse:
|
||||||
|
response = getattr(e, "response", None)
|
||||||
|
headers = getattr(response, "headers", None)
|
||||||
|
payload = (
|
||||||
|
getattr(e, "body", None)
|
||||||
|
or getattr(e, "doc", None)
|
||||||
|
or getattr(response, "text", None)
|
||||||
|
)
|
||||||
|
if payload is None and response is not None:
|
||||||
|
response_json = getattr(response, "json", None)
|
||||||
|
if callable(response_json):
|
||||||
|
try:
|
||||||
|
payload = response_json()
|
||||||
|
except Exception:
|
||||||
|
payload = None
|
||||||
|
payload_text = payload if isinstance(payload, str) else str(payload) if payload is not None else ""
|
||||||
|
msg = f"Error: {payload_text.strip()[:500]}" if payload_text.strip() else f"Error calling LLM: {e}"
|
||||||
|
retry_after = cls._extract_retry_after_from_headers(headers)
|
||||||
|
if retry_after is None:
|
||||||
|
retry_after = LLMProvider._extract_retry_after(msg)
|
||||||
|
|
||||||
|
status_code = getattr(e, "status_code", None)
|
||||||
|
if status_code is None and response is not None:
|
||||||
|
status_code = getattr(response, "status_code", None)
|
||||||
|
|
||||||
|
should_retry: bool | None = None
|
||||||
|
if headers is not None:
|
||||||
|
raw = headers.get("x-should-retry")
|
||||||
|
if isinstance(raw, str):
|
||||||
|
lowered = raw.strip().lower()
|
||||||
|
if lowered == "true":
|
||||||
|
should_retry = True
|
||||||
|
elif lowered == "false":
|
||||||
|
should_retry = False
|
||||||
|
|
||||||
|
error_kind: str | None = None
|
||||||
|
error_name = e.__class__.__name__.lower()
|
||||||
|
if "timeout" in error_name:
|
||||||
|
error_kind = "timeout"
|
||||||
|
elif "connection" in error_name:
|
||||||
|
error_kind = "connection"
|
||||||
|
error_type, error_code = LLMProvider._extract_error_type_code(payload)
|
||||||
|
|
||||||
|
return LLMResponse(
|
||||||
|
content=msg,
|
||||||
|
finish_reason="error",
|
||||||
|
retry_after=retry_after,
|
||||||
|
error_status_code=int(status_code) if status_code is not None else None,
|
||||||
|
error_kind=error_kind,
|
||||||
|
error_type=error_type,
|
||||||
|
error_code=error_code,
|
||||||
|
error_retry_after_s=retry_after,
|
||||||
|
error_should_retry=should_retry,
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _strip_prefix(model: str) -> str:
|
def _strip_prefix(model: str) -> str:
|
||||||
if model.startswith("anthropic/"):
|
if model.startswith("anthropic/"):
|
||||||
@ -404,15 +460,6 @@ class AnthropicProvider(LLMProvider):
|
|||||||
# Public API
|
# Public API
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _handle_error(e: Exception) -> LLMResponse:
|
|
||||||
msg = f"Error calling LLM: {e}"
|
|
||||||
response = getattr(e, "response", None)
|
|
||||||
retry_after = LLMProvider._extract_retry_after_from_headers(getattr(response, "headers", None))
|
|
||||||
if retry_after is None:
|
|
||||||
retry_after = LLMProvider._extract_retry_after(msg)
|
|
||||||
return LLMResponse(content=msg, finish_reason="error", retry_after=retry_after)
|
|
||||||
|
|
||||||
async def chat(
|
async def chat(
|
||||||
self,
|
self,
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
@ -474,6 +521,7 @@ class AnthropicProvider(LLMProvider):
|
|||||||
f"{idle_timeout_s} seconds"
|
f"{idle_timeout_s} seconds"
|
||||||
),
|
),
|
||||||
finish_reason="error",
|
finish_reason="error",
|
||||||
|
error_kind="timeout",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return self._handle_error(e)
|
return self._handle_error(e)
|
||||||
|
|||||||
@ -54,6 +54,13 @@ class LLMResponse:
|
|||||||
retry_after: float | None = None # Provider supplied retry wait in seconds.
|
retry_after: float | None = None # Provider supplied retry wait in seconds.
|
||||||
reasoning_content: str | None = None # Kimi, DeepSeek-R1, MiMo etc.
|
reasoning_content: str | None = None # Kimi, DeepSeek-R1, MiMo etc.
|
||||||
thinking_blocks: list[dict] | None = None # Anthropic extended thinking
|
thinking_blocks: list[dict] | None = None # Anthropic extended thinking
|
||||||
|
# Structured error metadata used by retry policy when finish_reason == "error".
|
||||||
|
error_status_code: int | None = None
|
||||||
|
error_kind: str | None = None # e.g. "timeout", "connection"
|
||||||
|
error_type: str | None = None # Provider/type semantic, e.g. insufficient_quota.
|
||||||
|
error_code: str | None = None # Provider/code semantic, e.g. rate_limit_exceeded.
|
||||||
|
error_retry_after_s: float | None = None
|
||||||
|
error_should_retry: bool | None = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_tool_calls(self) -> bool:
|
def has_tool_calls(self) -> bool:
|
||||||
@ -91,6 +98,52 @@ class LLMProvider(ABC):
|
|||||||
"server error",
|
"server error",
|
||||||
"temporarily unavailable",
|
"temporarily unavailable",
|
||||||
)
|
)
|
||||||
|
_RETRYABLE_STATUS_CODES = frozenset({408, 409, 429})
|
||||||
|
_TRANSIENT_ERROR_KINDS = frozenset({"timeout", "connection"})
|
||||||
|
_NON_RETRYABLE_429_ERROR_TOKENS = frozenset({
|
||||||
|
"insufficient_quota",
|
||||||
|
"quota_exceeded",
|
||||||
|
"quota_exhausted",
|
||||||
|
"billing_hard_limit_reached",
|
||||||
|
"insufficient_balance",
|
||||||
|
"credit_balance_too_low",
|
||||||
|
"billing_not_active",
|
||||||
|
"payment_required",
|
||||||
|
})
|
||||||
|
_RETRYABLE_429_ERROR_TOKENS = frozenset({
|
||||||
|
"rate_limit_exceeded",
|
||||||
|
"rate_limit_error",
|
||||||
|
"too_many_requests",
|
||||||
|
"request_limit_exceeded",
|
||||||
|
"requests_limit_exceeded",
|
||||||
|
"overloaded_error",
|
||||||
|
})
|
||||||
|
_NON_RETRYABLE_429_TEXT_MARKERS = (
|
||||||
|
"insufficient_quota",
|
||||||
|
"insufficient quota",
|
||||||
|
"quota exceeded",
|
||||||
|
"quota exhausted",
|
||||||
|
"billing hard limit",
|
||||||
|
"billing_hard_limit_reached",
|
||||||
|
"billing not active",
|
||||||
|
"insufficient balance",
|
||||||
|
"insufficient_balance",
|
||||||
|
"credit balance too low",
|
||||||
|
"payment required",
|
||||||
|
"out of credits",
|
||||||
|
"out of quota",
|
||||||
|
"exceeded your current quota",
|
||||||
|
)
|
||||||
|
_RETRYABLE_429_TEXT_MARKERS = (
|
||||||
|
"rate limit",
|
||||||
|
"rate_limit",
|
||||||
|
"too many requests",
|
||||||
|
"retry after",
|
||||||
|
"try again in",
|
||||||
|
"temporarily unavailable",
|
||||||
|
"overloaded",
|
||||||
|
"concurrency limit",
|
||||||
|
)
|
||||||
|
|
||||||
_SENTINEL = object()
|
_SENTINEL = object()
|
||||||
|
|
||||||
@ -226,6 +279,80 @@ class LLMProvider(ABC):
|
|||||||
err = (content or "").lower()
|
err = (content or "").lower()
|
||||||
return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS)
|
return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _is_transient_response(cls, response: LLMResponse) -> bool:
|
||||||
|
"""Prefer structured error metadata, fallback to text markers for legacy providers."""
|
||||||
|
if response.error_should_retry is not None:
|
||||||
|
return bool(response.error_should_retry)
|
||||||
|
|
||||||
|
if response.error_status_code is not None:
|
||||||
|
status = int(response.error_status_code)
|
||||||
|
if status == 429:
|
||||||
|
return cls._is_retryable_429_response(response)
|
||||||
|
if status in cls._RETRYABLE_STATUS_CODES or status >= 500:
|
||||||
|
return True
|
||||||
|
|
||||||
|
kind = (response.error_kind or "").strip().lower()
|
||||||
|
if kind in cls._TRANSIENT_ERROR_KINDS:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return cls._is_transient_error(response.content)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _normalize_error_token(value: Any) -> str | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
token = str(value).strip().lower()
|
||||||
|
return token or None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_error_type_code(cls, payload: Any) -> tuple[str | None, str | None]:
|
||||||
|
data: dict[str, Any] | None = None
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
data = payload
|
||||||
|
elif isinstance(payload, str):
|
||||||
|
text = payload.strip()
|
||||||
|
if text:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(text)
|
||||||
|
except Exception:
|
||||||
|
parsed = None
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
data = parsed
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
error_obj = data.get("error")
|
||||||
|
type_value = data.get("type")
|
||||||
|
code_value = data.get("code")
|
||||||
|
if isinstance(error_obj, dict):
|
||||||
|
type_value = error_obj.get("type") or type_value
|
||||||
|
code_value = error_obj.get("code") or code_value
|
||||||
|
|
||||||
|
return cls._normalize_error_token(type_value), cls._normalize_error_token(code_value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _is_retryable_429_response(cls, response: LLMResponse) -> bool:
|
||||||
|
type_token = cls._normalize_error_token(response.error_type)
|
||||||
|
code_token = cls._normalize_error_token(response.error_code)
|
||||||
|
semantic_tokens = {
|
||||||
|
token for token in (type_token, code_token)
|
||||||
|
if token is not None
|
||||||
|
}
|
||||||
|
if any(token in cls._NON_RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens):
|
||||||
|
return False
|
||||||
|
|
||||||
|
content = (response.content or "").lower()
|
||||||
|
if any(marker in content for marker in cls._NON_RETRYABLE_429_TEXT_MARKERS):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if any(token in cls._RETRYABLE_429_ERROR_TOKENS for token in semantic_tokens):
|
||||||
|
return True
|
||||||
|
if any(marker in content for marker in cls._RETRYABLE_429_TEXT_MARKERS):
|
||||||
|
return True
|
||||||
|
# Unknown 429 defaults to WAIT+retry.
|
||||||
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
|
def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
|
||||||
"""Replace image_url blocks with text placeholder. Returns None if no images found."""
|
"""Replace image_url blocks with text placeholder. Returns None if no images found."""
|
||||||
@ -397,14 +524,28 @@ class LLMProvider(ABC):
|
|||||||
def _extract_retry_after_from_headers(cls, headers: Any) -> float | None:
|
def _extract_retry_after_from_headers(cls, headers: Any) -> float | None:
|
||||||
if not headers:
|
if not headers:
|
||||||
return None
|
return None
|
||||||
retry_after: Any = None
|
|
||||||
if hasattr(headers, "get"):
|
def _header_value(name: str) -> Any:
|
||||||
retry_after = headers.get("retry-after") or headers.get("Retry-After")
|
if hasattr(headers, "get"):
|
||||||
if retry_after is None and isinstance(headers, dict):
|
value = headers.get(name) or headers.get(name.title())
|
||||||
for key, value in headers.items():
|
if value is not None:
|
||||||
if isinstance(key, str) and key.lower() == "retry-after":
|
return value
|
||||||
retry_after = value
|
if isinstance(headers, dict):
|
||||||
break
|
for key, value in headers.items():
|
||||||
|
if isinstance(key, str) and key.lower() == name.lower():
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
retry_ms = _header_value("retry-after-ms")
|
||||||
|
if retry_ms is not None:
|
||||||
|
value = float(retry_ms) / 1000.0
|
||||||
|
if value > 0:
|
||||||
|
return value
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
retry_after = _header_value("retry-after")
|
||||||
if retry_after is None:
|
if retry_after is None:
|
||||||
return None
|
return None
|
||||||
retry_after_text = str(retry_after).strip()
|
retry_after_text = str(retry_after).strip()
|
||||||
@ -421,6 +562,14 @@ class LLMProvider(ABC):
|
|||||||
remaining = (retry_at - datetime.now(retry_at.tzinfo)).total_seconds()
|
remaining = (retry_at - datetime.now(retry_at.tzinfo)).total_seconds()
|
||||||
return max(0.1, remaining)
|
return max(0.1, remaining)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_retry_after_from_response(cls, response: LLMResponse) -> float | None:
|
||||||
|
if response.error_retry_after_s is not None and response.error_retry_after_s > 0:
|
||||||
|
return response.error_retry_after_s
|
||||||
|
if response.retry_after is not None and response.retry_after > 0:
|
||||||
|
return response.retry_after
|
||||||
|
return cls._extract_retry_after(response.content)
|
||||||
|
|
||||||
async def _sleep_with_heartbeat(
|
async def _sleep_with_heartbeat(
|
||||||
self,
|
self,
|
||||||
delay: float,
|
delay: float,
|
||||||
@ -469,7 +618,7 @@ class LLMProvider(ABC):
|
|||||||
last_error_key = error_key
|
last_error_key = error_key
|
||||||
identical_error_count = 1 if error_key else 0
|
identical_error_count = 1 if error_key else 0
|
||||||
|
|
||||||
if not self._is_transient_error(response.content):
|
if not self._is_transient_response(response):
|
||||||
stripped = self._strip_image_content(original_messages)
|
stripped = self._strip_image_content(original_messages)
|
||||||
if stripped is not None and stripped != kw["messages"]:
|
if stripped is not None and stripped != kw["messages"]:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@ -492,7 +641,7 @@ class LLMProvider(ABC):
|
|||||||
break
|
break
|
||||||
|
|
||||||
base_delay = delays[min(attempt - 1, len(delays) - 1)]
|
base_delay = delays[min(attempt - 1, len(delays) - 1)]
|
||||||
delay = response.retry_after or self._extract_retry_after(response.content) or base_delay
|
delay = self._extract_retry_after_from_response(response) or base_delay
|
||||||
if persistent:
|
if persistent:
|
||||||
delay = min(delay, self._PERSISTENT_MAX_DELAY)
|
delay = min(delay, self._PERSISTENT_MAX_DELAY)
|
||||||
|
|
||||||
|
|||||||
@ -634,16 +634,73 @@ class OpenAICompatProvider(LLMProvider):
|
|||||||
reasoning_content="".join(reasoning_parts) or None,
|
reasoning_content="".join(reasoning_parts) or None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_error_metadata(cls, e: Exception) -> dict[str, Any]:
|
||||||
|
response = getattr(e, "response", None)
|
||||||
|
headers = getattr(response, "headers", None)
|
||||||
|
payload = (
|
||||||
|
getattr(e, "body", None)
|
||||||
|
or getattr(e, "doc", None)
|
||||||
|
or getattr(response, "text", None)
|
||||||
|
)
|
||||||
|
if payload is None and response is not None:
|
||||||
|
response_json = getattr(response, "json", None)
|
||||||
|
if callable(response_json):
|
||||||
|
try:
|
||||||
|
payload = response_json()
|
||||||
|
except Exception:
|
||||||
|
payload = None
|
||||||
|
error_type, error_code = LLMProvider._extract_error_type_code(payload)
|
||||||
|
|
||||||
|
status_code = getattr(e, "status_code", None)
|
||||||
|
if status_code is None and response is not None:
|
||||||
|
status_code = getattr(response, "status_code", None)
|
||||||
|
|
||||||
|
should_retry: bool | None = None
|
||||||
|
if headers is not None:
|
||||||
|
raw = headers.get("x-should-retry")
|
||||||
|
if isinstance(raw, str):
|
||||||
|
lowered = raw.strip().lower()
|
||||||
|
if lowered == "true":
|
||||||
|
should_retry = True
|
||||||
|
elif lowered == "false":
|
||||||
|
should_retry = False
|
||||||
|
|
||||||
|
error_kind: str | None = None
|
||||||
|
error_name = e.__class__.__name__.lower()
|
||||||
|
if "timeout" in error_name:
|
||||||
|
error_kind = "timeout"
|
||||||
|
elif "connection" in error_name:
|
||||||
|
error_kind = "connection"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"error_status_code": int(status_code) if status_code is not None else None,
|
||||||
|
"error_kind": error_kind,
|
||||||
|
"error_type": error_type,
|
||||||
|
"error_code": error_code,
|
||||||
|
"error_retry_after_s": cls._extract_retry_after_from_headers(headers),
|
||||||
|
"error_should_retry": should_retry,
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _handle_error(e: Exception) -> LLMResponse:
|
def _handle_error(e: Exception) -> LLMResponse:
|
||||||
|
body = (
|
||||||
|
getattr(e, "doc", None)
|
||||||
|
or getattr(e, "body", None)
|
||||||
|
or getattr(getattr(e, "response", None), "text", None)
|
||||||
|
)
|
||||||
|
body_text = body if isinstance(body, str) else str(body) if body is not None else ""
|
||||||
|
msg = f"Error: {body_text.strip()[:500]}" if body_text.strip() else f"Error calling LLM: {e}"
|
||||||
response = getattr(e, "response", None)
|
response = getattr(e, "response", None)
|
||||||
body = getattr(e, "doc", None) or getattr(response, "text", None)
|
|
||||||
body_text = str(body).strip() if body is not None else ""
|
|
||||||
msg = f"Error: {body_text[:500]}" if body_text else f"Error calling LLM: {e}"
|
|
||||||
retry_after = LLMProvider._extract_retry_after_from_headers(getattr(response, "headers", None))
|
retry_after = LLMProvider._extract_retry_after_from_headers(getattr(response, "headers", None))
|
||||||
if retry_after is None:
|
if retry_after is None:
|
||||||
retry_after = LLMProvider._extract_retry_after(msg)
|
retry_after = LLMProvider._extract_retry_after(msg)
|
||||||
return LLMResponse(content=msg, finish_reason="error", retry_after=retry_after)
|
return LLMResponse(
|
||||||
|
content=msg,
|
||||||
|
finish_reason="error",
|
||||||
|
retry_after=retry_after,
|
||||||
|
**OpenAICompatProvider._extract_error_metadata(e),
|
||||||
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Public API
|
# Public API
|
||||||
@ -711,6 +768,7 @@ class OpenAICompatProvider(LLMProvider):
|
|||||||
f"{idle_timeout_s} seconds"
|
f"{idle_timeout_s} seconds"
|
||||||
),
|
),
|
||||||
finish_reason="error",
|
finish_reason="error",
|
||||||
|
error_kind="timeout",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return self._handle_error(e)
|
return self._handle_error(e)
|
||||||
|
|||||||
81
tests/providers/test_provider_error_metadata.py
Normal file
81
tests/providers/test_provider_error_metadata.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from nanobot.providers.anthropic_provider import AnthropicProvider
|
||||||
|
from nanobot.providers.openai_compat_provider import OpenAICompatProvider
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_response(
|
||||||
|
*,
|
||||||
|
status_code: int,
|
||||||
|
headers: dict[str, str] | None = None,
|
||||||
|
text: str = "",
|
||||||
|
) -> SimpleNamespace:
|
||||||
|
return SimpleNamespace(
|
||||||
|
status_code=status_code,
|
||||||
|
headers=headers or {},
|
||||||
|
text=text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_handle_error_extracts_structured_metadata() -> None:
|
||||||
|
class FakeStatusError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
err = FakeStatusError("boom")
|
||||||
|
err.status_code = 409
|
||||||
|
err.response = _fake_response(
|
||||||
|
status_code=409,
|
||||||
|
headers={"retry-after-ms": "250", "x-should-retry": "false"},
|
||||||
|
text='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}',
|
||||||
|
)
|
||||||
|
err.body = {"error": {"type": "rate_limit_exceeded", "code": "rate_limit_exceeded"}}
|
||||||
|
|
||||||
|
response = OpenAICompatProvider._handle_error(err)
|
||||||
|
|
||||||
|
assert response.finish_reason == "error"
|
||||||
|
assert response.error_status_code == 409
|
||||||
|
assert response.error_type == "rate_limit_exceeded"
|
||||||
|
assert response.error_code == "rate_limit_exceeded"
|
||||||
|
assert response.error_retry_after_s == 0.25
|
||||||
|
assert response.error_should_retry is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_handle_error_marks_timeout_kind() -> None:
|
||||||
|
class FakeTimeoutError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
response = OpenAICompatProvider._handle_error(FakeTimeoutError("timeout"))
|
||||||
|
|
||||||
|
assert response.finish_reason == "error"
|
||||||
|
assert response.error_kind == "timeout"
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_handle_error_extracts_structured_metadata() -> None:
|
||||||
|
class FakeStatusError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
err = FakeStatusError("boom")
|
||||||
|
err.status_code = 408
|
||||||
|
err.response = _fake_response(
|
||||||
|
status_code=408,
|
||||||
|
headers={"retry-after": "1.5", "x-should-retry": "true"},
|
||||||
|
)
|
||||||
|
err.body = {"type": "error", "error": {"type": "rate_limit_error"}}
|
||||||
|
|
||||||
|
response = AnthropicProvider._handle_error(err)
|
||||||
|
|
||||||
|
assert response.finish_reason == "error"
|
||||||
|
assert response.error_status_code == 408
|
||||||
|
assert response.error_type == "rate_limit_error"
|
||||||
|
assert response.error_retry_after_s == 1.5
|
||||||
|
assert response.error_should_retry is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_handle_error_marks_connection_kind() -> None:
|
||||||
|
class FakeConnectionError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
response = AnthropicProvider._handle_error(FakeConnectionError("connection"))
|
||||||
|
|
||||||
|
assert response.finish_reason == "error"
|
||||||
|
assert response.error_kind == "connection"
|
||||||
@ -254,6 +254,14 @@ def test_extract_retry_after_from_headers_supports_numeric_and_http_date() -> No
|
|||||||
) == 0.1
|
) == 0.1
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_retry_after_from_headers_supports_retry_after_ms() -> None:
|
||||||
|
assert LLMProvider._extract_retry_after_from_headers({"retry-after-ms": "250"}) == 0.25
|
||||||
|
assert LLMProvider._extract_retry_after_from_headers({"Retry-After-Ms": "1000"}) == 1.0
|
||||||
|
assert LLMProvider._extract_retry_after_from_headers(
|
||||||
|
{"retry-after-ms": "500", "retry-after": "10"},
|
||||||
|
) == 0.5
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_chat_with_retry_prefers_structured_retry_after_when_present(monkeypatch) -> None:
|
async def test_chat_with_retry_prefers_structured_retry_after_when_present(monkeypatch) -> None:
|
||||||
provider = ScriptedProvider([
|
provider = ScriptedProvider([
|
||||||
@ -273,6 +281,153 @@ async def test_chat_with_retry_prefers_structured_retry_after_when_present(monke
|
|||||||
assert delays == [9.0]
|
assert delays == [9.0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_with_retry_retries_structured_status_code_without_keyword(monkeypatch) -> None:
|
||||||
|
provider = ScriptedProvider([
|
||||||
|
LLMResponse(
|
||||||
|
content="request failed",
|
||||||
|
finish_reason="error",
|
||||||
|
error_status_code=409,
|
||||||
|
),
|
||||||
|
LLMResponse(content="ok"),
|
||||||
|
])
|
||||||
|
delays: list[float] = []
|
||||||
|
|
||||||
|
async def _fake_sleep(delay: float) -> None:
|
||||||
|
delays.append(delay)
|
||||||
|
|
||||||
|
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||||
|
|
||||||
|
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||||
|
|
||||||
|
assert response.content == "ok"
|
||||||
|
assert provider.calls == 2
|
||||||
|
assert delays == [1]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_with_retry_stops_on_429_quota_exhausted(monkeypatch) -> None:
|
||||||
|
provider = ScriptedProvider([
|
||||||
|
LLMResponse(
|
||||||
|
content='{"error":{"type":"insufficient_quota","code":"insufficient_quota"}}',
|
||||||
|
finish_reason="error",
|
||||||
|
error_status_code=429,
|
||||||
|
error_type="insufficient_quota",
|
||||||
|
error_code="insufficient_quota",
|
||||||
|
),
|
||||||
|
LLMResponse(content="ok"),
|
||||||
|
])
|
||||||
|
delays: list[float] = []
|
||||||
|
|
||||||
|
async def _fake_sleep(delay: float) -> None:
|
||||||
|
delays.append(delay)
|
||||||
|
|
||||||
|
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||||
|
|
||||||
|
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||||
|
|
||||||
|
assert response.finish_reason == "error"
|
||||||
|
assert provider.calls == 1
|
||||||
|
assert delays == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_with_retry_retries_429_transient_rate_limit(monkeypatch) -> None:
|
||||||
|
provider = ScriptedProvider([
|
||||||
|
LLMResponse(
|
||||||
|
content='{"error":{"type":"rate_limit_exceeded","code":"rate_limit_exceeded"}}',
|
||||||
|
finish_reason="error",
|
||||||
|
error_status_code=429,
|
||||||
|
error_type="rate_limit_exceeded",
|
||||||
|
error_code="rate_limit_exceeded",
|
||||||
|
error_retry_after_s=0.2,
|
||||||
|
),
|
||||||
|
LLMResponse(content="ok"),
|
||||||
|
])
|
||||||
|
delays: list[float] = []
|
||||||
|
|
||||||
|
async def _fake_sleep(delay: float) -> None:
|
||||||
|
delays.append(delay)
|
||||||
|
|
||||||
|
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||||
|
|
||||||
|
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||||
|
|
||||||
|
assert response.content == "ok"
|
||||||
|
assert provider.calls == 2
|
||||||
|
assert delays == [0.2]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_with_retry_retries_structured_timeout_kind(monkeypatch) -> None:
|
||||||
|
provider = ScriptedProvider([
|
||||||
|
LLMResponse(
|
||||||
|
content="request failed",
|
||||||
|
finish_reason="error",
|
||||||
|
error_kind="timeout",
|
||||||
|
),
|
||||||
|
LLMResponse(content="ok"),
|
||||||
|
])
|
||||||
|
delays: list[float] = []
|
||||||
|
|
||||||
|
async def _fake_sleep(delay: float) -> None:
|
||||||
|
delays.append(delay)
|
||||||
|
|
||||||
|
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||||
|
|
||||||
|
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||||
|
|
||||||
|
assert response.content == "ok"
|
||||||
|
assert provider.calls == 2
|
||||||
|
assert delays == [1]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_with_retry_structured_should_retry_false_disables_retry(monkeypatch) -> None:
|
||||||
|
provider = ScriptedProvider([
|
||||||
|
LLMResponse(
|
||||||
|
content="429 rate limit",
|
||||||
|
finish_reason="error",
|
||||||
|
error_should_retry=False,
|
||||||
|
),
|
||||||
|
])
|
||||||
|
delays: list[float] = []
|
||||||
|
|
||||||
|
async def _fake_sleep(delay: float) -> None:
|
||||||
|
delays.append(delay)
|
||||||
|
|
||||||
|
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||||
|
|
||||||
|
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||||
|
|
||||||
|
assert response.finish_reason == "error"
|
||||||
|
assert provider.calls == 1
|
||||||
|
assert delays == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_with_retry_prefers_structured_retry_after(monkeypatch) -> None:
|
||||||
|
provider = ScriptedProvider([
|
||||||
|
LLMResponse(
|
||||||
|
content="429 rate limit, retry after 99s",
|
||||||
|
finish_reason="error",
|
||||||
|
error_retry_after_s=0.2,
|
||||||
|
),
|
||||||
|
LLMResponse(content="ok"),
|
||||||
|
])
|
||||||
|
delays: list[float] = []
|
||||||
|
|
||||||
|
async def _fake_sleep(delay: float) -> None:
|
||||||
|
delays.append(delay)
|
||||||
|
|
||||||
|
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||||
|
|
||||||
|
response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
|
||||||
|
|
||||||
|
assert response.content == "ok"
|
||||||
|
assert delays == [0.2]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monkeypatch) -> None:
|
async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monkeypatch) -> None:
|
||||||
provider = ScriptedProvider([
|
provider = ScriptedProvider([
|
||||||
@ -295,4 +450,3 @@ async def test_persistent_retry_aborts_after_ten_identical_transient_errors(monk
|
|||||||
assert response.content == "429 rate limit"
|
assert response.content == "429 rate limit"
|
||||||
assert provider.calls == 10
|
assert provider.calls == 10
|
||||||
assert delays == [1, 2, 4, 4, 4, 4, 4, 4, 4]
|
assert delays == [1, 2, 4, 4, 4, 4, 4, 4, 4]
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user