diff --git a/nanobot/providers/anthropic_provider.py b/nanobot/providers/anthropic_provider.py index a46cc32e9..2c6aa531e 100644 --- a/nanobot/providers/anthropic_provider.py +++ b/nanobot/providers/anthropic_provider.py @@ -537,6 +537,13 @@ class AnthropicProvider(LLMProvider): # Public API # ------------------------------------------------------------------ + @staticmethod + def _is_streaming_required_error(e: Exception) -> bool: + """Anthropic SDK rejects long non-stream requests with a ValueError + whose message starts with 'Streaming is required'. Match defensively + on substring so a future SDK message tweak doesn't break detection.""" + return isinstance(e, ValueError) and "streaming is required" in str(e).lower() + async def chat( self, messages: list[dict[str, Any]], @@ -555,6 +562,21 @@ class AnthropicProvider(LLMProvider): response = await self._client.messages.create(**kwargs) return self._parse_response(response) except Exception as e: + if self._is_streaming_required_error(e): + # Anthropic SDK refuses non-stream calls when max_tokens (plus + # extended thinking budget) could push the request past the + # 10-minute server-side timeout (#2709). Transparently retry + # via the streaming path so callers don't need to know the + # provider-specific limit. + return await self.chat_stream( + messages=messages, + tools=tools, + model=model, + max_tokens=max_tokens, + temperature=temperature, + reasoning_effort=reasoning_effort, + tool_choice=tool_choice, + ) return self._handle_error(e) async def chat_stream( diff --git a/tests/providers/test_anthropic_long_request_fallback.py b/tests/providers/test_anthropic_long_request_fallback.py new file mode 100644 index 000000000..6af968e6f --- /dev/null +++ b/tests/providers/test_anthropic_long_request_fallback.py @@ -0,0 +1,105 @@ +"""Regression test for #2709: Anthropic non-stream long-request fallback. + +When ``messages.create`` raises the Anthropic SDK's client-side +``ValueError("Streaming is required for operations that may take longer +than 10 minutes...")``, ``AnthropicProvider.chat`` should transparently +retry via ``chat_stream`` instead of surfacing the error. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.providers.anthropic_provider import AnthropicProvider +from nanobot.providers.base import LLMResponse + + +_LONG_REQUEST_MESSAGE = ( + "Streaming is required for operations that may take longer than 10 minutes. " + "See https://github.com/anthropics/anthropic-sdk-python#long-requests for more details" +) + + +def _make_provider() -> AnthropicProvider: + provider = AnthropicProvider(api_key="test-key") + provider._client = MagicMock() + return provider + + +def test_is_streaming_required_error_matches_value_error() -> None: + assert AnthropicProvider._is_streaming_required_error( + ValueError(_LONG_REQUEST_MESSAGE) + ) is True + + +def test_is_streaming_required_error_ignores_other_value_errors() -> None: + assert AnthropicProvider._is_streaming_required_error( + ValueError("something else went wrong") + ) is False + + +def test_is_streaming_required_error_ignores_other_exception_types() -> None: + assert AnthropicProvider._is_streaming_required_error( + RuntimeError(_LONG_REQUEST_MESSAGE) + ) is False + + +@pytest.mark.asyncio +async def test_chat_falls_back_to_stream_on_long_request_error() -> None: + provider = _make_provider() + provider._client.messages.create = AsyncMock( + side_effect=ValueError(_LONG_REQUEST_MESSAGE) + ) + + expected = LLMResponse(content="streamed result", finish_reason="stop") + captured: dict[str, Any] = {} + + async def _fake_chat_stream(**kwargs: Any) -> LLMResponse: + captured.update(kwargs) + return expected + + provider.chat_stream = _fake_chat_stream # type: ignore[method-assign] + + result = await provider.chat( + messages=[{"role": "user", "content": "hi"}], + max_tokens=64_000, + temperature=0.5, + reasoning_effort="high", + tool_choice="auto", + ) + + assert result is expected + assert captured["messages"] == [{"role": "user", "content": "hi"}] + assert captured["max_tokens"] == 64_000 + assert captured["temperature"] == 0.5 + assert captured["reasoning_effort"] == "high" + assert captured["tool_choice"] == "auto" + # The fallback must NOT pass an on_content_delta — chat() callers don't + # expect streaming side-effects. + assert "on_content_delta" not in captured + + +@pytest.mark.asyncio +async def test_chat_does_not_fall_back_on_unrelated_value_error() -> None: + provider = _make_provider() + provider._client.messages.create = AsyncMock( + side_effect=ValueError("some other validation failure") + ) + + called = False + + async def _should_not_be_called(**_kwargs: Any) -> LLMResponse: + nonlocal called + called = True + return LLMResponse(content="x", finish_reason="stop") + + provider.chat_stream = _should_not_be_called # type: ignore[method-assign] + + result = await provider.chat(messages=[{"role": "user", "content": "hi"}]) + + assert called is False + # Generic ValueError flows through _handle_error and surfaces as an error response. + assert result.finish_reason == "error" or "Error" in (result.content or "")