fix(anthropic): auto-fallback to stream on long-request error

The Anthropic SDK raises a client-side ValueError when a non-streaming `messages.create` call could exceed the 10-minute server timeout (e.g. high `max_tokens` combined with extended thinking budget). The error text "Streaming is required for operations that may take longer than 10 minutes" was bubbling up to the user as an opaque LLM error in channels that use the non-stream path (e.g. wecom in #2709). Detect this specific ValueError in `chat()` and transparently retry through `chat_stream()` (without `on_content_delta` so behavior matches the non-stream contract). Other ValueErrors continue to flow through `_handle_error` unchanged. Closes #2709 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 18:12:32 +00:00 · 2026-05-01 21:40:20 +08:00 · 2026-05-01 21:40:20 +08:00 · 4c54a2b153
commit 4c54a2b153
parent 4860a9a6c9
2 changed files with 127 additions and 0 deletions
--- a/nanobot/providers/anthropic_provider.py
+++ b/nanobot/providers/anthropic_provider.py
@ -537,6 +537,13 @@ class AnthropicProvider(LLMProvider):
    # Public API
    # ------------------------------------------------------------------
    @staticmethod
    def _is_streaming_required_error(e: Exception) -> bool:
        """Anthropic SDK rejects long non-stream requests with a ValueError
        whose message starts with 'Streaming is required'. Match defensively
        on substring so a future SDK message tweak doesn't break detection."""
        return isinstance(e, ValueError) and "streaming is required" in str(e).lower()
    async def chat(
        self,
        messages: list[dict[str, Any]],
@ -555,6 +562,21 @@ class AnthropicProvider(LLMProvider):
            response = await self._client.messages.create(**kwargs)
            return self._parse_response(response)
        except Exception as e:
            if self._is_streaming_required_error(e):
                # Anthropic SDK refuses non-stream calls when max_tokens (plus
                # extended thinking budget) could push the request past the
                # 10-minute server-side timeout (#2709). Transparently retry
                # via the streaming path so callers don't need to know the
                # provider-specific limit.
                return await self.chat_stream(
                    messages=messages,
                    tools=tools,
                    model=model,
                    max_tokens=max_tokens,
                    temperature=temperature,
                    reasoning_effort=reasoning_effort,
                    tool_choice=tool_choice,
                )
            return self._handle_error(e)
    async def chat_stream(
--- a/tests/providers/test_anthropic_long_request_fallback.py
+++ b/tests/providers/test_anthropic_long_request_fallback.py
@ -0,0 +1,105 @@
 """Regression test for #2709: Anthropic non-stream long-request fallback.
 When ``messages.create`` raises the Anthropic SDK's client-side
 ``ValueError("Streaming is required for operations that may take longer
 than 10 minutes...")``, ``AnthropicProvider.chat`` should transparently
 retry via ``chat_stream`` instead of surfacing the error.
 """
 from __future__ import annotations
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock
 import pytest
 from nanobot.providers.anthropic_provider import AnthropicProvider
 from nanobot.providers.base import LLMResponse
 _LONG_REQUEST_MESSAGE = (
    "Streaming is required for operations that may take longer than 10 minutes. "
    "See https://github.com/anthropics/anthropic-sdk-python#long-requests for more details"
 )
 def _make_provider() -> AnthropicProvider:
    provider = AnthropicProvider(api_key="test-key")
    provider._client = MagicMock()
    return provider
 def test_is_streaming_required_error_matches_value_error() -> None:
    assert AnthropicProvider._is_streaming_required_error(
        ValueError(_LONG_REQUEST_MESSAGE)
    ) is True
 def test_is_streaming_required_error_ignores_other_value_errors() -> None:
    assert AnthropicProvider._is_streaming_required_error(
        ValueError("something else went wrong")
    ) is False
 def test_is_streaming_required_error_ignores_other_exception_types() -> None:
    assert AnthropicProvider._is_streaming_required_error(
        RuntimeError(_LONG_REQUEST_MESSAGE)
    ) is False
@pytest.mark.asyncio
 async def test_chat_falls_back_to_stream_on_long_request_error() -> None:
    provider = _make_provider()
    provider._client.messages.create = AsyncMock(
        side_effect=ValueError(_LONG_REQUEST_MESSAGE)
    )
    expected = LLMResponse(content="streamed result", finish_reason="stop")
    captured: dict[str, Any] = {}
    async def _fake_chat_stream(**kwargs: Any) -> LLMResponse:
        captured.update(kwargs)
        return expected
    provider.chat_stream = _fake_chat_stream  # type: ignore[method-assign]
    result = await provider.chat(
        messages=[{"role": "user", "content": "hi"}],
        max_tokens=64_000,
        temperature=0.5,
        reasoning_effort="high",
        tool_choice="auto",
    )
    assert result is expected
    assert captured["messages"] == [{"role": "user", "content": "hi"}]
    assert captured["max_tokens"] == 64_000
    assert captured["temperature"] == 0.5
    assert captured["reasoning_effort"] == "high"
    assert captured["tool_choice"] == "auto"
    # The fallback must NOT pass an on_content_delta — chat() callers don't
    # expect streaming side-effects.
    assert "on_content_delta" not in captured
@pytest.mark.asyncio
 async def test_chat_does_not_fall_back_on_unrelated_value_error() -> None:
    provider = _make_provider()
    provider._client.messages.create = AsyncMock(
        side_effect=ValueError("some other validation failure")
    )
    called = False
    async def _should_not_be_called(**_kwargs: Any) -> LLMResponse:
        nonlocal called
        called = True
        return LLMResponse(content="x", finish_reason="stop")
    provider.chat_stream = _should_not_be_called  # type: ignore[method-assign]
    result = await provider.chat(messages=[{"role": "user", "content": "hi"}])
    assert called is False
    # Generic ValueError flows through _handle_error and surfaces as an error response.
    assert result.finish_reason == "error" or "Error" in (result.content or "")