From 4c54a2b15342481a3e18f5571b3f0cd6850b2a59 Mon Sep 17 00:00:00 2001
From: coldxiangyu <coldxiangyu@163.com>
Date: Fri, 1 May 2026 21:40:20 +0800
Subject: [PATCH] fix(anthropic): auto-fallback to stream on long-request error

The Anthropic SDK raises a client-side ValueError when a non-streaming
`messages.create` call could exceed the 10-minute server timeout (e.g.
high `max_tokens` combined with extended thinking budget). The error
text "Streaming is required for operations that may take longer than
10 minutes" was bubbling up to the user as an opaque LLM error in
channels that use the non-stream path (e.g. wecom in #2709).

Detect this specific ValueError in `chat()` and transparently retry
through `chat_stream()` (without `on_content_delta` so behavior matches
the non-stream contract). Other ValueErrors continue to flow through
`_handle_error` unchanged.

Closes #2709

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 nanobot/providers/anthropic_provider.py       |  22 ++++
 .../test_anthropic_long_request_fallback.py   | 105 ++++++++++++++++++
 2 files changed, 127 insertions(+)
 create mode 100644 tests/providers/test_anthropic_long_request_fallback.py

diff --git a/nanobot/providers/anthropic_provider.py b/nanobot/providers/anthropic_provider.py
index a46cc32e9..2c6aa531e 100644
--- a/nanobot/providers/anthropic_provider.py
+++ b/nanobot/providers/anthropic_provider.py
@@ -537,6 +537,13 @@ class AnthropicProvider(LLMProvider):
     # Public API
     # ------------------------------------------------------------------
 
+    @staticmethod
+    def _is_streaming_required_error(e: Exception) -> bool:
+        """Anthropic SDK rejects long non-stream requests with a ValueError
+        whose message starts with 'Streaming is required'. Match defensively
+        on substring so a future SDK message tweak doesn't break detection."""
+        return isinstance(e, ValueError) and "streaming is required" in str(e).lower()
+
     async def chat(
         self,
         messages: list[dict[str, Any]],
@@ -555,6 +562,21 @@ class AnthropicProvider(LLMProvider):
             response = await self._client.messages.create(**kwargs)
             return self._parse_response(response)
         except Exception as e:
+            if self._is_streaming_required_error(e):
+                # Anthropic SDK refuses non-stream calls when max_tokens (plus
+                # extended thinking budget) could push the request past the
+                # 10-minute server-side timeout (#2709). Transparently retry
+                # via the streaming path so callers don't need to know the
+                # provider-specific limit.
+                return await self.chat_stream(
+                    messages=messages,
+                    tools=tools,
+                    model=model,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    reasoning_effort=reasoning_effort,
+                    tool_choice=tool_choice,
+                )
             return self._handle_error(e)
 
     async def chat_stream(
diff --git a/tests/providers/test_anthropic_long_request_fallback.py b/tests/providers/test_anthropic_long_request_fallback.py
new file mode 100644
index 000000000..6af968e6f
--- /dev/null
+++ b/tests/providers/test_anthropic_long_request_fallback.py
@@ -0,0 +1,105 @@
+"""Regression test for #2709: Anthropic non-stream long-request fallback.
+
+When ``messages.create`` raises the Anthropic SDK's client-side
+``ValueError("Streaming is required for operations that may take longer
+than 10 minutes...")``, ``AnthropicProvider.chat`` should transparently
+retry via ``chat_stream`` instead of surfacing the error.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.providers.anthropic_provider import AnthropicProvider
+from nanobot.providers.base import LLMResponse
+
+
+_LONG_REQUEST_MESSAGE = (
+    "Streaming is required for operations that may take longer than 10 minutes. "
+    "See https://github.com/anthropics/anthropic-sdk-python#long-requests for more details"
+)
+
+
+def _make_provider() -> AnthropicProvider:
+    provider = AnthropicProvider(api_key="test-key")
+    provider._client = MagicMock()
+    return provider
+
+
+def test_is_streaming_required_error_matches_value_error() -> None:
+    assert AnthropicProvider._is_streaming_required_error(
+        ValueError(_LONG_REQUEST_MESSAGE)
+    ) is True
+
+
+def test_is_streaming_required_error_ignores_other_value_errors() -> None:
+    assert AnthropicProvider._is_streaming_required_error(
+        ValueError("something else went wrong")
+    ) is False
+
+
+def test_is_streaming_required_error_ignores_other_exception_types() -> None:
+    assert AnthropicProvider._is_streaming_required_error(
+        RuntimeError(_LONG_REQUEST_MESSAGE)
+    ) is False
+
+
+@pytest.mark.asyncio
+async def test_chat_falls_back_to_stream_on_long_request_error() -> None:
+    provider = _make_provider()
+    provider._client.messages.create = AsyncMock(
+        side_effect=ValueError(_LONG_REQUEST_MESSAGE)
+    )
+
+    expected = LLMResponse(content="streamed result", finish_reason="stop")
+    captured: dict[str, Any] = {}
+
+    async def _fake_chat_stream(**kwargs: Any) -> LLMResponse:
+        captured.update(kwargs)
+        return expected
+
+    provider.chat_stream = _fake_chat_stream  # type: ignore[method-assign]
+
+    result = await provider.chat(
+        messages=[{"role": "user", "content": "hi"}],
+        max_tokens=64_000,
+        temperature=0.5,
+        reasoning_effort="high",
+        tool_choice="auto",
+    )
+
+    assert result is expected
+    assert captured["messages"] == [{"role": "user", "content": "hi"}]
+    assert captured["max_tokens"] == 64_000
+    assert captured["temperature"] == 0.5
+    assert captured["reasoning_effort"] == "high"
+    assert captured["tool_choice"] == "auto"
+    # The fallback must NOT pass an on_content_delta — chat() callers don't
+    # expect streaming side-effects.
+    assert "on_content_delta" not in captured
+
+
+@pytest.mark.asyncio
+async def test_chat_does_not_fall_back_on_unrelated_value_error() -> None:
+    provider = _make_provider()
+    provider._client.messages.create = AsyncMock(
+        side_effect=ValueError("some other validation failure")
+    )
+
+    called = False
+
+    async def _should_not_be_called(**_kwargs: Any) -> LLMResponse:
+        nonlocal called
+        called = True
+        return LLMResponse(content="x", finish_reason="stop")
+
+    provider.chat_stream = _should_not_be_called  # type: ignore[method-assign]
+
+    result = await provider.chat(messages=[{"role": "user", "content": "hi"}])
+
+    assert called is False
+    # Generic ValueError flows through _handle_error and surfaces as an error response.
+    assert result.finish_reason == "error" or "Error" in (result.content or "")