fix(provider): bound OpenAI-compatible request timeouts

2026-05-30 13:31:12 +00:00 · 2026-04-27 16:45:40 +08:00 · 2026-04-27 16:45:40 +08:00 · 9dc99d1b34
commit 9dc99d1b34
parent b8932bc041
2 changed files with 77 additions and 0 deletions
--- a/nanobot/providers/openai_compat_provider.py
+++ b/nanobot/providers/openai_compat_provider.py
@ -60,6 +60,7 @@ _KIMI_THINKING_MODELS: frozenset[str] = frozenset({
    "kimi-k2.6",
    "k2.6-code-preview",
 })
+_OPENAI_COMPAT_REQUEST_TIMEOUT_S = 120.0

 # Maps ProviderSpec.thinking_style → extra_body builder.
 # Each builder takes a bool (thinking_enabled) and returns the dict to
@ -90,6 +91,26 @@ def _is_kimi_thinking_model(model_name: str) -> bool:
    return False


+def _openai_compat_timeout_s() -> float:
+    """Return the bounded request timeout used for OpenAI-compatible providers."""
+    return _float_env("NANOBOT_OPENAI_COMPAT_TIMEOUT_S", _OPENAI_COMPAT_REQUEST_TIMEOUT_S)
+
+
+def _float_env(name: str, default: float) -> float:
+    raw = os.environ.get(name)
+    if raw is None or not raw.strip():
+        return default
+    try:
+        value = float(raw)
+    except (TypeError, ValueError):
+        logger.warning("Ignoring invalid {}={!r}; using {}", name, raw, default)
+        return default
+    if value <= 0:
+        logger.warning("Ignoring non-positive {}={!r}; using {}", name, raw, default)
+        return default
+    return value
+
+
 def _short_tool_id() -> str:
    """9-char alphanumeric ID compatible with all providers (incl. Mistral)."""
    return "".join(secrets.choice(_ALNUM) for _ in range(9))
@ -251,10 +272,12 @@ class OpenAICompatProvider(LLMProvider):
        # opening a fresh connection for each request, which is cheap on a
        # LAN.  Cloud providers benefit from keepalive, so we leave the
        # default pool settings for them.
+        timeout_s = _openai_compat_timeout_s()
        http_client: httpx.AsyncClient | None = None
        if _is_local_endpoint(spec, effective_base):
            http_client = httpx.AsyncClient(
                limits=httpx.Limits(keepalive_expiry=0),
+                timeout=timeout_s,
            )

        self._client = AsyncOpenAI(
@ -262,6 +285,7 @@ class OpenAICompatProvider(LLMProvider):
            base_url=effective_base,
            default_headers=default_headers,
            max_retries=0,
+            timeout=timeout_s,
            http_client=http_client,
        )

--- a/tests/providers/test_openai_compat_timeout.py
+++ b/tests/providers/test_openai_compat_timeout.py
@ -0,0 +1,53 @@
+from unittest.mock import patch, sentinel
+
+from nanobot.providers.openai_compat_provider import OpenAICompatProvider
+from nanobot.providers.registry import ProviderSpec
+
+
+def _assert_openai_compat_timeout(timeout) -> None:
+    assert timeout == 120.0
+
+
+def test_openai_compat_provider_sets_sdk_timeout() -> None:
+    with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI") as mock_async_openai:
+        OpenAICompatProvider(api_key="test-key", api_base="https://example.com/v1")
+
+    kwargs = mock_async_openai.call_args.kwargs
+    _assert_openai_compat_timeout(kwargs["timeout"])
+    assert kwargs["http_client"] is None
+
+
+def test_openai_compat_provider_sets_timeout_on_local_http_client() -> None:
+    spec = ProviderSpec(
+        name="local",
+        keywords=(),
+        env_key="",
+        is_local=True,
+        default_api_base="http://127.0.0.1:11434/v1",
+    )
+
+    with (
+        patch("nanobot.providers.openai_compat_provider.AsyncOpenAI") as mock_async_openai,
+        patch(
+            "nanobot.providers.openai_compat_provider.httpx.AsyncClient",
+            return_value=sentinel.http_client,
+        ) as mock_http_client,
+    ):
+        OpenAICompatProvider(spec=spec)
+
+    client_kwargs = mock_http_client.call_args.kwargs
+    _assert_openai_compat_timeout(client_kwargs["timeout"])
+    assert client_kwargs["limits"].keepalive_expiry == 0
+
+    openai_kwargs = mock_async_openai.call_args.kwargs
+    _assert_openai_compat_timeout(openai_kwargs["timeout"])
+    assert openai_kwargs["http_client"] is sentinel.http_client
+
+
+def test_openai_compat_provider_timeout_can_be_overridden_by_env(monkeypatch) -> None:
+    monkeypatch.setenv("NANOBOT_OPENAI_COMPAT_TIMEOUT_S", "45")
+
+    with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI") as mock_async_openai:
+        OpenAICompatProvider(api_key="test-key", api_base="https://example.com/v1")
+
+    assert mock_async_openai.call_args.kwargs["timeout"] == 45.0