mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-26 11:32:25 +00:00
fix(providers): disable HTTP keepalive for local/LAN endpoints
Local model servers (Ollama, llama.cpp, vLLM) often close idle HTTP connections before the client-side keepalive timer expires. When two LLM calls happen seconds apart — for example the heartbeat _decide() phase followed immediately by process_direct() — the second call grabs a now-dead pooled connection, causing a transient APIConnectionError on every first attempt. The fix detects local endpoints via: - ProviderSpec.is_local (Ollama, LM Studio, vLLM, OVMS) - Private-network URL patterns (localhost, 127.x, 192.168.x, 10.x, 172.16-31.x, host.docker.internal, [::1]) For these endpoints, the AsyncOpenAI client is created with a custom httpx.AsyncClient that sets keepalive_expiry=0, forcing a fresh TCP connection for each request. This is cheap on LAN (sub-5ms connect) and eliminates the stale-connection retry tax entirely. Cloud providers (OpenAI, Anthropic, OpenRouter, etc.) keep the default 5-second keepalive, which is fine for high-frequency API usage. The private-network heuristic also covers the common case where users configure provider='openai' but point apiBase at a LAN IP running llama.cpp — the spec says is_local=False, but the URL clearly is.
This commit is contained in:
parent
d0e1b1393a
commit
5943ab386d
@ -14,6 +14,7 @@ import uuid
|
|||||||
from collections.abc import Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
import json_repair
|
import json_repair
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
@ -159,6 +160,39 @@ _RESPONSES_FAILURE_THRESHOLD = 3
|
|||||||
_RESPONSES_PROBE_INTERVAL_S = 300 # 5 minutes
|
_RESPONSES_PROBE_INTERVAL_S = 300 # 5 minutes
|
||||||
|
|
||||||
|
|
||||||
|
def _is_local_endpoint(
|
||||||
|
spec: "ProviderSpec | None",
|
||||||
|
api_base: str | None,
|
||||||
|
) -> bool:
|
||||||
|
"""Return True when the endpoint is a local or LAN model server.
|
||||||
|
|
||||||
|
Matches either the provider spec's ``is_local`` flag or common private-
|
||||||
|
network patterns in the base URL (localhost, 127.x, 192.168.x, 10.x,
|
||||||
|
172.16-31.x, Docker ``host.docker.internal``).
|
||||||
|
"""
|
||||||
|
if spec and spec.is_local:
|
||||||
|
return True
|
||||||
|
if not api_base:
|
||||||
|
return False
|
||||||
|
host = api_base.strip().lower().rstrip("/")
|
||||||
|
private_patterns = (
|
||||||
|
"localhost",
|
||||||
|
"127.",
|
||||||
|
"192.168.",
|
||||||
|
"10.",
|
||||||
|
"host.docker.internal",
|
||||||
|
"[::1]",
|
||||||
|
)
|
||||||
|
if any(p in host for p in private_patterns):
|
||||||
|
return True
|
||||||
|
# 172.16.0.0 – 172.31.255.255
|
||||||
|
import re
|
||||||
|
m = re.search(r"172\.(\d+)\." , host)
|
||||||
|
if m and 16 <= int(m.group(1)) <= 31:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _is_direct_openai_base(api_base: str | None) -> bool:
|
def _is_direct_openai_base(api_base: str | None) -> bool:
|
||||||
"""Return True for direct OpenAI endpoints, not generic OpenAI-compatible gateways."""
|
"""Return True for direct OpenAI endpoints, not generic OpenAI-compatible gateways."""
|
||||||
if not api_base:
|
if not api_base:
|
||||||
@ -208,11 +242,27 @@ class OpenAICompatProvider(LLMProvider):
|
|||||||
if extra_headers:
|
if extra_headers:
|
||||||
default_headers.update(extra_headers)
|
default_headers.update(extra_headers)
|
||||||
|
|
||||||
|
# Local model servers (Ollama, llama.cpp, vLLM) often close idle
|
||||||
|
# HTTP connections before the client-side keepalive expires. When
|
||||||
|
# two LLM calls happen seconds apart (e.g. heartbeat _decide then
|
||||||
|
# process_direct), the second call may grab a now-dead pooled
|
||||||
|
# connection, causing a transient APIConnectionError on every first
|
||||||
|
# attempt. Disabling keepalive for local endpoints avoids this by
|
||||||
|
# opening a fresh connection for each request, which is cheap on a
|
||||||
|
# LAN. Cloud providers benefit from keepalive, so we leave the
|
||||||
|
# default pool settings for them.
|
||||||
|
http_client: httpx.AsyncClient | None = None
|
||||||
|
if _is_local_endpoint(spec, effective_base):
|
||||||
|
http_client = httpx.AsyncClient(
|
||||||
|
limits=httpx.Limits(keepalive_expiry=0),
|
||||||
|
)
|
||||||
|
|
||||||
self._client = AsyncOpenAI(
|
self._client = AsyncOpenAI(
|
||||||
api_key=api_key or "no-key",
|
api_key=api_key or "no-key",
|
||||||
base_url=effective_base,
|
base_url=effective_base,
|
||||||
default_headers=default_headers,
|
default_headers=default_headers,
|
||||||
max_retries=0,
|
max_retries=0,
|
||||||
|
http_client=http_client,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Responses API circuit breaker: skip after repeated failures,
|
# Responses API circuit breaker: skip after repeated failures,
|
||||||
|
|||||||
111
tests/providers/test_local_endpoint_detection.py
Normal file
111
tests/providers/test_local_endpoint_detection.py
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
"""Tests for _is_local_endpoint detection and keepalive configuration."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from nanobot.providers.openai_compat_provider import (
|
||||||
|
OpenAICompatProvider,
|
||||||
|
_is_local_endpoint,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_spec(is_local: bool = False) -> MagicMock:
|
||||||
|
spec = MagicMock()
|
||||||
|
spec.is_local = is_local
|
||||||
|
return spec
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsLocalEndpoint:
|
||||||
|
"""Test the _is_local_endpoint helper."""
|
||||||
|
|
||||||
|
def test_spec_is_local_true(self):
|
||||||
|
assert _is_local_endpoint(_make_spec(is_local=True), None) is True
|
||||||
|
|
||||||
|
def test_spec_is_local_false_no_base(self):
|
||||||
|
assert _is_local_endpoint(_make_spec(is_local=False), None) is False
|
||||||
|
|
||||||
|
def test_no_spec_no_base(self):
|
||||||
|
assert _is_local_endpoint(None, None) is False
|
||||||
|
|
||||||
|
def test_localhost(self):
|
||||||
|
assert _is_local_endpoint(None, "http://localhost:1234/v1") is True
|
||||||
|
|
||||||
|
def test_localhost_https(self):
|
||||||
|
assert _is_local_endpoint(None, "https://localhost:8080/v1") is True
|
||||||
|
|
||||||
|
def test_loopback_127(self):
|
||||||
|
assert _is_local_endpoint(None, "http://127.0.0.1:11434/v1") is True
|
||||||
|
|
||||||
|
def test_private_192_168(self):
|
||||||
|
assert _is_local_endpoint(None, "http://192.168.8.188:1234/v1") is True
|
||||||
|
|
||||||
|
def test_private_10(self):
|
||||||
|
assert _is_local_endpoint(None, "http://10.0.0.5:8000/v1") is True
|
||||||
|
|
||||||
|
def test_private_172_16(self):
|
||||||
|
assert _is_local_endpoint(None, "http://172.16.0.1:1234/v1") is True
|
||||||
|
|
||||||
|
def test_private_172_31(self):
|
||||||
|
assert _is_local_endpoint(None, "http://172.31.255.255:1234/v1") is True
|
||||||
|
|
||||||
|
def test_not_private_172_32(self):
|
||||||
|
assert _is_local_endpoint(None, "http://172.32.0.1:1234/v1") is False
|
||||||
|
|
||||||
|
def test_docker_internal(self):
|
||||||
|
assert _is_local_endpoint(None, "http://host.docker.internal:11434/v1") is True
|
||||||
|
|
||||||
|
def test_ipv6_loopback(self):
|
||||||
|
assert _is_local_endpoint(None, "http://[::1]:1234/v1") is True
|
||||||
|
|
||||||
|
def test_public_api(self):
|
||||||
|
assert _is_local_endpoint(None, "https://api.openai.com/v1") is False
|
||||||
|
|
||||||
|
def test_openrouter(self):
|
||||||
|
assert _is_local_endpoint(None, "https://openrouter.ai/api/v1") is False
|
||||||
|
|
||||||
|
def test_spec_overrides_public_url(self):
|
||||||
|
"""spec.is_local=True takes precedence even with a public-looking URL."""
|
||||||
|
assert _is_local_endpoint(_make_spec(is_local=True), "https://api.example.com/v1") is True
|
||||||
|
|
||||||
|
def test_case_insensitive(self):
|
||||||
|
assert _is_local_endpoint(None, "http://LOCALHOST:1234/v1") is True
|
||||||
|
|
||||||
|
def test_trailing_slash(self):
|
||||||
|
assert _is_local_endpoint(None, "http://192.168.1.1:8080/v1/") is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestLocalKeepaliveConfig:
|
||||||
|
"""Verify that local endpoints get keepalive_expiry=0."""
|
||||||
|
|
||||||
|
def test_local_spec_disables_keepalive(self):
|
||||||
|
spec = _make_spec(is_local=True)
|
||||||
|
spec.env_key = ""
|
||||||
|
spec.default_api_base = "http://localhost:11434/v1"
|
||||||
|
provider = OpenAICompatProvider(
|
||||||
|
api_key="test", api_base="http://localhost:11434/v1", spec=spec,
|
||||||
|
)
|
||||||
|
pool = provider._client._client._transport._pool
|
||||||
|
assert pool._keepalive_expiry == 0
|
||||||
|
|
||||||
|
def test_lan_ip_disables_keepalive(self):
|
||||||
|
"""A generic 'openai' spec with a LAN IP should still disable keepalive."""
|
||||||
|
spec = _make_spec(is_local=False)
|
||||||
|
spec.env_key = ""
|
||||||
|
spec.default_api_base = None
|
||||||
|
provider = OpenAICompatProvider(
|
||||||
|
api_key="test", api_base="http://192.168.8.188:1234/v1", spec=spec,
|
||||||
|
)
|
||||||
|
pool = provider._client._client._transport._pool
|
||||||
|
assert pool._keepalive_expiry == 0
|
||||||
|
|
||||||
|
def test_cloud_keeps_default_keepalive(self):
|
||||||
|
spec = _make_spec(is_local=False)
|
||||||
|
spec.env_key = ""
|
||||||
|
spec.default_api_base = "https://api.openai.com/v1"
|
||||||
|
provider = OpenAICompatProvider(
|
||||||
|
api_key="test", api_base=None, spec=spec,
|
||||||
|
)
|
||||||
|
pool = provider._client._client._transport._pool
|
||||||
|
# Default httpx keepalive is 5.0s
|
||||||
|
assert pool._keepalive_expiry == 5.0
|
||||||
Loading…
x
Reference in New Issue
Block a user