From 5943ab386d8a47e29eea89baf6c6098762020505 Mon Sep 17 00:00:00 2001
From: hussein1362 <hussein.1362@gmail.com>
Date: Sat, 25 Apr 2026 17:36:58 +0300
Subject: [PATCH] fix(providers): disable HTTP keepalive for local/LAN
 endpoints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Local model servers (Ollama, llama.cpp, vLLM) often close idle HTTP
connections before the client-side keepalive timer expires.  When two
LLM calls happen seconds apart — for example the heartbeat _decide()
phase followed immediately by process_direct() — the second call grabs
a now-dead pooled connection, causing a transient APIConnectionError
on every first attempt.

The fix detects local endpoints via:
- ProviderSpec.is_local (Ollama, LM Studio, vLLM, OVMS)
- Private-network URL patterns (localhost, 127.x, 192.168.x, 10.x,
  172.16-31.x, host.docker.internal, [::1])

For these endpoints, the AsyncOpenAI client is created with a custom
httpx.AsyncClient that sets keepalive_expiry=0, forcing a fresh TCP
connection for each request.  This is cheap on LAN (sub-5ms connect)
and eliminates the stale-connection retry tax entirely.

Cloud providers (OpenAI, Anthropic, OpenRouter, etc.) keep the default
5-second keepalive, which is fine for high-frequency API usage.

The private-network heuristic also covers the common case where users
configure provider='openai' but point apiBase at a LAN IP running
llama.cpp — the spec says is_local=False, but the URL clearly is.
---
 nanobot/providers/openai_compat_provider.py   |  50 ++++++++
 .../test_local_endpoint_detection.py          | 111 ++++++++++++++++++
 2 files changed, 161 insertions(+)
 create mode 100644 tests/providers/test_local_endpoint_detection.py

diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py
index f603b9e37..c59080abc 100644
--- a/nanobot/providers/openai_compat_provider.py
+++ b/nanobot/providers/openai_compat_provider.py
@@ -14,6 +14,7 @@ import uuid
 from collections.abc import Awaitable, Callable
 from typing import TYPE_CHECKING, Any
 
+import httpx
 import json_repair
 from loguru import logger
 
@@ -159,6 +160,39 @@ _RESPONSES_FAILURE_THRESHOLD = 3
 _RESPONSES_PROBE_INTERVAL_S = 300  # 5 minutes
 
 
+def _is_local_endpoint(
+    spec: "ProviderSpec | None",
+    api_base: str | None,
+) -> bool:
+    """Return True when the endpoint is a local or LAN model server.
+
+    Matches either the provider spec's ``is_local`` flag or common private-
+    network patterns in the base URL (localhost, 127.x, 192.168.x, 10.x,
+    172.16-31.x, Docker ``host.docker.internal``).
+    """
+    if spec and spec.is_local:
+        return True
+    if not api_base:
+        return False
+    host = api_base.strip().lower().rstrip("/")
+    private_patterns = (
+        "localhost",
+        "127.",
+        "192.168.",
+        "10.",
+        "host.docker.internal",
+        "[::1]",
+    )
+    if any(p in host for p in private_patterns):
+        return True
+    # 172.16.0.0 – 172.31.255.255
+    import re
+    m = re.search(r"172\.(\d+)\." , host)
+    if m and 16 <= int(m.group(1)) <= 31:
+        return True
+    return False
+
+
 def _is_direct_openai_base(api_base: str | None) -> bool:
     """Return True for direct OpenAI endpoints, not generic OpenAI-compatible gateways."""
     if not api_base:
@@ -208,11 +242,27 @@ class OpenAICompatProvider(LLMProvider):
         if extra_headers:
             default_headers.update(extra_headers)
 
+        # Local model servers (Ollama, llama.cpp, vLLM) often close idle
+        # HTTP connections before the client-side keepalive expires.  When
+        # two LLM calls happen seconds apart (e.g. heartbeat _decide then
+        # process_direct), the second call may grab a now-dead pooled
+        # connection, causing a transient APIConnectionError on every first
+        # attempt.  Disabling keepalive for local endpoints avoids this by
+        # opening a fresh connection for each request, which is cheap on a
+        # LAN.  Cloud providers benefit from keepalive, so we leave the
+        # default pool settings for them.
+        http_client: httpx.AsyncClient | None = None
+        if _is_local_endpoint(spec, effective_base):
+            http_client = httpx.AsyncClient(
+                limits=httpx.Limits(keepalive_expiry=0),
+            )
+
         self._client = AsyncOpenAI(
             api_key=api_key or "no-key",
             base_url=effective_base,
             default_headers=default_headers,
             max_retries=0,
+            http_client=http_client,
         )
 
         # Responses API circuit breaker: skip after repeated failures,
diff --git a/tests/providers/test_local_endpoint_detection.py b/tests/providers/test_local_endpoint_detection.py
new file mode 100644
index 000000000..2b27176be
--- /dev/null
+++ b/tests/providers/test_local_endpoint_detection.py
@@ -0,0 +1,111 @@
+"""Tests for _is_local_endpoint detection and keepalive configuration."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from nanobot.providers.openai_compat_provider import (
+    OpenAICompatProvider,
+    _is_local_endpoint,
+)
+
+
+def _make_spec(is_local: bool = False) -> MagicMock:
+    spec = MagicMock()
+    spec.is_local = is_local
+    return spec
+
+
+class TestIsLocalEndpoint:
+    """Test the _is_local_endpoint helper."""
+
+    def test_spec_is_local_true(self):
+        assert _is_local_endpoint(_make_spec(is_local=True), None) is True
+
+    def test_spec_is_local_false_no_base(self):
+        assert _is_local_endpoint(_make_spec(is_local=False), None) is False
+
+    def test_no_spec_no_base(self):
+        assert _is_local_endpoint(None, None) is False
+
+    def test_localhost(self):
+        assert _is_local_endpoint(None, "http://localhost:1234/v1") is True
+
+    def test_localhost_https(self):
+        assert _is_local_endpoint(None, "https://localhost:8080/v1") is True
+
+    def test_loopback_127(self):
+        assert _is_local_endpoint(None, "http://127.0.0.1:11434/v1") is True
+
+    def test_private_192_168(self):
+        assert _is_local_endpoint(None, "http://192.168.8.188:1234/v1") is True
+
+    def test_private_10(self):
+        assert _is_local_endpoint(None, "http://10.0.0.5:8000/v1") is True
+
+    def test_private_172_16(self):
+        assert _is_local_endpoint(None, "http://172.16.0.1:1234/v1") is True
+
+    def test_private_172_31(self):
+        assert _is_local_endpoint(None, "http://172.31.255.255:1234/v1") is True
+
+    def test_not_private_172_32(self):
+        assert _is_local_endpoint(None, "http://172.32.0.1:1234/v1") is False
+
+    def test_docker_internal(self):
+        assert _is_local_endpoint(None, "http://host.docker.internal:11434/v1") is True
+
+    def test_ipv6_loopback(self):
+        assert _is_local_endpoint(None, "http://[::1]:1234/v1") is True
+
+    def test_public_api(self):
+        assert _is_local_endpoint(None, "https://api.openai.com/v1") is False
+
+    def test_openrouter(self):
+        assert _is_local_endpoint(None, "https://openrouter.ai/api/v1") is False
+
+    def test_spec_overrides_public_url(self):
+        """spec.is_local=True takes precedence even with a public-looking URL."""
+        assert _is_local_endpoint(_make_spec(is_local=True), "https://api.example.com/v1") is True
+
+    def test_case_insensitive(self):
+        assert _is_local_endpoint(None, "http://LOCALHOST:1234/v1") is True
+
+    def test_trailing_slash(self):
+        assert _is_local_endpoint(None, "http://192.168.1.1:8080/v1/") is True
+
+
+class TestLocalKeepaliveConfig:
+    """Verify that local endpoints get keepalive_expiry=0."""
+
+    def test_local_spec_disables_keepalive(self):
+        spec = _make_spec(is_local=True)
+        spec.env_key = ""
+        spec.default_api_base = "http://localhost:11434/v1"
+        provider = OpenAICompatProvider(
+            api_key="test", api_base="http://localhost:11434/v1", spec=spec,
+        )
+        pool = provider._client._client._transport._pool
+        assert pool._keepalive_expiry == 0
+
+    def test_lan_ip_disables_keepalive(self):
+        """A generic 'openai' spec with a LAN IP should still disable keepalive."""
+        spec = _make_spec(is_local=False)
+        spec.env_key = ""
+        spec.default_api_base = None
+        provider = OpenAICompatProvider(
+            api_key="test", api_base="http://192.168.8.188:1234/v1", spec=spec,
+        )
+        pool = provider._client._client._transport._pool
+        assert pool._keepalive_expiry == 0
+
+    def test_cloud_keeps_default_keepalive(self):
+        spec = _make_spec(is_local=False)
+        spec.env_key = ""
+        spec.default_api_base = "https://api.openai.com/v1"
+        provider = OpenAICompatProvider(
+            api_key="test", api_base=None, spec=spec,
+        )
+        pool = provider._client._client._transport._pool
+        # Default httpx keepalive is 5.0s
+        assert pool._keepalive_expiry == 5.0