From af9f8d54b897e94a10ee240baf4bd7d445afa9de Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Wed, 20 May 2026 00:19:27 +0800 Subject: [PATCH] perf: optimize gateway cold start from ~6.9s to ~460ms (#3918) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Channel lazy load: discover_enabled() only imports enabled channel modules instead of all 18 modules with heavy SDKs (telegram, discord, slack, etc). discover_all() now delegates to discover_enabled(). Lazy OpenAI client: defer AsyncOpenAI() + httpx construction to _ensure_client() with asyncio.Lock double-checked locking. openai and httpx imports moved from module-level into _ensure_client(). Minor: lazy Nanobot/RunResult and CronService exports via __getattr__. Benchmark: 6910ms → 460ms (-93.3%) --- .gitignore | 1 + nanobot/__init__.py | 22 +++- nanobot/channels/manager.py | 26 ++-- nanobot/channels/registry.py | 50 +++++--- nanobot/cron/__init__.py | 14 ++- nanobot/providers/openai_compat_provider.py | 111 ++++++++++++------ tests/channels/test_channel_plugins.py | 12 +- .../test_local_endpoint_detection.py | 9 +- tests/providers/test_openai_compat_timeout.py | 2 +- 9 files changed, 173 insertions(+), 74 deletions(-) diff --git a/.gitignore b/.gitignore index 81127ad11..17ebbc972 100644 --- a/.gitignore +++ b/.gitignore @@ -97,3 +97,4 @@ logs/ tmp/ temp/ *.tmp +exp/ diff --git a/nanobot/__init__.py b/nanobot/__init__.py index 8ab213a33..a2380644e 100644 --- a/nanobot/__init__.py +++ b/nanobot/__init__.py @@ -2,9 +2,10 @@ nanobot - A lightweight AI agent framework """ -from importlib.metadata import PackageNotFoundError, version as _pkg_version -from pathlib import Path import tomllib +from importlib.metadata import PackageNotFoundError +from importlib.metadata import version as _pkg_version +from pathlib import Path def _read_pyproject_version() -> str | None: @@ -27,6 +28,21 @@ def _resolve_version() -> str: __version__ = _resolve_version() __logo__ = "🐈" -from nanobot.nanobot import Nanobot, RunResult +_LAZY_EXPORTS = { + "Nanobot": ".nanobot", + "RunResult": ".nanobot", +} + + +def __getattr__(name: str): + module_path = _LAZY_EXPORTS.get(name) + if module_path is None: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + from importlib import import_module + mod = import_module(module_path, __name__) + val = getattr(mod, name) + globals()[name] = val + return val + __all__ = ["Nanobot", "RunResult"] diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 5bd2ef33b..37377b785 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -70,28 +70,40 @@ class ChannelManager: def _init_channels(self) -> None: """Initialize channels discovered via pkgutil scan + entry_points plugins.""" - from nanobot.channels.registry import discover_all + from nanobot.channels.registry import discover_channel_names, discover_enabled transcription_provider = self.config.channels.transcription_provider transcription_key = self._resolve_transcription_key(transcription_provider) transcription_base = self._resolve_transcription_base(transcription_provider) transcription_language = self.config.channels.transcription_language - for name, cls in discover_all().items(): + # Collect enabled module names first, then only import those. + # Channel configs live in ChannelsConfig's extra fields (via + # extra="allow"), so we enumerate candidates from pkgutil scan + # (cheap, no imports) and any plugin keys in __pydantic_extra__. + names = discover_channel_names() + candidate_names = set(names) + extra = getattr(self.config.channels, "__pydantic_extra__", None) or {} + candidate_names.update(extra.keys()) + + enabled_names: set[str] = set() + for name in candidate_names: section = getattr(self.config.channels, name, None) if section is None: continue - enabled = ( + if ( section.get("enabled", False) if isinstance(section, dict) else getattr(section, "enabled", False) - ) - if not enabled: + ): + enabled_names.add(name) + + for name, cls in discover_enabled(enabled_names, _names=names).items(): + section = getattr(self.config.channels, name, None) + if section is None: continue try: kwargs: dict[str, Any] = {} - # Only the WebSocket channel currently hosts the embedded webui - # surface; other channels stay oblivious to these knobs. if cls.name == "websocket": if self._session_manager is not None: kwargs["session_manager"] = self._session_manager diff --git a/nanobot/channels/registry.py b/nanobot/channels/registry.py index 04effc77d..f2dd628d3 100644 --- a/nanobot/channels/registry.py +++ b/nanobot/channels/registry.py @@ -1,5 +1,4 @@ """Auto-discovery for built-in channel modules and external plugins.""" - from __future__ import annotations import importlib @@ -51,21 +50,44 @@ def discover_plugins() -> dict[str, type[BaseChannel]]: return plugins +def discover_enabled( + enabled_names: set[str], + *, + _names: list[str] | None = None, + _include_all_external: bool = False, +) -> dict[str, type[BaseChannel]]: + """Return channels whose module names are in *enabled_names*. + + Uses cheap ``pkgutil.iter_modules`` to list names, then imports only + those that match — skipping the heavy third-party SDK imports of + unneeded channels. + """ + names = _names if _names is not None else discover_channel_names() + result: dict[str, type[BaseChannel]] = {} + for modname in names: + if modname not in enabled_names: + continue + try: + result[modname] = load_channel_class(modname) + except ImportError as e: + logger.debug("Skipping built-in channel '{}': {}", modname, e) + + external = discover_plugins() + shadowed = set(external) & set(result) + if shadowed: + logger.warning("Plugin(s) shadowed by built-in channels (ignored): {}", shadowed) + if _include_all_external: + result.update({k: v for k, v in external.items() if k not in shadowed}) + else: + result.update({k: v for k, v in external.items() if k not in shadowed and k in enabled_names}) + + return result + + def discover_all() -> dict[str, type[BaseChannel]]: """Return all channels: built-in (pkgutil) merged with external (entry_points). Built-in channels take priority — an external plugin cannot shadow a built-in name. """ - builtin: dict[str, type[BaseChannel]] = {} - for modname in discover_channel_names(): - try: - builtin[modname] = load_channel_class(modname) - except ImportError as e: - logger.debug("Skipping built-in channel '{}': {}", modname, e) - - external = discover_plugins() - shadowed = set(external) & set(builtin) - if shadowed: - logger.warning("Plugin(s) shadowed by built-in channels (ignored): {}", shadowed) - - return {**external, **builtin} + names = discover_channel_names() + return discover_enabled(set(names), _names=names, _include_all_external=True) diff --git a/nanobot/cron/__init__.py b/nanobot/cron/__init__.py index a9d4cad4a..a85f44d1f 100644 --- a/nanobot/cron/__init__.py +++ b/nanobot/cron/__init__.py @@ -1,6 +1,18 @@ """Cron service for scheduled agent tasks.""" -from nanobot.cron.service import CronService from nanobot.cron.types import CronJob, CronSchedule __all__ = ["CronService", "CronJob", "CronSchedule"] + +_LAZY = {"CronService": ".service"} + + +def __getattr__(name: str): + module_path = _LAZY.get(name) + if module_path is None: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + from importlib import import_module + mod = import_module(module_path, __name__) + val = getattr(mod, name) + globals()[name] = val + return val diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index 2f8455416..e7f872525 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -16,20 +16,9 @@ from ipaddress import ip_address from typing import TYPE_CHECKING, Any from urllib.parse import urlparse -import httpx import json_repair from loguru import logger -if os.environ.get("LANGFUSE_SECRET_KEY") and importlib.util.find_spec("langfuse"): - from langfuse.openai import AsyncOpenAI -else: - if os.environ.get("LANGFUSE_SECRET_KEY"): - logger.warning( - "LANGFUSE_SECRET_KEY is set but langfuse is not installed; " - "install with `pip install langfuse` to enable tracing" - ) - from openai import AsyncOpenAI - from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest from nanobot.providers.openai_responses import ( consume_sdk_stream, @@ -39,8 +28,15 @@ from nanobot.providers.openai_responses import ( ) if TYPE_CHECKING: + from openai import AsyncOpenAI as AsyncOpenAIType + from nanobot.providers.registry import ProviderSpec +# Module-level placeholder — set lazily by _ensure_client on first real +# use, or replaced by tests via ``patch(...)``. Kept as a plain name so +# that ``unittest.mock.patch`` can find and replace it. +AsyncOpenAI: Any = None + _ALLOWED_MSG_KEYS = frozenset({ "role", "content", "tool_calls", "tool_call_id", "name", "reasoning_content", "extra_content", @@ -302,43 +298,80 @@ class OpenAICompatProvider(LLMProvider): effective_base = api_base or (spec.default_api_base if spec else None) or None self._effective_base = effective_base - default_headers = {"x-session-affinity": uuid.uuid4().hex} + self._default_headers = {"x-session-affinity": uuid.uuid4().hex} if _uses_openrouter_attribution(spec, effective_base): - default_headers.update(_DEFAULT_OPENROUTER_HEADERS) + self._default_headers.update(_DEFAULT_OPENROUTER_HEADERS) if extra_headers: - default_headers.update(extra_headers) + self._default_headers.update(extra_headers) + self._api_key_for_client = api_key or "no-key" + self._is_local = _is_local_endpoint(spec, effective_base) - # Local model servers (Ollama, llama.cpp, vLLM) often close idle - # HTTP connections before the client-side keepalive expires. When - # two LLM calls happen seconds apart (e.g. heartbeat _decide then - # process_direct), the second call may grab a now-dead pooled - # connection, causing a transient APIConnectionError on every first - # attempt. Disabling keepalive for local endpoints avoids this by - # opening a fresh connection for each request, which is cheap on a - # LAN. Cloud providers benefit from keepalive, so we leave the - # default pool settings for them. - timeout_s = _openai_compat_timeout_s() - http_client: httpx.AsyncClient | None = None - if _is_local_endpoint(spec, effective_base): - http_client = httpx.AsyncClient( - limits=httpx.Limits(keepalive_expiry=0), - timeout=timeout_s, - ) + # Lazy-init: the OpenAI client and its httpx transport are expensive + # to create (~700 ms on Windows). Defer until first use — unless + # AsyncOpenAI has been patched (tests), in which case build eagerly. + self._client: AsyncOpenAIType | None = None + self._client_lock = asyncio.Lock() - self._client = AsyncOpenAI( - api_key=api_key or "no-key", - base_url=effective_base, - default_headers=default_headers, - max_retries=0, - timeout=timeout_s, - http_client=http_client, - ) + if AsyncOpenAI is not None: + self._build_client() # Responses API circuit breaker: skip after repeated failures, # probe again after _RESPONSES_PROBE_INTERVAL_S seconds. self._responses_failures: dict[str, int] = {} self._responses_tripped_at: dict[str, float] = {} + def _build_client(self) -> None: + """Create the OpenAI client using the current module-level AsyncOpenAI.""" + import httpx + + timeout_s = _openai_compat_timeout_s() + http_client: httpx.AsyncClient | None = None + if self._is_local: + # Local model servers (Ollama, llama.cpp, vLLM) often close idle + # HTTP connections before the client-side keepalive expires. When + # two LLM calls happen seconds apart (e.g. heartbeat _decide then + # process_direct), the second call may grab a now-dead pooled + # connection, causing a transient APIConnectionError on every first + # attempt. Disabling keepalive for local endpoints avoids this by + # opening a fresh connection for each request, which is cheap on a + # LAN. Cloud providers benefit from keepalive, so we leave the + # default pool settings for them. + http_client = httpx.AsyncClient( + limits=httpx.Limits(keepalive_expiry=0), + timeout=timeout_s, + ) + self._client = AsyncOpenAI( + api_key=self._api_key_for_client, + base_url=self._effective_base, + default_headers=self._default_headers, + max_retries=0, + timeout=timeout_s, + http_client=http_client, + ) + + async def _ensure_client(self): + """Return the shared OpenAI client, creating it on first call.""" + if self._client is not None: + return self._client + async with self._client_lock: + if self._client is not None: + return self._client + global AsyncOpenAI + if AsyncOpenAI is None: + if os.environ.get("LANGFUSE_SECRET_KEY") and importlib.util.find_spec("langfuse"): + from langfuse.openai import AsyncOpenAI as _AsyncOpenAI + else: + if os.environ.get("LANGFUSE_SECRET_KEY"): + logger.warning( + "LANGFUSE_SECRET_KEY is set but langfuse is not installed; " + "install with `pip install langfuse` to enable tracing" + ) + from openai import AsyncOpenAI as _AsyncOpenAI + AsyncOpenAI = _AsyncOpenAI + + self._build_client() + return self._client + def _setup_env(self, api_key: str, api_base: str | None) -> None: """Set environment variables based on provider spec.""" spec = self._spec @@ -1182,6 +1215,7 @@ class OpenAICompatProvider(LLMProvider): reasoning_effort: str | None = None, tool_choice: str | dict[str, Any] | None = None, ) -> LLMResponse: + await self._ensure_client() try: if self._should_use_responses_api(model, reasoning_effort): try: @@ -1223,6 +1257,7 @@ class OpenAICompatProvider(LLMProvider): on_thinking_delta: Callable[[str], Awaitable[None]] | None = None, on_tool_call_delta: Callable[[dict[str, Any]], Awaitable[None]] | None = None, ) -> LLMResponse: + await self._ensure_client() idle_timeout_s = int(os.environ.get("NANOBOT_STREAM_IDLE_TIMEOUT_S", "90")) try: if self._should_use_responses_api(model, reasoning_effort): diff --git a/tests/channels/test_channel_plugins.py b/tests/channels/test_channel_plugins.py index 2309df2c2..ed315d5d0 100644 --- a/tests/channels/test_channel_plugins.py +++ b/tests/channels/test_channel_plugins.py @@ -180,7 +180,7 @@ async def test_manager_loads_plugin_from_dict_config(): ) with patch( - "nanobot.channels.registry.discover_all", + "nanobot.channels.registry.discover_enabled", return_value={"fakeplugin": _FakePlugin}, ): mgr = ChannelManager.__new__(ChannelManager) @@ -210,7 +210,7 @@ async def test_manager_propagates_groq_transcription_api_base_to_channels(): ) with patch( - "nanobot.channels.registry.discover_all", + "nanobot.channels.registry.discover_enabled", return_value={"fakeplugin": _FakePlugin}, ): mgr = ChannelManager.__new__(ChannelManager) @@ -246,7 +246,7 @@ async def test_manager_propagates_openai_transcription_api_base_to_channels(): ) with patch( - "nanobot.channels.registry.discover_all", + "nanobot.channels.registry.discover_enabled", return_value={"fakeplugin": _FakePlugin}, ): mgr = ChannelManager.__new__(ChannelManager) @@ -498,10 +498,8 @@ async def test_manager_skips_disabled_plugin(): providers=SimpleNamespace(groq=SimpleNamespace(api_key="")), ) - with patch( - "nanobot.channels.registry.discover_all", - return_value={"fakeplugin": _FakePlugin}, - ): + ep = _make_entry_point("fakeplugin", _FakePlugin) + with patch(_EP_TARGET, return_value=[ep]): mgr = ChannelManager.__new__(ChannelManager) mgr.config = fake_config mgr.bus = MessageBus() diff --git a/tests/providers/test_local_endpoint_detection.py b/tests/providers/test_local_endpoint_detection.py index fe45b90aa..27aecefc9 100644 --- a/tests/providers/test_local_endpoint_detection.py +++ b/tests/providers/test_local_endpoint_detection.py @@ -85,17 +85,18 @@ class TestIsLocalEndpoint: class TestLocalKeepaliveConfig: """Verify that local endpoints get keepalive_expiry=0.""" - def test_local_spec_disables_keepalive(self): + async def test_local_spec_disables_keepalive(self): spec = _make_spec(is_local=True) spec.env_key = "" spec.default_api_base = "http://localhost:11434/v1" provider = OpenAICompatProvider( api_key="test", api_base="http://localhost:11434/v1", spec=spec, ) + await provider._ensure_client() pool = provider._client._client._transport._pool assert pool._keepalive_expiry == 0 - def test_lan_ip_disables_keepalive(self): + async def test_lan_ip_disables_keepalive(self): """A generic 'openai' spec with a LAN IP should still disable keepalive.""" spec = _make_spec(is_local=False) spec.env_key = "" @@ -103,16 +104,18 @@ class TestLocalKeepaliveConfig: provider = OpenAICompatProvider( api_key="test", api_base="http://192.168.8.188:1234/v1", spec=spec, ) + await provider._ensure_client() pool = provider._client._client._transport._pool assert pool._keepalive_expiry == 0 - def test_cloud_keeps_default_keepalive(self): + async def test_cloud_keeps_default_keepalive(self): spec = _make_spec(is_local=False) spec.env_key = "" spec.default_api_base = "https://api.openai.com/v1" provider = OpenAICompatProvider( api_key="test", api_base=None, spec=spec, ) + await provider._ensure_client() pool = provider._client._client._transport._pool # Default httpx keepalive is 5.0s assert pool._keepalive_expiry == 5.0 diff --git a/tests/providers/test_openai_compat_timeout.py b/tests/providers/test_openai_compat_timeout.py index 664aff90e..dade6bee1 100644 --- a/tests/providers/test_openai_compat_timeout.py +++ b/tests/providers/test_openai_compat_timeout.py @@ -29,7 +29,7 @@ def test_openai_compat_provider_sets_timeout_on_local_http_client() -> None: with ( patch("nanobot.providers.openai_compat_provider.AsyncOpenAI") as mock_async_openai, patch( - "nanobot.providers.openai_compat_provider.httpx.AsyncClient", + "httpx.AsyncClient", return_value=sentinel.http_client, ) as mock_http_client, ):