mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-19 16:12:30 +00:00
- Restrict fallback_models to only reference preset names in model_presets. - Add schema validation to reject unknown preset names in fallback_models. - Remove build_provider_for_model() since bare model fallback is no longer supported. - Simplify make_provider_factory() to only look up presets by name. - Update onboard UI to remove "Add custom model" option from fallback chain. - Update tests to use preset names instead of bare model strings in fallback chains. - Fix test imports referencing deleted _make_provider function.
468 lines
16 KiB
Python
468 lines
16 KiB
Python
"""End-to-end smoke tests for model presets + failover.
|
|
|
|
Uses a local aiohttp fake OpenAI server so requests are real HTTP,
|
|
not mocked at the provider level.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from nanobot.nanobot import Nanobot
|
|
from nanobot.providers.base import GenerationSettings, LLMProvider
|
|
from nanobot.providers.failover import ModelRouter
|
|
from nanobot.providers.openai_compat_provider import OpenAICompatProvider
|
|
|
|
try:
|
|
from aiohttp import web
|
|
from aiohttp.test_utils import TestServer
|
|
|
|
HAS_AIOHTTP = True
|
|
except ImportError:
|
|
HAS_AIOHTTP = False
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _disable_proxy_for_localhost_tests(monkeypatch):
|
|
"""Prevent httpx from routing localhost requests through a system proxy."""
|
|
monkeypatch.delenv("ALL_PROXY", raising=False)
|
|
monkeypatch.delenv("HTTP_PROXY", raising=False)
|
|
monkeypatch.delenv("HTTPS_PROXY", raising=False)
|
|
monkeypatch.setenv("NO_PROXY", "127.0.0.1,localhost")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers (mock-level preset tests)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _write_config(tmp_path: Path, **overrides) -> Path:
|
|
data = {
|
|
"providers": {
|
|
"openrouter": {"apiKey": "sk-test-key"},
|
|
"openai": {"apiKey": "sk-openai-test"},
|
|
},
|
|
"agents": {"defaults": {"model": "openai/gpt-4.1"}},
|
|
"tools": {"my": {"allowSet": True}},
|
|
}
|
|
data.update(overrides)
|
|
config_path = tmp_path / "config.json"
|
|
config_path.write_text(json.dumps(data))
|
|
return config_path
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 1. Model Preset Mock Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_preset_loaded_at_startup(tmp_path: Path) -> None:
|
|
config_path = _write_config(
|
|
tmp_path,
|
|
model_presets={
|
|
"fast": {
|
|
"model": "gpt-4.1-mini",
|
|
"provider": "openai",
|
|
"max_tokens": 4096,
|
|
"context_window_tokens": 128000,
|
|
"temperature": 0.3,
|
|
}
|
|
},
|
|
agents={"defaults": {"model_preset": "fast", "model": "ignored-model"}},
|
|
)
|
|
|
|
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
|
|
bot = Nanobot.from_config(config_path, workspace=tmp_path)
|
|
|
|
loop = bot._loop
|
|
assert loop.model == "gpt-4.1-mini"
|
|
assert loop.context_window_tokens == 128000
|
|
assert loop.provider.generation.temperature == 0.3
|
|
assert loop.provider.generation.max_tokens == 4096
|
|
assert loop.model_preset == "fast"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_preset_runtime_switch_updates_all_fields(tmp_path: Path) -> None:
|
|
config_path = _write_config(
|
|
tmp_path,
|
|
model_presets={
|
|
"cheap": {
|
|
"model": "gpt-4.1-mini",
|
|
"provider": "openai",
|
|
"max_tokens": 2048,
|
|
"context_window_tokens": 64000,
|
|
"temperature": 0.5,
|
|
},
|
|
"power": {
|
|
"model": "gpt-4.1",
|
|
"provider": "openai",
|
|
"max_tokens": 8192,
|
|
"context_window_tokens": 256000,
|
|
"temperature": 0.1,
|
|
},
|
|
},
|
|
agents={"defaults": {"model_preset": "cheap"}},
|
|
)
|
|
|
|
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
|
|
bot = Nanobot.from_config(config_path, workspace=tmp_path)
|
|
|
|
loop = bot._loop
|
|
assert loop.model == "gpt-4.1-mini"
|
|
|
|
my_tool = loop.tools.get("my")
|
|
result = await my_tool.execute(action="set", key="model_preset", value="power")
|
|
assert "Error" not in result
|
|
|
|
assert loop.model == "gpt-4.1"
|
|
assert loop.context_window_tokens == 256000
|
|
assert loop.provider.generation.temperature == 0.1
|
|
assert loop.provider.generation.max_tokens == 8192
|
|
assert loop.model_preset == "power"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_preset_switch_unknown_returns_error(tmp_path: Path) -> None:
|
|
config_path = _write_config(
|
|
tmp_path,
|
|
model_presets={"a": {"model": "model-a"}},
|
|
agents={"defaults": {"model_preset": "a"}},
|
|
)
|
|
|
|
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
|
|
bot = Nanobot.from_config(config_path, workspace=tmp_path)
|
|
|
|
loop = bot._loop
|
|
original_model = loop.model
|
|
|
|
my_tool = loop.tools.get("my")
|
|
result = await my_tool.execute(action="set", key="model_preset", value="nonexistent")
|
|
assert "not found" in result.lower()
|
|
|
|
assert loop.model == original_model
|
|
assert loop.model_preset == "a"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_preset_model_with_fallback_presets_in_config(tmp_path: Path) -> None:
|
|
config_path = _write_config(
|
|
tmp_path,
|
|
model_presets={
|
|
"prod": {
|
|
"model": "gpt-4.1",
|
|
"provider": "openai",
|
|
"max_tokens": 8192,
|
|
"temperature": 0.1,
|
|
},
|
|
"fallback": {
|
|
"model": "gpt-4.1-mini",
|
|
"provider": "openai",
|
|
"max_tokens": 4096,
|
|
"temperature": 0.2,
|
|
},
|
|
},
|
|
agents={
|
|
"defaults": {
|
|
"model_preset": "prod",
|
|
"fallback_presets": ["fallback"],
|
|
}
|
|
},
|
|
)
|
|
|
|
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
|
|
bot = Nanobot.from_config(config_path, workspace=tmp_path)
|
|
|
|
loop = bot._loop
|
|
assert loop.model == "gpt-4.1"
|
|
assert isinstance(loop.provider, ModelRouter)
|
|
assert loop.provider.fallback_presets == ["fallback"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_presets_wired_to_all_subsystems(tmp_path: Path) -> None:
|
|
"""When fallback_presets is configured, every subsystem that calls the LLM
|
|
must use the same ModelRouter instance, not the raw primary provider."""
|
|
config_path = _write_config(
|
|
tmp_path,
|
|
model_presets={
|
|
"prod": {
|
|
"model": "gpt-4.1",
|
|
"provider": "openai",
|
|
"max_tokens": 8192,
|
|
"temperature": 0.1,
|
|
},
|
|
"fallback": {
|
|
"model": "gpt-4.1-mini",
|
|
"provider": "openai",
|
|
"max_tokens": 4096,
|
|
"temperature": 0.2,
|
|
},
|
|
},
|
|
agents={
|
|
"defaults": {
|
|
"model_preset": "prod",
|
|
"fallback_presets": ["fallback"],
|
|
}
|
|
},
|
|
)
|
|
|
|
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
|
|
bot = Nanobot.from_config(config_path, workspace=tmp_path)
|
|
|
|
loop = bot._loop
|
|
router = loop.provider
|
|
assert isinstance(router, ModelRouter)
|
|
|
|
# Every LLM-consuming subsystem must share the same router
|
|
assert loop.runner.provider is router, "AgentRunner must use ModelRouter"
|
|
assert loop.subagents.provider is router, "SubagentManager must use ModelRouter"
|
|
assert loop.consolidator.provider is router, "Consolidator must use ModelRouter"
|
|
assert loop.dream.provider is router, "Dream must use ModelRouter"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 2. Real HTTP Smoke Tests (aiohttp fake OpenAI server)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
|
|
@pytest.mark.asyncio
|
|
async def test_preset_generation_params_reach_http_request() -> None:
|
|
"""Provider.generation settings must appear in the actual HTTP request body."""
|
|
requests_log: list[dict] = []
|
|
|
|
async def handler(request: web.Request) -> web.Response:
|
|
body = await request.json()
|
|
requests_log.append(body)
|
|
return web.json_response({
|
|
"id": "chatcmpl-test",
|
|
"object": "chat.completion",
|
|
"model": body.get("model"),
|
|
"choices": [{
|
|
"index": 0,
|
|
"message": {"role": "assistant", "content": "pong"},
|
|
"finish_reason": "stop",
|
|
}],
|
|
})
|
|
|
|
app = web.Application()
|
|
app.router.add_post("/chat/completions", handler)
|
|
server = TestServer(app)
|
|
await server.start_server()
|
|
try:
|
|
base_url = str(server.make_url("/"))
|
|
provider = OpenAICompatProvider(
|
|
api_key="test",
|
|
api_base=base_url,
|
|
default_model="test-model",
|
|
)
|
|
provider.generation = GenerationSettings(temperature=0.42, max_tokens=1024)
|
|
|
|
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
|
|
response = await provider.chat_with_retry(
|
|
messages=[{"role": "user", "content": "ping"}],
|
|
)
|
|
|
|
assert response.finish_reason != "error"
|
|
assert len(requests_log) >= 1
|
|
req = requests_log[0]
|
|
assert req["model"] == "test-model"
|
|
assert req["temperature"] == 0.42
|
|
assert req["max_tokens"] == 1024
|
|
finally:
|
|
await server.close()
|
|
|
|
|
|
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
|
|
@pytest.mark.asyncio
|
|
async def test_failover_sends_second_request_to_fallback_model() -> None:
|
|
"""Primary returns 503; after retry exhaustion ModelRouter hits fallback."""
|
|
requests_log: list[dict] = []
|
|
|
|
async def handler(request: web.Request) -> web.Response:
|
|
body = await request.json()
|
|
requests_log.append(body)
|
|
model = body.get("model")
|
|
|
|
if model == "primary-model":
|
|
return web.Response(
|
|
status=503,
|
|
body=json.dumps({"error": {"message": "overloaded", "type": "server_error"}}),
|
|
content_type="application/json",
|
|
)
|
|
|
|
return web.json_response({
|
|
"id": "chatcmpl-test",
|
|
"object": "chat.completion",
|
|
"model": model,
|
|
"choices": [{
|
|
"index": 0,
|
|
"message": {"role": "assistant", "content": "fallback-ok"},
|
|
"finish_reason": "stop",
|
|
}],
|
|
})
|
|
|
|
app = web.Application()
|
|
app.router.add_post("/chat/completions", handler)
|
|
server = TestServer(app)
|
|
await server.start_server()
|
|
try:
|
|
base_url = str(server.make_url("/"))
|
|
primary = OpenAICompatProvider(
|
|
api_key="test", api_base=base_url, default_model="primary-model"
|
|
)
|
|
fallback = OpenAICompatProvider(
|
|
api_key="test", api_base=base_url, default_model="fallback-model"
|
|
)
|
|
|
|
factory = MagicMock(return_value=fallback)
|
|
|
|
router = ModelRouter(
|
|
primary_provider=primary,
|
|
primary_model="primary-model",
|
|
fallback_presets=["fallback-model"],
|
|
provider_factory=factory,
|
|
)
|
|
|
|
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
|
|
response = await router.chat_with_retry(
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
)
|
|
|
|
assert response.finish_reason != "error"
|
|
assert response.content == "fallback-ok"
|
|
|
|
models_requested = [r["model"] for r in requests_log]
|
|
assert "primary-model" in models_requested
|
|
assert "fallback-model" in models_requested
|
|
factory.assert_called_once_with("fallback-model")
|
|
finally:
|
|
await server.close()
|
|
|
|
|
|
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
|
|
@pytest.mark.asyncio
|
|
async def test_failover_on_quota_429() -> None:
|
|
"""Quota 429 on one provider may still work on a different provider."""
|
|
requests_log: list[dict] = []
|
|
|
|
async def handler(request: web.Request) -> web.Response:
|
|
body = await request.json()
|
|
requests_log.append(body)
|
|
return web.Response(
|
|
status=429,
|
|
body=json.dumps({
|
|
"error": {
|
|
"message": "insufficient quota",
|
|
"type": "insufficient_quota",
|
|
"code": "insufficient_quota",
|
|
}
|
|
}),
|
|
content_type="application/json",
|
|
)
|
|
|
|
app = web.Application()
|
|
app.router.add_post("/chat/completions", handler)
|
|
server = TestServer(app)
|
|
await server.start_server()
|
|
try:
|
|
base_url = str(server.make_url("/"))
|
|
primary = OpenAICompatProvider(
|
|
api_key="test", api_base=base_url, default_model="primary-model"
|
|
)
|
|
fallback = OpenAICompatProvider(
|
|
api_key="test", api_base=base_url, default_model="fallback-model"
|
|
)
|
|
|
|
factory = MagicMock(return_value=fallback)
|
|
|
|
router = ModelRouter(
|
|
primary_provider=primary,
|
|
primary_model="primary-model",
|
|
fallback_presets=["fallback-model"],
|
|
provider_factory=factory,
|
|
)
|
|
|
|
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
|
|
response = await router.chat_with_retry(
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
)
|
|
|
|
# Quota 429 SHOULD trigger failover — another provider may still work.
|
|
factory.assert_called_once_with("fallback-model")
|
|
assert response.finish_reason == "error"
|
|
# Both primary and fallback should have been requested.
|
|
assert len(requests_log) == 2
|
|
finally:
|
|
await server.close()
|
|
|
|
|
|
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
|
|
@pytest.mark.asyncio
|
|
async def test_model_router_failover_integration() -> None:
|
|
"""ModelRouter -> real HTTP failover chain (primary 503, fallback 200)."""
|
|
requests_log: list[dict] = []
|
|
|
|
async def handler(request: web.Request) -> web.Response:
|
|
body = await request.json()
|
|
requests_log.append(body)
|
|
model = body.get("model")
|
|
|
|
if model == "primary-model":
|
|
return web.Response(
|
|
status=503,
|
|
body=json.dumps({"error": {"message": "overloaded", "type": "server_error"}}),
|
|
content_type="application/json",
|
|
)
|
|
|
|
return web.json_response({
|
|
"id": "chatcmpl-test",
|
|
"object": "chat.completion",
|
|
"model": model,
|
|
"choices": [{
|
|
"index": 0,
|
|
"message": {"role": "assistant", "content": "fallback-ok"},
|
|
"finish_reason": "stop",
|
|
}],
|
|
})
|
|
|
|
app = web.Application()
|
|
app.router.add_post("/chat/completions", handler)
|
|
server = TestServer(app)
|
|
await server.start_server()
|
|
try:
|
|
base_url = str(server.make_url("/"))
|
|
primary = OpenAICompatProvider(
|
|
api_key="test", api_base=base_url, default_model="primary-model"
|
|
)
|
|
fallback = OpenAICompatProvider(
|
|
api_key="test", api_base=base_url, default_model="fallback-model"
|
|
)
|
|
|
|
factory = MagicMock(return_value=fallback)
|
|
|
|
router = ModelRouter(
|
|
primary_provider=primary,
|
|
primary_model="primary-model",
|
|
fallback_presets=["fallback-model"],
|
|
provider_factory=factory,
|
|
)
|
|
|
|
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
|
|
response = await router.chat_with_retry(
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
|
|
assert response.finish_reason != "error"
|
|
assert response.content == "fallback-ok"
|
|
models_requested = [r["model"] for r in requests_log]
|
|
assert "primary-model" in models_requested
|
|
assert "fallback-model" in models_requested
|
|
factory.assert_called_once_with("fallback-model")
|
|
finally:
|
|
await server.close()
|