nanobot/tests/test_preset_failover_smoke.py
chengyongru 0bc42e2ab2 refactor: restrict fallback_models to preset-only and clean up provider factory
- Restrict fallback_models to only reference preset names in model_presets.
- Add schema validation to reject unknown preset names in fallback_models.
- Remove build_provider_for_model() since bare model fallback is no longer supported.
- Simplify make_provider_factory() to only look up presets by name.
- Update onboard UI to remove "Add custom model" option from fallback chain.
- Update tests to use preset names instead of bare model strings in fallback chains.
- Fix test imports referencing deleted _make_provider function.
2026-05-08 20:24:24 +08:00

468 lines
16 KiB
Python

"""End-to-end smoke tests for model presets + failover.
Uses a local aiohttp fake OpenAI server so requests are real HTTP,
not mocked at the provider level.
"""
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from nanobot.nanobot import Nanobot
from nanobot.providers.base import GenerationSettings, LLMProvider
from nanobot.providers.failover import ModelRouter
from nanobot.providers.openai_compat_provider import OpenAICompatProvider
try:
from aiohttp import web
from aiohttp.test_utils import TestServer
HAS_AIOHTTP = True
except ImportError:
HAS_AIOHTTP = False
@pytest.fixture(autouse=True)
def _disable_proxy_for_localhost_tests(monkeypatch):
"""Prevent httpx from routing localhost requests through a system proxy."""
monkeypatch.delenv("ALL_PROXY", raising=False)
monkeypatch.delenv("HTTP_PROXY", raising=False)
monkeypatch.delenv("HTTPS_PROXY", raising=False)
monkeypatch.setenv("NO_PROXY", "127.0.0.1,localhost")
# ---------------------------------------------------------------------------
# Helpers (mock-level preset tests)
# ---------------------------------------------------------------------------
def _write_config(tmp_path: Path, **overrides) -> Path:
data = {
"providers": {
"openrouter": {"apiKey": "sk-test-key"},
"openai": {"apiKey": "sk-openai-test"},
},
"agents": {"defaults": {"model": "openai/gpt-4.1"}},
"tools": {"my": {"allowSet": True}},
}
data.update(overrides)
config_path = tmp_path / "config.json"
config_path.write_text(json.dumps(data))
return config_path
# ---------------------------------------------------------------------------
# 1. Model Preset Mock Tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_preset_loaded_at_startup(tmp_path: Path) -> None:
config_path = _write_config(
tmp_path,
model_presets={
"fast": {
"model": "gpt-4.1-mini",
"provider": "openai",
"max_tokens": 4096,
"context_window_tokens": 128000,
"temperature": 0.3,
}
},
agents={"defaults": {"model_preset": "fast", "model": "ignored-model"}},
)
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
bot = Nanobot.from_config(config_path, workspace=tmp_path)
loop = bot._loop
assert loop.model == "gpt-4.1-mini"
assert loop.context_window_tokens == 128000
assert loop.provider.generation.temperature == 0.3
assert loop.provider.generation.max_tokens == 4096
assert loop.model_preset == "fast"
@pytest.mark.asyncio
async def test_preset_runtime_switch_updates_all_fields(tmp_path: Path) -> None:
config_path = _write_config(
tmp_path,
model_presets={
"cheap": {
"model": "gpt-4.1-mini",
"provider": "openai",
"max_tokens": 2048,
"context_window_tokens": 64000,
"temperature": 0.5,
},
"power": {
"model": "gpt-4.1",
"provider": "openai",
"max_tokens": 8192,
"context_window_tokens": 256000,
"temperature": 0.1,
},
},
agents={"defaults": {"model_preset": "cheap"}},
)
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
bot = Nanobot.from_config(config_path, workspace=tmp_path)
loop = bot._loop
assert loop.model == "gpt-4.1-mini"
my_tool = loop.tools.get("my")
result = await my_tool.execute(action="set", key="model_preset", value="power")
assert "Error" not in result
assert loop.model == "gpt-4.1"
assert loop.context_window_tokens == 256000
assert loop.provider.generation.temperature == 0.1
assert loop.provider.generation.max_tokens == 8192
assert loop.model_preset == "power"
@pytest.mark.asyncio
async def test_preset_switch_unknown_returns_error(tmp_path: Path) -> None:
config_path = _write_config(
tmp_path,
model_presets={"a": {"model": "model-a"}},
agents={"defaults": {"model_preset": "a"}},
)
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
bot = Nanobot.from_config(config_path, workspace=tmp_path)
loop = bot._loop
original_model = loop.model
my_tool = loop.tools.get("my")
result = await my_tool.execute(action="set", key="model_preset", value="nonexistent")
assert "not found" in result.lower()
assert loop.model == original_model
assert loop.model_preset == "a"
@pytest.mark.asyncio
async def test_preset_model_with_fallback_presets_in_config(tmp_path: Path) -> None:
config_path = _write_config(
tmp_path,
model_presets={
"prod": {
"model": "gpt-4.1",
"provider": "openai",
"max_tokens": 8192,
"temperature": 0.1,
},
"fallback": {
"model": "gpt-4.1-mini",
"provider": "openai",
"max_tokens": 4096,
"temperature": 0.2,
},
},
agents={
"defaults": {
"model_preset": "prod",
"fallback_presets": ["fallback"],
}
},
)
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
bot = Nanobot.from_config(config_path, workspace=tmp_path)
loop = bot._loop
assert loop.model == "gpt-4.1"
assert isinstance(loop.provider, ModelRouter)
assert loop.provider.fallback_presets == ["fallback"]
@pytest.mark.asyncio
async def test_fallback_presets_wired_to_all_subsystems(tmp_path: Path) -> None:
"""When fallback_presets is configured, every subsystem that calls the LLM
must use the same ModelRouter instance, not the raw primary provider."""
config_path = _write_config(
tmp_path,
model_presets={
"prod": {
"model": "gpt-4.1",
"provider": "openai",
"max_tokens": 8192,
"temperature": 0.1,
},
"fallback": {
"model": "gpt-4.1-mini",
"provider": "openai",
"max_tokens": 4096,
"temperature": 0.2,
},
},
agents={
"defaults": {
"model_preset": "prod",
"fallback_presets": ["fallback"],
}
},
)
with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"):
bot = Nanobot.from_config(config_path, workspace=tmp_path)
loop = bot._loop
router = loop.provider
assert isinstance(router, ModelRouter)
# Every LLM-consuming subsystem must share the same router
assert loop.runner.provider is router, "AgentRunner must use ModelRouter"
assert loop.subagents.provider is router, "SubagentManager must use ModelRouter"
assert loop.consolidator.provider is router, "Consolidator must use ModelRouter"
assert loop.dream.provider is router, "Dream must use ModelRouter"
# ---------------------------------------------------------------------------
# 2. Real HTTP Smoke Tests (aiohttp fake OpenAI server)
# ---------------------------------------------------------------------------
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
@pytest.mark.asyncio
async def test_preset_generation_params_reach_http_request() -> None:
"""Provider.generation settings must appear in the actual HTTP request body."""
requests_log: list[dict] = []
async def handler(request: web.Request) -> web.Response:
body = await request.json()
requests_log.append(body)
return web.json_response({
"id": "chatcmpl-test",
"object": "chat.completion",
"model": body.get("model"),
"choices": [{
"index": 0,
"message": {"role": "assistant", "content": "pong"},
"finish_reason": "stop",
}],
})
app = web.Application()
app.router.add_post("/chat/completions", handler)
server = TestServer(app)
await server.start_server()
try:
base_url = str(server.make_url("/"))
provider = OpenAICompatProvider(
api_key="test",
api_base=base_url,
default_model="test-model",
)
provider.generation = GenerationSettings(temperature=0.42, max_tokens=1024)
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
response = await provider.chat_with_retry(
messages=[{"role": "user", "content": "ping"}],
)
assert response.finish_reason != "error"
assert len(requests_log) >= 1
req = requests_log[0]
assert req["model"] == "test-model"
assert req["temperature"] == 0.42
assert req["max_tokens"] == 1024
finally:
await server.close()
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
@pytest.mark.asyncio
async def test_failover_sends_second_request_to_fallback_model() -> None:
"""Primary returns 503; after retry exhaustion ModelRouter hits fallback."""
requests_log: list[dict] = []
async def handler(request: web.Request) -> web.Response:
body = await request.json()
requests_log.append(body)
model = body.get("model")
if model == "primary-model":
return web.Response(
status=503,
body=json.dumps({"error": {"message": "overloaded", "type": "server_error"}}),
content_type="application/json",
)
return web.json_response({
"id": "chatcmpl-test",
"object": "chat.completion",
"model": model,
"choices": [{
"index": 0,
"message": {"role": "assistant", "content": "fallback-ok"},
"finish_reason": "stop",
}],
})
app = web.Application()
app.router.add_post("/chat/completions", handler)
server = TestServer(app)
await server.start_server()
try:
base_url = str(server.make_url("/"))
primary = OpenAICompatProvider(
api_key="test", api_base=base_url, default_model="primary-model"
)
fallback = OpenAICompatProvider(
api_key="test", api_base=base_url, default_model="fallback-model"
)
factory = MagicMock(return_value=fallback)
router = ModelRouter(
primary_provider=primary,
primary_model="primary-model",
fallback_presets=["fallback-model"],
provider_factory=factory,
)
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
response = await router.chat_with_retry(
messages=[{"role": "user", "content": "hi"}],
)
assert response.finish_reason != "error"
assert response.content == "fallback-ok"
models_requested = [r["model"] for r in requests_log]
assert "primary-model" in models_requested
assert "fallback-model" in models_requested
factory.assert_called_once_with("fallback-model")
finally:
await server.close()
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
@pytest.mark.asyncio
async def test_failover_on_quota_429() -> None:
"""Quota 429 on one provider may still work on a different provider."""
requests_log: list[dict] = []
async def handler(request: web.Request) -> web.Response:
body = await request.json()
requests_log.append(body)
return web.Response(
status=429,
body=json.dumps({
"error": {
"message": "insufficient quota",
"type": "insufficient_quota",
"code": "insufficient_quota",
}
}),
content_type="application/json",
)
app = web.Application()
app.router.add_post("/chat/completions", handler)
server = TestServer(app)
await server.start_server()
try:
base_url = str(server.make_url("/"))
primary = OpenAICompatProvider(
api_key="test", api_base=base_url, default_model="primary-model"
)
fallback = OpenAICompatProvider(
api_key="test", api_base=base_url, default_model="fallback-model"
)
factory = MagicMock(return_value=fallback)
router = ModelRouter(
primary_provider=primary,
primary_model="primary-model",
fallback_presets=["fallback-model"],
provider_factory=factory,
)
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
response = await router.chat_with_retry(
messages=[{"role": "user", "content": "hi"}],
)
# Quota 429 SHOULD trigger failover — another provider may still work.
factory.assert_called_once_with("fallback-model")
assert response.finish_reason == "error"
# Both primary and fallback should have been requested.
assert len(requests_log) == 2
finally:
await server.close()
@pytest.mark.skipif(not HAS_AIOHTTP, reason="aiohttp not installed")
@pytest.mark.asyncio
async def test_model_router_failover_integration() -> None:
"""ModelRouter -> real HTTP failover chain (primary 503, fallback 200)."""
requests_log: list[dict] = []
async def handler(request: web.Request) -> web.Response:
body = await request.json()
requests_log.append(body)
model = body.get("model")
if model == "primary-model":
return web.Response(
status=503,
body=json.dumps({"error": {"message": "overloaded", "type": "server_error"}}),
content_type="application/json",
)
return web.json_response({
"id": "chatcmpl-test",
"object": "chat.completion",
"model": model,
"choices": [{
"index": 0,
"message": {"role": "assistant", "content": "fallback-ok"},
"finish_reason": "stop",
}],
})
app = web.Application()
app.router.add_post("/chat/completions", handler)
server = TestServer(app)
await server.start_server()
try:
base_url = str(server.make_url("/"))
primary = OpenAICompatProvider(
api_key="test", api_base=base_url, default_model="primary-model"
)
fallback = OpenAICompatProvider(
api_key="test", api_base=base_url, default_model="fallback-model"
)
factory = MagicMock(return_value=fallback)
router = ModelRouter(
primary_provider=primary,
primary_model="primary-model",
fallback_presets=["fallback-model"],
provider_factory=factory,
)
with patch.object(LLMProvider, "_CHAT_RETRY_DELAYS", (0,)):
response = await router.chat_with_retry(
messages=[{"role": "user", "content": "hello"}],
)
assert response.finish_reason != "error"
assert response.content == "fallback-ok"
models_requested = [r["model"] for r in requests_log]
assert "primary-model" in models_requested
assert "fallback-model" in models_requested
factory.assert_called_once_with("fallback-model")
finally:
await server.close()