feat(runner): support structured fallback models

Bind fallback model chains to the active model configuration so defaults and presets do not inherit or merge fallback behavior implicitly. Require explicit fallback providers while preserving per-fallback generation overrides and context-window safety.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Xubin Ren 2026-05-13 13:57:30 +00:00
parent eaa8ebd5d3
commit 02b059a616
5 changed files with 325 additions and 42 deletions

View File

@ -672,6 +672,12 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age
"maxTokens": 8192,
"contextWindowTokens": 128000,
"temperature": 0.1,
"fallbackModels": [
{
"provider": "anthropic",
"model": "anthropic/claude-sonnet-4-6"
}
],
"modelPreset": null
}
},
@ -682,7 +688,17 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age
"maxTokens": 4096,
"contextWindowTokens": 128000,
"temperature": 0.2,
"reasoningEffort": "low"
"reasoningEffort": "low",
"fallbackModels": [
{
"provider": "deepseek",
"model": "deepseek/deepseek-chat",
"maxTokens": 4096,
"contextWindowTokens": 64000,
"temperature": 0.1,
"reasoningEffort": null
}
]
},
"deep": {
"model": "anthropic/claude-opus-4-5",
@ -705,9 +721,53 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age
| `contextWindowTokens` | Context window size used by prompt building and consolidation decisions. |
| `temperature` | Sampling temperature. |
| `reasoningEffort` | Optional reasoning/thinking setting. Provider support varies. |
| `fallbackModels` | Optional ordered fallback models for this active configuration only. |
`default` is reserved and always means the implicit preset built from `agents.defaults.*`; do not define `modelPresets.default`. Use `/model default` to switch back to `agents.defaults.*`.
### Model Fallbacks
`fallbackModels` belongs to the currently active model configuration. If the active configuration is `agents.defaults`, only `agents.defaults.fallbackModels` is used. If the active configuration is `modelPresets.fast`, only `modelPresets.fast.fallbackModels` is used. nanobot does not inherit or merge fallbacks between defaults and presets.
Each fallback entry must include at least `provider` and `model`. The other fields are optional; omitted values inherit from the active primary configuration for that request.
```json
{
"modelPresets": {
"fast": {
"model": "MiniMax-M2.7-highspeed",
"provider": "minimaxAnthropic",
"maxTokens": 4096,
"contextWindowTokens": 262144,
"temperature": 0.1,
"reasoningEffort": null,
"fallbackModels": [
{
"provider": "deepseek",
"model": "deepseek-v4-pro",
"maxTokens": 4096,
"contextWindowTokens": 262144,
"temperature": 0.1,
"reasoningEffort": null
}
]
},
"deep": {
"model": "deepseek-v4-pro",
"provider": "deepseek",
"maxTokens": 4096,
"contextWindowTokens": 262144,
"temperature": 0.1,
"reasoningEffort": null
}
}
}
```
In this example, `/model fast` can fail over to DeepSeek, but `/model deep` has no fallback because the `deep` preset does not define `fallbackModels`.
Failover only runs when the primary model returns an error before any answer text has been streamed. Fallback models are tried in order. If a fallback has a smaller `contextWindowTokens`, nanobot uses the smallest window in the active chain when building context so the fallback can receive the same prompt.
Set `agents.defaults.modelPreset` to start with a named preset:
```json

View File

@ -74,6 +74,17 @@ class DreamConfig(Base):
return f"every {hours}h"
class ModelFallbackConfig(Base):
"""A fallback model tied to one active model configuration."""
model: str
provider: str
max_tokens: int | None = None
context_window_tokens: int | None = None
temperature: float | None = None
reasoning_effort: str | None = None
class ModelPresetConfig(Base):
"""A named set of model + generation parameters for quick switching."""
@ -83,7 +94,7 @@ class ModelPresetConfig(Base):
context_window_tokens: int = 65_536
temperature: float = 0.1
reasoning_effort: str | None = None
fallback_models: list[str] = Field(default_factory=list)
fallback_models: list[ModelFallbackConfig] = Field(default_factory=list)
def to_generation_settings(self) -> Any:
from nanobot.providers.base import GenerationSettings
@ -107,6 +118,7 @@ class AgentDefaults(Base):
context_window_tokens: int = 65_536
context_block_limit: int | None = None
temperature: float = 0.1
fallback_models: list[ModelFallbackConfig] = Field(default_factory=list)
max_tool_iterations: int = 200
max_concurrent_subagents: int = Field(default=1, ge=1)
max_tool_result_chars: int = 16_000
@ -297,6 +309,7 @@ class Config(BaseSettings):
model=d.model, provider=d.provider, max_tokens=d.max_tokens,
context_window_tokens=d.context_window_tokens,
temperature=d.temperature, reasoning_effort=d.reasoning_effort,
fallback_models=d.fallback_models,
)
def resolve_preset(self, name: str | None = None) -> ModelPresetConfig:

View File

@ -5,7 +5,7 @@ from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from nanobot.config.schema import Config, ModelPresetConfig
from nanobot.config.schema import Config, ModelFallbackConfig, ModelPresetConfig
from nanobot.providers.base import LLMProvider
from nanobot.providers.fallback_provider import FallbackProvider
from nanobot.providers.registry import find_by_name
@ -104,6 +104,28 @@ def _make_provider_core(
return provider
def _fallback_preset(primary: ModelPresetConfig, fallback: ModelFallbackConfig) -> ModelPresetConfig:
"""Build the effective provider/generation config for one fallback model."""
return ModelPresetConfig(
model=fallback.model,
provider=fallback.provider,
max_tokens=fallback.max_tokens if fallback.max_tokens is not None else primary.max_tokens,
context_window_tokens=(
fallback.context_window_tokens
if fallback.context_window_tokens is not None
else primary.context_window_tokens
),
temperature=(
fallback.temperature if fallback.temperature is not None else primary.temperature
),
reasoning_effort=(
fallback.reasoning_effort
if fallback.reasoning_effort is not None
else primary.reasoning_effort
),
)
def make_provider(
config: Config,
*,
@ -120,12 +142,11 @@ def make_provider(
provider = _make_provider_core(config, preset_name=preset_name, preset=preset, model=model)
if resolved.fallback_models:
fb_preset = resolved.model_copy(update={"provider": "auto", "fallback_models": []})
provider = FallbackProvider(
primary=provider,
fallback_models=resolved.fallback_models,
provider_factory=lambda m: _make_provider_core(
config, preset_name=preset_name, preset=fb_preset, model=m
provider_factory=lambda fb: _make_provider_core(
config, preset_name=preset_name, preset=_fallback_preset(resolved, fb)
),
)
@ -138,9 +159,32 @@ def provider_signature(
preset_name: str | None = None,
preset: ModelPresetConfig | None = None,
) -> tuple[object, ...]:
"""Return the config fields that affect the primary LLM provider."""
"""Return the config fields that affect the active provider chain."""
resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset)
p = config.get_provider(resolved.model, preset=resolved)
def _fallback_signature(fallback: ModelFallbackConfig) -> tuple[object, ...]:
fallback_preset = _fallback_preset(resolved, fallback)
fp = config.get_provider(fallback.model, preset=fallback_preset)
return (
fallback.model,
fallback.provider,
fallback_preset.max_tokens,
fallback_preset.temperature,
fallback_preset.reasoning_effort,
fallback_preset.context_window_tokens,
config.get_provider_name(fallback.model, preset=fallback_preset),
config.get_api_key(fallback.model, preset=fallback_preset),
config.get_api_base(fallback.model, preset=fallback_preset),
fp.extra_headers if fp else None,
fp.extra_body if fp else None,
getattr(fp, "region", None) if fp else None,
getattr(fp, "profile", None) if fp else None,
)
fallback_signatures = tuple(
_fallback_signature(fallback) for fallback in resolved.fallback_models
)
return (
resolved.model,
resolved.provider,
@ -155,6 +199,7 @@ def provider_signature(
resolved.temperature,
resolved.reasoning_effort,
resolved.context_window_tokens,
fallback_signatures,
)
@ -165,10 +210,14 @@ def build_provider_snapshot(
preset: ModelPresetConfig | None = None,
) -> ProviderSnapshot:
resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset)
fallback_windows = [
_fallback_preset(resolved, fallback).context_window_tokens
for fallback in resolved.fallback_models
]
return ProviderSnapshot(
provider=make_provider(config, preset=resolved),
model=resolved.model,
context_window_tokens=resolved.context_window_tokens,
context_window_tokens=min([resolved.context_window_tokens, *fallback_windows]),
signature=provider_signature(config, preset=resolved),
)

View File

@ -24,7 +24,7 @@ class FallbackProvider(LLMProvider):
provider on-the-fly.
Key design:
- Failover is request-scoped (the wrapper itself is stateless between turns).
- Failover attempts are request-scoped; primary circuit state persists.
- Skipped when content was already streamed to avoid duplicate output.
- Recursive failover is prevented by the factory returning plain providers.
- Primary provider is circuit-broken after repeated failures to avoid
@ -34,8 +34,8 @@ class FallbackProvider(LLMProvider):
def __init__(
self,
primary: LLMProvider,
fallback_models: list[str],
provider_factory: Callable[[str], LLMProvider],
fallback_models: list[Any],
provider_factory: Callable[[Any], LLMProvider],
):
self._primary = primary
self._fallback_models = list(fallback_models)
@ -52,6 +52,10 @@ class FallbackProvider(LLMProvider):
def generation(self, value):
self._primary.generation = value
@property
def supports_progress_deltas(self) -> bool:
return bool(getattr(self._primary, "supports_progress_deltas", False))
def get_default_model(self) -> str:
return self._primary.get_default_model()
@ -122,7 +126,8 @@ class FallbackProvider(LLMProvider):
last_response: LLMResponse | None = None
primary_skipped = not self._primary_available()
for idx, fallback_model in enumerate(self._fallback_models):
for idx, fallback in enumerate(self._fallback_models):
fallback_model = fallback.model
if has_streamed is not None and has_streamed[0]:
break
if idx == 0 and primary_skipped:
@ -138,25 +143,35 @@ class FallbackProvider(LLMProvider):
else:
logger.info(
"Fallback '{}' also failed, trying next fallback '{}'",
self._fallback_models[idx - 1], fallback_model,
self._fallback_models[idx - 1].model, fallback_model,
)
try:
fallback_provider = self._provider_factory(fallback_model)
fallback_provider = self._provider_factory(fallback)
except Exception as exc:
logger.warning(
"Failed to create provider for fallback '{}': {}", fallback_model, exc
)
continue
original_model = kwargs.get("model")
original_values = {
name: kwargs.get(name, LLMProvider._SENTINEL)
for name in ("model", "max_tokens", "temperature", "reasoning_effort")
}
kwargs["model"] = fallback_model
if fallback.max_tokens is not None:
kwargs["max_tokens"] = fallback.max_tokens
if fallback.temperature is not None:
kwargs["temperature"] = fallback.temperature
if fallback.reasoning_effort is not None:
kwargs["reasoning_effort"] = fallback.reasoning_effort
try:
fallback_response = await call(fallback_provider, kwargs)
finally:
if original_model is not None:
kwargs["model"] = original_model
else:
kwargs.pop("model", None)
for name, value in original_values.items():
if value is LLMProvider._SENTINEL:
kwargs.pop(name, None)
else:
kwargs[name] = value
if fallback_response.finish_reason != "error":
logger.info(

View File

@ -7,6 +7,7 @@ from unittest.mock import MagicMock
import pytest
from nanobot.config.schema import ModelFallbackConfig
from nanobot.providers.base import LLMProvider, LLMResponse
from nanobot.providers.fallback_provider import FallbackProvider
@ -24,6 +25,25 @@ def _error_response(content: str = "api error") -> LLMResponse:
return _make_response(content, finish_reason="error", error_kind="server_error")
def _fallback(
model: str,
provider: str = "fallback",
*,
max_tokens: int | None = None,
context_window_tokens: int | None = None,
temperature: float | None = None,
reasoning_effort: str | None = None,
) -> ModelFallbackConfig:
return ModelFallbackConfig(
model=model,
provider=provider,
max_tokens=max_tokens,
context_window_tokens=context_window_tokens,
temperature=temperature,
reasoning_effort=reasoning_effort,
)
class _FakeProvider(LLMProvider):
"""Fake provider for testing."""
@ -60,17 +80,113 @@ def test_fallback_models_default_empty() -> None:
def test_fallback_models_accepts_list() -> None:
from nanobot.config.schema import ModelPresetConfig
p = ModelPresetConfig(model="test/primary", fallback_models=["test/a", "test/b"])
assert p.fallback_models == ["test/a", "test/b"]
p = ModelPresetConfig(
model="test/primary",
fallback_models=[{"provider": "test", "model": "test/a"}],
)
assert p.fallback_models == [_fallback("test/a", provider="test")]
def test_fallback_models_from_camel_case() -> None:
from nanobot.config.schema import ModelPresetConfig
p = ModelPresetConfig.model_validate({
"model": "test/primary",
"fallbackModels": ["test/a"],
"fallbackModels": [{"provider": "test", "model": "test/a"}],
})
assert p.fallback_models == ["test/a"]
assert p.fallback_models == [_fallback("test/a", provider="test")]
def test_provider_signature_tracks_fallback_models_and_provider_config() -> None:
from nanobot.config.schema import Config
from nanobot.providers.factory import provider_signature
base = {
"modelPresets": {
"prod": {
"model": "openai/gpt-4.1",
"fallbackModels": [
{"provider": "anthropic", "model": "anthropic/claude-sonnet-4-6"}
],
}
},
"providers": {
"openai": {"apiKey": "primary-key"},
"anthropic": {"apiKey": "fallback-key"},
},
}
changed_fallback = {
**base,
"modelPresets": {
"prod": {
"model": "openai/gpt-4.1",
"fallbackModels": [{"provider": "deepseek", "model": "deepseek/deepseek-chat"}],
}
},
"providers": {
**base["providers"],
"deepseek": {"apiKey": "deepseek-key"},
},
}
changed_key = {
**base,
"providers": {
"openai": {"apiKey": "primary-key"},
"anthropic": {"apiKey": "new-fallback-key"},
},
}
signature = provider_signature(Config.model_validate(base), preset_name="prod")
assert signature != provider_signature(Config.model_validate(changed_fallback), preset_name="prod")
assert signature != provider_signature(Config.model_validate(changed_key), preset_name="prod")
def test_agent_defaults_can_define_fallback_models() -> None:
from nanobot.config.schema import Config
config = Config.model_validate({
"agents": {
"defaults": {
"model": "primary-model",
"provider": "custom",
"fallbackModels": [{"provider": "deepseek", "model": "deepseek-v4-pro"}],
}
}
})
assert config.resolve_preset().fallback_models == [
_fallback("deepseek-v4-pro", provider="deepseek")
]
def test_provider_snapshot_uses_smallest_fallback_context_window() -> None:
from nanobot.config.schema import Config
from nanobot.providers.factory import build_provider_snapshot
config = Config.model_validate({
"modelPresets": {
"prod": {
"model": "openai/gpt-4.1",
"provider": "openai",
"contextWindowTokens": 128000,
"fallbackModels": [
{
"provider": "deepseek",
"model": "deepseek/deepseek-chat",
"contextWindowTokens": 64000,
}
],
}
},
"providers": {
"openai": {"apiKey": "primary-key"},
"deepseek": {"apiKey": "fallback-key"},
},
})
snapshot = build_provider_snapshot(config, preset_name="prod")
assert snapshot.context_window_tokens == 64000
# -- FallbackProvider tests --
@ -83,7 +199,7 @@ class TestNoFallbackWhenPrimarySucceeds:
factory = MagicMock()
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
@ -102,14 +218,14 @@ class TestFallbackOnPrimaryError:
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
result = await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model")
assert result.content == "fallback ok"
assert result.finish_reason == "stop"
factory.assert_called_once_with("fallback-a")
factory.assert_called_once_with(_fallback("fallback-a"))
assert primary.chat_calls[0]["model"] == "primary-model"
assert fallback.chat_calls[0]["model"] == "fallback-a"
@ -121,7 +237,7 @@ class TestNoFallbackWhenContentStreamed:
factory = MagicMock()
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
@ -146,14 +262,14 @@ class TestFailoverOnTransientError:
factory = MagicMock(return_value=fallback)
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
assert result.content == "fallback ok"
assert result.finish_reason == "stop"
factory.assert_called_once_with("fallback-a")
factory.assert_called_once_with(_fallback("fallback-a"))
@pytest.mark.asyncio
async def test_timeout(self) -> None:
@ -165,14 +281,14 @@ class TestFailoverOnTransientError:
factory = MagicMock(return_value=fallback)
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
assert result.content == "fallback ok"
assert result.finish_reason == "stop"
factory.assert_called_once_with("fallback-a")
factory.assert_called_once_with(_fallback("fallback-a"))
class TestFallbackTriesModelsInOrder:
@ -185,15 +301,15 @@ class TestFallbackTriesModelsInOrder:
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a", "fallback-b"],
fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")],
provider_factory=factory,
)
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
assert result.content == "b ok"
assert factory.call_count == 2
factory.assert_any_call("fallback-a")
factory.assert_any_call("fallback-b")
factory.assert_any_call(_fallback("fallback-a"))
factory.assert_any_call(_fallback("fallback-b"))
class TestAllFallbacksFail:
@ -205,7 +321,7 @@ class TestAllFallbacksFail:
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
@ -223,7 +339,7 @@ class TestFactoryExceptionSkipsModel:
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a", "fallback-b"],
fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")],
provider_factory=factory,
)
@ -242,13 +358,43 @@ class TestFallbackModelParameter:
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-model"],
fallback_models=[_fallback("fallback-model")],
provider_factory=factory,
)
await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model")
assert fallback.chat_calls[0]["model"] == "fallback-model"
@pytest.mark.asyncio
async def test_overrides_generation_fields_when_configured(self) -> None:
primary = _FakeProvider("primary", _error_response())
fallback = _FakeProvider("fallback", _make_response("ok"))
fb = FallbackProvider(
primary=primary,
fallback_models=[
_fallback(
"fallback-model",
max_tokens=1234,
temperature=0.4,
reasoning_effort="low",
)
],
provider_factory=MagicMock(return_value=fallback),
)
await fb.chat(
messages=[{"role": "user", "content": "hi"}],
model="primary-model",
max_tokens=8192,
temperature=0.1,
reasoning_effort="high",
)
assert fallback.chat_calls[0]["model"] == "fallback-model"
assert fallback.chat_calls[0]["max_tokens"] == 1234
assert fallback.chat_calls[0]["temperature"] == 0.4
assert fallback.chat_calls[0]["reasoning_effort"] == "low"
class TestNoFallbackWhenEmptyList:
@pytest.mark.asyncio
@ -277,7 +423,7 @@ class TestChatStreamFailover:
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
@ -291,7 +437,7 @@ class TestGetDefaultModel:
primary = _FakeProvider("primary")
fb = FallbackProvider(
primary=primary,
fallback_models=["a"],
fallback_models=[_fallback("a")],
provider_factory=MagicMock(),
)
assert fb.get_default_model() == "primary/model"
@ -305,7 +451,7 @@ class TestCircuitBreaker:
factory = MagicMock(return_value=fallback)
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
@ -329,7 +475,7 @@ class TestCircuitBreaker:
factory = MagicMock(return_value=fallback)
fb = FallbackProvider(
primary=primary,
fallback_models=["fallback-a"],
fallback_models=[_fallback("fallback-a")],
provider_factory=factory,
)
@ -357,7 +503,7 @@ class TestGenerationForwarded:
primary.generation = GenerationSettings(temperature=0.5, max_tokens=1024)
fb = FallbackProvider(
primary=primary,
fallback_models=["a"],
fallback_models=[_fallback("a")],
provider_factory=MagicMock(),
)
assert fb.generation.temperature == 0.5