From 02b059a616dc6dc82ad15282102c7b27a5a34e40 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 13:57:30 +0000 Subject: [PATCH] feat(runner): support structured fallback models Bind fallback model chains to the active model configuration so defaults and presets do not inherit or merge fallback behavior implicitly. Require explicit fallback providers while preserving per-fallback generation overrides and context-window safety. Co-authored-by: Cursor --- docs/configuration.md | 62 +++++++- nanobot/config/schema.py | 15 +- nanobot/providers/factory.py | 61 +++++++- nanobot/providers/fallback_provider.py | 37 +++-- tests/agent/test_runner_fallback.py | 192 ++++++++++++++++++++++--- 5 files changed, 325 insertions(+), 42 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 0123017d2..e208212cf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -672,6 +672,12 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age "maxTokens": 8192, "contextWindowTokens": 128000, "temperature": 0.1, + "fallbackModels": [ + { + "provider": "anthropic", + "model": "anthropic/claude-sonnet-4-6" + } + ], "modelPreset": null } }, @@ -682,7 +688,17 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age "maxTokens": 4096, "contextWindowTokens": 128000, "temperature": 0.2, - "reasoningEffort": "low" + "reasoningEffort": "low", + "fallbackModels": [ + { + "provider": "deepseek", + "model": "deepseek/deepseek-chat", + "maxTokens": 4096, + "contextWindowTokens": 64000, + "temperature": 0.1, + "reasoningEffort": null + } + ] }, "deep": { "model": "anthropic/claude-opus-4-5", @@ -705,9 +721,53 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age | `contextWindowTokens` | Context window size used by prompt building and consolidation decisions. | | `temperature` | Sampling temperature. | | `reasoningEffort` | Optional reasoning/thinking setting. Provider support varies. | +| `fallbackModels` | Optional ordered fallback models for this active configuration only. | `default` is reserved and always means the implicit preset built from `agents.defaults.*`; do not define `modelPresets.default`. Use `/model default` to switch back to `agents.defaults.*`. +### Model Fallbacks + +`fallbackModels` belongs to the currently active model configuration. If the active configuration is `agents.defaults`, only `agents.defaults.fallbackModels` is used. If the active configuration is `modelPresets.fast`, only `modelPresets.fast.fallbackModels` is used. nanobot does not inherit or merge fallbacks between defaults and presets. + +Each fallback entry must include at least `provider` and `model`. The other fields are optional; omitted values inherit from the active primary configuration for that request. + +```json +{ + "modelPresets": { + "fast": { + "model": "MiniMax-M2.7-highspeed", + "provider": "minimaxAnthropic", + "maxTokens": 4096, + "contextWindowTokens": 262144, + "temperature": 0.1, + "reasoningEffort": null, + "fallbackModels": [ + { + "provider": "deepseek", + "model": "deepseek-v4-pro", + "maxTokens": 4096, + "contextWindowTokens": 262144, + "temperature": 0.1, + "reasoningEffort": null + } + ] + }, + "deep": { + "model": "deepseek-v4-pro", + "provider": "deepseek", + "maxTokens": 4096, + "contextWindowTokens": 262144, + "temperature": 0.1, + "reasoningEffort": null + } + } +} +``` + +In this example, `/model fast` can fail over to DeepSeek, but `/model deep` has no fallback because the `deep` preset does not define `fallbackModels`. + +Failover only runs when the primary model returns an error before any answer text has been streamed. Fallback models are tried in order. If a fallback has a smaller `contextWindowTokens`, nanobot uses the smallest window in the active chain when building context so the fallback can receive the same prompt. + Set `agents.defaults.modelPreset` to start with a named preset: ```json diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index a112b932d..bdae26008 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -74,6 +74,17 @@ class DreamConfig(Base): return f"every {hours}h" +class ModelFallbackConfig(Base): + """A fallback model tied to one active model configuration.""" + + model: str + provider: str + max_tokens: int | None = None + context_window_tokens: int | None = None + temperature: float | None = None + reasoning_effort: str | None = None + + class ModelPresetConfig(Base): """A named set of model + generation parameters for quick switching.""" @@ -83,7 +94,7 @@ class ModelPresetConfig(Base): context_window_tokens: int = 65_536 temperature: float = 0.1 reasoning_effort: str | None = None - fallback_models: list[str] = Field(default_factory=list) + fallback_models: list[ModelFallbackConfig] = Field(default_factory=list) def to_generation_settings(self) -> Any: from nanobot.providers.base import GenerationSettings @@ -107,6 +118,7 @@ class AgentDefaults(Base): context_window_tokens: int = 65_536 context_block_limit: int | None = None temperature: float = 0.1 + fallback_models: list[ModelFallbackConfig] = Field(default_factory=list) max_tool_iterations: int = 200 max_concurrent_subagents: int = Field(default=1, ge=1) max_tool_result_chars: int = 16_000 @@ -297,6 +309,7 @@ class Config(BaseSettings): model=d.model, provider=d.provider, max_tokens=d.max_tokens, context_window_tokens=d.context_window_tokens, temperature=d.temperature, reasoning_effort=d.reasoning_effort, + fallback_models=d.fallback_models, ) def resolve_preset(self, name: str | None = None) -> ModelPresetConfig: diff --git a/nanobot/providers/factory.py b/nanobot/providers/factory.py index e4822b7f8..a3ae57daf 100644 --- a/nanobot/providers/factory.py +++ b/nanobot/providers/factory.py @@ -5,7 +5,7 @@ from __future__ import annotations from dataclasses import dataclass from pathlib import Path -from nanobot.config.schema import Config, ModelPresetConfig +from nanobot.config.schema import Config, ModelFallbackConfig, ModelPresetConfig from nanobot.providers.base import LLMProvider from nanobot.providers.fallback_provider import FallbackProvider from nanobot.providers.registry import find_by_name @@ -104,6 +104,28 @@ def _make_provider_core( return provider +def _fallback_preset(primary: ModelPresetConfig, fallback: ModelFallbackConfig) -> ModelPresetConfig: + """Build the effective provider/generation config for one fallback model.""" + return ModelPresetConfig( + model=fallback.model, + provider=fallback.provider, + max_tokens=fallback.max_tokens if fallback.max_tokens is not None else primary.max_tokens, + context_window_tokens=( + fallback.context_window_tokens + if fallback.context_window_tokens is not None + else primary.context_window_tokens + ), + temperature=( + fallback.temperature if fallback.temperature is not None else primary.temperature + ), + reasoning_effort=( + fallback.reasoning_effort + if fallback.reasoning_effort is not None + else primary.reasoning_effort + ), + ) + + def make_provider( config: Config, *, @@ -120,12 +142,11 @@ def make_provider( provider = _make_provider_core(config, preset_name=preset_name, preset=preset, model=model) if resolved.fallback_models: - fb_preset = resolved.model_copy(update={"provider": "auto", "fallback_models": []}) provider = FallbackProvider( primary=provider, fallback_models=resolved.fallback_models, - provider_factory=lambda m: _make_provider_core( - config, preset_name=preset_name, preset=fb_preset, model=m + provider_factory=lambda fb: _make_provider_core( + config, preset_name=preset_name, preset=_fallback_preset(resolved, fb) ), ) @@ -138,9 +159,32 @@ def provider_signature( preset_name: str | None = None, preset: ModelPresetConfig | None = None, ) -> tuple[object, ...]: - """Return the config fields that affect the primary LLM provider.""" + """Return the config fields that affect the active provider chain.""" resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset) p = config.get_provider(resolved.model, preset=resolved) + + def _fallback_signature(fallback: ModelFallbackConfig) -> tuple[object, ...]: + fallback_preset = _fallback_preset(resolved, fallback) + fp = config.get_provider(fallback.model, preset=fallback_preset) + return ( + fallback.model, + fallback.provider, + fallback_preset.max_tokens, + fallback_preset.temperature, + fallback_preset.reasoning_effort, + fallback_preset.context_window_tokens, + config.get_provider_name(fallback.model, preset=fallback_preset), + config.get_api_key(fallback.model, preset=fallback_preset), + config.get_api_base(fallback.model, preset=fallback_preset), + fp.extra_headers if fp else None, + fp.extra_body if fp else None, + getattr(fp, "region", None) if fp else None, + getattr(fp, "profile", None) if fp else None, + ) + + fallback_signatures = tuple( + _fallback_signature(fallback) for fallback in resolved.fallback_models + ) return ( resolved.model, resolved.provider, @@ -155,6 +199,7 @@ def provider_signature( resolved.temperature, resolved.reasoning_effort, resolved.context_window_tokens, + fallback_signatures, ) @@ -165,10 +210,14 @@ def build_provider_snapshot( preset: ModelPresetConfig | None = None, ) -> ProviderSnapshot: resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset) + fallback_windows = [ + _fallback_preset(resolved, fallback).context_window_tokens + for fallback in resolved.fallback_models + ] return ProviderSnapshot( provider=make_provider(config, preset=resolved), model=resolved.model, - context_window_tokens=resolved.context_window_tokens, + context_window_tokens=min([resolved.context_window_tokens, *fallback_windows]), signature=provider_signature(config, preset=resolved), ) diff --git a/nanobot/providers/fallback_provider.py b/nanobot/providers/fallback_provider.py index c0b137890..a62b619a0 100644 --- a/nanobot/providers/fallback_provider.py +++ b/nanobot/providers/fallback_provider.py @@ -24,7 +24,7 @@ class FallbackProvider(LLMProvider): provider on-the-fly. Key design: - - Failover is request-scoped (the wrapper itself is stateless between turns). + - Failover attempts are request-scoped; primary circuit state persists. - Skipped when content was already streamed to avoid duplicate output. - Recursive failover is prevented by the factory returning plain providers. - Primary provider is circuit-broken after repeated failures to avoid @@ -34,8 +34,8 @@ class FallbackProvider(LLMProvider): def __init__( self, primary: LLMProvider, - fallback_models: list[str], - provider_factory: Callable[[str], LLMProvider], + fallback_models: list[Any], + provider_factory: Callable[[Any], LLMProvider], ): self._primary = primary self._fallback_models = list(fallback_models) @@ -52,6 +52,10 @@ class FallbackProvider(LLMProvider): def generation(self, value): self._primary.generation = value + @property + def supports_progress_deltas(self) -> bool: + return bool(getattr(self._primary, "supports_progress_deltas", False)) + def get_default_model(self) -> str: return self._primary.get_default_model() @@ -122,7 +126,8 @@ class FallbackProvider(LLMProvider): last_response: LLMResponse | None = None primary_skipped = not self._primary_available() - for idx, fallback_model in enumerate(self._fallback_models): + for idx, fallback in enumerate(self._fallback_models): + fallback_model = fallback.model if has_streamed is not None and has_streamed[0]: break if idx == 0 and primary_skipped: @@ -138,25 +143,35 @@ class FallbackProvider(LLMProvider): else: logger.info( "Fallback '{}' also failed, trying next fallback '{}'", - self._fallback_models[idx - 1], fallback_model, + self._fallback_models[idx - 1].model, fallback_model, ) try: - fallback_provider = self._provider_factory(fallback_model) + fallback_provider = self._provider_factory(fallback) except Exception as exc: logger.warning( "Failed to create provider for fallback '{}': {}", fallback_model, exc ) continue - original_model = kwargs.get("model") + original_values = { + name: kwargs.get(name, LLMProvider._SENTINEL) + for name in ("model", "max_tokens", "temperature", "reasoning_effort") + } kwargs["model"] = fallback_model + if fallback.max_tokens is not None: + kwargs["max_tokens"] = fallback.max_tokens + if fallback.temperature is not None: + kwargs["temperature"] = fallback.temperature + if fallback.reasoning_effort is not None: + kwargs["reasoning_effort"] = fallback.reasoning_effort try: fallback_response = await call(fallback_provider, kwargs) finally: - if original_model is not None: - kwargs["model"] = original_model - else: - kwargs.pop("model", None) + for name, value in original_values.items(): + if value is LLMProvider._SENTINEL: + kwargs.pop(name, None) + else: + kwargs[name] = value if fallback_response.finish_reason != "error": logger.info( diff --git a/tests/agent/test_runner_fallback.py b/tests/agent/test_runner_fallback.py index 273bd6d6d..e15a29848 100644 --- a/tests/agent/test_runner_fallback.py +++ b/tests/agent/test_runner_fallback.py @@ -7,6 +7,7 @@ from unittest.mock import MagicMock import pytest +from nanobot.config.schema import ModelFallbackConfig from nanobot.providers.base import LLMProvider, LLMResponse from nanobot.providers.fallback_provider import FallbackProvider @@ -24,6 +25,25 @@ def _error_response(content: str = "api error") -> LLMResponse: return _make_response(content, finish_reason="error", error_kind="server_error") +def _fallback( + model: str, + provider: str = "fallback", + *, + max_tokens: int | None = None, + context_window_tokens: int | None = None, + temperature: float | None = None, + reasoning_effort: str | None = None, +) -> ModelFallbackConfig: + return ModelFallbackConfig( + model=model, + provider=provider, + max_tokens=max_tokens, + context_window_tokens=context_window_tokens, + temperature=temperature, + reasoning_effort=reasoning_effort, + ) + + class _FakeProvider(LLMProvider): """Fake provider for testing.""" @@ -60,17 +80,113 @@ def test_fallback_models_default_empty() -> None: def test_fallback_models_accepts_list() -> None: from nanobot.config.schema import ModelPresetConfig - p = ModelPresetConfig(model="test/primary", fallback_models=["test/a", "test/b"]) - assert p.fallback_models == ["test/a", "test/b"] + p = ModelPresetConfig( + model="test/primary", + fallback_models=[{"provider": "test", "model": "test/a"}], + ) + assert p.fallback_models == [_fallback("test/a", provider="test")] def test_fallback_models_from_camel_case() -> None: from nanobot.config.schema import ModelPresetConfig p = ModelPresetConfig.model_validate({ "model": "test/primary", - "fallbackModels": ["test/a"], + "fallbackModels": [{"provider": "test", "model": "test/a"}], }) - assert p.fallback_models == ["test/a"] + assert p.fallback_models == [_fallback("test/a", provider="test")] + + +def test_provider_signature_tracks_fallback_models_and_provider_config() -> None: + from nanobot.config.schema import Config + from nanobot.providers.factory import provider_signature + + base = { + "modelPresets": { + "prod": { + "model": "openai/gpt-4.1", + "fallbackModels": [ + {"provider": "anthropic", "model": "anthropic/claude-sonnet-4-6"} + ], + } + }, + "providers": { + "openai": {"apiKey": "primary-key"}, + "anthropic": {"apiKey": "fallback-key"}, + }, + } + changed_fallback = { + **base, + "modelPresets": { + "prod": { + "model": "openai/gpt-4.1", + "fallbackModels": [{"provider": "deepseek", "model": "deepseek/deepseek-chat"}], + } + }, + "providers": { + **base["providers"], + "deepseek": {"apiKey": "deepseek-key"}, + }, + } + changed_key = { + **base, + "providers": { + "openai": {"apiKey": "primary-key"}, + "anthropic": {"apiKey": "new-fallback-key"}, + }, + } + + signature = provider_signature(Config.model_validate(base), preset_name="prod") + + assert signature != provider_signature(Config.model_validate(changed_fallback), preset_name="prod") + assert signature != provider_signature(Config.model_validate(changed_key), preset_name="prod") + + +def test_agent_defaults_can_define_fallback_models() -> None: + from nanobot.config.schema import Config + + config = Config.model_validate({ + "agents": { + "defaults": { + "model": "primary-model", + "provider": "custom", + "fallbackModels": [{"provider": "deepseek", "model": "deepseek-v4-pro"}], + } + } + }) + + assert config.resolve_preset().fallback_models == [ + _fallback("deepseek-v4-pro", provider="deepseek") + ] + + +def test_provider_snapshot_uses_smallest_fallback_context_window() -> None: + from nanobot.config.schema import Config + from nanobot.providers.factory import build_provider_snapshot + + config = Config.model_validate({ + "modelPresets": { + "prod": { + "model": "openai/gpt-4.1", + "provider": "openai", + "contextWindowTokens": 128000, + "fallbackModels": [ + { + "provider": "deepseek", + "model": "deepseek/deepseek-chat", + "contextWindowTokens": 64000, + } + ], + } + }, + "providers": { + "openai": {"apiKey": "primary-key"}, + "deepseek": {"apiKey": "fallback-key"}, + }, + }) + + snapshot = build_provider_snapshot(config, preset_name="prod") + + assert snapshot.context_window_tokens == 64000 # -- FallbackProvider tests -- @@ -83,7 +199,7 @@ class TestNoFallbackWhenPrimarySucceeds: factory = MagicMock() fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) @@ -102,14 +218,14 @@ class TestFallbackOnPrimaryError: fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model") assert result.content == "fallback ok" assert result.finish_reason == "stop" - factory.assert_called_once_with("fallback-a") + factory.assert_called_once_with(_fallback("fallback-a")) assert primary.chat_calls[0]["model"] == "primary-model" assert fallback.chat_calls[0]["model"] == "fallback-a" @@ -121,7 +237,7 @@ class TestNoFallbackWhenContentStreamed: factory = MagicMock() fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) @@ -146,14 +262,14 @@ class TestFailoverOnTransientError: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}]) assert result.content == "fallback ok" assert result.finish_reason == "stop" - factory.assert_called_once_with("fallback-a") + factory.assert_called_once_with(_fallback("fallback-a")) @pytest.mark.asyncio async def test_timeout(self) -> None: @@ -165,14 +281,14 @@ class TestFailoverOnTransientError: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}]) assert result.content == "fallback ok" assert result.finish_reason == "stop" - factory.assert_called_once_with("fallback-a") + factory.assert_called_once_with(_fallback("fallback-a")) class TestFallbackTriesModelsInOrder: @@ -185,15 +301,15 @@ class TestFallbackTriesModelsInOrder: fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a", "fallback-b"], + fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}]) assert result.content == "b ok" assert factory.call_count == 2 - factory.assert_any_call("fallback-a") - factory.assert_any_call("fallback-b") + factory.assert_any_call(_fallback("fallback-a")) + factory.assert_any_call(_fallback("fallback-b")) class TestAllFallbacksFail: @@ -205,7 +321,7 @@ class TestAllFallbacksFail: fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) @@ -223,7 +339,7 @@ class TestFactoryExceptionSkipsModel: fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a", "fallback-b"], + fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")], provider_factory=factory, ) @@ -242,13 +358,43 @@ class TestFallbackModelParameter: fb = FallbackProvider( primary=primary, - fallback_models=["fallback-model"], + fallback_models=[_fallback("fallback-model")], provider_factory=factory, ) await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model") assert fallback.chat_calls[0]["model"] == "fallback-model" + @pytest.mark.asyncio + async def test_overrides_generation_fields_when_configured(self) -> None: + primary = _FakeProvider("primary", _error_response()) + fallback = _FakeProvider("fallback", _make_response("ok")) + fb = FallbackProvider( + primary=primary, + fallback_models=[ + _fallback( + "fallback-model", + max_tokens=1234, + temperature=0.4, + reasoning_effort="low", + ) + ], + provider_factory=MagicMock(return_value=fallback), + ) + + await fb.chat( + messages=[{"role": "user", "content": "hi"}], + model="primary-model", + max_tokens=8192, + temperature=0.1, + reasoning_effort="high", + ) + + assert fallback.chat_calls[0]["model"] == "fallback-model" + assert fallback.chat_calls[0]["max_tokens"] == 1234 + assert fallback.chat_calls[0]["temperature"] == 0.4 + assert fallback.chat_calls[0]["reasoning_effort"] == "low" + class TestNoFallbackWhenEmptyList: @pytest.mark.asyncio @@ -277,7 +423,7 @@ class TestChatStreamFailover: fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) @@ -291,7 +437,7 @@ class TestGetDefaultModel: primary = _FakeProvider("primary") fb = FallbackProvider( primary=primary, - fallback_models=["a"], + fallback_models=[_fallback("a")], provider_factory=MagicMock(), ) assert fb.get_default_model() == "primary/model" @@ -305,7 +451,7 @@ class TestCircuitBreaker: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) @@ -329,7 +475,7 @@ class TestCircuitBreaker: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=["fallback-a"], + fallback_models=[_fallback("fallback-a")], provider_factory=factory, ) @@ -357,7 +503,7 @@ class TestGenerationForwarded: primary.generation = GenerationSettings(temperature=0.5, max_tokens=1024) fb = FallbackProvider( primary=primary, - fallback_models=["a"], + fallback_models=[_fallback("a")], provider_factory=MagicMock(), ) assert fb.generation.temperature == 0.5