diff --git a/docs/configuration.md b/docs/configuration.md index e208212cf..0123017d2 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -672,12 +672,6 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age "maxTokens": 8192, "contextWindowTokens": 128000, "temperature": 0.1, - "fallbackModels": [ - { - "provider": "anthropic", - "model": "anthropic/claude-sonnet-4-6" - } - ], "modelPreset": null } }, @@ -688,17 +682,7 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age "maxTokens": 4096, "contextWindowTokens": 128000, "temperature": 0.2, - "reasoningEffort": "low", - "fallbackModels": [ - { - "provider": "deepseek", - "model": "deepseek/deepseek-chat", - "maxTokens": 4096, - "contextWindowTokens": 64000, - "temperature": 0.1, - "reasoningEffort": null - } - ] + "reasoningEffort": "low" }, "deep": { "model": "anthropic/claude-opus-4-5", @@ -721,53 +705,9 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age | `contextWindowTokens` | Context window size used by prompt building and consolidation decisions. | | `temperature` | Sampling temperature. | | `reasoningEffort` | Optional reasoning/thinking setting. Provider support varies. | -| `fallbackModels` | Optional ordered fallback models for this active configuration only. | `default` is reserved and always means the implicit preset built from `agents.defaults.*`; do not define `modelPresets.default`. Use `/model default` to switch back to `agents.defaults.*`. -### Model Fallbacks - -`fallbackModels` belongs to the currently active model configuration. If the active configuration is `agents.defaults`, only `agents.defaults.fallbackModels` is used. If the active configuration is `modelPresets.fast`, only `modelPresets.fast.fallbackModels` is used. nanobot does not inherit or merge fallbacks between defaults and presets. - -Each fallback entry must include at least `provider` and `model`. The other fields are optional; omitted values inherit from the active primary configuration for that request. - -```json -{ - "modelPresets": { - "fast": { - "model": "MiniMax-M2.7-highspeed", - "provider": "minimaxAnthropic", - "maxTokens": 4096, - "contextWindowTokens": 262144, - "temperature": 0.1, - "reasoningEffort": null, - "fallbackModels": [ - { - "provider": "deepseek", - "model": "deepseek-v4-pro", - "maxTokens": 4096, - "contextWindowTokens": 262144, - "temperature": 0.1, - "reasoningEffort": null - } - ] - }, - "deep": { - "model": "deepseek-v4-pro", - "provider": "deepseek", - "maxTokens": 4096, - "contextWindowTokens": 262144, - "temperature": 0.1, - "reasoningEffort": null - } - } -} -``` - -In this example, `/model fast` can fail over to DeepSeek, but `/model deep` has no fallback because the `deep` preset does not define `fallbackModels`. - -Failover only runs when the primary model returns an error before any answer text has been streamed. Fallback models are tried in order. If a fallback has a smaller `contextWindowTokens`, nanobot uses the smallest window in the active chain when building context so the fallback can receive the same prompt. - Set `agents.defaults.modelPreset` to start with a named preset: ```json diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index bdae26008..a112b932d 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -74,17 +74,6 @@ class DreamConfig(Base): return f"every {hours}h" -class ModelFallbackConfig(Base): - """A fallback model tied to one active model configuration.""" - - model: str - provider: str - max_tokens: int | None = None - context_window_tokens: int | None = None - temperature: float | None = None - reasoning_effort: str | None = None - - class ModelPresetConfig(Base): """A named set of model + generation parameters for quick switching.""" @@ -94,7 +83,7 @@ class ModelPresetConfig(Base): context_window_tokens: int = 65_536 temperature: float = 0.1 reasoning_effort: str | None = None - fallback_models: list[ModelFallbackConfig] = Field(default_factory=list) + fallback_models: list[str] = Field(default_factory=list) def to_generation_settings(self) -> Any: from nanobot.providers.base import GenerationSettings @@ -118,7 +107,6 @@ class AgentDefaults(Base): context_window_tokens: int = 65_536 context_block_limit: int | None = None temperature: float = 0.1 - fallback_models: list[ModelFallbackConfig] = Field(default_factory=list) max_tool_iterations: int = 200 max_concurrent_subagents: int = Field(default=1, ge=1) max_tool_result_chars: int = 16_000 @@ -309,7 +297,6 @@ class Config(BaseSettings): model=d.model, provider=d.provider, max_tokens=d.max_tokens, context_window_tokens=d.context_window_tokens, temperature=d.temperature, reasoning_effort=d.reasoning_effort, - fallback_models=d.fallback_models, ) def resolve_preset(self, name: str | None = None) -> ModelPresetConfig: diff --git a/nanobot/providers/factory.py b/nanobot/providers/factory.py index a3ae57daf..e4822b7f8 100644 --- a/nanobot/providers/factory.py +++ b/nanobot/providers/factory.py @@ -5,7 +5,7 @@ from __future__ import annotations from dataclasses import dataclass from pathlib import Path -from nanobot.config.schema import Config, ModelFallbackConfig, ModelPresetConfig +from nanobot.config.schema import Config, ModelPresetConfig from nanobot.providers.base import LLMProvider from nanobot.providers.fallback_provider import FallbackProvider from nanobot.providers.registry import find_by_name @@ -104,28 +104,6 @@ def _make_provider_core( return provider -def _fallback_preset(primary: ModelPresetConfig, fallback: ModelFallbackConfig) -> ModelPresetConfig: - """Build the effective provider/generation config for one fallback model.""" - return ModelPresetConfig( - model=fallback.model, - provider=fallback.provider, - max_tokens=fallback.max_tokens if fallback.max_tokens is not None else primary.max_tokens, - context_window_tokens=( - fallback.context_window_tokens - if fallback.context_window_tokens is not None - else primary.context_window_tokens - ), - temperature=( - fallback.temperature if fallback.temperature is not None else primary.temperature - ), - reasoning_effort=( - fallback.reasoning_effort - if fallback.reasoning_effort is not None - else primary.reasoning_effort - ), - ) - - def make_provider( config: Config, *, @@ -142,11 +120,12 @@ def make_provider( provider = _make_provider_core(config, preset_name=preset_name, preset=preset, model=model) if resolved.fallback_models: + fb_preset = resolved.model_copy(update={"provider": "auto", "fallback_models": []}) provider = FallbackProvider( primary=provider, fallback_models=resolved.fallback_models, - provider_factory=lambda fb: _make_provider_core( - config, preset_name=preset_name, preset=_fallback_preset(resolved, fb) + provider_factory=lambda m: _make_provider_core( + config, preset_name=preset_name, preset=fb_preset, model=m ), ) @@ -159,32 +138,9 @@ def provider_signature( preset_name: str | None = None, preset: ModelPresetConfig | None = None, ) -> tuple[object, ...]: - """Return the config fields that affect the active provider chain.""" + """Return the config fields that affect the primary LLM provider.""" resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset) p = config.get_provider(resolved.model, preset=resolved) - - def _fallback_signature(fallback: ModelFallbackConfig) -> tuple[object, ...]: - fallback_preset = _fallback_preset(resolved, fallback) - fp = config.get_provider(fallback.model, preset=fallback_preset) - return ( - fallback.model, - fallback.provider, - fallback_preset.max_tokens, - fallback_preset.temperature, - fallback_preset.reasoning_effort, - fallback_preset.context_window_tokens, - config.get_provider_name(fallback.model, preset=fallback_preset), - config.get_api_key(fallback.model, preset=fallback_preset), - config.get_api_base(fallback.model, preset=fallback_preset), - fp.extra_headers if fp else None, - fp.extra_body if fp else None, - getattr(fp, "region", None) if fp else None, - getattr(fp, "profile", None) if fp else None, - ) - - fallback_signatures = tuple( - _fallback_signature(fallback) for fallback in resolved.fallback_models - ) return ( resolved.model, resolved.provider, @@ -199,7 +155,6 @@ def provider_signature( resolved.temperature, resolved.reasoning_effort, resolved.context_window_tokens, - fallback_signatures, ) @@ -210,14 +165,10 @@ def build_provider_snapshot( preset: ModelPresetConfig | None = None, ) -> ProviderSnapshot: resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset) - fallback_windows = [ - _fallback_preset(resolved, fallback).context_window_tokens - for fallback in resolved.fallback_models - ] return ProviderSnapshot( provider=make_provider(config, preset=resolved), model=resolved.model, - context_window_tokens=min([resolved.context_window_tokens, *fallback_windows]), + context_window_tokens=resolved.context_window_tokens, signature=provider_signature(config, preset=resolved), ) diff --git a/nanobot/providers/fallback_provider.py b/nanobot/providers/fallback_provider.py index a62b619a0..c0b137890 100644 --- a/nanobot/providers/fallback_provider.py +++ b/nanobot/providers/fallback_provider.py @@ -24,7 +24,7 @@ class FallbackProvider(LLMProvider): provider on-the-fly. Key design: - - Failover attempts are request-scoped; primary circuit state persists. + - Failover is request-scoped (the wrapper itself is stateless between turns). - Skipped when content was already streamed to avoid duplicate output. - Recursive failover is prevented by the factory returning plain providers. - Primary provider is circuit-broken after repeated failures to avoid @@ -34,8 +34,8 @@ class FallbackProvider(LLMProvider): def __init__( self, primary: LLMProvider, - fallback_models: list[Any], - provider_factory: Callable[[Any], LLMProvider], + fallback_models: list[str], + provider_factory: Callable[[str], LLMProvider], ): self._primary = primary self._fallback_models = list(fallback_models) @@ -52,10 +52,6 @@ class FallbackProvider(LLMProvider): def generation(self, value): self._primary.generation = value - @property - def supports_progress_deltas(self) -> bool: - return bool(getattr(self._primary, "supports_progress_deltas", False)) - def get_default_model(self) -> str: return self._primary.get_default_model() @@ -126,8 +122,7 @@ class FallbackProvider(LLMProvider): last_response: LLMResponse | None = None primary_skipped = not self._primary_available() - for idx, fallback in enumerate(self._fallback_models): - fallback_model = fallback.model + for idx, fallback_model in enumerate(self._fallback_models): if has_streamed is not None and has_streamed[0]: break if idx == 0 and primary_skipped: @@ -143,35 +138,25 @@ class FallbackProvider(LLMProvider): else: logger.info( "Fallback '{}' also failed, trying next fallback '{}'", - self._fallback_models[idx - 1].model, fallback_model, + self._fallback_models[idx - 1], fallback_model, ) try: - fallback_provider = self._provider_factory(fallback) + fallback_provider = self._provider_factory(fallback_model) except Exception as exc: logger.warning( "Failed to create provider for fallback '{}': {}", fallback_model, exc ) continue - original_values = { - name: kwargs.get(name, LLMProvider._SENTINEL) - for name in ("model", "max_tokens", "temperature", "reasoning_effort") - } + original_model = kwargs.get("model") kwargs["model"] = fallback_model - if fallback.max_tokens is not None: - kwargs["max_tokens"] = fallback.max_tokens - if fallback.temperature is not None: - kwargs["temperature"] = fallback.temperature - if fallback.reasoning_effort is not None: - kwargs["reasoning_effort"] = fallback.reasoning_effort try: fallback_response = await call(fallback_provider, kwargs) finally: - for name, value in original_values.items(): - if value is LLMProvider._SENTINEL: - kwargs.pop(name, None) - else: - kwargs[name] = value + if original_model is not None: + kwargs["model"] = original_model + else: + kwargs.pop("model", None) if fallback_response.finish_reason != "error": logger.info( diff --git a/tests/agent/test_runner_fallback.py b/tests/agent/test_runner_fallback.py index e15a29848..273bd6d6d 100644 --- a/tests/agent/test_runner_fallback.py +++ b/tests/agent/test_runner_fallback.py @@ -7,7 +7,6 @@ from unittest.mock import MagicMock import pytest -from nanobot.config.schema import ModelFallbackConfig from nanobot.providers.base import LLMProvider, LLMResponse from nanobot.providers.fallback_provider import FallbackProvider @@ -25,25 +24,6 @@ def _error_response(content: str = "api error") -> LLMResponse: return _make_response(content, finish_reason="error", error_kind="server_error") -def _fallback( - model: str, - provider: str = "fallback", - *, - max_tokens: int | None = None, - context_window_tokens: int | None = None, - temperature: float | None = None, - reasoning_effort: str | None = None, -) -> ModelFallbackConfig: - return ModelFallbackConfig( - model=model, - provider=provider, - max_tokens=max_tokens, - context_window_tokens=context_window_tokens, - temperature=temperature, - reasoning_effort=reasoning_effort, - ) - - class _FakeProvider(LLMProvider): """Fake provider for testing.""" @@ -80,113 +60,17 @@ def test_fallback_models_default_empty() -> None: def test_fallback_models_accepts_list() -> None: from nanobot.config.schema import ModelPresetConfig - p = ModelPresetConfig( - model="test/primary", - fallback_models=[{"provider": "test", "model": "test/a"}], - ) - assert p.fallback_models == [_fallback("test/a", provider="test")] + p = ModelPresetConfig(model="test/primary", fallback_models=["test/a", "test/b"]) + assert p.fallback_models == ["test/a", "test/b"] def test_fallback_models_from_camel_case() -> None: from nanobot.config.schema import ModelPresetConfig p = ModelPresetConfig.model_validate({ "model": "test/primary", - "fallbackModels": [{"provider": "test", "model": "test/a"}], + "fallbackModels": ["test/a"], }) - assert p.fallback_models == [_fallback("test/a", provider="test")] - - -def test_provider_signature_tracks_fallback_models_and_provider_config() -> None: - from nanobot.config.schema import Config - from nanobot.providers.factory import provider_signature - - base = { - "modelPresets": { - "prod": { - "model": "openai/gpt-4.1", - "fallbackModels": [ - {"provider": "anthropic", "model": "anthropic/claude-sonnet-4-6"} - ], - } - }, - "providers": { - "openai": {"apiKey": "primary-key"}, - "anthropic": {"apiKey": "fallback-key"}, - }, - } - changed_fallback = { - **base, - "modelPresets": { - "prod": { - "model": "openai/gpt-4.1", - "fallbackModels": [{"provider": "deepseek", "model": "deepseek/deepseek-chat"}], - } - }, - "providers": { - **base["providers"], - "deepseek": {"apiKey": "deepseek-key"}, - }, - } - changed_key = { - **base, - "providers": { - "openai": {"apiKey": "primary-key"}, - "anthropic": {"apiKey": "new-fallback-key"}, - }, - } - - signature = provider_signature(Config.model_validate(base), preset_name="prod") - - assert signature != provider_signature(Config.model_validate(changed_fallback), preset_name="prod") - assert signature != provider_signature(Config.model_validate(changed_key), preset_name="prod") - - -def test_agent_defaults_can_define_fallback_models() -> None: - from nanobot.config.schema import Config - - config = Config.model_validate({ - "agents": { - "defaults": { - "model": "primary-model", - "provider": "custom", - "fallbackModels": [{"provider": "deepseek", "model": "deepseek-v4-pro"}], - } - } - }) - - assert config.resolve_preset().fallback_models == [ - _fallback("deepseek-v4-pro", provider="deepseek") - ] - - -def test_provider_snapshot_uses_smallest_fallback_context_window() -> None: - from nanobot.config.schema import Config - from nanobot.providers.factory import build_provider_snapshot - - config = Config.model_validate({ - "modelPresets": { - "prod": { - "model": "openai/gpt-4.1", - "provider": "openai", - "contextWindowTokens": 128000, - "fallbackModels": [ - { - "provider": "deepseek", - "model": "deepseek/deepseek-chat", - "contextWindowTokens": 64000, - } - ], - } - }, - "providers": { - "openai": {"apiKey": "primary-key"}, - "deepseek": {"apiKey": "fallback-key"}, - }, - }) - - snapshot = build_provider_snapshot(config, preset_name="prod") - - assert snapshot.context_window_tokens == 64000 + assert p.fallback_models == ["test/a"] # -- FallbackProvider tests -- @@ -199,7 +83,7 @@ class TestNoFallbackWhenPrimarySucceeds: factory = MagicMock() fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) @@ -218,14 +102,14 @@ class TestFallbackOnPrimaryError: fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model") assert result.content == "fallback ok" assert result.finish_reason == "stop" - factory.assert_called_once_with(_fallback("fallback-a")) + factory.assert_called_once_with("fallback-a") assert primary.chat_calls[0]["model"] == "primary-model" assert fallback.chat_calls[0]["model"] == "fallback-a" @@ -237,7 +121,7 @@ class TestNoFallbackWhenContentStreamed: factory = MagicMock() fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) @@ -262,14 +146,14 @@ class TestFailoverOnTransientError: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}]) assert result.content == "fallback ok" assert result.finish_reason == "stop" - factory.assert_called_once_with(_fallback("fallback-a")) + factory.assert_called_once_with("fallback-a") @pytest.mark.asyncio async def test_timeout(self) -> None: @@ -281,14 +165,14 @@ class TestFailoverOnTransientError: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}]) assert result.content == "fallback ok" assert result.finish_reason == "stop" - factory.assert_called_once_with(_fallback("fallback-a")) + factory.assert_called_once_with("fallback-a") class TestFallbackTriesModelsInOrder: @@ -301,15 +185,15 @@ class TestFallbackTriesModelsInOrder: fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")], + fallback_models=["fallback-a", "fallback-b"], provider_factory=factory, ) result = await fb.chat(messages=[{"role": "user", "content": "hi"}]) assert result.content == "b ok" assert factory.call_count == 2 - factory.assert_any_call(_fallback("fallback-a")) - factory.assert_any_call(_fallback("fallback-b")) + factory.assert_any_call("fallback-a") + factory.assert_any_call("fallback-b") class TestAllFallbacksFail: @@ -321,7 +205,7 @@ class TestAllFallbacksFail: fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) @@ -339,7 +223,7 @@ class TestFactoryExceptionSkipsModel: fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")], + fallback_models=["fallback-a", "fallback-b"], provider_factory=factory, ) @@ -358,43 +242,13 @@ class TestFallbackModelParameter: fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-model")], + fallback_models=["fallback-model"], provider_factory=factory, ) await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model") assert fallback.chat_calls[0]["model"] == "fallback-model" - @pytest.mark.asyncio - async def test_overrides_generation_fields_when_configured(self) -> None: - primary = _FakeProvider("primary", _error_response()) - fallback = _FakeProvider("fallback", _make_response("ok")) - fb = FallbackProvider( - primary=primary, - fallback_models=[ - _fallback( - "fallback-model", - max_tokens=1234, - temperature=0.4, - reasoning_effort="low", - ) - ], - provider_factory=MagicMock(return_value=fallback), - ) - - await fb.chat( - messages=[{"role": "user", "content": "hi"}], - model="primary-model", - max_tokens=8192, - temperature=0.1, - reasoning_effort="high", - ) - - assert fallback.chat_calls[0]["model"] == "fallback-model" - assert fallback.chat_calls[0]["max_tokens"] == 1234 - assert fallback.chat_calls[0]["temperature"] == 0.4 - assert fallback.chat_calls[0]["reasoning_effort"] == "low" - class TestNoFallbackWhenEmptyList: @pytest.mark.asyncio @@ -423,7 +277,7 @@ class TestChatStreamFailover: fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) @@ -437,7 +291,7 @@ class TestGetDefaultModel: primary = _FakeProvider("primary") fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("a")], + fallback_models=["a"], provider_factory=MagicMock(), ) assert fb.get_default_model() == "primary/model" @@ -451,7 +305,7 @@ class TestCircuitBreaker: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) @@ -475,7 +329,7 @@ class TestCircuitBreaker: factory = MagicMock(return_value=fallback) fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("fallback-a")], + fallback_models=["fallback-a"], provider_factory=factory, ) @@ -503,7 +357,7 @@ class TestGenerationForwarded: primary.generation = GenerationSettings(temperature=0.5, max_tokens=1024) fb = FallbackProvider( primary=primary, - fallback_models=[_fallback("a")], + fallback_models=["a"], provider_factory=MagicMock(), ) assert fb.generation.temperature == 0.5