mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-20 00:22:31 +00:00
Revert "feat(runner): support structured fallback models"
This reverts commit 02b059a616dc6dc82ad15282102c7b27a5a34e40.
This commit is contained in:
parent
02b059a616
commit
43db848db0
@ -672,12 +672,6 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age
|
|||||||
"maxTokens": 8192,
|
"maxTokens": 8192,
|
||||||
"contextWindowTokens": 128000,
|
"contextWindowTokens": 128000,
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
"fallbackModels": [
|
|
||||||
{
|
|
||||||
"provider": "anthropic",
|
|
||||||
"model": "anthropic/claude-sonnet-4-6"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"modelPreset": null
|
"modelPreset": null
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -688,17 +682,7 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age
|
|||||||
"maxTokens": 4096,
|
"maxTokens": 4096,
|
||||||
"contextWindowTokens": 128000,
|
"contextWindowTokens": 128000,
|
||||||
"temperature": 0.2,
|
"temperature": 0.2,
|
||||||
"reasoningEffort": "low",
|
"reasoningEffort": "low"
|
||||||
"fallbackModels": [
|
|
||||||
{
|
|
||||||
"provider": "deepseek",
|
|
||||||
"model": "deepseek/deepseek-chat",
|
|
||||||
"maxTokens": 4096,
|
|
||||||
"contextWindowTokens": 64000,
|
|
||||||
"temperature": 0.1,
|
|
||||||
"reasoningEffort": null
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"deep": {
|
"deep": {
|
||||||
"model": "anthropic/claude-opus-4-5",
|
"model": "anthropic/claude-opus-4-5",
|
||||||
@ -721,53 +705,9 @@ Existing configs do not need to change. If you do not set `modelPresets` or `age
|
|||||||
| `contextWindowTokens` | Context window size used by prompt building and consolidation decisions. |
|
| `contextWindowTokens` | Context window size used by prompt building and consolidation decisions. |
|
||||||
| `temperature` | Sampling temperature. |
|
| `temperature` | Sampling temperature. |
|
||||||
| `reasoningEffort` | Optional reasoning/thinking setting. Provider support varies. |
|
| `reasoningEffort` | Optional reasoning/thinking setting. Provider support varies. |
|
||||||
| `fallbackModels` | Optional ordered fallback models for this active configuration only. |
|
|
||||||
|
|
||||||
`default` is reserved and always means the implicit preset built from `agents.defaults.*`; do not define `modelPresets.default`. Use `/model default` to switch back to `agents.defaults.*`.
|
`default` is reserved and always means the implicit preset built from `agents.defaults.*`; do not define `modelPresets.default`. Use `/model default` to switch back to `agents.defaults.*`.
|
||||||
|
|
||||||
### Model Fallbacks
|
|
||||||
|
|
||||||
`fallbackModels` belongs to the currently active model configuration. If the active configuration is `agents.defaults`, only `agents.defaults.fallbackModels` is used. If the active configuration is `modelPresets.fast`, only `modelPresets.fast.fallbackModels` is used. nanobot does not inherit or merge fallbacks between defaults and presets.
|
|
||||||
|
|
||||||
Each fallback entry must include at least `provider` and `model`. The other fields are optional; omitted values inherit from the active primary configuration for that request.
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"modelPresets": {
|
|
||||||
"fast": {
|
|
||||||
"model": "MiniMax-M2.7-highspeed",
|
|
||||||
"provider": "minimaxAnthropic",
|
|
||||||
"maxTokens": 4096,
|
|
||||||
"contextWindowTokens": 262144,
|
|
||||||
"temperature": 0.1,
|
|
||||||
"reasoningEffort": null,
|
|
||||||
"fallbackModels": [
|
|
||||||
{
|
|
||||||
"provider": "deepseek",
|
|
||||||
"model": "deepseek-v4-pro",
|
|
||||||
"maxTokens": 4096,
|
|
||||||
"contextWindowTokens": 262144,
|
|
||||||
"temperature": 0.1,
|
|
||||||
"reasoningEffort": null
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"deep": {
|
|
||||||
"model": "deepseek-v4-pro",
|
|
||||||
"provider": "deepseek",
|
|
||||||
"maxTokens": 4096,
|
|
||||||
"contextWindowTokens": 262144,
|
|
||||||
"temperature": 0.1,
|
|
||||||
"reasoningEffort": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
In this example, `/model fast` can fail over to DeepSeek, but `/model deep` has no fallback because the `deep` preset does not define `fallbackModels`.
|
|
||||||
|
|
||||||
Failover only runs when the primary model returns an error before any answer text has been streamed. Fallback models are tried in order. If a fallback has a smaller `contextWindowTokens`, nanobot uses the smallest window in the active chain when building context so the fallback can receive the same prompt.
|
|
||||||
|
|
||||||
Set `agents.defaults.modelPreset` to start with a named preset:
|
Set `agents.defaults.modelPreset` to start with a named preset:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
|||||||
@ -74,17 +74,6 @@ class DreamConfig(Base):
|
|||||||
return f"every {hours}h"
|
return f"every {hours}h"
|
||||||
|
|
||||||
|
|
||||||
class ModelFallbackConfig(Base):
|
|
||||||
"""A fallback model tied to one active model configuration."""
|
|
||||||
|
|
||||||
model: str
|
|
||||||
provider: str
|
|
||||||
max_tokens: int | None = None
|
|
||||||
context_window_tokens: int | None = None
|
|
||||||
temperature: float | None = None
|
|
||||||
reasoning_effort: str | None = None
|
|
||||||
|
|
||||||
|
|
||||||
class ModelPresetConfig(Base):
|
class ModelPresetConfig(Base):
|
||||||
"""A named set of model + generation parameters for quick switching."""
|
"""A named set of model + generation parameters for quick switching."""
|
||||||
|
|
||||||
@ -94,7 +83,7 @@ class ModelPresetConfig(Base):
|
|||||||
context_window_tokens: int = 65_536
|
context_window_tokens: int = 65_536
|
||||||
temperature: float = 0.1
|
temperature: float = 0.1
|
||||||
reasoning_effort: str | None = None
|
reasoning_effort: str | None = None
|
||||||
fallback_models: list[ModelFallbackConfig] = Field(default_factory=list)
|
fallback_models: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
def to_generation_settings(self) -> Any:
|
def to_generation_settings(self) -> Any:
|
||||||
from nanobot.providers.base import GenerationSettings
|
from nanobot.providers.base import GenerationSettings
|
||||||
@ -118,7 +107,6 @@ class AgentDefaults(Base):
|
|||||||
context_window_tokens: int = 65_536
|
context_window_tokens: int = 65_536
|
||||||
context_block_limit: int | None = None
|
context_block_limit: int | None = None
|
||||||
temperature: float = 0.1
|
temperature: float = 0.1
|
||||||
fallback_models: list[ModelFallbackConfig] = Field(default_factory=list)
|
|
||||||
max_tool_iterations: int = 200
|
max_tool_iterations: int = 200
|
||||||
max_concurrent_subagents: int = Field(default=1, ge=1)
|
max_concurrent_subagents: int = Field(default=1, ge=1)
|
||||||
max_tool_result_chars: int = 16_000
|
max_tool_result_chars: int = 16_000
|
||||||
@ -309,7 +297,6 @@ class Config(BaseSettings):
|
|||||||
model=d.model, provider=d.provider, max_tokens=d.max_tokens,
|
model=d.model, provider=d.provider, max_tokens=d.max_tokens,
|
||||||
context_window_tokens=d.context_window_tokens,
|
context_window_tokens=d.context_window_tokens,
|
||||||
temperature=d.temperature, reasoning_effort=d.reasoning_effort,
|
temperature=d.temperature, reasoning_effort=d.reasoning_effort,
|
||||||
fallback_models=d.fallback_models,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def resolve_preset(self, name: str | None = None) -> ModelPresetConfig:
|
def resolve_preset(self, name: str | None = None) -> ModelPresetConfig:
|
||||||
|
|||||||
@ -5,7 +5,7 @@ from __future__ import annotations
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from nanobot.config.schema import Config, ModelFallbackConfig, ModelPresetConfig
|
from nanobot.config.schema import Config, ModelPresetConfig
|
||||||
from nanobot.providers.base import LLMProvider
|
from nanobot.providers.base import LLMProvider
|
||||||
from nanobot.providers.fallback_provider import FallbackProvider
|
from nanobot.providers.fallback_provider import FallbackProvider
|
||||||
from nanobot.providers.registry import find_by_name
|
from nanobot.providers.registry import find_by_name
|
||||||
@ -104,28 +104,6 @@ def _make_provider_core(
|
|||||||
return provider
|
return provider
|
||||||
|
|
||||||
|
|
||||||
def _fallback_preset(primary: ModelPresetConfig, fallback: ModelFallbackConfig) -> ModelPresetConfig:
|
|
||||||
"""Build the effective provider/generation config for one fallback model."""
|
|
||||||
return ModelPresetConfig(
|
|
||||||
model=fallback.model,
|
|
||||||
provider=fallback.provider,
|
|
||||||
max_tokens=fallback.max_tokens if fallback.max_tokens is not None else primary.max_tokens,
|
|
||||||
context_window_tokens=(
|
|
||||||
fallback.context_window_tokens
|
|
||||||
if fallback.context_window_tokens is not None
|
|
||||||
else primary.context_window_tokens
|
|
||||||
),
|
|
||||||
temperature=(
|
|
||||||
fallback.temperature if fallback.temperature is not None else primary.temperature
|
|
||||||
),
|
|
||||||
reasoning_effort=(
|
|
||||||
fallback.reasoning_effort
|
|
||||||
if fallback.reasoning_effort is not None
|
|
||||||
else primary.reasoning_effort
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def make_provider(
|
def make_provider(
|
||||||
config: Config,
|
config: Config,
|
||||||
*,
|
*,
|
||||||
@ -142,11 +120,12 @@ def make_provider(
|
|||||||
provider = _make_provider_core(config, preset_name=preset_name, preset=preset, model=model)
|
provider = _make_provider_core(config, preset_name=preset_name, preset=preset, model=model)
|
||||||
|
|
||||||
if resolved.fallback_models:
|
if resolved.fallback_models:
|
||||||
|
fb_preset = resolved.model_copy(update={"provider": "auto", "fallback_models": []})
|
||||||
provider = FallbackProvider(
|
provider = FallbackProvider(
|
||||||
primary=provider,
|
primary=provider,
|
||||||
fallback_models=resolved.fallback_models,
|
fallback_models=resolved.fallback_models,
|
||||||
provider_factory=lambda fb: _make_provider_core(
|
provider_factory=lambda m: _make_provider_core(
|
||||||
config, preset_name=preset_name, preset=_fallback_preset(resolved, fb)
|
config, preset_name=preset_name, preset=fb_preset, model=m
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -159,32 +138,9 @@ def provider_signature(
|
|||||||
preset_name: str | None = None,
|
preset_name: str | None = None,
|
||||||
preset: ModelPresetConfig | None = None,
|
preset: ModelPresetConfig | None = None,
|
||||||
) -> tuple[object, ...]:
|
) -> tuple[object, ...]:
|
||||||
"""Return the config fields that affect the active provider chain."""
|
"""Return the config fields that affect the primary LLM provider."""
|
||||||
resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset)
|
resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset)
|
||||||
p = config.get_provider(resolved.model, preset=resolved)
|
p = config.get_provider(resolved.model, preset=resolved)
|
||||||
|
|
||||||
def _fallback_signature(fallback: ModelFallbackConfig) -> tuple[object, ...]:
|
|
||||||
fallback_preset = _fallback_preset(resolved, fallback)
|
|
||||||
fp = config.get_provider(fallback.model, preset=fallback_preset)
|
|
||||||
return (
|
|
||||||
fallback.model,
|
|
||||||
fallback.provider,
|
|
||||||
fallback_preset.max_tokens,
|
|
||||||
fallback_preset.temperature,
|
|
||||||
fallback_preset.reasoning_effort,
|
|
||||||
fallback_preset.context_window_tokens,
|
|
||||||
config.get_provider_name(fallback.model, preset=fallback_preset),
|
|
||||||
config.get_api_key(fallback.model, preset=fallback_preset),
|
|
||||||
config.get_api_base(fallback.model, preset=fallback_preset),
|
|
||||||
fp.extra_headers if fp else None,
|
|
||||||
fp.extra_body if fp else None,
|
|
||||||
getattr(fp, "region", None) if fp else None,
|
|
||||||
getattr(fp, "profile", None) if fp else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
fallback_signatures = tuple(
|
|
||||||
_fallback_signature(fallback) for fallback in resolved.fallback_models
|
|
||||||
)
|
|
||||||
return (
|
return (
|
||||||
resolved.model,
|
resolved.model,
|
||||||
resolved.provider,
|
resolved.provider,
|
||||||
@ -199,7 +155,6 @@ def provider_signature(
|
|||||||
resolved.temperature,
|
resolved.temperature,
|
||||||
resolved.reasoning_effort,
|
resolved.reasoning_effort,
|
||||||
resolved.context_window_tokens,
|
resolved.context_window_tokens,
|
||||||
fallback_signatures,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -210,14 +165,10 @@ def build_provider_snapshot(
|
|||||||
preset: ModelPresetConfig | None = None,
|
preset: ModelPresetConfig | None = None,
|
||||||
) -> ProviderSnapshot:
|
) -> ProviderSnapshot:
|
||||||
resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset)
|
resolved = _resolve_model_preset(config, preset_name=preset_name, preset=preset)
|
||||||
fallback_windows = [
|
|
||||||
_fallback_preset(resolved, fallback).context_window_tokens
|
|
||||||
for fallback in resolved.fallback_models
|
|
||||||
]
|
|
||||||
return ProviderSnapshot(
|
return ProviderSnapshot(
|
||||||
provider=make_provider(config, preset=resolved),
|
provider=make_provider(config, preset=resolved),
|
||||||
model=resolved.model,
|
model=resolved.model,
|
||||||
context_window_tokens=min([resolved.context_window_tokens, *fallback_windows]),
|
context_window_tokens=resolved.context_window_tokens,
|
||||||
signature=provider_signature(config, preset=resolved),
|
signature=provider_signature(config, preset=resolved),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -24,7 +24,7 @@ class FallbackProvider(LLMProvider):
|
|||||||
provider on-the-fly.
|
provider on-the-fly.
|
||||||
|
|
||||||
Key design:
|
Key design:
|
||||||
- Failover attempts are request-scoped; primary circuit state persists.
|
- Failover is request-scoped (the wrapper itself is stateless between turns).
|
||||||
- Skipped when content was already streamed to avoid duplicate output.
|
- Skipped when content was already streamed to avoid duplicate output.
|
||||||
- Recursive failover is prevented by the factory returning plain providers.
|
- Recursive failover is prevented by the factory returning plain providers.
|
||||||
- Primary provider is circuit-broken after repeated failures to avoid
|
- Primary provider is circuit-broken after repeated failures to avoid
|
||||||
@ -34,8 +34,8 @@ class FallbackProvider(LLMProvider):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
primary: LLMProvider,
|
primary: LLMProvider,
|
||||||
fallback_models: list[Any],
|
fallback_models: list[str],
|
||||||
provider_factory: Callable[[Any], LLMProvider],
|
provider_factory: Callable[[str], LLMProvider],
|
||||||
):
|
):
|
||||||
self._primary = primary
|
self._primary = primary
|
||||||
self._fallback_models = list(fallback_models)
|
self._fallback_models = list(fallback_models)
|
||||||
@ -52,10 +52,6 @@ class FallbackProvider(LLMProvider):
|
|||||||
def generation(self, value):
|
def generation(self, value):
|
||||||
self._primary.generation = value
|
self._primary.generation = value
|
||||||
|
|
||||||
@property
|
|
||||||
def supports_progress_deltas(self) -> bool:
|
|
||||||
return bool(getattr(self._primary, "supports_progress_deltas", False))
|
|
||||||
|
|
||||||
def get_default_model(self) -> str:
|
def get_default_model(self) -> str:
|
||||||
return self._primary.get_default_model()
|
return self._primary.get_default_model()
|
||||||
|
|
||||||
@ -126,8 +122,7 @@ class FallbackProvider(LLMProvider):
|
|||||||
|
|
||||||
last_response: LLMResponse | None = None
|
last_response: LLMResponse | None = None
|
||||||
primary_skipped = not self._primary_available()
|
primary_skipped = not self._primary_available()
|
||||||
for idx, fallback in enumerate(self._fallback_models):
|
for idx, fallback_model in enumerate(self._fallback_models):
|
||||||
fallback_model = fallback.model
|
|
||||||
if has_streamed is not None and has_streamed[0]:
|
if has_streamed is not None and has_streamed[0]:
|
||||||
break
|
break
|
||||||
if idx == 0 and primary_skipped:
|
if idx == 0 and primary_skipped:
|
||||||
@ -143,35 +138,25 @@ class FallbackProvider(LLMProvider):
|
|||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Fallback '{}' also failed, trying next fallback '{}'",
|
"Fallback '{}' also failed, trying next fallback '{}'",
|
||||||
self._fallback_models[idx - 1].model, fallback_model,
|
self._fallback_models[idx - 1], fallback_model,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
fallback_provider = self._provider_factory(fallback)
|
fallback_provider = self._provider_factory(fallback_model)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Failed to create provider for fallback '{}': {}", fallback_model, exc
|
"Failed to create provider for fallback '{}': {}", fallback_model, exc
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
original_values = {
|
original_model = kwargs.get("model")
|
||||||
name: kwargs.get(name, LLMProvider._SENTINEL)
|
|
||||||
for name in ("model", "max_tokens", "temperature", "reasoning_effort")
|
|
||||||
}
|
|
||||||
kwargs["model"] = fallback_model
|
kwargs["model"] = fallback_model
|
||||||
if fallback.max_tokens is not None:
|
|
||||||
kwargs["max_tokens"] = fallback.max_tokens
|
|
||||||
if fallback.temperature is not None:
|
|
||||||
kwargs["temperature"] = fallback.temperature
|
|
||||||
if fallback.reasoning_effort is not None:
|
|
||||||
kwargs["reasoning_effort"] = fallback.reasoning_effort
|
|
||||||
try:
|
try:
|
||||||
fallback_response = await call(fallback_provider, kwargs)
|
fallback_response = await call(fallback_provider, kwargs)
|
||||||
finally:
|
finally:
|
||||||
for name, value in original_values.items():
|
if original_model is not None:
|
||||||
if value is LLMProvider._SENTINEL:
|
kwargs["model"] = original_model
|
||||||
kwargs.pop(name, None)
|
else:
|
||||||
else:
|
kwargs.pop("model", None)
|
||||||
kwargs[name] = value
|
|
||||||
|
|
||||||
if fallback_response.finish_reason != "error":
|
if fallback_response.finish_reason != "error":
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@ -7,7 +7,6 @@ from unittest.mock import MagicMock
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from nanobot.config.schema import ModelFallbackConfig
|
|
||||||
from nanobot.providers.base import LLMProvider, LLMResponse
|
from nanobot.providers.base import LLMProvider, LLMResponse
|
||||||
from nanobot.providers.fallback_provider import FallbackProvider
|
from nanobot.providers.fallback_provider import FallbackProvider
|
||||||
|
|
||||||
@ -25,25 +24,6 @@ def _error_response(content: str = "api error") -> LLMResponse:
|
|||||||
return _make_response(content, finish_reason="error", error_kind="server_error")
|
return _make_response(content, finish_reason="error", error_kind="server_error")
|
||||||
|
|
||||||
|
|
||||||
def _fallback(
|
|
||||||
model: str,
|
|
||||||
provider: str = "fallback",
|
|
||||||
*,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
context_window_tokens: int | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
reasoning_effort: str | None = None,
|
|
||||||
) -> ModelFallbackConfig:
|
|
||||||
return ModelFallbackConfig(
|
|
||||||
model=model,
|
|
||||||
provider=provider,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
context_window_tokens=context_window_tokens,
|
|
||||||
temperature=temperature,
|
|
||||||
reasoning_effort=reasoning_effort,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeProvider(LLMProvider):
|
class _FakeProvider(LLMProvider):
|
||||||
"""Fake provider for testing."""
|
"""Fake provider for testing."""
|
||||||
|
|
||||||
@ -80,113 +60,17 @@ def test_fallback_models_default_empty() -> None:
|
|||||||
|
|
||||||
def test_fallback_models_accepts_list() -> None:
|
def test_fallback_models_accepts_list() -> None:
|
||||||
from nanobot.config.schema import ModelPresetConfig
|
from nanobot.config.schema import ModelPresetConfig
|
||||||
p = ModelPresetConfig(
|
p = ModelPresetConfig(model="test/primary", fallback_models=["test/a", "test/b"])
|
||||||
model="test/primary",
|
assert p.fallback_models == ["test/a", "test/b"]
|
||||||
fallback_models=[{"provider": "test", "model": "test/a"}],
|
|
||||||
)
|
|
||||||
assert p.fallback_models == [_fallback("test/a", provider="test")]
|
|
||||||
|
|
||||||
|
|
||||||
def test_fallback_models_from_camel_case() -> None:
|
def test_fallback_models_from_camel_case() -> None:
|
||||||
from nanobot.config.schema import ModelPresetConfig
|
from nanobot.config.schema import ModelPresetConfig
|
||||||
p = ModelPresetConfig.model_validate({
|
p = ModelPresetConfig.model_validate({
|
||||||
"model": "test/primary",
|
"model": "test/primary",
|
||||||
"fallbackModels": [{"provider": "test", "model": "test/a"}],
|
"fallbackModels": ["test/a"],
|
||||||
})
|
})
|
||||||
assert p.fallback_models == [_fallback("test/a", provider="test")]
|
assert p.fallback_models == ["test/a"]
|
||||||
|
|
||||||
|
|
||||||
def test_provider_signature_tracks_fallback_models_and_provider_config() -> None:
|
|
||||||
from nanobot.config.schema import Config
|
|
||||||
from nanobot.providers.factory import provider_signature
|
|
||||||
|
|
||||||
base = {
|
|
||||||
"modelPresets": {
|
|
||||||
"prod": {
|
|
||||||
"model": "openai/gpt-4.1",
|
|
||||||
"fallbackModels": [
|
|
||||||
{"provider": "anthropic", "model": "anthropic/claude-sonnet-4-6"}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"providers": {
|
|
||||||
"openai": {"apiKey": "primary-key"},
|
|
||||||
"anthropic": {"apiKey": "fallback-key"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
changed_fallback = {
|
|
||||||
**base,
|
|
||||||
"modelPresets": {
|
|
||||||
"prod": {
|
|
||||||
"model": "openai/gpt-4.1",
|
|
||||||
"fallbackModels": [{"provider": "deepseek", "model": "deepseek/deepseek-chat"}],
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"providers": {
|
|
||||||
**base["providers"],
|
|
||||||
"deepseek": {"apiKey": "deepseek-key"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
changed_key = {
|
|
||||||
**base,
|
|
||||||
"providers": {
|
|
||||||
"openai": {"apiKey": "primary-key"},
|
|
||||||
"anthropic": {"apiKey": "new-fallback-key"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
signature = provider_signature(Config.model_validate(base), preset_name="prod")
|
|
||||||
|
|
||||||
assert signature != provider_signature(Config.model_validate(changed_fallback), preset_name="prod")
|
|
||||||
assert signature != provider_signature(Config.model_validate(changed_key), preset_name="prod")
|
|
||||||
|
|
||||||
|
|
||||||
def test_agent_defaults_can_define_fallback_models() -> None:
|
|
||||||
from nanobot.config.schema import Config
|
|
||||||
|
|
||||||
config = Config.model_validate({
|
|
||||||
"agents": {
|
|
||||||
"defaults": {
|
|
||||||
"model": "primary-model",
|
|
||||||
"provider": "custom",
|
|
||||||
"fallbackModels": [{"provider": "deepseek", "model": "deepseek-v4-pro"}],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
assert config.resolve_preset().fallback_models == [
|
|
||||||
_fallback("deepseek-v4-pro", provider="deepseek")
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_provider_snapshot_uses_smallest_fallback_context_window() -> None:
|
|
||||||
from nanobot.config.schema import Config
|
|
||||||
from nanobot.providers.factory import build_provider_snapshot
|
|
||||||
|
|
||||||
config = Config.model_validate({
|
|
||||||
"modelPresets": {
|
|
||||||
"prod": {
|
|
||||||
"model": "openai/gpt-4.1",
|
|
||||||
"provider": "openai",
|
|
||||||
"contextWindowTokens": 128000,
|
|
||||||
"fallbackModels": [
|
|
||||||
{
|
|
||||||
"provider": "deepseek",
|
|
||||||
"model": "deepseek/deepseek-chat",
|
|
||||||
"contextWindowTokens": 64000,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"providers": {
|
|
||||||
"openai": {"apiKey": "primary-key"},
|
|
||||||
"deepseek": {"apiKey": "fallback-key"},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
snapshot = build_provider_snapshot(config, preset_name="prod")
|
|
||||||
|
|
||||||
assert snapshot.context_window_tokens == 64000
|
|
||||||
|
|
||||||
|
|
||||||
# -- FallbackProvider tests --
|
# -- FallbackProvider tests --
|
||||||
@ -199,7 +83,7 @@ class TestNoFallbackWhenPrimarySucceeds:
|
|||||||
factory = MagicMock()
|
factory = MagicMock()
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -218,14 +102,14 @@ class TestFallbackOnPrimaryError:
|
|||||||
|
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model")
|
result = await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model")
|
||||||
assert result.content == "fallback ok"
|
assert result.content == "fallback ok"
|
||||||
assert result.finish_reason == "stop"
|
assert result.finish_reason == "stop"
|
||||||
factory.assert_called_once_with(_fallback("fallback-a"))
|
factory.assert_called_once_with("fallback-a")
|
||||||
assert primary.chat_calls[0]["model"] == "primary-model"
|
assert primary.chat_calls[0]["model"] == "primary-model"
|
||||||
assert fallback.chat_calls[0]["model"] == "fallback-a"
|
assert fallback.chat_calls[0]["model"] == "fallback-a"
|
||||||
|
|
||||||
@ -237,7 +121,7 @@ class TestNoFallbackWhenContentStreamed:
|
|||||||
factory = MagicMock()
|
factory = MagicMock()
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -262,14 +146,14 @@ class TestFailoverOnTransientError:
|
|||||||
factory = MagicMock(return_value=fallback)
|
factory = MagicMock(return_value=fallback)
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
|
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
|
||||||
assert result.content == "fallback ok"
|
assert result.content == "fallback ok"
|
||||||
assert result.finish_reason == "stop"
|
assert result.finish_reason == "stop"
|
||||||
factory.assert_called_once_with(_fallback("fallback-a"))
|
factory.assert_called_once_with("fallback-a")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_timeout(self) -> None:
|
async def test_timeout(self) -> None:
|
||||||
@ -281,14 +165,14 @@ class TestFailoverOnTransientError:
|
|||||||
factory = MagicMock(return_value=fallback)
|
factory = MagicMock(return_value=fallback)
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
|
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
|
||||||
assert result.content == "fallback ok"
|
assert result.content == "fallback ok"
|
||||||
assert result.finish_reason == "stop"
|
assert result.finish_reason == "stop"
|
||||||
factory.assert_called_once_with(_fallback("fallback-a"))
|
factory.assert_called_once_with("fallback-a")
|
||||||
|
|
||||||
|
|
||||||
class TestFallbackTriesModelsInOrder:
|
class TestFallbackTriesModelsInOrder:
|
||||||
@ -301,15 +185,15 @@ class TestFallbackTriesModelsInOrder:
|
|||||||
|
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")],
|
fallback_models=["fallback-a", "fallback-b"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
|
result = await fb.chat(messages=[{"role": "user", "content": "hi"}])
|
||||||
assert result.content == "b ok"
|
assert result.content == "b ok"
|
||||||
assert factory.call_count == 2
|
assert factory.call_count == 2
|
||||||
factory.assert_any_call(_fallback("fallback-a"))
|
factory.assert_any_call("fallback-a")
|
||||||
factory.assert_any_call(_fallback("fallback-b"))
|
factory.assert_any_call("fallback-b")
|
||||||
|
|
||||||
|
|
||||||
class TestAllFallbacksFail:
|
class TestAllFallbacksFail:
|
||||||
@ -321,7 +205,7 @@ class TestAllFallbacksFail:
|
|||||||
|
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -339,7 +223,7 @@ class TestFactoryExceptionSkipsModel:
|
|||||||
|
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a"), _fallback("fallback-b")],
|
fallback_models=["fallback-a", "fallback-b"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -358,43 +242,13 @@ class TestFallbackModelParameter:
|
|||||||
|
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-model")],
|
fallback_models=["fallback-model"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model")
|
await fb.chat(messages=[{"role": "user", "content": "hi"}], model="primary-model")
|
||||||
assert fallback.chat_calls[0]["model"] == "fallback-model"
|
assert fallback.chat_calls[0]["model"] == "fallback-model"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_overrides_generation_fields_when_configured(self) -> None:
|
|
||||||
primary = _FakeProvider("primary", _error_response())
|
|
||||||
fallback = _FakeProvider("fallback", _make_response("ok"))
|
|
||||||
fb = FallbackProvider(
|
|
||||||
primary=primary,
|
|
||||||
fallback_models=[
|
|
||||||
_fallback(
|
|
||||||
"fallback-model",
|
|
||||||
max_tokens=1234,
|
|
||||||
temperature=0.4,
|
|
||||||
reasoning_effort="low",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
provider_factory=MagicMock(return_value=fallback),
|
|
||||||
)
|
|
||||||
|
|
||||||
await fb.chat(
|
|
||||||
messages=[{"role": "user", "content": "hi"}],
|
|
||||||
model="primary-model",
|
|
||||||
max_tokens=8192,
|
|
||||||
temperature=0.1,
|
|
||||||
reasoning_effort="high",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert fallback.chat_calls[0]["model"] == "fallback-model"
|
|
||||||
assert fallback.chat_calls[0]["max_tokens"] == 1234
|
|
||||||
assert fallback.chat_calls[0]["temperature"] == 0.4
|
|
||||||
assert fallback.chat_calls[0]["reasoning_effort"] == "low"
|
|
||||||
|
|
||||||
|
|
||||||
class TestNoFallbackWhenEmptyList:
|
class TestNoFallbackWhenEmptyList:
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@ -423,7 +277,7 @@ class TestChatStreamFailover:
|
|||||||
|
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -437,7 +291,7 @@ class TestGetDefaultModel:
|
|||||||
primary = _FakeProvider("primary")
|
primary = _FakeProvider("primary")
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("a")],
|
fallback_models=["a"],
|
||||||
provider_factory=MagicMock(),
|
provider_factory=MagicMock(),
|
||||||
)
|
)
|
||||||
assert fb.get_default_model() == "primary/model"
|
assert fb.get_default_model() == "primary/model"
|
||||||
@ -451,7 +305,7 @@ class TestCircuitBreaker:
|
|||||||
factory = MagicMock(return_value=fallback)
|
factory = MagicMock(return_value=fallback)
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -475,7 +329,7 @@ class TestCircuitBreaker:
|
|||||||
factory = MagicMock(return_value=fallback)
|
factory = MagicMock(return_value=fallback)
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("fallback-a")],
|
fallback_models=["fallback-a"],
|
||||||
provider_factory=factory,
|
provider_factory=factory,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -503,7 +357,7 @@ class TestGenerationForwarded:
|
|||||||
primary.generation = GenerationSettings(temperature=0.5, max_tokens=1024)
|
primary.generation = GenerationSettings(temperature=0.5, max_tokens=1024)
|
||||||
fb = FallbackProvider(
|
fb = FallbackProvider(
|
||||||
primary=primary,
|
primary=primary,
|
||||||
fallback_models=[_fallback("a")],
|
fallback_models=["a"],
|
||||||
provider_factory=MagicMock(),
|
provider_factory=MagicMock(),
|
||||||
)
|
)
|
||||||
assert fb.generation.temperature == 0.5
|
assert fb.generation.temperature == 0.5
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user