mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-21 17:12:32 +00:00
refactor(providers): centralize gateway reasoning control
This commit is contained in:
parent
0cd2f626c0
commit
4f895e6307
@ -74,41 +74,43 @@ _THINKING_STYLE_MAP: dict[str, Any] = {
|
||||
"enable_thinking": lambda on: {"enable_thinking": on},
|
||||
"reasoning_split": lambda on: {"reasoning_split": on},
|
||||
}
|
||||
_GATEWAY_REASONING_STYLE_MAP: dict[str, Any] = {
|
||||
"reasoning_effort": lambda effort: {"reasoning": {"effort": effort}},
|
||||
}
|
||||
_MODEL_THINKING_STYLES: dict[str, str] = {
|
||||
**dict.fromkeys(_KIMI_THINKING_MODELS, "thinking_type"),
|
||||
**dict.fromkeys(_MIMO_THINKING_MODELS, "thinking_type"),
|
||||
}
|
||||
|
||||
|
||||
def _is_kimi_thinking_model(model_name: str) -> bool:
|
||||
"""Return True if model_name refers to a Kimi thinking-capable model.
|
||||
|
||||
Supports two forms:
|
||||
- Exact match: e.g. kimi-k2.5 / kimi-k2.6 in _KIMI_THINKING_MODELS
|
||||
- Slug match: moonshotai/kimi-k2.5 -> the part after the last "/"
|
||||
is checked against _KIMI_THINKING_MODELS
|
||||
|
||||
This covers both the native Moonshot provider (bare slug) and
|
||||
OpenRouter-style names (``"publisher/slug"``).
|
||||
"""
|
||||
name = model_name.lower()
|
||||
if name in _KIMI_THINKING_MODELS:
|
||||
return True
|
||||
if "/" in name and name.rsplit("/", 1)[1] in _KIMI_THINKING_MODELS:
|
||||
return True
|
||||
return False
|
||||
def _model_slug(model_name: str) -> str:
|
||||
return model_name.lower().rsplit("/", 1)[-1]
|
||||
|
||||
|
||||
def _is_mimo_thinking_model(model_name: str) -> bool:
|
||||
"""Return True if model_name refers to a MiMo thinking-capable model.
|
||||
def _model_thinking_style(model_name: str) -> str:
|
||||
return _MODEL_THINKING_STYLES.get(_model_slug(model_name), "")
|
||||
|
||||
Mirrors _is_kimi_thinking_model: gateway providers (e.g. OpenRouter
|
||||
routing ``xiaomi/mimo-v2.5-pro``) have no ``thinking_style`` on their
|
||||
spec, so the spec-driven branch in _build_kwargs misses them. The
|
||||
model-name path catches those cases.
|
||||
"""
|
||||
name = model_name.lower()
|
||||
if name in _MIMO_THINKING_MODELS:
|
||||
return True
|
||||
if "/" in name and name.rsplit("/", 1)[1] in _MIMO_THINKING_MODELS:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _thinking_styles_for(spec: ProviderSpec | None, model_name: str) -> list[str]:
|
||||
styles: list[str] = []
|
||||
if spec and spec.thinking_style:
|
||||
styles.append(spec.thinking_style)
|
||||
model_style = _model_thinking_style(model_name)
|
||||
if model_style and model_style not in styles:
|
||||
styles.append(model_style)
|
||||
return styles
|
||||
|
||||
|
||||
def _thinking_extra_body(style: str, thinking_enabled: bool) -> dict[str, Any] | None:
|
||||
builder = _THINKING_STYLE_MAP.get(style)
|
||||
return builder(thinking_enabled) if builder else None
|
||||
|
||||
|
||||
def _gateway_reasoning_extra_body(style: str, effort: str | None) -> dict[str, Any] | None:
|
||||
if not effort:
|
||||
return None
|
||||
builder = _GATEWAY_REASONING_STYLE_MAP.get(style)
|
||||
return builder(effort) if builder else None
|
||||
|
||||
|
||||
def _openai_compat_timeout_s() -> float:
|
||||
@ -581,60 +583,19 @@ class OpenAICompatProvider(LLMProvider):
|
||||
if wire_effort and semantic_effort != "none":
|
||||
kwargs["reasoning_effort"] = wire_effort
|
||||
|
||||
# Provider-specific thinking parameters.
|
||||
# Only sent when reasoning_effort is explicitly configured so that
|
||||
# the provider default is preserved otherwise.
|
||||
# The mapping is driven by ProviderSpec.thinking_style so that adding
|
||||
# a new provider never requires touching this function.
|
||||
if spec and spec.thinking_style and reasoning_effort is not None:
|
||||
# Only send thinking controls when reasoning_effort is explicit so
|
||||
# omitting the config preserves each provider's default.
|
||||
if reasoning_effort is not None:
|
||||
thinking_enabled = semantic_effort not in ("none", "minimal")
|
||||
extra = _THINKING_STYLE_MAP.get(spec.thinking_style, lambda _: None)(thinking_enabled)
|
||||
if extra:
|
||||
kwargs.setdefault("extra_body", {}).update(extra)
|
||||
|
||||
# Model-level thinking injection for Kimi thinking-capable models.
|
||||
# Strip any provider prefix (e.g. "moonshotai/") before the set lookup
|
||||
# so that OpenRouter-style names like "moonshotai/kimi-k2.5" are handled
|
||||
# identically to bare names like "kimi-k2.5".
|
||||
if reasoning_effort is not None and _is_kimi_thinking_model(model_name):
|
||||
thinking_enabled = semantic_effort not in ("none", "minimal")
|
||||
kwargs.setdefault("extra_body", {}).update(
|
||||
{"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
|
||||
)
|
||||
|
||||
# Model-level thinking injection for MiMo thinking-capable models.
|
||||
# Same shape as Kimi: gateway providers (OpenRouter, etc.) lack the
|
||||
# xiaomi_mimo spec's thinking_style, so the spec-driven branch above
|
||||
# misses them — match by model name to catch "xiaomi/mimo-v2.5-pro"
|
||||
# and friends. (Direct xiaomi_mimo requests are also covered here;
|
||||
# both branches write the same payload, so the dict update is a
|
||||
# safe no-op for already-handled cases.)
|
||||
if reasoning_effort is not None and _is_mimo_thinking_model(model_name):
|
||||
thinking_enabled = semantic_effort not in ("none", "minimal")
|
||||
kwargs.setdefault("extra_body", {}).update(
|
||||
{"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
|
||||
)
|
||||
|
||||
# OpenRouter uses its own unified `reasoning` field and does not
|
||||
# forward provider-specific thinking shapes (the Kimi/MiMo
|
||||
# extra_body.thinking above) to upstream. Reported as the follow-up
|
||||
# to #3845/#3851: MiMo via OR kept thinking despite our injection.
|
||||
# For known thinking-capable models routed via OR, mirror the
|
||||
# effort signal into reasoning.effort (OR's documented enum:
|
||||
# "none"|"minimal"|"low"|"medium"|"high"|"xhigh"), which OR
|
||||
# translates to the upstream model's native shape.
|
||||
if (
|
||||
spec
|
||||
and spec.name == "openrouter"
|
||||
and reasoning_effort is not None
|
||||
and (
|
||||
_is_kimi_thinking_model(model_name)
|
||||
or _is_mimo_thinking_model(model_name)
|
||||
)
|
||||
):
|
||||
kwargs.setdefault("extra_body", {}).update(
|
||||
{"reasoning": {"effort": semantic_effort}}
|
||||
)
|
||||
for thinking_style in _thinking_styles_for(spec, model_name):
|
||||
extra = _thinking_extra_body(thinking_style, thinking_enabled)
|
||||
if extra:
|
||||
kwargs.setdefault("extra_body", {}).update(extra)
|
||||
gateway_style = getattr(spec, "gateway_reasoning_style", "") if spec else ""
|
||||
if gateway_style and _model_thinking_style(model_name):
|
||||
extra = _gateway_reasoning_extra_body(gateway_style, semantic_effort)
|
||||
if extra:
|
||||
kwargs.setdefault("extra_body", {}).update(extra)
|
||||
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
@ -649,8 +610,7 @@ class OpenAICompatProvider(LLMProvider):
|
||||
and semantic_effort not in ("none", "minimal")
|
||||
and (
|
||||
(spec and spec.thinking_style)
|
||||
or _is_kimi_thinking_model(model_name)
|
||||
or _is_mimo_thinking_model(model_name)
|
||||
or _model_thinking_style(model_name)
|
||||
)
|
||||
)
|
||||
implicit_deepseek_thinking = (
|
||||
|
||||
@ -71,6 +71,11 @@ class ProviderSpec:
|
||||
# "reasoning_split" — {"reasoning_split": true/false} (MiniMax)
|
||||
thinking_style: str = ""
|
||||
|
||||
# Gateway-native reasoning control to pair with model-level thinking styles.
|
||||
# "reasoning_effort" — {"reasoning": {"effort": <none|minimal|...>}}
|
||||
# (OpenRouter)
|
||||
gateway_reasoning_style: str = ""
|
||||
|
||||
# When True, treat the "reasoning" response field as formal content
|
||||
# when "content" is empty. Only set this for providers (e.g. StepFun)
|
||||
# whose API returns the actual answer in "reasoning" instead of "content".
|
||||
@ -142,6 +147,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
|
||||
detect_by_base_keyword="openrouter",
|
||||
default_api_base="https://openrouter.ai/api/v1",
|
||||
supports_prompt_caching=True,
|
||||
gateway_reasoning_style="reasoning_effort",
|
||||
),
|
||||
# Hugging Face Inference Providers: OpenAI-compatible router for chat models.
|
||||
ProviderSpec(
|
||||
|
||||
@ -32,7 +32,7 @@ def _mimo_spec():
|
||||
|
||||
|
||||
def _openrouter_spec():
|
||||
"""Return the registered OpenRouter ProviderSpec (no thinking_style)."""
|
||||
"""Return the registered OpenRouter ProviderSpec."""
|
||||
specs = {s.name: s for s in PROVIDERS}
|
||||
return specs["openrouter"]
|
||||
|
||||
@ -77,6 +77,13 @@ def test_xiaomi_mimo_uses_thinking_type_style():
|
||||
assert spec.default_api_base == "https://api.xiaomimimo.com/v1"
|
||||
|
||||
|
||||
def test_openrouter_declares_gateway_reasoning_style():
|
||||
"""OpenRouter uses its own reasoning.effort field for routed thinking models."""
|
||||
spec = _openrouter_spec()
|
||||
assert spec.thinking_style == ""
|
||||
assert spec.gateway_reasoning_style == "reasoning_effort"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _build_kwargs wire-format
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user