mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-22 17:42:24 +00:00
refactor(providers): centralize gateway reasoning control
This commit is contained in:
parent
0cd2f626c0
commit
4f895e6307
@ -74,41 +74,43 @@ _THINKING_STYLE_MAP: dict[str, Any] = {
|
|||||||
"enable_thinking": lambda on: {"enable_thinking": on},
|
"enable_thinking": lambda on: {"enable_thinking": on},
|
||||||
"reasoning_split": lambda on: {"reasoning_split": on},
|
"reasoning_split": lambda on: {"reasoning_split": on},
|
||||||
}
|
}
|
||||||
|
_GATEWAY_REASONING_STYLE_MAP: dict[str, Any] = {
|
||||||
|
"reasoning_effort": lambda effort: {"reasoning": {"effort": effort}},
|
||||||
|
}
|
||||||
|
_MODEL_THINKING_STYLES: dict[str, str] = {
|
||||||
|
**dict.fromkeys(_KIMI_THINKING_MODELS, "thinking_type"),
|
||||||
|
**dict.fromkeys(_MIMO_THINKING_MODELS, "thinking_type"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _is_kimi_thinking_model(model_name: str) -> bool:
|
def _model_slug(model_name: str) -> str:
|
||||||
"""Return True if model_name refers to a Kimi thinking-capable model.
|
return model_name.lower().rsplit("/", 1)[-1]
|
||||||
|
|
||||||
Supports two forms:
|
|
||||||
- Exact match: e.g. kimi-k2.5 / kimi-k2.6 in _KIMI_THINKING_MODELS
|
|
||||||
- Slug match: moonshotai/kimi-k2.5 -> the part after the last "/"
|
|
||||||
is checked against _KIMI_THINKING_MODELS
|
|
||||||
|
|
||||||
This covers both the native Moonshot provider (bare slug) and
|
|
||||||
OpenRouter-style names (``"publisher/slug"``).
|
|
||||||
"""
|
|
||||||
name = model_name.lower()
|
|
||||||
if name in _KIMI_THINKING_MODELS:
|
|
||||||
return True
|
|
||||||
if "/" in name and name.rsplit("/", 1)[1] in _KIMI_THINKING_MODELS:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _is_mimo_thinking_model(model_name: str) -> bool:
|
def _model_thinking_style(model_name: str) -> str:
|
||||||
"""Return True if model_name refers to a MiMo thinking-capable model.
|
return _MODEL_THINKING_STYLES.get(_model_slug(model_name), "")
|
||||||
|
|
||||||
Mirrors _is_kimi_thinking_model: gateway providers (e.g. OpenRouter
|
|
||||||
routing ``xiaomi/mimo-v2.5-pro``) have no ``thinking_style`` on their
|
def _thinking_styles_for(spec: ProviderSpec | None, model_name: str) -> list[str]:
|
||||||
spec, so the spec-driven branch in _build_kwargs misses them. The
|
styles: list[str] = []
|
||||||
model-name path catches those cases.
|
if spec and spec.thinking_style:
|
||||||
"""
|
styles.append(spec.thinking_style)
|
||||||
name = model_name.lower()
|
model_style = _model_thinking_style(model_name)
|
||||||
if name in _MIMO_THINKING_MODELS:
|
if model_style and model_style not in styles:
|
||||||
return True
|
styles.append(model_style)
|
||||||
if "/" in name and name.rsplit("/", 1)[1] in _MIMO_THINKING_MODELS:
|
return styles
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
def _thinking_extra_body(style: str, thinking_enabled: bool) -> dict[str, Any] | None:
|
||||||
|
builder = _THINKING_STYLE_MAP.get(style)
|
||||||
|
return builder(thinking_enabled) if builder else None
|
||||||
|
|
||||||
|
|
||||||
|
def _gateway_reasoning_extra_body(style: str, effort: str | None) -> dict[str, Any] | None:
|
||||||
|
if not effort:
|
||||||
|
return None
|
||||||
|
builder = _GATEWAY_REASONING_STYLE_MAP.get(style)
|
||||||
|
return builder(effort) if builder else None
|
||||||
|
|
||||||
|
|
||||||
def _openai_compat_timeout_s() -> float:
|
def _openai_compat_timeout_s() -> float:
|
||||||
@ -581,60 +583,19 @@ class OpenAICompatProvider(LLMProvider):
|
|||||||
if wire_effort and semantic_effort != "none":
|
if wire_effort and semantic_effort != "none":
|
||||||
kwargs["reasoning_effort"] = wire_effort
|
kwargs["reasoning_effort"] = wire_effort
|
||||||
|
|
||||||
# Provider-specific thinking parameters.
|
# Only send thinking controls when reasoning_effort is explicit so
|
||||||
# Only sent when reasoning_effort is explicitly configured so that
|
# omitting the config preserves each provider's default.
|
||||||
# the provider default is preserved otherwise.
|
if reasoning_effort is not None:
|
||||||
# The mapping is driven by ProviderSpec.thinking_style so that adding
|
|
||||||
# a new provider never requires touching this function.
|
|
||||||
if spec and spec.thinking_style and reasoning_effort is not None:
|
|
||||||
thinking_enabled = semantic_effort not in ("none", "minimal")
|
thinking_enabled = semantic_effort not in ("none", "minimal")
|
||||||
extra = _THINKING_STYLE_MAP.get(spec.thinking_style, lambda _: None)(thinking_enabled)
|
for thinking_style in _thinking_styles_for(spec, model_name):
|
||||||
if extra:
|
extra = _thinking_extra_body(thinking_style, thinking_enabled)
|
||||||
kwargs.setdefault("extra_body", {}).update(extra)
|
if extra:
|
||||||
|
kwargs.setdefault("extra_body", {}).update(extra)
|
||||||
# Model-level thinking injection for Kimi thinking-capable models.
|
gateway_style = getattr(spec, "gateway_reasoning_style", "") if spec else ""
|
||||||
# Strip any provider prefix (e.g. "moonshotai/") before the set lookup
|
if gateway_style and _model_thinking_style(model_name):
|
||||||
# so that OpenRouter-style names like "moonshotai/kimi-k2.5" are handled
|
extra = _gateway_reasoning_extra_body(gateway_style, semantic_effort)
|
||||||
# identically to bare names like "kimi-k2.5".
|
if extra:
|
||||||
if reasoning_effort is not None and _is_kimi_thinking_model(model_name):
|
kwargs.setdefault("extra_body", {}).update(extra)
|
||||||
thinking_enabled = semantic_effort not in ("none", "minimal")
|
|
||||||
kwargs.setdefault("extra_body", {}).update(
|
|
||||||
{"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Model-level thinking injection for MiMo thinking-capable models.
|
|
||||||
# Same shape as Kimi: gateway providers (OpenRouter, etc.) lack the
|
|
||||||
# xiaomi_mimo spec's thinking_style, so the spec-driven branch above
|
|
||||||
# misses them — match by model name to catch "xiaomi/mimo-v2.5-pro"
|
|
||||||
# and friends. (Direct xiaomi_mimo requests are also covered here;
|
|
||||||
# both branches write the same payload, so the dict update is a
|
|
||||||
# safe no-op for already-handled cases.)
|
|
||||||
if reasoning_effort is not None and _is_mimo_thinking_model(model_name):
|
|
||||||
thinking_enabled = semantic_effort not in ("none", "minimal")
|
|
||||||
kwargs.setdefault("extra_body", {}).update(
|
|
||||||
{"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
|
|
||||||
)
|
|
||||||
|
|
||||||
# OpenRouter uses its own unified `reasoning` field and does not
|
|
||||||
# forward provider-specific thinking shapes (the Kimi/MiMo
|
|
||||||
# extra_body.thinking above) to upstream. Reported as the follow-up
|
|
||||||
# to #3845/#3851: MiMo via OR kept thinking despite our injection.
|
|
||||||
# For known thinking-capable models routed via OR, mirror the
|
|
||||||
# effort signal into reasoning.effort (OR's documented enum:
|
|
||||||
# "none"|"minimal"|"low"|"medium"|"high"|"xhigh"), which OR
|
|
||||||
# translates to the upstream model's native shape.
|
|
||||||
if (
|
|
||||||
spec
|
|
||||||
and spec.name == "openrouter"
|
|
||||||
and reasoning_effort is not None
|
|
||||||
and (
|
|
||||||
_is_kimi_thinking_model(model_name)
|
|
||||||
or _is_mimo_thinking_model(model_name)
|
|
||||||
)
|
|
||||||
):
|
|
||||||
kwargs.setdefault("extra_body", {}).update(
|
|
||||||
{"reasoning": {"effort": semantic_effort}}
|
|
||||||
)
|
|
||||||
|
|
||||||
if tools:
|
if tools:
|
||||||
kwargs["tools"] = tools
|
kwargs["tools"] = tools
|
||||||
@ -649,8 +610,7 @@ class OpenAICompatProvider(LLMProvider):
|
|||||||
and semantic_effort not in ("none", "minimal")
|
and semantic_effort not in ("none", "minimal")
|
||||||
and (
|
and (
|
||||||
(spec and spec.thinking_style)
|
(spec and spec.thinking_style)
|
||||||
or _is_kimi_thinking_model(model_name)
|
or _model_thinking_style(model_name)
|
||||||
or _is_mimo_thinking_model(model_name)
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
implicit_deepseek_thinking = (
|
implicit_deepseek_thinking = (
|
||||||
|
|||||||
@ -71,6 +71,11 @@ class ProviderSpec:
|
|||||||
# "reasoning_split" — {"reasoning_split": true/false} (MiniMax)
|
# "reasoning_split" — {"reasoning_split": true/false} (MiniMax)
|
||||||
thinking_style: str = ""
|
thinking_style: str = ""
|
||||||
|
|
||||||
|
# Gateway-native reasoning control to pair with model-level thinking styles.
|
||||||
|
# "reasoning_effort" — {"reasoning": {"effort": <none|minimal|...>}}
|
||||||
|
# (OpenRouter)
|
||||||
|
gateway_reasoning_style: str = ""
|
||||||
|
|
||||||
# When True, treat the "reasoning" response field as formal content
|
# When True, treat the "reasoning" response field as formal content
|
||||||
# when "content" is empty. Only set this for providers (e.g. StepFun)
|
# when "content" is empty. Only set this for providers (e.g. StepFun)
|
||||||
# whose API returns the actual answer in "reasoning" instead of "content".
|
# whose API returns the actual answer in "reasoning" instead of "content".
|
||||||
@ -142,6 +147,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
|
|||||||
detect_by_base_keyword="openrouter",
|
detect_by_base_keyword="openrouter",
|
||||||
default_api_base="https://openrouter.ai/api/v1",
|
default_api_base="https://openrouter.ai/api/v1",
|
||||||
supports_prompt_caching=True,
|
supports_prompt_caching=True,
|
||||||
|
gateway_reasoning_style="reasoning_effort",
|
||||||
),
|
),
|
||||||
# Hugging Face Inference Providers: OpenAI-compatible router for chat models.
|
# Hugging Face Inference Providers: OpenAI-compatible router for chat models.
|
||||||
ProviderSpec(
|
ProviderSpec(
|
||||||
|
|||||||
@ -32,7 +32,7 @@ def _mimo_spec():
|
|||||||
|
|
||||||
|
|
||||||
def _openrouter_spec():
|
def _openrouter_spec():
|
||||||
"""Return the registered OpenRouter ProviderSpec (no thinking_style)."""
|
"""Return the registered OpenRouter ProviderSpec."""
|
||||||
specs = {s.name: s for s in PROVIDERS}
|
specs = {s.name: s for s in PROVIDERS}
|
||||||
return specs["openrouter"]
|
return specs["openrouter"]
|
||||||
|
|
||||||
@ -77,6 +77,13 @@ def test_xiaomi_mimo_uses_thinking_type_style():
|
|||||||
assert spec.default_api_base == "https://api.xiaomimimo.com/v1"
|
assert spec.default_api_base == "https://api.xiaomimimo.com/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_openrouter_declares_gateway_reasoning_style():
|
||||||
|
"""OpenRouter uses its own reasoning.effort field for routed thinking models."""
|
||||||
|
spec = _openrouter_spec()
|
||||||
|
assert spec.thinking_style == ""
|
||||||
|
assert spec.gateway_reasoning_style == "reasoning_effort"
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# _build_kwargs wire-format
|
# _build_kwargs wire-format
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user