refactor(providers): centralize gateway reasoning control

2026-05-22 17:42:24 +00:00 · 2026-05-21 14:34:45 +08:00 · 2026-05-21 14:34:45 +08:00 · 4f895e6307
commit 4f895e6307
parent 0cd2f626c0
3 changed files with 59 additions and 86 deletions
--- a/nanobot/providers/openai_compat_provider.py
+++ b/nanobot/providers/openai_compat_provider.py
@ -74,41 +74,43 @@ _THINKING_STYLE_MAP: dict[str, Any] = {
    "enable_thinking": lambda on: {"enable_thinking": on},
    "reasoning_split": lambda on: {"reasoning_split": on},
 }
 _GATEWAY_REASONING_STYLE_MAP: dict[str, Any] = {
    "reasoning_effort": lambda effort: {"reasoning": {"effort": effort}},
 }
 _MODEL_THINKING_STYLES: dict[str, str] = {
    **dict.fromkeys(_KIMI_THINKING_MODELS, "thinking_type"),
    **dict.fromkeys(_MIMO_THINKING_MODELS, "thinking_type"),
 }
-def _is_kimi_thinking_model(model_name: str) -> bool:
+def _model_slug(model_name: str) -> str:
-    """Return True if model_name refers to a Kimi thinking-capable model.
+    return model_name.lower().rsplit("/", 1)[-1]
    Supports two forms:
    - Exact match: e.g. kimi-k2.5 / kimi-k2.6 in _KIMI_THINKING_MODELS
    - Slug match:  moonshotai/kimi-k2.5 -> the part after the last "/"
                   is checked against _KIMI_THINKING_MODELS
    This covers both the native Moonshot provider (bare slug) and
    OpenRouter-style names (``"publisher/slug"``).
    """
    name = model_name.lower()
    if name in _KIMI_THINKING_MODELS:
        return True
    if "/" in name and name.rsplit("/", 1)[1] in _KIMI_THINKING_MODELS:
        return True
    return False
-def _is_mimo_thinking_model(model_name: str) -> bool:
+def _model_thinking_style(model_name: str) -> str:
-    """Return True if model_name refers to a MiMo thinking-capable model.
+    return _MODEL_THINKING_STYLES.get(_model_slug(model_name), "")
-    Mirrors _is_kimi_thinking_model: gateway providers (e.g. OpenRouter
+
-    routing ``xiaomi/mimo-v2.5-pro``) have no ``thinking_style`` on their
+def _thinking_styles_for(spec: ProviderSpec | None, model_name: str) -> list[str]:
-    spec, so the spec-driven branch in _build_kwargs misses them. The
+    styles: list[str] = []
-    model-name path catches those cases.
+    if spec and spec.thinking_style:
-    """
+        styles.append(spec.thinking_style)
-    name = model_name.lower()
+    model_style = _model_thinking_style(model_name)
-    if name in _MIMO_THINKING_MODELS:
+    if model_style and model_style not in styles:
-        return True
+        styles.append(model_style)
-    if "/" in name and name.rsplit("/", 1)[1] in _MIMO_THINKING_MODELS:
+    return styles
-        return True
+
-    return False
+
 def _thinking_extra_body(style: str, thinking_enabled: bool) -> dict[str, Any] | None:
    builder = _THINKING_STYLE_MAP.get(style)
    return builder(thinking_enabled) if builder else None
 def _gateway_reasoning_extra_body(style: str, effort: str | None) -> dict[str, Any] | None:
    if not effort:
        return None
    builder = _GATEWAY_REASONING_STYLE_MAP.get(style)
    return builder(effort) if builder else None
 def _openai_compat_timeout_s() -> float:
@ -581,60 +583,19 @@ class OpenAICompatProvider(LLMProvider):
        if wire_effort and semantic_effort != "none":
            kwargs["reasoning_effort"] = wire_effort
-        # Provider-specific thinking parameters.
+        # Only send thinking controls when reasoning_effort is explicit so
-        # Only sent when reasoning_effort is explicitly configured so that
+        # omitting the config preserves each provider's default.
-        # the provider default is preserved otherwise.
+        if reasoning_effort is not None:
        # The mapping is driven by ProviderSpec.thinking_style so that adding
        # a new provider never requires touching this function.
        if spec and spec.thinking_style and reasoning_effort is not None:
            thinking_enabled = semantic_effort not in ("none", "minimal")
-            extra = _THINKING_STYLE_MAP.get(spec.thinking_style, lambda _: None)(thinking_enabled)
+            for thinking_style in _thinking_styles_for(spec, model_name):
-            if extra:
+                extra = _thinking_extra_body(thinking_style, thinking_enabled)
-                kwargs.setdefault("extra_body", {}).update(extra)
+                if extra:
-
+                    kwargs.setdefault("extra_body", {}).update(extra)
-        # Model-level thinking injection for Kimi thinking-capable models.
+            gateway_style = getattr(spec, "gateway_reasoning_style", "") if spec else ""
-        # Strip any provider prefix (e.g. "moonshotai/") before the set lookup
+            if gateway_style and _model_thinking_style(model_name):
-        # so that OpenRouter-style names like "moonshotai/kimi-k2.5" are handled
+                extra = _gateway_reasoning_extra_body(gateway_style, semantic_effort)
-        # identically to bare names like "kimi-k2.5".
+                if extra:
-        if reasoning_effort is not None and _is_kimi_thinking_model(model_name):
+                    kwargs.setdefault("extra_body", {}).update(extra)
            thinking_enabled = semantic_effort not in ("none", "minimal")
            kwargs.setdefault("extra_body", {}).update(
                {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
            )
        # Model-level thinking injection for MiMo thinking-capable models.
        # Same shape as Kimi: gateway providers (OpenRouter, etc.) lack the
        # xiaomi_mimo spec's thinking_style, so the spec-driven branch above
        # misses them — match by model name to catch "xiaomi/mimo-v2.5-pro"
        # and friends. (Direct xiaomi_mimo requests are also covered here;
        # both branches write the same payload, so the dict update is a
        # safe no-op for already-handled cases.)
        if reasoning_effort is not None and _is_mimo_thinking_model(model_name):
            thinking_enabled = semantic_effort not in ("none", "minimal")
            kwargs.setdefault("extra_body", {}).update(
                {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
            )
        # OpenRouter uses its own unified `reasoning` field and does not
        # forward provider-specific thinking shapes (the Kimi/MiMo
        # extra_body.thinking above) to upstream. Reported as the follow-up
        # to #3845/#3851: MiMo via OR kept thinking despite our injection.
        # For known thinking-capable models routed via OR, mirror the
        # effort signal into reasoning.effort (OR's documented enum:
        # "none"|"minimal"|"low"|"medium"|"high"|"xhigh"), which OR
        # translates to the upstream model's native shape.
        if (
            spec
            and spec.name == "openrouter"
            and reasoning_effort is not None
            and (
                _is_kimi_thinking_model(model_name)
                or _is_mimo_thinking_model(model_name)
            )
        ):
            kwargs.setdefault("extra_body", {}).update(
                {"reasoning": {"effort": semantic_effort}}
            )
        if tools:
            kwargs["tools"] = tools
@ -649,8 +610,7 @@ class OpenAICompatProvider(LLMProvider):
            and semantic_effort not in ("none", "minimal")
            and (
                (spec and spec.thinking_style)
-                or _is_kimi_thinking_model(model_name)
+                or _model_thinking_style(model_name)
                or _is_mimo_thinking_model(model_name)
            )
        )
        implicit_deepseek_thinking = (
--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@ -71,6 +71,11 @@ class ProviderSpec:
    # "reasoning_split" — {"reasoning_split": true/false}  (MiniMax)
    thinking_style: str = ""
    # Gateway-native reasoning control to pair with model-level thinking styles.
    # "reasoning_effort" — {"reasoning": {"effort": <none|minimal|...>}}
    #                      (OpenRouter)
    gateway_reasoning_style: str = ""
    # When True, treat the "reasoning" response field as formal content
    # when "content" is empty.  Only set this for providers (e.g. StepFun)
    # whose API returns the actual answer in "reasoning" instead of "content".
@ -142,6 +147,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
        detect_by_base_keyword="openrouter",
        default_api_base="https://openrouter.ai/api/v1",
        supports_prompt_caching=True,
        gateway_reasoning_style="reasoning_effort",
    ),
    # Hugging Face Inference Providers: OpenAI-compatible router for chat models.
    ProviderSpec(
--- a/tests/providers/test_xiaomi_mimo_thinking.py
+++ b/tests/providers/test_xiaomi_mimo_thinking.py
@ -32,7 +32,7 @@ def _mimo_spec():
 def _openrouter_spec():
-    """Return the registered OpenRouter ProviderSpec (no thinking_style)."""
+    """Return the registered OpenRouter ProviderSpec."""
    specs = {s.name: s for s in PROVIDERS}
    return specs["openrouter"]
@ -77,6 +77,13 @@ def test_xiaomi_mimo_uses_thinking_type_style():
    assert spec.default_api_base == "https://api.xiaomimimo.com/v1"
 def test_openrouter_declares_gateway_reasoning_style():
    """OpenRouter uses its own reasoning.effort field for routed thinking models."""
    spec = _openrouter_spec()
    assert spec.thinking_style == ""
    assert spec.gateway_reasoning_style == "reasoning_effort"
 # ---------------------------------------------------------------------------
 # _build_kwargs wire-format
 # ---------------------------------------------------------------------------