refactor(providers): centralize gateway reasoning control

2026-05-21 17:12:32 +00:00 · 2026-05-21 14:34:45 +08:00 · 2026-05-21 14:34:45 +08:00 · 4f895e6307
commit 4f895e6307
parent 0cd2f626c0
3 changed files with 59 additions and 86 deletions
--- a/nanobot/providers/openai_compat_provider.py
+++ b/nanobot/providers/openai_compat_provider.py
@ -74,41 +74,43 @@ _THINKING_STYLE_MAP: dict[str, Any] = {
    "enable_thinking": lambda on: {"enable_thinking": on},
    "reasoning_split": lambda on: {"reasoning_split": on},
 }
+_GATEWAY_REASONING_STYLE_MAP: dict[str, Any] = {
+    "reasoning_effort": lambda effort: {"reasoning": {"effort": effort}},
+}
+_MODEL_THINKING_STYLES: dict[str, str] = {
+    **dict.fromkeys(_KIMI_THINKING_MODELS, "thinking_type"),
+    **dict.fromkeys(_MIMO_THINKING_MODELS, "thinking_type"),
+}


-def _is_kimi_thinking_model(model_name: str) -> bool:
-    """Return True if model_name refers to a Kimi thinking-capable model.
-
-    Supports two forms:
-    - Exact match: e.g. kimi-k2.5 / kimi-k2.6 in _KIMI_THINKING_MODELS
-    - Slug match:  moonshotai/kimi-k2.5 -> the part after the last "/"
-                   is checked against _KIMI_THINKING_MODELS
-
-    This covers both the native Moonshot provider (bare slug) and
-    OpenRouter-style names (``"publisher/slug"``).
-    """
-    name = model_name.lower()
-    if name in _KIMI_THINKING_MODELS:
-        return True
-    if "/" in name and name.rsplit("/", 1)[1] in _KIMI_THINKING_MODELS:
-        return True
-    return False
+def _model_slug(model_name: str) -> str:
+    return model_name.lower().rsplit("/", 1)[-1]


-def _is_mimo_thinking_model(model_name: str) -> bool:
-    """Return True if model_name refers to a MiMo thinking-capable model.
+def _model_thinking_style(model_name: str) -> str:
+    return _MODEL_THINKING_STYLES.get(_model_slug(model_name), "")

-    Mirrors _is_kimi_thinking_model: gateway providers (e.g. OpenRouter
-    routing ``xiaomi/mimo-v2.5-pro``) have no ``thinking_style`` on their
-    spec, so the spec-driven branch in _build_kwargs misses them. The
-    model-name path catches those cases.
-    """
-    name = model_name.lower()
-    if name in _MIMO_THINKING_MODELS:
-        return True
-    if "/" in name and name.rsplit("/", 1)[1] in _MIMO_THINKING_MODELS:
-        return True
-    return False
+
+def _thinking_styles_for(spec: ProviderSpec | None, model_name: str) -> list[str]:
+    styles: list[str] = []
+    if spec and spec.thinking_style:
+        styles.append(spec.thinking_style)
+    model_style = _model_thinking_style(model_name)
+    if model_style and model_style not in styles:
+        styles.append(model_style)
+    return styles
+
+
+def _thinking_extra_body(style: str, thinking_enabled: bool) -> dict[str, Any] | None:
+    builder = _THINKING_STYLE_MAP.get(style)
+    return builder(thinking_enabled) if builder else None
+
+
+def _gateway_reasoning_extra_body(style: str, effort: str | None) -> dict[str, Any] | None:
+    if not effort:
+        return None
+    builder = _GATEWAY_REASONING_STYLE_MAP.get(style)
+    return builder(effort) if builder else None


 def _openai_compat_timeout_s() -> float:
@ -581,60 +583,19 @@ class OpenAICompatProvider(LLMProvider):
        if wire_effort and semantic_effort != "none":
            kwargs["reasoning_effort"] = wire_effort

-        # Provider-specific thinking parameters.
-        # Only sent when reasoning_effort is explicitly configured so that
-        # the provider default is preserved otherwise.
-        # The mapping is driven by ProviderSpec.thinking_style so that adding
-        # a new provider never requires touching this function.
-        if spec and spec.thinking_style and reasoning_effort is not None:
+        # Only send thinking controls when reasoning_effort is explicit so
+        # omitting the config preserves each provider's default.
+        if reasoning_effort is not None:
            thinking_enabled = semantic_effort not in ("none", "minimal")
-            extra = _THINKING_STYLE_MAP.get(spec.thinking_style, lambda _: None)(thinking_enabled)
-            if extra:
-                kwargs.setdefault("extra_body", {}).update(extra)
-
-        # Model-level thinking injection for Kimi thinking-capable models.
-        # Strip any provider prefix (e.g. "moonshotai/") before the set lookup
-        # so that OpenRouter-style names like "moonshotai/kimi-k2.5" are handled
-        # identically to bare names like "kimi-k2.5".
-        if reasoning_effort is not None and _is_kimi_thinking_model(model_name):
-            thinking_enabled = semantic_effort not in ("none", "minimal")
-            kwargs.setdefault("extra_body", {}).update(
-                {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
-            )
-
-        # Model-level thinking injection for MiMo thinking-capable models.
-        # Same shape as Kimi: gateway providers (OpenRouter, etc.) lack the
-        # xiaomi_mimo spec's thinking_style, so the spec-driven branch above
-        # misses them — match by model name to catch "xiaomi/mimo-v2.5-pro"
-        # and friends. (Direct xiaomi_mimo requests are also covered here;
-        # both branches write the same payload, so the dict update is a
-        # safe no-op for already-handled cases.)
-        if reasoning_effort is not None and _is_mimo_thinking_model(model_name):
-            thinking_enabled = semantic_effort not in ("none", "minimal")
-            kwargs.setdefault("extra_body", {}).update(
-                {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
-            )
-
-        # OpenRouter uses its own unified `reasoning` field and does not
-        # forward provider-specific thinking shapes (the Kimi/MiMo
-        # extra_body.thinking above) to upstream. Reported as the follow-up
-        # to #3845/#3851: MiMo via OR kept thinking despite our injection.
-        # For known thinking-capable models routed via OR, mirror the
-        # effort signal into reasoning.effort (OR's documented enum:
-        # "none"|"minimal"|"low"|"medium"|"high"|"xhigh"), which OR
-        # translates to the upstream model's native shape.
-        if (
-            spec
-            and spec.name == "openrouter"
-            and reasoning_effort is not None
-            and (
-                _is_kimi_thinking_model(model_name)
-                or _is_mimo_thinking_model(model_name)
-            )
-        ):
-            kwargs.setdefault("extra_body", {}).update(
-                {"reasoning": {"effort": semantic_effort}}
-            )
+            for thinking_style in _thinking_styles_for(spec, model_name):
+                extra = _thinking_extra_body(thinking_style, thinking_enabled)
+                if extra:
+                    kwargs.setdefault("extra_body", {}).update(extra)
+            gateway_style = getattr(spec, "gateway_reasoning_style", "") if spec else ""
+            if gateway_style and _model_thinking_style(model_name):
+                extra = _gateway_reasoning_extra_body(gateway_style, semantic_effort)
+                if extra:
+                    kwargs.setdefault("extra_body", {}).update(extra)

        if tools:
            kwargs["tools"] = tools
@ -649,8 +610,7 @@ class OpenAICompatProvider(LLMProvider):
            and semantic_effort not in ("none", "minimal")
            and (
                (spec and spec.thinking_style)
-                or _is_kimi_thinking_model(model_name)
-                or _is_mimo_thinking_model(model_name)
+                or _model_thinking_style(model_name)
            )
        )
        implicit_deepseek_thinking = (
--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@ -71,6 +71,11 @@ class ProviderSpec:
    # "reasoning_split" — {"reasoning_split": true/false}  (MiniMax)
    thinking_style: str = ""

+    # Gateway-native reasoning control to pair with model-level thinking styles.
+    # "reasoning_effort" — {"reasoning": {"effort": <none|minimal|...>}}
+    #                      (OpenRouter)
+    gateway_reasoning_style: str = ""
+
    # When True, treat the "reasoning" response field as formal content
    # when "content" is empty.  Only set this for providers (e.g. StepFun)
    # whose API returns the actual answer in "reasoning" instead of "content".
@ -142,6 +147,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
        detect_by_base_keyword="openrouter",
        default_api_base="https://openrouter.ai/api/v1",
        supports_prompt_caching=True,
+        gateway_reasoning_style="reasoning_effort",
    ),
    # Hugging Face Inference Providers: OpenAI-compatible router for chat models.
    ProviderSpec(
--- a/tests/providers/test_xiaomi_mimo_thinking.py
+++ b/tests/providers/test_xiaomi_mimo_thinking.py
@ -32,7 +32,7 @@ def _mimo_spec():


 def _openrouter_spec():
-    """Return the registered OpenRouter ProviderSpec (no thinking_style)."""
+    """Return the registered OpenRouter ProviderSpec."""
    specs = {s.name: s for s in PROVIDERS}
    return specs["openrouter"]

@ -77,6 +77,13 @@ def test_xiaomi_mimo_uses_thinking_type_style():
    assert spec.default_api_base == "https://api.xiaomimimo.com/v1"


+def test_openrouter_declares_gateway_reasoning_style():
+    """OpenRouter uses its own reasoning.effort field for routed thinking models."""
+    spec = _openrouter_spec()
+    assert spec.thinking_style == ""
+    assert spec.gateway_reasoning_style == "reasoning_effort"
+
+
 # ---------------------------------------------------------------------------
 # _build_kwargs wire-format
 # ---------------------------------------------------------------------------