fix(providers): wire MiMo thinking control on gateway providers (#3845)

The xiaomi_mimo ProviderSpec carries thinking_style="thinking_type", but gateway providers (OpenRouter etc.) route MiMo under their own spec which has no thinking_style. As a result, `reasoning_effort="none"` was silently ignored: `{"thinking": {"type": "disabled"}}` was never injected and responses still contained reasoning_content. Mirror the Kimi pattern that already handles the same problem: add an explicit _MIMO_THINKING_MODELS allowlist (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni — per Xiaomi docs), an _is_mimo_thinking_model helper that strips publisher prefixes ("xiaomi/mimo-v2.5-pro" matches), and a sibling branch in _build_kwargs that injects the thinking payload by model name. mimo-v2-flash is intentionally excluded — it has no thinking mode. Also include MiMo in the explicit_thinking predicate so the reasoning_content backfill (#3554, #3584) covers the gateway path consistently with the direct path. Tests cover the gateway disable/enable signals, bare-slug fallback, flash exclusion, and a non-MiMo sanity check.
2026-05-19 16:12:30 +00:00 · 2026-05-15 17:32:33 -04:00 · 2026-05-15 17:32:33 -04:00 · 0ca0fe2221
commit 0ca0fe2221
parent 8a819dda1e
2 changed files with 124 additions and 1 deletions
--- a/nanobot/providers/openai_compat_provider.py
+++ b/nanobot/providers/openai_compat_provider.py
@ -59,6 +59,15 @@ _KIMI_THINKING_MODELS: frozenset[str] = frozenset({
    "kimi-k2.6",
    "k2.6-code-preview",
 })
 # Thinking-capable MiMo models per Xiaomi docs (see
 # tests/providers/test_xiaomi_mimo_thinking.py). mimo-v2-flash is omitted
 # because it does not support thinking.
 _MIMO_THINKING_MODELS: frozenset[str] = frozenset({
    "mimo-v2.5-pro",
    "mimo-v2.5",
    "mimo-v2-pro",
    "mimo-v2-omni",
 })
 _OPENAI_COMPAT_REQUEST_TIMEOUT_S = 120.0
 # Maps ProviderSpec.thinking_style → extra_body builder.
@ -90,6 +99,22 @@ def _is_kimi_thinking_model(model_name: str) -> bool:
    return False
 def _is_mimo_thinking_model(model_name: str) -> bool:
    """Return True if model_name refers to a MiMo thinking-capable model.
    Mirrors _is_kimi_thinking_model: gateway providers (e.g. OpenRouter
    routing ``xiaomi/mimo-v2.5-pro``) have no ``thinking_style`` on their
    spec, so the spec-driven branch in _build_kwargs misses them. The
    model-name path catches those cases.
    """
    name = model_name.lower()
    if name in _MIMO_THINKING_MODELS:
        return True
    if "/" in name and name.rsplit("/", 1)[1] in _MIMO_THINKING_MODELS:
        return True
    return False
 def _openai_compat_timeout_s() -> float:
    """Return the bounded request timeout used for OpenAI-compatible providers."""
    return _float_env("NANOBOT_OPENAI_COMPAT_TIMEOUT_S", _OPENAI_COMPAT_REQUEST_TIMEOUT_S)
@ -548,6 +573,19 @@ class OpenAICompatProvider(LLMProvider):
                {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
            )
        # Model-level thinking injection for MiMo thinking-capable models.
        # Same shape as Kimi: gateway providers (OpenRouter, etc.) lack the
        # xiaomi_mimo spec's thinking_style, so the spec-driven branch above
        # misses them — match by model name to catch "xiaomi/mimo-v2.5-pro"
        # and friends. (Direct xiaomi_mimo requests are also covered here;
        # both branches write the same payload, so the dict update is a
        # safe no-op for already-handled cases.)
        if reasoning_effort is not None and _is_mimo_thinking_model(model_name):
            thinking_enabled = semantic_effort not in ("none", "minimal")
            kwargs.setdefault("extra_body", {}).update(
                {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
            )
        if tools:
            kwargs["tools"] = tools
            kwargs["tool_choice"] = tool_choice or "auto"
@ -559,7 +597,11 @@ class OpenAICompatProvider(LLMProvider):
        explicit_thinking = (
            reasoning_effort is not None
            and semantic_effort not in ("none", "minimal")
-            and ((spec and spec.thinking_style) or _is_kimi_thinking_model(model_name))
+            and (
                (spec and spec.thinking_style)
                or _is_kimi_thinking_model(model_name)
                or _is_mimo_thinking_model(model_name)
            )
        )
        implicit_deepseek_thinking = (
            spec is not None
--- a/tests/providers/test_xiaomi_mimo_thinking.py
+++ b/tests/providers/test_xiaomi_mimo_thinking.py
@ -31,6 +31,12 @@ def _mimo_spec():
    return specs["xiaomi_mimo"]
 def _openrouter_spec():
    """Return the registered OpenRouter ProviderSpec (no thinking_style)."""
    specs = {s.name: s for s in PROVIDERS}
    return specs["openrouter"]
 def _mimo_provider() -> OpenAICompatProvider:
    return OpenAICompatProvider(
        api_key="test-key",
@ -39,6 +45,15 @@ def _mimo_provider() -> OpenAICompatProvider:
    )
 def _openrouter_provider(default_model: str) -> OpenAICompatProvider:
    """Provider configured as OpenRouter (gateway, no thinking_style on spec)."""
    return OpenAICompatProvider(
        api_key="sk-or-test",
        default_model=default_model,
        spec=_openrouter_spec(),
    )
 def _simple_messages() -> list[dict[str, Any]]:
    return [{"role": "user", "content": "hello"}]
@ -119,3 +134,69 @@ def test_mimo_reasoning_effort_unset_preserves_provider_default():
    )
    assert "reasoning_effort" not in kwargs
    assert "extra_body" not in kwargs
 # ---------------------------------------------------------------------------
 # Gateway path: MiMo routed through OpenRouter (no spec.thinking_style)
 # ---------------------------------------------------------------------------
 def test_mimo_via_openrouter_reasoning_effort_none_disables_thinking():
    """OpenRouter routes MiMo as "xiaomi/mimo-v2.5-pro"; the openrouter spec
    has no thinking_style, so the disable signal must come from the
    model-name path (#3845)."""
    provider = _openrouter_provider("xiaomi/mimo-v2.5-pro")
    kwargs = provider._build_kwargs(
        messages=_simple_messages(),
        tools=None, model=None, max_tokens=100,
        temperature=0.7, reasoning_effort="none", tool_choice=None,
    )
    assert "reasoning_effort" not in kwargs
    assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}}
 def test_mimo_via_openrouter_reasoning_effort_medium_enables_thinking():
    """Same as the direct path: any non-none/minimal effort enables thinking."""
    provider = _openrouter_provider("xiaomi/mimo-v2.5-pro")
    kwargs = provider._build_kwargs(
        messages=_simple_messages(),
        tools=None, model=None, max_tokens=100,
        temperature=0.7, reasoning_effort="medium", tool_choice=None,
    )
    assert kwargs.get("reasoning_effort") == "medium"
    assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}}
 def test_mimo_via_openrouter_bare_slug_also_matches():
    """Bare "mimo-v2.5-pro" (no publisher prefix) must also match the
    allowlist, since gateways sometimes accept either form."""
    provider = _openrouter_provider("mimo-v2.5-pro")
    kwargs = provider._build_kwargs(
        messages=_simple_messages(),
        tools=None, model=None, max_tokens=100,
        temperature=0.7, reasoning_effort="none", tool_choice=None,
    )
    assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}}
 def test_mimo_flash_via_openrouter_does_not_inject_thinking():
    """mimo-v2-flash has no thinking mode per Xiaomi docs; the allowlist
    excludes it, so no thinking field should be injected on the gateway path."""
    provider = _openrouter_provider("xiaomi/mimo-v2-flash")
    kwargs = provider._build_kwargs(
        messages=_simple_messages(),
        tools=None, model=None, max_tokens=100,
        temperature=0.7, reasoning_effort="none", tool_choice=None,
    )
    assert "extra_body" not in kwargs
 def test_non_mimo_model_via_openrouter_unaffected():
    """Sanity: a non-MiMo, non-Kimi model through OpenRouter is untouched."""
    provider = _openrouter_provider("openai/gpt-4o")
    kwargs = provider._build_kwargs(
        messages=_simple_messages(),
        tools=None, model=None, max_tokens=100,
        temperature=0.7, reasoning_effort="none", tool_choice=None,
    )
    assert "extra_body" not in kwargs