fix(providers): wire MiMo thinking control on gateway providers (#3845)

The xiaomi_mimo ProviderSpec carries thinking_style="thinking_type", but
gateway providers (OpenRouter etc.) route MiMo under their own spec
which has no thinking_style. As a result, `reasoning_effort="none"` was
silently ignored: `{"thinking": {"type": "disabled"}}` was never
injected and responses still contained reasoning_content.

Mirror the Kimi pattern that already handles the same problem: add an
explicit _MIMO_THINKING_MODELS allowlist (mimo-v2.5-pro, mimo-v2.5,
mimo-v2-pro, mimo-v2-omni — per Xiaomi docs), an _is_mimo_thinking_model
helper that strips publisher prefixes ("xiaomi/mimo-v2.5-pro" matches),
and a sibling branch in _build_kwargs that injects the thinking payload
by model name. mimo-v2-flash is intentionally excluded — it has no
thinking mode.

Also include MiMo in the explicit_thinking predicate so the
reasoning_content backfill (#3554, #3584) covers the gateway path
consistently with the direct path.

Tests cover the gateway disable/enable signals, bare-slug fallback,
flash exclusion, and a non-MiMo sanity check.
This commit is contained in:
olgagaga 2026-05-15 17:32:33 -04:00 committed by Xubin Ren
parent 8a819dda1e
commit 0ca0fe2221
2 changed files with 124 additions and 1 deletions

View File

@ -59,6 +59,15 @@ _KIMI_THINKING_MODELS: frozenset[str] = frozenset({
"kimi-k2.6", "kimi-k2.6",
"k2.6-code-preview", "k2.6-code-preview",
}) })
# Thinking-capable MiMo models per Xiaomi docs (see
# tests/providers/test_xiaomi_mimo_thinking.py). mimo-v2-flash is omitted
# because it does not support thinking.
_MIMO_THINKING_MODELS: frozenset[str] = frozenset({
"mimo-v2.5-pro",
"mimo-v2.5",
"mimo-v2-pro",
"mimo-v2-omni",
})
_OPENAI_COMPAT_REQUEST_TIMEOUT_S = 120.0 _OPENAI_COMPAT_REQUEST_TIMEOUT_S = 120.0
# Maps ProviderSpec.thinking_style → extra_body builder. # Maps ProviderSpec.thinking_style → extra_body builder.
@ -90,6 +99,22 @@ def _is_kimi_thinking_model(model_name: str) -> bool:
return False return False
def _is_mimo_thinking_model(model_name: str) -> bool:
"""Return True if model_name refers to a MiMo thinking-capable model.
Mirrors _is_kimi_thinking_model: gateway providers (e.g. OpenRouter
routing ``xiaomi/mimo-v2.5-pro``) have no ``thinking_style`` on their
spec, so the spec-driven branch in _build_kwargs misses them. The
model-name path catches those cases.
"""
name = model_name.lower()
if name in _MIMO_THINKING_MODELS:
return True
if "/" in name and name.rsplit("/", 1)[1] in _MIMO_THINKING_MODELS:
return True
return False
def _openai_compat_timeout_s() -> float: def _openai_compat_timeout_s() -> float:
"""Return the bounded request timeout used for OpenAI-compatible providers.""" """Return the bounded request timeout used for OpenAI-compatible providers."""
return _float_env("NANOBOT_OPENAI_COMPAT_TIMEOUT_S", _OPENAI_COMPAT_REQUEST_TIMEOUT_S) return _float_env("NANOBOT_OPENAI_COMPAT_TIMEOUT_S", _OPENAI_COMPAT_REQUEST_TIMEOUT_S)
@ -548,6 +573,19 @@ class OpenAICompatProvider(LLMProvider):
{"thinking": {"type": "enabled" if thinking_enabled else "disabled"}} {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
) )
# Model-level thinking injection for MiMo thinking-capable models.
# Same shape as Kimi: gateway providers (OpenRouter, etc.) lack the
# xiaomi_mimo spec's thinking_style, so the spec-driven branch above
# misses them — match by model name to catch "xiaomi/mimo-v2.5-pro"
# and friends. (Direct xiaomi_mimo requests are also covered here;
# both branches write the same payload, so the dict update is a
# safe no-op for already-handled cases.)
if reasoning_effort is not None and _is_mimo_thinking_model(model_name):
thinking_enabled = semantic_effort not in ("none", "minimal")
kwargs.setdefault("extra_body", {}).update(
{"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
)
if tools: if tools:
kwargs["tools"] = tools kwargs["tools"] = tools
kwargs["tool_choice"] = tool_choice or "auto" kwargs["tool_choice"] = tool_choice or "auto"
@ -559,7 +597,11 @@ class OpenAICompatProvider(LLMProvider):
explicit_thinking = ( explicit_thinking = (
reasoning_effort is not None reasoning_effort is not None
and semantic_effort not in ("none", "minimal") and semantic_effort not in ("none", "minimal")
and ((spec and spec.thinking_style) or _is_kimi_thinking_model(model_name)) and (
(spec and spec.thinking_style)
or _is_kimi_thinking_model(model_name)
or _is_mimo_thinking_model(model_name)
)
) )
implicit_deepseek_thinking = ( implicit_deepseek_thinking = (
spec is not None spec is not None

View File

@ -31,6 +31,12 @@ def _mimo_spec():
return specs["xiaomi_mimo"] return specs["xiaomi_mimo"]
def _openrouter_spec():
"""Return the registered OpenRouter ProviderSpec (no thinking_style)."""
specs = {s.name: s for s in PROVIDERS}
return specs["openrouter"]
def _mimo_provider() -> OpenAICompatProvider: def _mimo_provider() -> OpenAICompatProvider:
return OpenAICompatProvider( return OpenAICompatProvider(
api_key="test-key", api_key="test-key",
@ -39,6 +45,15 @@ def _mimo_provider() -> OpenAICompatProvider:
) )
def _openrouter_provider(default_model: str) -> OpenAICompatProvider:
"""Provider configured as OpenRouter (gateway, no thinking_style on spec)."""
return OpenAICompatProvider(
api_key="sk-or-test",
default_model=default_model,
spec=_openrouter_spec(),
)
def _simple_messages() -> list[dict[str, Any]]: def _simple_messages() -> list[dict[str, Any]]:
return [{"role": "user", "content": "hello"}] return [{"role": "user", "content": "hello"}]
@ -119,3 +134,69 @@ def test_mimo_reasoning_effort_unset_preserves_provider_default():
) )
assert "reasoning_effort" not in kwargs assert "reasoning_effort" not in kwargs
assert "extra_body" not in kwargs assert "extra_body" not in kwargs
# ---------------------------------------------------------------------------
# Gateway path: MiMo routed through OpenRouter (no spec.thinking_style)
# ---------------------------------------------------------------------------
def test_mimo_via_openrouter_reasoning_effort_none_disables_thinking():
"""OpenRouter routes MiMo as "xiaomi/mimo-v2.5-pro"; the openrouter spec
has no thinking_style, so the disable signal must come from the
model-name path (#3845)."""
provider = _openrouter_provider("xiaomi/mimo-v2.5-pro")
kwargs = provider._build_kwargs(
messages=_simple_messages(),
tools=None, model=None, max_tokens=100,
temperature=0.7, reasoning_effort="none", tool_choice=None,
)
assert "reasoning_effort" not in kwargs
assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}}
def test_mimo_via_openrouter_reasoning_effort_medium_enables_thinking():
"""Same as the direct path: any non-none/minimal effort enables thinking."""
provider = _openrouter_provider("xiaomi/mimo-v2.5-pro")
kwargs = provider._build_kwargs(
messages=_simple_messages(),
tools=None, model=None, max_tokens=100,
temperature=0.7, reasoning_effort="medium", tool_choice=None,
)
assert kwargs.get("reasoning_effort") == "medium"
assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}}
def test_mimo_via_openrouter_bare_slug_also_matches():
"""Bare "mimo-v2.5-pro" (no publisher prefix) must also match the
allowlist, since gateways sometimes accept either form."""
provider = _openrouter_provider("mimo-v2.5-pro")
kwargs = provider._build_kwargs(
messages=_simple_messages(),
tools=None, model=None, max_tokens=100,
temperature=0.7, reasoning_effort="none", tool_choice=None,
)
assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}}
def test_mimo_flash_via_openrouter_does_not_inject_thinking():
"""mimo-v2-flash has no thinking mode per Xiaomi docs; the allowlist
excludes it, so no thinking field should be injected on the gateway path."""
provider = _openrouter_provider("xiaomi/mimo-v2-flash")
kwargs = provider._build_kwargs(
messages=_simple_messages(),
tools=None, model=None, max_tokens=100,
temperature=0.7, reasoning_effort="none", tool_choice=None,
)
assert "extra_body" not in kwargs
def test_non_mimo_model_via_openrouter_unaffected():
"""Sanity: a non-MiMo, non-Kimi model through OpenRouter is untouched."""
provider = _openrouter_provider("openai/gpt-4o")
kwargs = provider._build_kwargs(
messages=_simple_messages(),
tools=None, model=None, max_tokens=100,
temperature=0.7, reasoning_effort="none", tool_choice=None,
)
assert "extra_body" not in kwargs