fix(providers): inject OpenRouter reasoning.effort for thinking models

Follow-up to #3851: that PR added `extra_body.thinking={type: disabled}`
for MiMo via OpenRouter, but OR doesn't forward provider-specific
thinking shapes to upstream — it strips unknown extra_body fields and
uses its own unified `reasoning` parameter. So MiMo via OR kept
thinking despite the injection (reproduced by @ClearPlume on #3851
with identical kwargs but provider switched from openrouter → xiaomi_mimo).

For known thinking-capable models (Kimi, MiMo) routed via the
openrouter spec, also inject `extra_body.reasoning = {effort: <effort>}`
in OR's documented enum ("none"|"minimal"|"low"|"medium"|"high"|"xhigh").
OR translates this to the upstream model's native shape.

Existing tests updated to expect both fields on the OR path. The direct
xiaomi_mimo and moonshot paths are unchanged (the new branch is gated
on spec.name == "openrouter"). Flash and non-MiMo models on OR continue
to receive no injection.
This commit is contained in:
olgagaga 2026-05-16 12:19:30 -04:00 committed by Xubin Ren
parent e2b51fa5dc
commit 0cd2f626c0
3 changed files with 72 additions and 12 deletions

View File

@ -615,6 +615,27 @@ class OpenAICompatProvider(LLMProvider):
{"thinking": {"type": "enabled" if thinking_enabled else "disabled"}} {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}}
) )
# OpenRouter uses its own unified `reasoning` field and does not
# forward provider-specific thinking shapes (the Kimi/MiMo
# extra_body.thinking above) to upstream. Reported as the follow-up
# to #3845/#3851: MiMo via OR kept thinking despite our injection.
# For known thinking-capable models routed via OR, mirror the
# effort signal into reasoning.effort (OR's documented enum:
# "none"|"minimal"|"low"|"medium"|"high"|"xhigh"), which OR
# translates to the upstream model's native shape.
if (
spec
and spec.name == "openrouter"
and reasoning_effort is not None
and (
_is_kimi_thinking_model(model_name)
or _is_mimo_thinking_model(model_name)
)
):
kwargs.setdefault("extra_body", {}).update(
{"reasoning": {"effort": semantic_effort}}
)
if tools: if tools:
kwargs["tools"] = tools kwargs["tools"] = tools
kwargs["tool_choice"] = tool_choice or "auto" kwargs["tool_choice"] = tool_choice or "auto"

View File

@ -1391,9 +1391,16 @@ def test_kimi_k25_no_extra_body_when_reasoning_effort_none() -> None:
def test_kimi_k25_thinking_enabled_with_openrouter_prefix() -> None: def test_kimi_k25_thinking_enabled_with_openrouter_prefix() -> None:
"""OpenRouter-style model names like moonshotai/kimi-k2.5 must trigger thinking.""" """OpenRouter-style model names like moonshotai/kimi-k2.5 must trigger thinking.
OR drops upstream-provider `thinking` fields, so the same intent also has
to go through OR's `reasoning.effort` shape (#3851 follow-up).
"""
kw = _build_kwargs_for("openrouter", "moonshotai/kimi-k2.5", reasoning_effort="medium") kw = _build_kwargs_for("openrouter", "moonshotai/kimi-k2.5", reasoning_effort="medium")
assert kw.get("extra_body") == {"thinking": {"type": "enabled"}} assert kw.get("extra_body") == {
"thinking": {"type": "enabled"},
"reasoning": {"effort": "medium"},
}
def test_kimi_k26_thinking_enabled() -> None: def test_kimi_k26_thinking_enabled() -> None:
@ -1403,9 +1410,13 @@ def test_kimi_k26_thinking_enabled() -> None:
def test_kimi_k26_thinking_enabled_with_openrouter_prefix() -> None: def test_kimi_k26_thinking_enabled_with_openrouter_prefix() -> None:
"""OpenRouter-style names like moonshotai/kimi-k2.6 must trigger thinking.""" """OpenRouter-style names like moonshotai/kimi-k2.6 must trigger thinking
via both upstream `thinking` and OR's `reasoning.effort`."""
kw = _build_kwargs_for("openrouter", "moonshotai/kimi-k2.6", reasoning_effort="medium") kw = _build_kwargs_for("openrouter", "moonshotai/kimi-k2.6", reasoning_effort="medium")
assert kw.get("extra_body") == {"thinking": {"type": "enabled"}} assert kw.get("extra_body") == {
"thinking": {"type": "enabled"},
"reasoning": {"effort": "medium"},
}
def test_moonshot_kimi_k26_temperature_override() -> None: def test_moonshot_kimi_k26_temperature_override() -> None:

View File

@ -142,9 +142,11 @@ def test_mimo_reasoning_effort_unset_preserves_provider_default():
def test_mimo_via_openrouter_reasoning_effort_none_disables_thinking(): def test_mimo_via_openrouter_reasoning_effort_none_disables_thinking():
"""OpenRouter routes MiMo as "xiaomi/mimo-v2.5-pro"; the openrouter spec """OpenRouter routes MiMo as "xiaomi/mimo-v2.5-pro" and does NOT forward
has no thinking_style, so the disable signal must come from the extra_body.thinking to upstream, so a disable signal must also reach OR
model-name path (#3845).""" in its own `reasoning.effort` shape. Verifies both the upstream-MiMo
payload (#3845) and the OR-native payload (#3851 follow-up) are sent.
"""
provider = _openrouter_provider("xiaomi/mimo-v2.5-pro") provider = _openrouter_provider("xiaomi/mimo-v2.5-pro")
kwargs = provider._build_kwargs( kwargs = provider._build_kwargs(
messages=_simple_messages(), messages=_simple_messages(),
@ -152,11 +154,15 @@ def test_mimo_via_openrouter_reasoning_effort_none_disables_thinking():
temperature=0.7, reasoning_effort="none", tool_choice=None, temperature=0.7, reasoning_effort="none", tool_choice=None,
) )
assert "reasoning_effort" not in kwargs assert "reasoning_effort" not in kwargs
assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}} assert kwargs["extra_body"] == {
"thinking": {"type": "disabled"},
"reasoning": {"effort": "none"},
}
def test_mimo_via_openrouter_reasoning_effort_medium_enables_thinking(): def test_mimo_via_openrouter_reasoning_effort_medium_enables_thinking():
"""Same as the direct path: any non-none/minimal effort enables thinking.""" """Non-none/minimal effort enables thinking and the OR `reasoning.effort`
field mirrors the requested effort level."""
provider = _openrouter_provider("xiaomi/mimo-v2.5-pro") provider = _openrouter_provider("xiaomi/mimo-v2.5-pro")
kwargs = provider._build_kwargs( kwargs = provider._build_kwargs(
messages=_simple_messages(), messages=_simple_messages(),
@ -164,7 +170,10 @@ def test_mimo_via_openrouter_reasoning_effort_medium_enables_thinking():
temperature=0.7, reasoning_effort="medium", tool_choice=None, temperature=0.7, reasoning_effort="medium", tool_choice=None,
) )
assert kwargs.get("reasoning_effort") == "medium" assert kwargs.get("reasoning_effort") == "medium"
assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}} assert kwargs["extra_body"] == {
"thinking": {"type": "enabled"},
"reasoning": {"effort": "medium"},
}
def test_mimo_via_openrouter_bare_slug_also_matches(): def test_mimo_via_openrouter_bare_slug_also_matches():
@ -176,12 +185,16 @@ def test_mimo_via_openrouter_bare_slug_also_matches():
tools=None, model=None, max_tokens=100, tools=None, model=None, max_tokens=100,
temperature=0.7, reasoning_effort="none", tool_choice=None, temperature=0.7, reasoning_effort="none", tool_choice=None,
) )
assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}} assert kwargs["extra_body"] == {
"thinking": {"type": "disabled"},
"reasoning": {"effort": "none"},
}
def test_mimo_flash_via_openrouter_does_not_inject_thinking(): def test_mimo_flash_via_openrouter_does_not_inject_thinking():
"""mimo-v2-flash has no thinking mode per Xiaomi docs; the allowlist """mimo-v2-flash has no thinking mode per Xiaomi docs; the allowlist
excludes it, so no thinking field should be injected on the gateway path.""" excludes it, so neither the upstream `thinking` field nor OR's
`reasoning.effort` should be injected on the gateway path."""
provider = _openrouter_provider("xiaomi/mimo-v2-flash") provider = _openrouter_provider("xiaomi/mimo-v2-flash")
kwargs = provider._build_kwargs( kwargs = provider._build_kwargs(
messages=_simple_messages(), messages=_simple_messages(),
@ -200,3 +213,18 @@ def test_non_mimo_model_via_openrouter_unaffected():
temperature=0.7, reasoning_effort="none", tool_choice=None, temperature=0.7, reasoning_effort="none", tool_choice=None,
) )
assert "extra_body" not in kwargs assert "extra_body" not in kwargs
def test_kimi_via_openrouter_also_injects_reasoning_effort():
"""Kimi has the same gateway problem as MiMo: OR drops the upstream
`thinking` field. The same OR-reasoning injection should fire."""
provider = _openrouter_provider("moonshotai/kimi-k2.5")
kwargs = provider._build_kwargs(
messages=_simple_messages(),
tools=None, model=None, max_tokens=100,
temperature=0.7, reasoning_effort="none", tool_choice=None,
)
assert kwargs["extra_body"] == {
"thinking": {"type": "disabled"},
"reasoning": {"effort": "none"},
}