From 0cd2f626c054572f9eb0787daa9c817c470b3249 Mon Sep 17 00:00:00 2001 From: olgagaga Date: Sat, 16 May 2026 12:19:30 -0400 Subject: [PATCH] fix(providers): inject OpenRouter `reasoning.effort` for thinking models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #3851: that PR added `extra_body.thinking={type: disabled}` for MiMo via OpenRouter, but OR doesn't forward provider-specific thinking shapes to upstream — it strips unknown extra_body fields and uses its own unified `reasoning` parameter. So MiMo via OR kept thinking despite the injection (reproduced by @ClearPlume on #3851 with identical kwargs but provider switched from openrouter → xiaomi_mimo). For known thinking-capable models (Kimi, MiMo) routed via the openrouter spec, also inject `extra_body.reasoning = {effort: }` in OR's documented enum ("none"|"minimal"|"low"|"medium"|"high"|"xhigh"). OR translates this to the upstream model's native shape. Existing tests updated to expect both fields on the OR path. The direct xiaomi_mimo and moonshot paths are unchanged (the new branch is gated on spec.name == "openrouter"). Flash and non-MiMo models on OR continue to receive no injection. --- nanobot/providers/openai_compat_provider.py | 21 ++++++++++ tests/providers/test_litellm_kwargs.py | 19 +++++++-- tests/providers/test_xiaomi_mimo_thinking.py | 44 ++++++++++++++++---- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index 03ab35a0e..222159dda 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -615,6 +615,27 @@ class OpenAICompatProvider(LLMProvider): {"thinking": {"type": "enabled" if thinking_enabled else "disabled"}} ) + # OpenRouter uses its own unified `reasoning` field and does not + # forward provider-specific thinking shapes (the Kimi/MiMo + # extra_body.thinking above) to upstream. Reported as the follow-up + # to #3845/#3851: MiMo via OR kept thinking despite our injection. + # For known thinking-capable models routed via OR, mirror the + # effort signal into reasoning.effort (OR's documented enum: + # "none"|"minimal"|"low"|"medium"|"high"|"xhigh"), which OR + # translates to the upstream model's native shape. + if ( + spec + and spec.name == "openrouter" + and reasoning_effort is not None + and ( + _is_kimi_thinking_model(model_name) + or _is_mimo_thinking_model(model_name) + ) + ): + kwargs.setdefault("extra_body", {}).update( + {"reasoning": {"effort": semantic_effort}} + ) + if tools: kwargs["tools"] = tools kwargs["tool_choice"] = tool_choice or "auto" diff --git a/tests/providers/test_litellm_kwargs.py b/tests/providers/test_litellm_kwargs.py index 5f2ffec59..461913c93 100644 --- a/tests/providers/test_litellm_kwargs.py +++ b/tests/providers/test_litellm_kwargs.py @@ -1391,9 +1391,16 @@ def test_kimi_k25_no_extra_body_when_reasoning_effort_none() -> None: def test_kimi_k25_thinking_enabled_with_openrouter_prefix() -> None: - """OpenRouter-style model names like moonshotai/kimi-k2.5 must trigger thinking.""" + """OpenRouter-style model names like moonshotai/kimi-k2.5 must trigger thinking. + + OR drops upstream-provider `thinking` fields, so the same intent also has + to go through OR's `reasoning.effort` shape (#3851 follow-up). + """ kw = _build_kwargs_for("openrouter", "moonshotai/kimi-k2.5", reasoning_effort="medium") - assert kw.get("extra_body") == {"thinking": {"type": "enabled"}} + assert kw.get("extra_body") == { + "thinking": {"type": "enabled"}, + "reasoning": {"effort": "medium"}, + } def test_kimi_k26_thinking_enabled() -> None: @@ -1403,9 +1410,13 @@ def test_kimi_k26_thinking_enabled() -> None: def test_kimi_k26_thinking_enabled_with_openrouter_prefix() -> None: - """OpenRouter-style names like moonshotai/kimi-k2.6 must trigger thinking.""" + """OpenRouter-style names like moonshotai/kimi-k2.6 must trigger thinking + via both upstream `thinking` and OR's `reasoning.effort`.""" kw = _build_kwargs_for("openrouter", "moonshotai/kimi-k2.6", reasoning_effort="medium") - assert kw.get("extra_body") == {"thinking": {"type": "enabled"}} + assert kw.get("extra_body") == { + "thinking": {"type": "enabled"}, + "reasoning": {"effort": "medium"}, + } def test_moonshot_kimi_k26_temperature_override() -> None: diff --git a/tests/providers/test_xiaomi_mimo_thinking.py b/tests/providers/test_xiaomi_mimo_thinking.py index 68ca6dd80..43dfec537 100644 --- a/tests/providers/test_xiaomi_mimo_thinking.py +++ b/tests/providers/test_xiaomi_mimo_thinking.py @@ -142,9 +142,11 @@ def test_mimo_reasoning_effort_unset_preserves_provider_default(): def test_mimo_via_openrouter_reasoning_effort_none_disables_thinking(): - """OpenRouter routes MiMo as "xiaomi/mimo-v2.5-pro"; the openrouter spec - has no thinking_style, so the disable signal must come from the - model-name path (#3845).""" + """OpenRouter routes MiMo as "xiaomi/mimo-v2.5-pro" and does NOT forward + extra_body.thinking to upstream, so a disable signal must also reach OR + in its own `reasoning.effort` shape. Verifies both the upstream-MiMo + payload (#3845) and the OR-native payload (#3851 follow-up) are sent. + """ provider = _openrouter_provider("xiaomi/mimo-v2.5-pro") kwargs = provider._build_kwargs( messages=_simple_messages(), @@ -152,11 +154,15 @@ def test_mimo_via_openrouter_reasoning_effort_none_disables_thinking(): temperature=0.7, reasoning_effort="none", tool_choice=None, ) assert "reasoning_effort" not in kwargs - assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}} + assert kwargs["extra_body"] == { + "thinking": {"type": "disabled"}, + "reasoning": {"effort": "none"}, + } def test_mimo_via_openrouter_reasoning_effort_medium_enables_thinking(): - """Same as the direct path: any non-none/minimal effort enables thinking.""" + """Non-none/minimal effort enables thinking and the OR `reasoning.effort` + field mirrors the requested effort level.""" provider = _openrouter_provider("xiaomi/mimo-v2.5-pro") kwargs = provider._build_kwargs( messages=_simple_messages(), @@ -164,7 +170,10 @@ def test_mimo_via_openrouter_reasoning_effort_medium_enables_thinking(): temperature=0.7, reasoning_effort="medium", tool_choice=None, ) assert kwargs.get("reasoning_effort") == "medium" - assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}} + assert kwargs["extra_body"] == { + "thinking": {"type": "enabled"}, + "reasoning": {"effort": "medium"}, + } def test_mimo_via_openrouter_bare_slug_also_matches(): @@ -176,12 +185,16 @@ def test_mimo_via_openrouter_bare_slug_also_matches(): tools=None, model=None, max_tokens=100, temperature=0.7, reasoning_effort="none", tool_choice=None, ) - assert kwargs["extra_body"] == {"thinking": {"type": "disabled"}} + assert kwargs["extra_body"] == { + "thinking": {"type": "disabled"}, + "reasoning": {"effort": "none"}, + } def test_mimo_flash_via_openrouter_does_not_inject_thinking(): """mimo-v2-flash has no thinking mode per Xiaomi docs; the allowlist - excludes it, so no thinking field should be injected on the gateway path.""" + excludes it, so neither the upstream `thinking` field nor OR's + `reasoning.effort` should be injected on the gateway path.""" provider = _openrouter_provider("xiaomi/mimo-v2-flash") kwargs = provider._build_kwargs( messages=_simple_messages(), @@ -200,3 +213,18 @@ def test_non_mimo_model_via_openrouter_unaffected(): temperature=0.7, reasoning_effort="none", tool_choice=None, ) assert "extra_body" not in kwargs + + +def test_kimi_via_openrouter_also_injects_reasoning_effort(): + """Kimi has the same gateway problem as MiMo: OR drops the upstream + `thinking` field. The same OR-reasoning injection should fire.""" + provider = _openrouter_provider("moonshotai/kimi-k2.5") + kwargs = provider._build_kwargs( + messages=_simple_messages(), + tools=None, model=None, max_tokens=100, + temperature=0.7, reasoning_effort="none", tool_choice=None, + ) + assert kwargs["extra_body"] == { + "thinking": {"type": "disabled"}, + "reasoning": {"effort": "none"}, + }