From 9b2f452b6e4b8e96a9e893bb2ed0ec676591f1f7 Mon Sep 17 00:00:00 2001
From: "A.G. Bocsardi" <bocsardi.gergely@gmail.com>
Date: Thu, 21 May 2026 15:44:47 +0200
Subject: [PATCH] fix: drop redundant reasoning_effort for Kimi thinking models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moonshot's API rejects requests that carry both 'reasoning_effort'
(top-level kwarg) and 'thinking' (extra_body) at the same time.
After the unified thinking-style injection loop injects the native
'thinking' param for kimi models, pop 'reasoning_effort' from kwargs
since it is redundant and causes a 400 error.

Uses _model_slug() + _KIMI_THINKING_MODELS lookup to stay consistent
with the refactored code (the old _is_kimi_thinking_model helper was
removed in 4f895e63).

Existing kimi tests updated to assert 'reasoning_effort' is absent.
Xiaomi MiMo models are unaffected — their API accepts both params.

Closes #3939
---
 nanobot/providers/openai_compat_provider.py | 8 ++++++++
 tests/providers/test_litellm_kwargs.py      | 9 +++++++++
 2 files changed, 17 insertions(+)

diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py
index 3c1bf9b8f..8281d7d20 100644
--- a/nanobot/providers/openai_compat_provider.py
+++ b/nanobot/providers/openai_compat_provider.py
@@ -633,6 +633,14 @@ class OpenAICompatProvider(LLMProvider):
                 if extra:
                     kwargs.setdefault("extra_body", {}).update(extra)
 
+            # Moonshot rejects requests that carry both 'reasoning_effort'
+            # and the native 'thinking' param.  We already expressed the
+            # user's intent via the provider-native shape, so drop the
+            # redundant wire-level kwarg.  Only kimi models need this —
+            # Xiaomi's API accepts both params.
+            if _model_slug(model_name) in _KIMI_THINKING_MODELS:
+                kwargs.pop("reasoning_effort", None)
+
         if tools:
             kwargs["tools"] = tools
             kwargs["tool_choice"] = tool_choice or "auto"
diff --git a/tests/providers/test_litellm_kwargs.py b/tests/providers/test_litellm_kwargs.py
index dddc70054..924ee0060 100644
--- a/tests/providers/test_litellm_kwargs.py
+++ b/tests/providers/test_litellm_kwargs.py
@@ -1420,12 +1420,15 @@ def test_kimi_k25_thinking_enabled() -> None:
     """kimi-k2.5 with reasoning_effort set should opt in to thinking."""
     kw = _build_kwargs_for("moonshot", "kimi-k2.5", reasoning_effort="medium")
     assert kw.get("extra_body") == {"thinking": {"type": "enabled"}}
+    # Moonshot rejects both 'reasoning_effort' and 'thinking' (#3939)
+    assert "reasoning_effort" not in kw
 
 
 def test_kimi_k25_thinking_disabled_for_minimal() -> None:
     """reasoning_effort='minimal' maps to thinking disabled for kimi-k2.5."""
     kw = _build_kwargs_for("moonshot", "kimi-k2.5", reasoning_effort="minimal")
     assert kw.get("extra_body") == {"thinking": {"type": "disabled"}}
+    assert "reasoning_effort" not in kw
 
 
 def test_kimi_k25_no_extra_body_when_reasoning_effort_none() -> None:
@@ -1445,12 +1448,15 @@ def test_kimi_k25_thinking_enabled_with_openrouter_prefix() -> None:
         "thinking": {"type": "enabled"},
         "reasoning": {"effort": "medium"},
     }
+    # Even via OR, reasoning_effort wire kwarg is dropped for kimi models
+    assert "reasoning_effort" not in kw
 
 
 def test_kimi_k26_thinking_enabled() -> None:
     """kimi-k2.6 with reasoning_effort set should opt in to thinking."""
     kw = _build_kwargs_for("moonshot", "kimi-k2.6", reasoning_effort="medium")
     assert kw.get("extra_body") == {"thinking": {"type": "enabled"}}
+    assert "reasoning_effort" not in kw
 
 
 def test_kimi_k26_thinking_enabled_with_openrouter_prefix() -> None:
@@ -1461,6 +1467,7 @@ def test_kimi_k26_thinking_enabled_with_openrouter_prefix() -> None:
         "thinking": {"type": "enabled"},
         "reasoning": {"effort": "medium"},
     }
+    assert "reasoning_effort" not in kw
 
 
 def test_moonshot_kimi_k26_temperature_override() -> None:
@@ -1479,6 +1486,7 @@ def test_kimi_k26_code_preview_thinking_enabled() -> None:
     """k2.6-code-preview also supports thinking; should behave like k2.5."""
     kw = _build_kwargs_for("moonshot", "k2.6-code-preview", reasoning_effort="high")
     assert kw.get("extra_body") == {"thinking": {"type": "enabled"}}
+    assert "reasoning_effort" not in kw
 
 
 def test_kimi_k2_series_no_thinking_injection() -> None:
@@ -1508,6 +1516,7 @@ def test_kimi_k25_thinking_disabled_for_none_string() -> None:
     """reasoning_effort='none' maps to thinking disabled for kimi-k2.5."""
     kw = _build_kwargs_for("moonshot", "kimi-k2.5", reasoning_effort="none")
     assert kw.get("extra_body") == {"thinking": {"type": "disabled"}}
+    assert "reasoning_effort" not in kw
 
 
 def test_dashscope_thinking_disabled_for_none_string() -> None: