diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index decdccb3a..555d0e5e0 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -449,59 +449,6 @@ class OpenAICompatProvider(LLMProvider): clean["content"] = self._coerce_content_to_string(clean.get("content")) return self._enforce_role_alternation(sanitized) - def _drop_deepseek_incomplete_reasoning_history( - self, - messages: list[dict[str, Any]], - model_name: str, - reasoning_effort: str | None, - ) -> list[dict[str, Any]]: - if ( - not self._spec - or self._spec.name != "deepseek" - ): - return messages - - semantic_effort = reasoning_effort.lower() if isinstance(reasoning_effort, str) else None - if semantic_effort in {"none", "minimal", "minimum"}: - return messages - - # DeepSeek-V4 can require reasoning_content even when the config did - # not explicitly request reasoning_effort. Keep that implicit-thinking - # cleanup scoped to known thinking-capable DeepSeek models so normal - # deepseek-chat history is not trimmed. - if semantic_effort is None: - model_lower = model_name.lower() - if not any(token in model_lower for token in ("deepseek-v4", "deepseek-reasoner")): - return messages - - bad_idx = None - for idx, msg in enumerate(messages): - if ( - msg.get("role") == "assistant" - and msg.get("tool_calls") - and not msg.get("reasoning_content") - ): - bad_idx = idx - if bad_idx is None: - return messages - - keep_from = None - for idx in range(bad_idx + 1, len(messages)): - if messages[idx].get("role") == "user": - keep_from = idx - break - - if keep_from is None: - trimmed = messages[:bad_idx] - else: - prefix = [msg for msg in messages[:keep_from] if msg.get("role") == "system"] - trimmed = prefix + messages[keep_from:] - logger.warning( - "Dropped {} DeepSeek thinking history message(s) with incomplete reasoning_content", - len(messages) - len(trimmed), - ) - return trimmed - # ------------------------------------------------------------------ # Build kwargs # ------------------------------------------------------------------ @@ -542,11 +489,6 @@ class OpenAICompatProvider(LLMProvider): if spec and spec.strip_model_prefix: model_name = model_name.split("/")[-1] - messages = self._drop_deepseek_incomplete_reasoning_history( - messages, - model_name, - reasoning_effort, - ) kwargs: dict[str, Any] = { "model": model_name, "messages": self._sanitize_messages(self._sanitize_empty_content(messages)), @@ -611,22 +553,22 @@ class OpenAICompatProvider(LLMProvider): kwargs["tools"] = tools kwargs["tool_choice"] = tool_choice or "auto" - # Backfill reasoning_content on legacy assistant messages. - # DeepSeek V4 (and potentially others) rejects thinking-mode - # requests that contain assistant messages without reasoning_content - # — even on turns that had no tool calls. This happens when a - # session was started with a non-thinking model or without - # reasoning_effort, then the user switches thinking mode on - # mid-session. Injecting an empty string satisfies the API - # without altering semantics (the model treats it as "no - # thinking happened on that turn"). - thinking_active = ( - (spec and spec.thinking_style and reasoning_effort is not None - and semantic_effort not in ("none", "minimal")) - or (reasoning_effort is not None and _is_kimi_thinking_model(model_name) - and semantic_effort not in ("none", "minimal")) + # Backfill reasoning_content="" on assistants missing it: DeepSeek + # thinking mode rejects history otherwise (#3554, #3584); "" reads + # as "no thinking that turn". DeepSeek-V4/reasoner reason natively, + # so backfill even without explicit reasoning_effort. + explicit_thinking = ( + reasoning_effort is not None + and semantic_effort not in ("none", "minimal") + and ((spec and spec.thinking_style) or _is_kimi_thinking_model(model_name)) ) - if thinking_active: + implicit_deepseek_thinking = ( + spec is not None + and spec.name == "deepseek" + and semantic_effort not in ("none", "minimal", "minimum") + and any(t in model_name.lower() for t in ("deepseek-v4", "deepseek-reasoner")) + ) + if explicit_thinking or implicit_deepseek_thinking: for msg in kwargs["messages"]: if msg.get("role") == "assistant" and "reasoning_content" not in msg: msg["reasoning_content"] = "" diff --git a/tests/providers/test_litellm_kwargs.py b/tests/providers/test_litellm_kwargs.py index a3b624171..94455fd40 100644 --- a/tests/providers/test_litellm_kwargs.py +++ b/tests/providers/test_litellm_kwargs.py @@ -620,7 +620,8 @@ def _tool_call(call_id: str) -> dict: } -def test_deepseek_thinking_drops_tool_history_missing_reasoning_content() -> None: +def test_deepseek_thinking_backfills_missing_reasoning_content_on_tool_history() -> None: + """Backfill reasoning_content="" instead of dropping the turn (#3554, #3584).""" kwargs = _deepseek_kwargs([ {"role": "system", "content": "system"}, {"role": "user", "content": "can we use wechat?"}, @@ -629,10 +630,12 @@ def test_deepseek_thinking_drops_tool_history_missing_reasoning_content() -> Non {"role": "user", "content": "continue"}, ]) - assert kwargs["messages"] == [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "continue"}, + assert [m["role"] for m in kwargs["messages"]] == [ + "system", "user", "assistant", "tool", "user", ] + assistant = kwargs["messages"][2] + assert assistant["reasoning_content"] == "" + assert assistant["tool_calls"][0]["function"]["name"] == "my" def test_deepseek_thinking_keeps_tool_history_with_reasoning_content() -> None: @@ -654,20 +657,6 @@ def test_deepseek_thinking_keeps_tool_history_with_reasoning_content() -> None: assert kwargs["messages"][2]["role"] == "tool" -def test_deepseek_thinking_drops_current_bad_tool_turn_without_followup_user() -> None: - kwargs = _deepseek_kwargs([ - {"role": "system", "content": "system"}, - {"role": "user", "content": "can we use wechat?"}, - {"role": "assistant", "content": "", "tool_calls": [_tool_call("call_bad")]}, - {"role": "tool", "tool_call_id": "call_bad", "name": "my", "content": "channels"}, - ]) - - assert kwargs["messages"] == [ - {"role": "system", "content": "system"}, - {"role": "user", "content": "can we use wechat?"}, - ] - - def test_openai_compat_keeps_tool_calls_after_consecutive_assistant_messages() -> None: with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"): provider = OpenAICompatProvider() @@ -937,8 +926,8 @@ def test_backfill_does_not_touch_messages_when_thinking_explicitly_off() -> None assert "reasoning_content" not in msg -def test_deepseek_v4_drops_incomplete_reasoning_history_when_effort_implicit() -> None: - """DeepSeek-V4 may default to thinking, so incomplete legacy history is trimmed.""" +def test_deepseek_v4_backfills_incomplete_reasoning_history_when_effort_implicit() -> None: + """DeepSeek-V4 reasons natively: backfill even without explicit reasoning_effort.""" spec = find_by_name("deepseek") with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"): p = OpenAICompatProvider(api_key="k", default_model="deepseek-v4-pro", spec=spec) @@ -958,12 +947,16 @@ def test_deepseek_v4_drops_incomplete_reasoning_history_when_effort_implicit() - reasoning_effort=None, tool_choice=None, ) - assert [msg["role"] for msg in kw["messages"]] == ["system", "user"] + assert [msg["role"] for msg in kw["messages"]] == [ + "system", "user", "assistant", "tool", "user", + ] + assert kw["messages"][2]["reasoning_content"] == "" assert kw["messages"][-1]["content"] == "thanks" def test_deepseek_chat_keeps_tool_history_when_effort_implicit() -> None: - """Implicit cleanup must not trim non-thinking DeepSeek chat models.""" + """Non-thinking deepseek-chat must keep history untouched and must NOT + receive backfilled reasoning_content (#3554, #3584).""" spec = find_by_name("deepseek") with patch("nanobot.providers.openai_compat_provider.AsyncOpenAI"): p = OpenAICompatProvider(api_key="k", default_model="deepseek-chat", spec=spec) @@ -985,6 +978,7 @@ def test_deepseek_chat_keeps_tool_history_when_effort_implicit() -> None: roles = [msg["role"] for msg in kw["messages"]] assert roles == ["user", "assistant", "tool", "user"] assert kw["messages"][1]["tool_calls"] + assert "reasoning_content" not in kw["messages"][1] def test_deepseek_coerces_list_content_to_string() -> None: