fix(providers): make max_tokens and max_completion_tokens mutually exclusive (#2491)

* fix(providers): make max_tokens and max_completion_tokens mutually exclusive

* docs: document supports_max_completion_tokens ProviderSpec option
This commit is contained in:
Flo 2026-03-27 13:10:04 +03:00 committed by Xubin Ren
parent ace3fd6049
commit 1331084873
3 changed files with 7 additions and 2 deletions

View File

@ -1157,6 +1157,7 @@ That's it! Environment variables, model routing, config matching, and `nanobot s
| `detect_by_key_prefix` | Detect gateway by API key prefix | `"sk-or-"` |
| `detect_by_base_keyword` | Detect gateway by API base URL | `"openrouter"` |
| `strip_model_prefix` | Strip provider prefix before sending to gateway | `True` (for AiHubMix) |
| `supports_max_completion_tokens` | Use `max_completion_tokens` instead of `max_tokens`; required for providers that reject both being set simultaneously (e.g. VolcEngine) | `True` |
</details>

View File

@ -243,11 +243,14 @@ class OpenAICompatProvider(LLMProvider):
kwargs: dict[str, Any] = {
"model": model_name,
"messages": self._sanitize_messages(self._sanitize_empty_content(messages)),
"max_tokens": max(1, max_tokens),
"max_completion_tokens": max(1, max_tokens),
"temperature": temperature,
}
if spec and getattr(spec, "supports_max_completion_tokens", False):
kwargs["max_completion_tokens"] = max(1, max_tokens)
else:
kwargs["max_tokens"] = max(1, max_tokens)
if spec:
model_lower = model_name.lower()
for pattern, overrides in spec.model_overrides:

View File

@ -49,6 +49,7 @@ class ProviderSpec:
# gateway behavior
strip_model_prefix: bool = False # strip "provider/" before sending to gateway
supports_max_completion_tokens: bool = False
# per-model param overrides, e.g. (("kimi-k2.5", {"temperature": 1.0}),)
model_overrides: tuple[tuple[str, dict[str, Any]], ...] = ()