diff --git a/README.md b/README.md index 7f686b683..8929d3612 100644 --- a/README.md +++ b/README.md @@ -1157,6 +1157,7 @@ That's it! Environment variables, model routing, config matching, and `nanobot s | `detect_by_key_prefix` | Detect gateway by API key prefix | `"sk-or-"` | | `detect_by_base_keyword` | Detect gateway by API base URL | `"openrouter"` | | `strip_model_prefix` | Strip provider prefix before sending to gateway | `True` (for AiHubMix) | +| `supports_max_completion_tokens` | Use `max_completion_tokens` instead of `max_tokens`; required for providers that reject both being set simultaneously (e.g. VolcEngine) | `True` | diff --git a/nanobot/providers/openai_compat_provider.py b/nanobot/providers/openai_compat_provider.py index e9a6ad871..397b8e797 100644 --- a/nanobot/providers/openai_compat_provider.py +++ b/nanobot/providers/openai_compat_provider.py @@ -243,11 +243,14 @@ class OpenAICompatProvider(LLMProvider): kwargs: dict[str, Any] = { "model": model_name, "messages": self._sanitize_messages(self._sanitize_empty_content(messages)), - "max_tokens": max(1, max_tokens), - "max_completion_tokens": max(1, max_tokens), "temperature": temperature, } + if spec and getattr(spec, "supports_max_completion_tokens", False): + kwargs["max_completion_tokens"] = max(1, max_tokens) + else: + kwargs["max_tokens"] = max(1, max_tokens) + if spec: model_lower = model_name.lower() for pattern, overrides in spec.model_overrides: diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index e42e1f95e..5644fc51d 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -49,6 +49,7 @@ class ProviderSpec: # gateway behavior strip_model_prefix: bool = False # strip "provider/" before sending to gateway + supports_max_completion_tokens: bool = False # per-model param overrides, e.g. (("kimi-k2.5", {"temperature": 1.0}),) model_overrides: tuple[tuple[str, dict[str, Any]], ...] = ()