fix(transcription): normalize chat-style apiBase to audio endpoint (#3637)

This commit is contained in:
04cb 2026-05-23 06:00:28 +08:00 committed by Xubin Ren
parent 5b71f61f55
commit ef2ef4f789
2 changed files with 66 additions and 9 deletions

View File

@ -7,6 +7,25 @@ from pathlib import Path
import httpx
from loguru import logger
_TRANSCRIPTIONS_PATH = "audio/transcriptions"
def _resolve_transcription_url(api_base: str | None, default_url: str) -> str:
"""Resolve the full transcription endpoint URL.
Accepts either a chat-style base (e.g. ``https://api.groq.com/openai/v1``)
or a complete URL already ending in ``/audio/transcriptions``. A chat-style
base the form users naturally copy from their LLM provider config gets
the path appended instead of being POSTed verbatim and 404ing (#3637).
"""
if not api_base:
return default_url
base = api_base.rstrip("/")
if base.endswith(_TRANSCRIPTIONS_PATH):
return base
return f"{base}/{_TRANSCRIPTIONS_PATH}"
# Up to 3 retries (4 attempts total) with exponential backoff on transient
# failures. Whisper endpoints occasionally return 502/503 under load, and
# mobile-network transcription callers hit sporadic connect/read errors.
@ -127,12 +146,12 @@ class OpenAITranscriptionProvider:
language: str | None = None,
):
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
self.api_url = (
api_base
or os.environ.get("OPENAI_TRANSCRIPTION_BASE_URL")
or "https://api.openai.com/v1/audio/transcriptions"
self.api_url = _resolve_transcription_url(
api_base or os.environ.get("OPENAI_TRANSCRIPTION_BASE_URL"),
"https://api.openai.com/v1/audio/transcriptions",
)
self.language = language or None
logger.debug("OpenAI transcription endpoint: {}", self.api_url)
async def transcribe(self, file_path: str | Path) -> str:
if not self.api_key:
@ -166,12 +185,12 @@ class GroqTranscriptionProvider:
language: str | None = None,
):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = (
api_base
or os.environ.get("GROQ_BASE_URL")
or "https://api.groq.com/openai/v1/audio/transcriptions"
self.api_url = _resolve_transcription_url(
api_base or os.environ.get("GROQ_BASE_URL"),
"https://api.groq.com/openai/v1/audio/transcriptions",
)
self.language = language or None
logger.debug("Groq transcription endpoint: {}", self.api_url)
async def transcribe(self, file_path: str | Path) -> str:
"""

View File

@ -8,7 +8,11 @@ from unittest.mock import AsyncMock, patch
import httpx
import pytest
from nanobot.providers.transcription import GroqTranscriptionProvider, OpenAITranscriptionProvider
from nanobot.providers.transcription import (
GroqTranscriptionProvider,
OpenAITranscriptionProvider,
_resolve_transcription_url,
)
@pytest.fixture
@ -290,3 +294,37 @@ async def test_retries_on_every_advertised_transient_exception(
result = await provider.transcribe(audio_file)
assert result == "recovered"
assert post.await_count == 2
# ---------------------------------------------------------------------------
# apiBase normalization (#3637): a chat-style base must not be POSTed verbatim
# ---------------------------------------------------------------------------
def test_resolve_transcription_url_falls_back_to_default() -> None:
default = "https://api.openai.com/v1/audio/transcriptions"
assert _resolve_transcription_url(None, default) == default
assert _resolve_transcription_url("", default) == default
def test_resolve_transcription_url_appends_path_to_chat_style_base() -> None:
assert (
_resolve_transcription_url("https://api.groq.com/openai/v1", "https://x/audio/transcriptions")
== "https://api.groq.com/openai/v1/audio/transcriptions"
)
# Trailing slash must not produce a doubled separator.
assert (
_resolve_transcription_url("https://api.groq.com/openai/v1/", "https://x/audio/transcriptions")
== "https://api.groq.com/openai/v1/audio/transcriptions"
)
def test_resolve_transcription_url_keeps_full_endpoint() -> None:
full = "https://api.groq.com/openai/v1/audio/transcriptions"
assert _resolve_transcription_url(full, "https://x/audio/transcriptions") == full
def test_groq_provider_normalizes_chat_style_api_base() -> None:
"""Regression for #3637: apiBase set to the v1 base resolves to the audio endpoint."""
provider = GroqTranscriptionProvider(api_key="gsk-test", api_base="https://api.groq.com/openai/v1")
assert provider.api_url == "https://api.groq.com/openai/v1/audio/transcriptions"