mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-13 22:34:06 +00:00
fix(transcription): normalize chat-style apiBase to audio endpoint (#3637)
This commit is contained in:
parent
5b71f61f55
commit
ef2ef4f789
@ -7,6 +7,25 @@ from pathlib import Path
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
_TRANSCRIPTIONS_PATH = "audio/transcriptions"
|
||||
|
||||
|
||||
def _resolve_transcription_url(api_base: str | None, default_url: str) -> str:
|
||||
"""Resolve the full transcription endpoint URL.
|
||||
|
||||
Accepts either a chat-style base (e.g. ``https://api.groq.com/openai/v1``)
|
||||
or a complete URL already ending in ``/audio/transcriptions``. A chat-style
|
||||
base — the form users naturally copy from their LLM provider config — gets
|
||||
the path appended instead of being POSTed verbatim and 404ing (#3637).
|
||||
"""
|
||||
if not api_base:
|
||||
return default_url
|
||||
base = api_base.rstrip("/")
|
||||
if base.endswith(_TRANSCRIPTIONS_PATH):
|
||||
return base
|
||||
return f"{base}/{_TRANSCRIPTIONS_PATH}"
|
||||
|
||||
|
||||
# Up to 3 retries (4 attempts total) with exponential backoff on transient
|
||||
# failures. Whisper endpoints occasionally return 502/503 under load, and
|
||||
# mobile-network transcription callers hit sporadic connect/read errors.
|
||||
@ -127,12 +146,12 @@ class OpenAITranscriptionProvider:
|
||||
language: str | None = None,
|
||||
):
|
||||
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
||||
self.api_url = (
|
||||
api_base
|
||||
or os.environ.get("OPENAI_TRANSCRIPTION_BASE_URL")
|
||||
or "https://api.openai.com/v1/audio/transcriptions"
|
||||
self.api_url = _resolve_transcription_url(
|
||||
api_base or os.environ.get("OPENAI_TRANSCRIPTION_BASE_URL"),
|
||||
"https://api.openai.com/v1/audio/transcriptions",
|
||||
)
|
||||
self.language = language or None
|
||||
logger.debug("OpenAI transcription endpoint: {}", self.api_url)
|
||||
|
||||
async def transcribe(self, file_path: str | Path) -> str:
|
||||
if not self.api_key:
|
||||
@ -166,12 +185,12 @@ class GroqTranscriptionProvider:
|
||||
language: str | None = None,
|
||||
):
|
||||
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
||||
self.api_url = (
|
||||
api_base
|
||||
or os.environ.get("GROQ_BASE_URL")
|
||||
or "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||
self.api_url = _resolve_transcription_url(
|
||||
api_base or os.environ.get("GROQ_BASE_URL"),
|
||||
"https://api.groq.com/openai/v1/audio/transcriptions",
|
||||
)
|
||||
self.language = language or None
|
||||
logger.debug("Groq transcription endpoint: {}", self.api_url)
|
||||
|
||||
async def transcribe(self, file_path: str | Path) -> str:
|
||||
"""
|
||||
|
||||
@ -8,7 +8,11 @@ from unittest.mock import AsyncMock, patch
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from nanobot.providers.transcription import GroqTranscriptionProvider, OpenAITranscriptionProvider
|
||||
from nanobot.providers.transcription import (
|
||||
GroqTranscriptionProvider,
|
||||
OpenAITranscriptionProvider,
|
||||
_resolve_transcription_url,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -290,3 +294,37 @@ async def test_retries_on_every_advertised_transient_exception(
|
||||
result = await provider.transcribe(audio_file)
|
||||
assert result == "recovered"
|
||||
assert post.await_count == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# apiBase normalization (#3637): a chat-style base must not be POSTed verbatim
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_resolve_transcription_url_falls_back_to_default() -> None:
|
||||
default = "https://api.openai.com/v1/audio/transcriptions"
|
||||
assert _resolve_transcription_url(None, default) == default
|
||||
assert _resolve_transcription_url("", default) == default
|
||||
|
||||
|
||||
def test_resolve_transcription_url_appends_path_to_chat_style_base() -> None:
|
||||
assert (
|
||||
_resolve_transcription_url("https://api.groq.com/openai/v1", "https://x/audio/transcriptions")
|
||||
== "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||
)
|
||||
# Trailing slash must not produce a doubled separator.
|
||||
assert (
|
||||
_resolve_transcription_url("https://api.groq.com/openai/v1/", "https://x/audio/transcriptions")
|
||||
== "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||
)
|
||||
|
||||
|
||||
def test_resolve_transcription_url_keeps_full_endpoint() -> None:
|
||||
full = "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||
assert _resolve_transcription_url(full, "https://x/audio/transcriptions") == full
|
||||
|
||||
|
||||
def test_groq_provider_normalizes_chat_style_api_base() -> None:
|
||||
"""Regression for #3637: apiBase set to the v1 base resolves to the audio endpoint."""
|
||||
provider = GroqTranscriptionProvider(api_key="gsk-test", api_base="https://api.groq.com/openai/v1")
|
||||
assert provider.api_url == "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user