mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-13 22:34:06 +00:00
fix(transcription): normalize chat-style apiBase to audio endpoint (#3637)
This commit is contained in:
parent
5b71f61f55
commit
ef2ef4f789
@ -7,6 +7,25 @@ from pathlib import Path
|
|||||||
import httpx
|
import httpx
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
_TRANSCRIPTIONS_PATH = "audio/transcriptions"
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_transcription_url(api_base: str | None, default_url: str) -> str:
|
||||||
|
"""Resolve the full transcription endpoint URL.
|
||||||
|
|
||||||
|
Accepts either a chat-style base (e.g. ``https://api.groq.com/openai/v1``)
|
||||||
|
or a complete URL already ending in ``/audio/transcriptions``. A chat-style
|
||||||
|
base — the form users naturally copy from their LLM provider config — gets
|
||||||
|
the path appended instead of being POSTed verbatim and 404ing (#3637).
|
||||||
|
"""
|
||||||
|
if not api_base:
|
||||||
|
return default_url
|
||||||
|
base = api_base.rstrip("/")
|
||||||
|
if base.endswith(_TRANSCRIPTIONS_PATH):
|
||||||
|
return base
|
||||||
|
return f"{base}/{_TRANSCRIPTIONS_PATH}"
|
||||||
|
|
||||||
|
|
||||||
# Up to 3 retries (4 attempts total) with exponential backoff on transient
|
# Up to 3 retries (4 attempts total) with exponential backoff on transient
|
||||||
# failures. Whisper endpoints occasionally return 502/503 under load, and
|
# failures. Whisper endpoints occasionally return 502/503 under load, and
|
||||||
# mobile-network transcription callers hit sporadic connect/read errors.
|
# mobile-network transcription callers hit sporadic connect/read errors.
|
||||||
@ -127,12 +146,12 @@ class OpenAITranscriptionProvider:
|
|||||||
language: str | None = None,
|
language: str | None = None,
|
||||||
):
|
):
|
||||||
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
||||||
self.api_url = (
|
self.api_url = _resolve_transcription_url(
|
||||||
api_base
|
api_base or os.environ.get("OPENAI_TRANSCRIPTION_BASE_URL"),
|
||||||
or os.environ.get("OPENAI_TRANSCRIPTION_BASE_URL")
|
"https://api.openai.com/v1/audio/transcriptions",
|
||||||
or "https://api.openai.com/v1/audio/transcriptions"
|
|
||||||
)
|
)
|
||||||
self.language = language or None
|
self.language = language or None
|
||||||
|
logger.debug("OpenAI transcription endpoint: {}", self.api_url)
|
||||||
|
|
||||||
async def transcribe(self, file_path: str | Path) -> str:
|
async def transcribe(self, file_path: str | Path) -> str:
|
||||||
if not self.api_key:
|
if not self.api_key:
|
||||||
@ -166,12 +185,12 @@ class GroqTranscriptionProvider:
|
|||||||
language: str | None = None,
|
language: str | None = None,
|
||||||
):
|
):
|
||||||
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
||||||
self.api_url = (
|
self.api_url = _resolve_transcription_url(
|
||||||
api_base
|
api_base or os.environ.get("GROQ_BASE_URL"),
|
||||||
or os.environ.get("GROQ_BASE_URL")
|
"https://api.groq.com/openai/v1/audio/transcriptions",
|
||||||
or "https://api.groq.com/openai/v1/audio/transcriptions"
|
|
||||||
)
|
)
|
||||||
self.language = language or None
|
self.language = language or None
|
||||||
|
logger.debug("Groq transcription endpoint: {}", self.api_url)
|
||||||
|
|
||||||
async def transcribe(self, file_path: str | Path) -> str:
|
async def transcribe(self, file_path: str | Path) -> str:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -8,7 +8,11 @@ from unittest.mock import AsyncMock, patch
|
|||||||
import httpx
|
import httpx
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from nanobot.providers.transcription import GroqTranscriptionProvider, OpenAITranscriptionProvider
|
from nanobot.providers.transcription import (
|
||||||
|
GroqTranscriptionProvider,
|
||||||
|
OpenAITranscriptionProvider,
|
||||||
|
_resolve_transcription_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@ -290,3 +294,37 @@ async def test_retries_on_every_advertised_transient_exception(
|
|||||||
result = await provider.transcribe(audio_file)
|
result = await provider.transcribe(audio_file)
|
||||||
assert result == "recovered"
|
assert result == "recovered"
|
||||||
assert post.await_count == 2
|
assert post.await_count == 2
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# apiBase normalization (#3637): a chat-style base must not be POSTed verbatim
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_transcription_url_falls_back_to_default() -> None:
|
||||||
|
default = "https://api.openai.com/v1/audio/transcriptions"
|
||||||
|
assert _resolve_transcription_url(None, default) == default
|
||||||
|
assert _resolve_transcription_url("", default) == default
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_transcription_url_appends_path_to_chat_style_base() -> None:
|
||||||
|
assert (
|
||||||
|
_resolve_transcription_url("https://api.groq.com/openai/v1", "https://x/audio/transcriptions")
|
||||||
|
== "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||||
|
)
|
||||||
|
# Trailing slash must not produce a doubled separator.
|
||||||
|
assert (
|
||||||
|
_resolve_transcription_url("https://api.groq.com/openai/v1/", "https://x/audio/transcriptions")
|
||||||
|
== "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_transcription_url_keeps_full_endpoint() -> None:
|
||||||
|
full = "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||||
|
assert _resolve_transcription_url(full, "https://x/audio/transcriptions") == full
|
||||||
|
|
||||||
|
|
||||||
|
def test_groq_provider_normalizes_chat_style_api_base() -> None:
|
||||||
|
"""Regression for #3637: apiBase set to the v1 base resolves to the audio endpoint."""
|
||||||
|
provider = GroqTranscriptionProvider(api_key="gsk-test", api_base="https://api.groq.com/openai/v1")
|
||||||
|
assert provider.api_url == "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user