mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-26 21:05:49 +00:00
- Move media_paths assignment before voice message handling to prevent NameError at runtime - Fix broken import layout in transcription.py (httpx/loguru after class) - Add error logging to OpenAITranscriptionProvider matching Groq style - Add regression tests for voice transcription and no-media fallback Made-with: Cursor
95 lines
3.1 KiB
Python
95 lines
3.1 KiB
Python
"""Voice transcription providers (Groq and OpenAI Whisper)."""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from loguru import logger
|
|
|
|
|
|
class OpenAITranscriptionProvider:
|
|
"""Voice transcription provider using OpenAI's Whisper API."""
|
|
|
|
def __init__(self, api_key: str | None = None):
|
|
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
self.api_url = "https://api.openai.com/v1/audio/transcriptions"
|
|
|
|
async def transcribe(self, file_path: str | Path) -> str:
|
|
if not self.api_key:
|
|
logger.warning("OpenAI API key not configured for transcription")
|
|
return ""
|
|
path = Path(file_path)
|
|
if not path.exists():
|
|
logger.error("Audio file not found: {}", file_path)
|
|
return ""
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
with open(path, "rb") as f:
|
|
files = {"file": (path.name, f), "model": (None, "whisper-1")}
|
|
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
response = await client.post(
|
|
self.api_url, headers=headers, files=files, timeout=60.0,
|
|
)
|
|
response.raise_for_status()
|
|
return response.json().get("text", "")
|
|
except Exception as e:
|
|
logger.error("OpenAI transcription error: {}", e)
|
|
return ""
|
|
|
|
|
|
class GroqTranscriptionProvider:
|
|
"""
|
|
Voice transcription provider using Groq's Whisper API.
|
|
|
|
Groq offers extremely fast transcription with a generous free tier.
|
|
"""
|
|
|
|
def __init__(self, api_key: str | None = None):
|
|
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
|
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
|
|
|
|
async def transcribe(self, file_path: str | Path) -> str:
|
|
"""
|
|
Transcribe an audio file using Groq.
|
|
|
|
Args:
|
|
file_path: Path to the audio file.
|
|
|
|
Returns:
|
|
Transcribed text.
|
|
"""
|
|
if not self.api_key:
|
|
logger.warning("Groq API key not configured for transcription")
|
|
return ""
|
|
|
|
path = Path(file_path)
|
|
if not path.exists():
|
|
logger.error("Audio file not found: {}", file_path)
|
|
return ""
|
|
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
with open(path, "rb") as f:
|
|
files = {
|
|
"file": (path.name, f),
|
|
"model": (None, "whisper-large-v3"),
|
|
}
|
|
headers = {
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
}
|
|
|
|
response = await client.post(
|
|
self.api_url,
|
|
headers=headers,
|
|
files=files,
|
|
timeout=60.0
|
|
)
|
|
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return data.get("text", "")
|
|
|
|
except Exception as e:
|
|
logger.error("Groq transcription error: {}", e)
|
|
return ""
|