nanobot/nanobot/providers/transcription.py
Xubin Ren 7b7a3e5748 fix: media_paths NameError, import order, add error logging and tests
- Move media_paths assignment before voice message handling to prevent
  NameError at runtime
- Fix broken import layout in transcription.py (httpx/loguru after class)
- Add error logging to OpenAITranscriptionProvider matching Groq style
- Add regression tests for voice transcription and no-media fallback

Made-with: Cursor
2026-04-06 06:01:14 +00:00

95 lines
3.1 KiB
Python

"""Voice transcription providers (Groq and OpenAI Whisper)."""
import os
from pathlib import Path
import httpx
from loguru import logger
class OpenAITranscriptionProvider:
"""Voice transcription provider using OpenAI's Whisper API."""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
self.api_url = "https://api.openai.com/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
if not self.api_key:
logger.warning("OpenAI API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error("Audio file not found: {}", file_path)
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
files = {"file": (path.name, f), "model": (None, "whisper-1")}
headers = {"Authorization": f"Bearer {self.api_key}"}
response = await client.post(
self.api_url, headers=headers, files=files, timeout=60.0,
)
response.raise_for_status()
return response.json().get("text", "")
except Exception as e:
logger.error("OpenAI transcription error: {}", e)
return ""
class GroqTranscriptionProvider:
"""
Voice transcription provider using Groq's Whisper API.
Groq offers extremely fast transcription with a generous free tier.
"""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
"""
Transcribe an audio file using Groq.
Args:
file_path: Path to the audio file.
Returns:
Transcribed text.
"""
if not self.api_key:
logger.warning("Groq API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error("Audio file not found: {}", file_path)
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
files = {
"file": (path.name, f),
"model": (None, "whisper-large-v3"),
}
headers = {
"Authorization": f"Bearer {self.api_key}",
}
response = await client.post(
self.api_url,
headers=headers,
files=files,
timeout=60.0
)
response.raise_for_status()
data = response.json()
return data.get("text", "")
except Exception as e:
logger.error("Groq transcription error: {}", e)
return ""