From db50dd8a772326e8425ba6581e75f757670db1f9 Mon Sep 17 00:00:00 2001 From: comadreja Date: Thu, 26 Mar 2026 21:46:31 -0500 Subject: [PATCH 1/5] feat(whatsapp): add voice message transcription via OpenAI/Groq Whisper Automatically transcribe WhatsApp voice messages using OpenAI Whisper or Groq. Configurable via transcriptionProvider and transcriptionApiKey. Config: "whatsapp": { "transcriptionProvider": "openai", "transcriptionApiKey": "sk-..." } --- nanobot/channels/base.py | 12 ++++++++---- nanobot/channels/whatsapp.py | 19 ++++++++++++++----- nanobot/providers/transcription.py | 30 +++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index 86e991344..e0bb62c0f 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -37,13 +37,17 @@ class BaseChannel(ABC): self._running = False async def transcribe_audio(self, file_path: str | Path) -> str: - """Transcribe an audio file via Groq Whisper. Returns empty string on failure.""" + """Transcribe an audio file via Whisper (OpenAI or Groq). Returns empty string on failure.""" if not self.transcription_api_key: return "" try: - from nanobot.providers.transcription import GroqTranscriptionProvider - - provider = GroqTranscriptionProvider(api_key=self.transcription_api_key) + provider_name = getattr(self, "transcription_provider", "groq") + if provider_name == "openai": + from nanobot.providers.transcription import OpenAITranscriptionProvider + provider = OpenAITranscriptionProvider(api_key=self.transcription_api_key) + else: + from nanobot.providers.transcription import GroqTranscriptionProvider + provider = GroqTranscriptionProvider(api_key=self.transcription_api_key) return await provider.transcribe(file_path) except Exception as e: logger.warning("{}: audio transcription failed: {}", self.name, e) diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index 95bde46e9..63a9b69d0 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -26,6 +26,8 @@ class WhatsAppConfig(Base): bridge_url: str = "ws://localhost:3001" bridge_token: str = "" allow_from: list[str] = Field(default_factory=list) + transcription_provider: str = "openai" # openai or groq + transcription_api_key: str = "" group_policy: Literal["open", "mention"] = "open" # "open" responds to all, "mention" only when @mentioned @@ -51,6 +53,8 @@ class WhatsAppChannel(BaseChannel): self._ws = None self._connected = False self._processed_message_ids: OrderedDict[str, None] = OrderedDict() + self.transcription_api_key = config.transcription_api_key + self.transcription_provider = config.transcription_provider async def login(self, force: bool = False) -> bool: """ @@ -203,11 +207,16 @@ class WhatsAppChannel(BaseChannel): # Handle voice transcription if it's a voice message if content == "[Voice Message]": - logger.info( - "Voice message received from {}, but direct download from bridge is not yet supported.", - sender_id, - ) - content = "[Voice Message: Transcription not available for WhatsApp yet]" + if media_paths: + logger.info("Transcribing voice message from {}...", sender_id) + transcription = await self.transcribe_audio(media_paths[0]) + if transcription: + content = transcription + logger.info("Transcribed voice from {}: {}...", sender_id, transcription[:50]) + else: + content = "[Voice Message: Transcription failed]" + else: + content = "[Voice Message: Audio not available]" # Extract media paths (images/documents/videos downloaded by the bridge) media_paths = data.get("media") or [] diff --git a/nanobot/providers/transcription.py b/nanobot/providers/transcription.py index 1c8cb6a3f..d432d24fd 100644 --- a/nanobot/providers/transcription.py +++ b/nanobot/providers/transcription.py @@ -1,8 +1,36 @@ -"""Voice transcription provider using Groq.""" +"""Voice transcription providers (Groq and OpenAI Whisper).""" import os from pathlib import Path + +class OpenAITranscriptionProvider: + """Voice transcription provider using OpenAI's Whisper API.""" + + def __init__(self, api_key: str | None = None): + self.api_key = api_key or os.environ.get("OPENAI_API_KEY") + self.api_url = "https://api.openai.com/v1/audio/transcriptions" + + async def transcribe(self, file_path: str | Path) -> str: + if not self.api_key: + return "" + path = Path(file_path) + if not path.exists(): + return "" + try: + import httpx + async with httpx.AsyncClient() as client: + with open(path, "rb") as f: + files = {"file": (path.name, f), "model": (None, "whisper-1")} + headers = {"Authorization": f"Bearer {self.api_key}"} + response = await client.post( + self.api_url, headers=headers, files=files, timeout=60.0, + ) + response.raise_for_status() + return response.json().get("text", "") + except Exception: + return "" + import httpx from loguru import logger From 7b7a3e5748194e0a542ce6298281f5e192c815a0 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Mon, 6 Apr 2026 06:01:14 +0000 Subject: [PATCH 2/5] fix: media_paths NameError, import order, add error logging and tests - Move media_paths assignment before voice message handling to prevent NameError at runtime - Fix broken import layout in transcription.py (httpx/loguru after class) - Add error logging to OpenAITranscriptionProvider matching Groq style - Add regression tests for voice transcription and no-media fallback Made-with: Cursor --- nanobot/channels/whatsapp.py | 6 ++-- nanobot/providers/transcription.py | 12 ++++--- tests/channels/test_whatsapp_channel.py | 48 +++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index 2d2552344..f0c07d105 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -236,6 +236,9 @@ class WhatsAppChannel(BaseChannel): sender_id = user_id.split("@")[0] if "@" in user_id else user_id logger.info("Sender {}", sender) + # Extract media paths (images/documents/videos downloaded by the bridge) + media_paths = data.get("media") or [] + # Handle voice transcription if it's a voice message if content == "[Voice Message]": if media_paths: @@ -249,9 +252,6 @@ class WhatsAppChannel(BaseChannel): else: content = "[Voice Message: Audio not available]" - # Extract media paths (images/documents/videos downloaded by the bridge) - media_paths = data.get("media") or [] - # Build content tags matching Telegram's pattern: [image: /path] or [file: /path] if media_paths: for p in media_paths: diff --git a/nanobot/providers/transcription.py b/nanobot/providers/transcription.py index d432d24fd..aca9693ee 100644 --- a/nanobot/providers/transcription.py +++ b/nanobot/providers/transcription.py @@ -3,6 +3,9 @@ import os from pathlib import Path +import httpx +from loguru import logger + class OpenAITranscriptionProvider: """Voice transcription provider using OpenAI's Whisper API.""" @@ -13,12 +16,13 @@ class OpenAITranscriptionProvider: async def transcribe(self, file_path: str | Path) -> str: if not self.api_key: + logger.warning("OpenAI API key not configured for transcription") return "" path = Path(file_path) if not path.exists(): + logger.error("Audio file not found: {}", file_path) return "" try: - import httpx async with httpx.AsyncClient() as client: with open(path, "rb") as f: files = {"file": (path.name, f), "model": (None, "whisper-1")} @@ -28,12 +32,10 @@ class OpenAITranscriptionProvider: ) response.raise_for_status() return response.json().get("text", "") - except Exception: + except Exception as e: + logger.error("OpenAI transcription error: {}", e) return "" -import httpx -from loguru import logger - class GroqTranscriptionProvider: """ diff --git a/tests/channels/test_whatsapp_channel.py b/tests/channels/test_whatsapp_channel.py index 8223fdff3..b1abb7b03 100644 --- a/tests/channels/test_whatsapp_channel.py +++ b/tests/channels/test_whatsapp_channel.py @@ -163,6 +163,54 @@ async def test_group_policy_mention_accepts_mentioned_group_message(): assert kwargs["sender_id"] == "user" +@pytest.mark.asyncio +async def test_voice_message_transcription_uses_media_path(): + """Voice messages are transcribed when media path is available.""" + ch = WhatsAppChannel( + {"enabled": True, "transcriptionProvider": "openai", "transcriptionApiKey": "sk-test"}, + MagicMock(), + ) + ch._handle_message = AsyncMock() + ch.transcribe_audio = AsyncMock(return_value="Hello world") + + await ch._handle_bridge_message( + json.dumps({ + "type": "message", + "id": "v1", + "sender": "12345@s.whatsapp.net", + "pn": "", + "content": "[Voice Message]", + "timestamp": 1, + "media": ["/tmp/voice.ogg"], + }) + ) + + ch.transcribe_audio.assert_awaited_once_with("/tmp/voice.ogg") + kwargs = ch._handle_message.await_args.kwargs + assert kwargs["content"].startswith("Hello world") + + +@pytest.mark.asyncio +async def test_voice_message_no_media_shows_not_available(): + """Voice messages without media produce a fallback placeholder.""" + ch = WhatsAppChannel({"enabled": True}, MagicMock()) + ch._handle_message = AsyncMock() + + await ch._handle_bridge_message( + json.dumps({ + "type": "message", + "id": "v2", + "sender": "12345@s.whatsapp.net", + "pn": "", + "content": "[Voice Message]", + "timestamp": 1, + }) + ) + + kwargs = ch._handle_message.await_args.kwargs + assert kwargs["content"] == "[Voice Message: Audio not available]" + + def test_load_or_create_bridge_token_persists_generated_secret(tmp_path): token_path = tmp_path / "whatsapp-auth" / "bridge-token" From 35dde8a30eb708067a5c6c6b09a8c2422fde1208 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Mon, 6 Apr 2026 06:07:30 +0000 Subject: [PATCH 3/5] refactor: unify voice transcription config across all channels - Move transcriptionProvider to global channels config (not per-channel) - ChannelManager auto-resolves API key from matching provider config - BaseChannel gets transcription_provider attribute, no more getattr hack - Remove redundant transcription fields from WhatsAppConfig - Update README: document transcriptionProvider, update provider table Made-with: Cursor --- README.md | 8 +++++--- nanobot/channels/base.py | 4 ++-- nanobot/channels/manager.py | 12 ++++++++++-- nanobot/channels/whatsapp.py | 4 ---- nanobot/config/schema.py | 1 + tests/channels/test_whatsapp_channel.py | 7 +++---- 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 1858e1672..e42a6efe9 100644 --- a/README.md +++ b/README.md @@ -900,7 +900,7 @@ IMAP_PASSWORD=your-password-here ### Providers > [!TIP] -> - **Groq** provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. +> - **Voice transcription**: Voice messages (Telegram, WhatsApp) are automatically transcribed using Whisper. By default Groq is used (free tier). Set `"transcriptionProvider": "openai"` under `channels` to use OpenAI Whisper instead — the API key is picked from the matching provider config. > - **MiniMax Coding Plan**: Exclusive discount links for the nanobot community: [Overseas](https://platform.minimax.io/subscribe/coding-plan?code=9txpdXw04g&source=link) · [Mainland China](https://platform.minimaxi.com/subscribe/token-plan?code=GILTJpMTqZ&source=link) > - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config. > - **VolcEngine / BytePlus Coding Plan**: Use dedicated providers `volcengineCodingPlan` or `byteplusCodingPlan` instead of the pay-per-use `volcengine` / `byteplus` providers. @@ -916,9 +916,9 @@ IMAP_PASSWORD=your-password-here | `byteplus` | LLM (VolcEngine international, pay-per-use) | [Coding Plan](https://www.byteplus.com/en/activity/codingplan?utm_campaign=nanobot&utm_content=nanobot&utm_medium=devrel&utm_source=OWO&utm_term=nanobot) · [byteplus.com](https://www.byteplus.com) | | `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) | | `azure_openai` | LLM (Azure OpenAI) | [portal.azure.com](https://portal.azure.com) | -| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) | +| `openai` | LLM + Voice transcription (Whisper) | [platform.openai.com](https://platform.openai.com) | | `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) | -| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) | +| `groq` | LLM + Voice transcription (Whisper, default) | [console.groq.com](https://console.groq.com) | | `minimax` | LLM (MiniMax direct) | [platform.minimaxi.com](https://platform.minimaxi.com) | | `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) | | `aihubmix` | LLM (API gateway, access to all models) | [aihubmix.com](https://aihubmix.com) | @@ -1233,6 +1233,7 @@ Global settings that apply to all channels. Configure under the `channels` secti "sendProgress": true, "sendToolHints": false, "sendMaxRetries": 3, + "transcriptionProvider": "groq", "telegram": { ... } } } @@ -1243,6 +1244,7 @@ Global settings that apply to all channels. Configure under the `channels` secti | `sendProgress` | `true` | Stream agent's text progress to the channel | | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) | | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) | +| `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. | #### Retry Behavior diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index e0bb62c0f..dd29c0851 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -22,6 +22,7 @@ class BaseChannel(ABC): name: str = "base" display_name: str = "Base" + transcription_provider: str = "groq" transcription_api_key: str = "" def __init__(self, config: Any, bus: MessageBus): @@ -41,8 +42,7 @@ class BaseChannel(ABC): if not self.transcription_api_key: return "" try: - provider_name = getattr(self, "transcription_provider", "groq") - if provider_name == "openai": + if self.transcription_provider == "openai": from nanobot.providers.transcription import OpenAITranscriptionProvider provider = OpenAITranscriptionProvider(api_key=self.transcription_api_key) else: diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 1f26f4d7a..b52c38ca3 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -39,7 +39,8 @@ class ChannelManager: """Initialize channels discovered via pkgutil scan + entry_points plugins.""" from nanobot.channels.registry import discover_all - groq_key = self.config.providers.groq.api_key + transcription_provider = self.config.channels.transcription_provider + transcription_key = self._resolve_transcription_key(transcription_provider) for name, cls in discover_all().items(): section = getattr(self.config.channels, name, None) @@ -54,7 +55,8 @@ class ChannelManager: continue try: channel = cls(section, self.bus) - channel.transcription_api_key = groq_key + channel.transcription_provider = transcription_provider + channel.transcription_api_key = transcription_key self.channels[name] = channel logger.info("{} channel enabled", cls.display_name) except Exception as e: @@ -62,6 +64,12 @@ class ChannelManager: self._validate_allow_from() + def _resolve_transcription_key(self, provider: str) -> str: + """Pick the API key for the configured transcription provider.""" + if provider == "openai": + return self.config.providers.openai.api_key + return self.config.providers.groq.api_key + def _validate_allow_from(self) -> None: for name, ch in self.channels.items(): if getattr(ch.config, "allow_from", None) == []: diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index f0c07d105..1b46d6e97 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -27,8 +27,6 @@ class WhatsAppConfig(Base): bridge_url: str = "ws://localhost:3001" bridge_token: str = "" allow_from: list[str] = Field(default_factory=list) - transcription_provider: str = "openai" # openai or groq - transcription_api_key: str = "" group_policy: Literal["open", "mention"] = "open" # "open" responds to all, "mention" only when @mentioned @@ -77,8 +75,6 @@ class WhatsAppChannel(BaseChannel): self._ws = None self._connected = False self._processed_message_ids: OrderedDict[str, None] = OrderedDict() - self.transcription_api_key = config.transcription_api_key - self.transcription_provider = config.transcription_provider self._bridge_token: str | None = None def _effective_bridge_token(self) -> str: diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index dfb91c528..f147434e7 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -28,6 +28,7 @@ class ChannelsConfig(Base): send_progress: bool = True # stream agent's text progress to the channel send_tool_hints: bool = False # stream tool-call hints (e.g. read_file("…")) send_max_retries: int = Field(default=3, ge=0, le=10) # Max delivery attempts (initial send included) + transcription_provider: str = "groq" # Voice transcription backend: "groq" or "openai" class DreamConfig(Base): diff --git a/tests/channels/test_whatsapp_channel.py b/tests/channels/test_whatsapp_channel.py index b1abb7b03..f285e4dbe 100644 --- a/tests/channels/test_whatsapp_channel.py +++ b/tests/channels/test_whatsapp_channel.py @@ -166,10 +166,9 @@ async def test_group_policy_mention_accepts_mentioned_group_message(): @pytest.mark.asyncio async def test_voice_message_transcription_uses_media_path(): """Voice messages are transcribed when media path is available.""" - ch = WhatsAppChannel( - {"enabled": True, "transcriptionProvider": "openai", "transcriptionApiKey": "sk-test"}, - MagicMock(), - ) + ch = WhatsAppChannel({"enabled": True}, MagicMock()) + ch.transcription_provider = "openai" + ch.transcription_api_key = "sk-test" ch._handle_message = AsyncMock() ch.transcribe_audio = AsyncMock(return_value="Hello world") From 3bf1fa52253750b4d0f639e5765d3b713841ca09 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Mon, 6 Apr 2026 06:10:08 +0000 Subject: [PATCH 4/5] feat: auto-fallback to other transcription provider on failure When the primary transcription provider fails (bad key, API error, etc.), automatically try the other provider if its API key is available. Made-with: Cursor --- nanobot/channels/base.py | 24 ++++++++++++++++++------ nanobot/channels/manager.py | 12 +++++++++--- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index dd29c0851..27d0b07a8 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -24,6 +24,7 @@ class BaseChannel(ABC): display_name: str = "Base" transcription_provider: str = "groq" transcription_api_key: str = "" + _transcription_fallback_key: str = "" def __init__(self, config: Any, bus: MessageBus): """ @@ -38,19 +39,30 @@ class BaseChannel(ABC): self._running = False async def transcribe_audio(self, file_path: str | Path) -> str: - """Transcribe an audio file via Whisper (OpenAI or Groq). Returns empty string on failure.""" + """Transcribe an audio file via Whisper. Falls back to the other provider on failure.""" if not self.transcription_api_key: return "" + result = await self._try_transcribe(self.transcription_provider, self.transcription_api_key, file_path) + if result: + return result + fallback = "groq" if self.transcription_provider == "openai" else "openai" + if self._transcription_fallback_key: + logger.info("{}: trying {} fallback for transcription", self.name, fallback) + return await self._try_transcribe(fallback, self._transcription_fallback_key, file_path) + return "" + + async def _try_transcribe(self, provider: str, api_key: str, file_path: str | Path) -> str: + """Attempt transcription with a single provider. Returns empty string on failure.""" try: - if self.transcription_provider == "openai": + if provider == "openai": from nanobot.providers.transcription import OpenAITranscriptionProvider - provider = OpenAITranscriptionProvider(api_key=self.transcription_api_key) + p = OpenAITranscriptionProvider(api_key=api_key) else: from nanobot.providers.transcription import GroqTranscriptionProvider - provider = GroqTranscriptionProvider(api_key=self.transcription_api_key) - return await provider.transcribe(file_path) + p = GroqTranscriptionProvider(api_key=api_key) + return await p.transcribe(file_path) except Exception as e: - logger.warning("{}: audio transcription failed: {}", self.name, e) + logger.warning("{}: {} transcription failed: {}", self.name, provider, e) return "" async def login(self, force: bool = False) -> bool: diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index b52c38ca3..d7bb4ef2d 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -41,6 +41,8 @@ class ChannelManager: transcription_provider = self.config.channels.transcription_provider transcription_key = self._resolve_transcription_key(transcription_provider) + fallback_provider = "groq" if transcription_provider == "openai" else "openai" + fallback_key = self._resolve_transcription_key(fallback_provider) for name, cls in discover_all().items(): section = getattr(self.config.channels, name, None) @@ -57,6 +59,7 @@ class ChannelManager: channel = cls(section, self.bus) channel.transcription_provider = transcription_provider channel.transcription_api_key = transcription_key + channel._transcription_fallback_key = fallback_key self.channels[name] = channel logger.info("{} channel enabled", cls.display_name) except Exception as e: @@ -66,9 +69,12 @@ class ChannelManager: def _resolve_transcription_key(self, provider: str) -> str: """Pick the API key for the configured transcription provider.""" - if provider == "openai": - return self.config.providers.openai.api_key - return self.config.providers.groq.api_key + try: + if provider == "openai": + return self.config.providers.openai.api_key + return self.config.providers.groq.api_key + except AttributeError: + return "" def _validate_allow_from(self) -> None: for name, ch in self.channels.items(): From 019eaff2251c940dc10af2bfbe197eb7c2f9eb07 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Mon, 6 Apr 2026 06:13:43 +0000 Subject: [PATCH 5/5] simplify: remove transcription fallback, respect explicit config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Configured provider is the only one used — no silent fallback. Made-with: Cursor --- nanobot/channels/base.py | 24 ++++++------------------ nanobot/channels/manager.py | 3 --- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index 27d0b07a8..dd29c0851 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -24,7 +24,6 @@ class BaseChannel(ABC): display_name: str = "Base" transcription_provider: str = "groq" transcription_api_key: str = "" - _transcription_fallback_key: str = "" def __init__(self, config: Any, bus: MessageBus): """ @@ -39,30 +38,19 @@ class BaseChannel(ABC): self._running = False async def transcribe_audio(self, file_path: str | Path) -> str: - """Transcribe an audio file via Whisper. Falls back to the other provider on failure.""" + """Transcribe an audio file via Whisper (OpenAI or Groq). Returns empty string on failure.""" if not self.transcription_api_key: return "" - result = await self._try_transcribe(self.transcription_provider, self.transcription_api_key, file_path) - if result: - return result - fallback = "groq" if self.transcription_provider == "openai" else "openai" - if self._transcription_fallback_key: - logger.info("{}: trying {} fallback for transcription", self.name, fallback) - return await self._try_transcribe(fallback, self._transcription_fallback_key, file_path) - return "" - - async def _try_transcribe(self, provider: str, api_key: str, file_path: str | Path) -> str: - """Attempt transcription with a single provider. Returns empty string on failure.""" try: - if provider == "openai": + if self.transcription_provider == "openai": from nanobot.providers.transcription import OpenAITranscriptionProvider - p = OpenAITranscriptionProvider(api_key=api_key) + provider = OpenAITranscriptionProvider(api_key=self.transcription_api_key) else: from nanobot.providers.transcription import GroqTranscriptionProvider - p = GroqTranscriptionProvider(api_key=api_key) - return await p.transcribe(file_path) + provider = GroqTranscriptionProvider(api_key=self.transcription_api_key) + return await provider.transcribe(file_path) except Exception as e: - logger.warning("{}: {} transcription failed: {}", self.name, provider, e) + logger.warning("{}: audio transcription failed: {}", self.name, e) return "" async def login(self, force: bool = False) -> bool: diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index d7bb4ef2d..aaec5e335 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -41,8 +41,6 @@ class ChannelManager: transcription_provider = self.config.channels.transcription_provider transcription_key = self._resolve_transcription_key(transcription_provider) - fallback_provider = "groq" if transcription_provider == "openai" else "openai" - fallback_key = self._resolve_transcription_key(fallback_provider) for name, cls in discover_all().items(): section = getattr(self.config.channels, name, None) @@ -59,7 +57,6 @@ class ChannelManager: channel = cls(section, self.bus) channel.transcription_provider = transcription_provider channel.transcription_api_key = transcription_key - channel._transcription_fallback_key = fallback_key self.channels[name] = channel logger.info("{} channel enabled", cls.display_name) except Exception as e: