diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index 2d2552344..f0c07d105 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -236,6 +236,9 @@ class WhatsAppChannel(BaseChannel): sender_id = user_id.split("@")[0] if "@" in user_id else user_id logger.info("Sender {}", sender) + # Extract media paths (images/documents/videos downloaded by the bridge) + media_paths = data.get("media") or [] + # Handle voice transcription if it's a voice message if content == "[Voice Message]": if media_paths: @@ -249,9 +252,6 @@ class WhatsAppChannel(BaseChannel): else: content = "[Voice Message: Audio not available]" - # Extract media paths (images/documents/videos downloaded by the bridge) - media_paths = data.get("media") or [] - # Build content tags matching Telegram's pattern: [image: /path] or [file: /path] if media_paths: for p in media_paths: diff --git a/nanobot/providers/transcription.py b/nanobot/providers/transcription.py index d432d24fd..aca9693ee 100644 --- a/nanobot/providers/transcription.py +++ b/nanobot/providers/transcription.py @@ -3,6 +3,9 @@ import os from pathlib import Path +import httpx +from loguru import logger + class OpenAITranscriptionProvider: """Voice transcription provider using OpenAI's Whisper API.""" @@ -13,12 +16,13 @@ class OpenAITranscriptionProvider: async def transcribe(self, file_path: str | Path) -> str: if not self.api_key: + logger.warning("OpenAI API key not configured for transcription") return "" path = Path(file_path) if not path.exists(): + logger.error("Audio file not found: {}", file_path) return "" try: - import httpx async with httpx.AsyncClient() as client: with open(path, "rb") as f: files = {"file": (path.name, f), "model": (None, "whisper-1")} @@ -28,12 +32,10 @@ class OpenAITranscriptionProvider: ) response.raise_for_status() return response.json().get("text", "") - except Exception: + except Exception as e: + logger.error("OpenAI transcription error: {}", e) return "" -import httpx -from loguru import logger - class GroqTranscriptionProvider: """ diff --git a/tests/channels/test_whatsapp_channel.py b/tests/channels/test_whatsapp_channel.py index 8223fdff3..b1abb7b03 100644 --- a/tests/channels/test_whatsapp_channel.py +++ b/tests/channels/test_whatsapp_channel.py @@ -163,6 +163,54 @@ async def test_group_policy_mention_accepts_mentioned_group_message(): assert kwargs["sender_id"] == "user" +@pytest.mark.asyncio +async def test_voice_message_transcription_uses_media_path(): + """Voice messages are transcribed when media path is available.""" + ch = WhatsAppChannel( + {"enabled": True, "transcriptionProvider": "openai", "transcriptionApiKey": "sk-test"}, + MagicMock(), + ) + ch._handle_message = AsyncMock() + ch.transcribe_audio = AsyncMock(return_value="Hello world") + + await ch._handle_bridge_message( + json.dumps({ + "type": "message", + "id": "v1", + "sender": "12345@s.whatsapp.net", + "pn": "", + "content": "[Voice Message]", + "timestamp": 1, + "media": ["/tmp/voice.ogg"], + }) + ) + + ch.transcribe_audio.assert_awaited_once_with("/tmp/voice.ogg") + kwargs = ch._handle_message.await_args.kwargs + assert kwargs["content"].startswith("Hello world") + + +@pytest.mark.asyncio +async def test_voice_message_no_media_shows_not_available(): + """Voice messages without media produce a fallback placeholder.""" + ch = WhatsAppChannel({"enabled": True}, MagicMock()) + ch._handle_message = AsyncMock() + + await ch._handle_bridge_message( + json.dumps({ + "type": "message", + "id": "v2", + "sender": "12345@s.whatsapp.net", + "pn": "", + "content": "[Voice Message]", + "timestamp": 1, + }) + ) + + kwargs = ch._handle_message.await_args.kwargs + assert kwargs["content"] == "[Voice Message: Audio not available]" + + def test_load_or_create_bridge_token_persists_generated_secret(tmp_path): token_path = tmp_path / "whatsapp-auth" / "bridge-token"