Merge PR #2531: fix(whatsapp): detect phone vs LID by JID suffix, not field name

fix(whatsapp): detect phone vs LID by JID suffix, not field name
2026-05-23 18:12:32 +00:00 · 2026-04-06 14:21:06 +08:00 · 2026-04-06 14:21:06 +08:00 · 219c9c6137
commit 219c9c6137
parent 4c6a4321e0 897d5a7e58
2 changed files with 77 additions and 3 deletions
--- a/nanobot/channels/whatsapp.py
+++ b/nanobot/channels/whatsapp.py
@ -75,6 +75,7 @@ class WhatsAppChannel(BaseChannel):
        self._ws = None
        self._connected = False
        self._processed_message_ids: OrderedDict[str, None] = OrderedDict()
+        self._lid_to_phone: dict[str, str] = {}
        self._bridge_token: str | None = None

    def _effective_bridge_token(self) -> str:
@ -228,9 +229,28 @@ class WhatsAppChannel(BaseChannel):
                if not was_mentioned:
                    return

-            user_id = pn if pn else sender
-            sender_id = user_id.split("@")[0] if "@" in user_id else user_id
-            logger.info("Sender {}", sender)
+            # Classify by JID suffix: @s.whatsapp.net = phone, @lid.whatsapp.net = LID
+            # The bridge's pn/sender fields don't consistently map to phone/LID across versions.
+            raw_a = pn or ""
+            raw_b = sender or ""
+            id_a = raw_a.split("@")[0] if "@" in raw_a else raw_a
+            id_b = raw_b.split("@")[0] if "@" in raw_b else raw_b
+
+            phone_id = ""
+            lid_id = ""
+            for raw, extracted in [(raw_a, id_a), (raw_b, id_b)]:
+                if "@s.whatsapp.net" in raw:
+                    phone_id = extracted
+                elif "@lid.whatsapp.net" in raw:
+                    lid_id = extracted
+                elif extracted and not phone_id:
+                    phone_id = extracted  # best guess for bare values
+
+            if phone_id and lid_id:
+                self._lid_to_phone[lid_id] = phone_id
+            sender_id = phone_id or self._lid_to_phone.get(lid_id, "") or lid_id or id_a or id_b
+
+            logger.info("Sender phone={} lid={} → sender_id={}", phone_id or "(empty)", lid_id or "(empty)", sender_id)

            # Extract media paths (images/documents/videos downloaded by the bridge)
            media_paths = data.get("media") or []
--- a/tests/channels/test_whatsapp_channel.py
+++ b/tests/channels/test_whatsapp_channel.py
@ -163,6 +163,60 @@ async def test_group_policy_mention_accepts_mentioned_group_message():
    assert kwargs["sender_id"] == "user"


+@pytest.mark.asyncio
+async def test_sender_id_prefers_phone_jid_over_lid():
+    """sender_id should resolve to phone number when @s.whatsapp.net JID is present."""
+    ch = WhatsAppChannel({"enabled": True}, MagicMock())
+    ch._handle_message = AsyncMock()
+
+    await ch._handle_bridge_message(
+        json.dumps({
+            "type": "message",
+            "id": "lid1",
+            "sender": "ABC123@lid.whatsapp.net",
+            "pn": "5551234@s.whatsapp.net",
+            "content": "hi",
+            "timestamp": 1,
+        })
+    )
+
+    kwargs = ch._handle_message.await_args.kwargs
+    assert kwargs["sender_id"] == "5551234"
+
+
+@pytest.mark.asyncio
+async def test_lid_to_phone_cache_resolves_lid_only_messages():
+    """When only LID is present, a cached LID→phone mapping should be used."""
+    ch = WhatsAppChannel({"enabled": True}, MagicMock())
+    ch._handle_message = AsyncMock()
+
+    # First message: both phone and LID → builds cache
+    await ch._handle_bridge_message(
+        json.dumps({
+            "type": "message",
+            "id": "c1",
+            "sender": "LID99@lid.whatsapp.net",
+            "pn": "5559999@s.whatsapp.net",
+            "content": "first",
+            "timestamp": 1,
+        })
+    )
+    # Second message: only LID, no phone
+    await ch._handle_bridge_message(
+        json.dumps({
+            "type": "message",
+            "id": "c2",
+            "sender": "LID99@lid.whatsapp.net",
+            "pn": "",
+            "content": "second",
+            "timestamp": 2,
+        })
+    )
+
+    second_kwargs = ch._handle_message.await_args_list[1].kwargs
+    assert second_kwargs["sender_id"] == "5559999"
+
+
@pytest.mark.asyncio
 async def test_voice_message_transcription_uses_media_path():
    """Voice messages are transcribed when media path is available."""