fix(weixin): align full_url AES key handling and quoted media fallback logic with reference

1. Fix full_url path for non-image media to require AES key and skip download when missing, instead of persisting encrypted bytes as valid media. 2. Restrict quoted media fallback trigger to only when no top-level media item exists, not when top-level media download/decryption fails.
2026-04-25 12:26:00 +00:00 · 2026-03-29 20:27:23 +08:00 · 2026-03-29 20:27:23 +08:00 · ed2ca759e7
commit ed2ca759e7
parent 79a915307c
2 changed files with 83 additions and 1 deletions
--- a/nanobot/channels/weixin.py
+++ b/nanobot/channels/weixin.py
@ -116,6 +116,12 @@ _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".tiff", ".ico"
 _VIDEO_EXTS = {".mp4", ".avi", ".mov", ".mkv", ".webm", ".flv"}
 def _has_downloadable_media_locator(media: dict[str, Any] | None) -> bool:
    if not isinstance(media, dict):
        return False
    return bool(str(media.get("encrypt_query_param", "") or "") or str(media.get("full_url", "") or "").strip())
 class WeixinConfig(Base):
    """Personal WeChat channel configuration."""
@ -611,6 +617,7 @@ class WeixinChannel(BaseChannel):
        item_list: list[dict] = msg.get("item_list") or []
        content_parts: list[str] = []
        media_paths: list[str] = []
        has_top_level_downloadable_media = False
        for item in item_list:
            item_type = item.get("type", 0)
@ -647,6 +654,8 @@ class WeixinChannel(BaseChannel):
            elif item_type == ITEM_IMAGE:
                image_item = item.get("image_item") or {}
                if _has_downloadable_media_locator(image_item.get("media")):
                    has_top_level_downloadable_media = True
                file_path = await self._download_media_item(image_item, "image")
                if file_path:
                    content_parts.append(f"[image]\n[Image: source: {file_path}]")
@ -661,6 +670,8 @@ class WeixinChannel(BaseChannel):
                if voice_text:
                    content_parts.append(f"[voice] {voice_text}")
                else:
                    if _has_downloadable_media_locator(voice_item.get("media")):
                        has_top_level_downloadable_media = True
                    file_path = await self._download_media_item(voice_item, "voice")
                    if file_path:
                        transcription = await self.transcribe_audio(file_path)
@ -674,6 +685,8 @@ class WeixinChannel(BaseChannel):
            elif item_type == ITEM_FILE:
                file_item = item.get("file_item") or {}
                if _has_downloadable_media_locator(file_item.get("media")):
                    has_top_level_downloadable_media = True
                file_name = file_item.get("file_name", "unknown")
                file_path = await self._download_media_item(
                    file_item,
@ -688,6 +701,8 @@ class WeixinChannel(BaseChannel):
            elif item_type == ITEM_VIDEO:
                video_item = item.get("video_item") or {}
                if _has_downloadable_media_locator(video_item.get("media")):
                    has_top_level_downloadable_media = True
                file_path = await self._download_media_item(video_item, "video")
                if file_path:
                    content_parts.append(f"[video]\n[Video: source: {file_path}]")
@ -698,7 +713,7 @@ class WeixinChannel(BaseChannel):
        # Fallback: when no top-level media was downloaded, try quoted/referenced media.
        # This aligns with the reference plugin behavior that checks ref_msg.message_item
        # when main item_list has no downloadable media.
-        if not media_paths:
+        if not media_paths and not has_top_level_downloadable_media:
            ref_media_item: dict[str, Any] | None = None
            for item in item_list:
                if item.get("type", 0) != ITEM_TEXT:
@ -793,6 +808,12 @@ class WeixinChannel(BaseChannel):
            elif media_aes_key_b64:
                aes_key_b64 = media_aes_key_b64
            # Reference protocol behavior: VOICE/FILE/VIDEO require aes_key;
            # only IMAGE may be downloaded as plain bytes when key is missing.
            if media_type != "image" and not aes_key_b64:
                logger.debug("Missing AES key for {} item, skip media download", media_type)
                return None
            # Prefer server-provided full_url, fallback to encrypted_query_param URL construction.
            if full_url:
                cdn_url = full_url
--- a/tests/channels/test_weixin_channel.py
+++ b/tests/channels/test_weixin_channel.py
@ -250,6 +250,46 @@ async def test_process_message_does_not_use_referenced_fallback_when_top_level_m
    assert "/tmp/ref.jpg" not in inbound.content
@pytest.mark.asyncio
 async def test_process_message_does_not_fallback_when_top_level_media_exists_but_download_fails() -> None:
    channel, bus = _make_channel()
    # Top-level image download fails (None), referenced image would succeed if fallback were triggered.
    channel._download_media_item = AsyncMock(side_effect=[None, "/tmp/ref.jpg"])
    await channel._process_message(
        {
            "message_type": 1,
            "message_id": "m3-ref-no-fallback-on-failure",
            "from_user_id": "wx-user",
            "context_token": "ctx-3-ref-no-fallback-on-failure",
            "item_list": [
                {"type": ITEM_IMAGE, "image_item": {"media": {"encrypt_query_param": "top-enc"}}},
                {
                    "type": ITEM_TEXT,
                    "text_item": {"text": "quoted has media"},
                    "ref_msg": {
                        "message_item": {
                            "type": ITEM_IMAGE,
                            "image_item": {"media": {"encrypt_query_param": "ref-enc"}},
                        },
                    },
                },
            ],
        }
    )
    inbound = await asyncio.wait_for(bus.consume_inbound(), timeout=1.0)
    # Should only attempt top-level media item; reference fallback must not activate.
    channel._download_media_item.assert_awaited_once_with(
        {"media": {"encrypt_query_param": "top-enc"}},
        "image",
    )
    assert inbound.media == []
    assert "[image]" in inbound.content
    assert "/tmp/ref.jpg" not in inbound.content
@pytest.mark.asyncio
 async def test_send_without_context_token_does_not_send_text() -> None:
    channel, _bus = _make_channel()
@ -613,3 +653,24 @@ async def test_download_media_item_falls_back_to_encrypt_query_param(tmp_path) -
    assert Path(saved_path).read_bytes() == b"fallback-bytes"
    called_url = channel._client.get.await_args_list[0].args[0]
    assert called_url.startswith(f"{channel.config.cdn_base_url}/download?encrypted_query_param=enc-fallback")
@pytest.mark.asyncio
 async def test_download_media_item_non_image_requires_aes_key_even_with_full_url(tmp_path) -> None:
    channel, _bus = _make_channel()
    weixin_mod.get_media_dir = lambda _name: tmp_path
    full_url = "https://cdn.example.test/download/voice"
    channel._client = SimpleNamespace(
        get=AsyncMock(return_value=_DummyDownloadResponse(content=b"ciphertext-or-unknown"))
    )
    item = {
        "media": {
            "full_url": full_url,
        },
    }
    saved_path = await channel._download_media_item(item, "voice")
    assert saved_path is None
    channel._client.get.assert_not_awaited()