mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-02 09:22:36 +00:00
fix(weixin): align full_url AES key handling and quoted media fallback logic with reference
1. Fix full_url path for non-image media to require AES key and skip download when missing, instead of persisting encrypted bytes as valid media. 2. Restrict quoted media fallback trigger to only when no top-level media item exists, not when top-level media download/decryption fails.
This commit is contained in:
parent
345c393e53
commit
0514233217
@ -116,6 +116,12 @@ _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".tiff", ".ico"
|
||||
_VIDEO_EXTS = {".mp4", ".avi", ".mov", ".mkv", ".webm", ".flv"}
|
||||
|
||||
|
||||
def _has_downloadable_media_locator(media: dict[str, Any] | None) -> bool:
|
||||
if not isinstance(media, dict):
|
||||
return False
|
||||
return bool(str(media.get("encrypt_query_param", "") or "") or str(media.get("full_url", "") or "").strip())
|
||||
|
||||
|
||||
class WeixinConfig(Base):
|
||||
"""Personal WeChat channel configuration."""
|
||||
|
||||
@ -611,6 +617,7 @@ class WeixinChannel(BaseChannel):
|
||||
item_list: list[dict] = msg.get("item_list") or []
|
||||
content_parts: list[str] = []
|
||||
media_paths: list[str] = []
|
||||
has_top_level_downloadable_media = False
|
||||
|
||||
for item in item_list:
|
||||
item_type = item.get("type", 0)
|
||||
@ -647,6 +654,8 @@ class WeixinChannel(BaseChannel):
|
||||
|
||||
elif item_type == ITEM_IMAGE:
|
||||
image_item = item.get("image_item") or {}
|
||||
if _has_downloadable_media_locator(image_item.get("media")):
|
||||
has_top_level_downloadable_media = True
|
||||
file_path = await self._download_media_item(image_item, "image")
|
||||
if file_path:
|
||||
content_parts.append(f"[image]\n[Image: source: {file_path}]")
|
||||
@ -661,6 +670,8 @@ class WeixinChannel(BaseChannel):
|
||||
if voice_text:
|
||||
content_parts.append(f"[voice] {voice_text}")
|
||||
else:
|
||||
if _has_downloadable_media_locator(voice_item.get("media")):
|
||||
has_top_level_downloadable_media = True
|
||||
file_path = await self._download_media_item(voice_item, "voice")
|
||||
if file_path:
|
||||
transcription = await self.transcribe_audio(file_path)
|
||||
@ -674,6 +685,8 @@ class WeixinChannel(BaseChannel):
|
||||
|
||||
elif item_type == ITEM_FILE:
|
||||
file_item = item.get("file_item") or {}
|
||||
if _has_downloadable_media_locator(file_item.get("media")):
|
||||
has_top_level_downloadable_media = True
|
||||
file_name = file_item.get("file_name", "unknown")
|
||||
file_path = await self._download_media_item(
|
||||
file_item,
|
||||
@ -688,6 +701,8 @@ class WeixinChannel(BaseChannel):
|
||||
|
||||
elif item_type == ITEM_VIDEO:
|
||||
video_item = item.get("video_item") or {}
|
||||
if _has_downloadable_media_locator(video_item.get("media")):
|
||||
has_top_level_downloadable_media = True
|
||||
file_path = await self._download_media_item(video_item, "video")
|
||||
if file_path:
|
||||
content_parts.append(f"[video]\n[Video: source: {file_path}]")
|
||||
@ -698,7 +713,7 @@ class WeixinChannel(BaseChannel):
|
||||
# Fallback: when no top-level media was downloaded, try quoted/referenced media.
|
||||
# This aligns with the reference plugin behavior that checks ref_msg.message_item
|
||||
# when main item_list has no downloadable media.
|
||||
if not media_paths:
|
||||
if not media_paths and not has_top_level_downloadable_media:
|
||||
ref_media_item: dict[str, Any] | None = None
|
||||
for item in item_list:
|
||||
if item.get("type", 0) != ITEM_TEXT:
|
||||
@ -793,6 +808,12 @@ class WeixinChannel(BaseChannel):
|
||||
elif media_aes_key_b64:
|
||||
aes_key_b64 = media_aes_key_b64
|
||||
|
||||
# Reference protocol behavior: VOICE/FILE/VIDEO require aes_key;
|
||||
# only IMAGE may be downloaded as plain bytes when key is missing.
|
||||
if media_type != "image" and not aes_key_b64:
|
||||
logger.debug("Missing AES key for {} item, skip media download", media_type)
|
||||
return None
|
||||
|
||||
# Prefer server-provided full_url, fallback to encrypted_query_param URL construction.
|
||||
if full_url:
|
||||
cdn_url = full_url
|
||||
|
||||
@ -250,6 +250,46 @@ async def test_process_message_does_not_use_referenced_fallback_when_top_level_m
|
||||
assert "/tmp/ref.jpg" not in inbound.content
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_message_does_not_fallback_when_top_level_media_exists_but_download_fails() -> None:
|
||||
channel, bus = _make_channel()
|
||||
# Top-level image download fails (None), referenced image would succeed if fallback were triggered.
|
||||
channel._download_media_item = AsyncMock(side_effect=[None, "/tmp/ref.jpg"])
|
||||
|
||||
await channel._process_message(
|
||||
{
|
||||
"message_type": 1,
|
||||
"message_id": "m3-ref-no-fallback-on-failure",
|
||||
"from_user_id": "wx-user",
|
||||
"context_token": "ctx-3-ref-no-fallback-on-failure",
|
||||
"item_list": [
|
||||
{"type": ITEM_IMAGE, "image_item": {"media": {"encrypt_query_param": "top-enc"}}},
|
||||
{
|
||||
"type": ITEM_TEXT,
|
||||
"text_item": {"text": "quoted has media"},
|
||||
"ref_msg": {
|
||||
"message_item": {
|
||||
"type": ITEM_IMAGE,
|
||||
"image_item": {"media": {"encrypt_query_param": "ref-enc"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
inbound = await asyncio.wait_for(bus.consume_inbound(), timeout=1.0)
|
||||
|
||||
# Should only attempt top-level media item; reference fallback must not activate.
|
||||
channel._download_media_item.assert_awaited_once_with(
|
||||
{"media": {"encrypt_query_param": "top-enc"}},
|
||||
"image",
|
||||
)
|
||||
assert inbound.media == []
|
||||
assert "[image]" in inbound.content
|
||||
assert "/tmp/ref.jpg" not in inbound.content
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_without_context_token_does_not_send_text() -> None:
|
||||
channel, _bus = _make_channel()
|
||||
@ -613,3 +653,24 @@ async def test_download_media_item_falls_back_to_encrypt_query_param(tmp_path) -
|
||||
assert Path(saved_path).read_bytes() == b"fallback-bytes"
|
||||
called_url = channel._client.get.await_args_list[0].args[0]
|
||||
assert called_url.startswith(f"{channel.config.cdn_base_url}/download?encrypted_query_param=enc-fallback")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_media_item_non_image_requires_aes_key_even_with_full_url(tmp_path) -> None:
|
||||
channel, _bus = _make_channel()
|
||||
weixin_mod.get_media_dir = lambda _name: tmp_path
|
||||
|
||||
full_url = "https://cdn.example.test/download/voice"
|
||||
channel._client = SimpleNamespace(
|
||||
get=AsyncMock(return_value=_DummyDownloadResponse(content=b"ciphertext-or-unknown"))
|
||||
)
|
||||
|
||||
item = {
|
||||
"media": {
|
||||
"full_url": full_url,
|
||||
},
|
||||
}
|
||||
saved_path = await channel._download_media_item(item, "voice")
|
||||
|
||||
assert saved_path is None
|
||||
channel._client.get.assert_not_awaited()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user