From 4b0fdffe395f4a710b9e83b789f49f59adba1558 Mon Sep 17 00:00:00 2001
From: chengyongru <chengyongru.ai@gmail.com>
Date: Fri, 10 Apr 2026 16:33:57 +0800
Subject: [PATCH] fix(wecom): harden upload/download, extract media type helper

- Use asyncio.to_thread for file I/O to avoid blocking event loop
- Add 200MB upload size limit with early rejection
- Fix file handle leak by using context manager
- Use memoryview for upload chunking to reduce peak memory
- Add inbound download size check to prevent OOM
- Use asyncio.to_thread for write_bytes in download path
- Extract inline media_type detection to _guess_wecom_media_type()
---
 nanobot/channels/wecom.py | 43 +++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
index d285d4bcd..910f02489 100644
--- a/nanobot/channels/wecom.py
+++ b/nanobot/channels/wecom.py
@@ -35,6 +35,23 @@ def _sanitize_filename(name: str) -> str:
     name = _SAFE_NAME_RE.sub("_", name).strip("._ ")
     return name
 
+
+_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
+_VIDEO_EXTS = {".mp4", ".avi", ".mov"}
+_AUDIO_EXTS = {".amr", ".mp3", ".wav", ".ogg"}
+
+
+def _guess_wecom_media_type(filename: str) -> str:
+    """Classify file extension as WeCom media_type string."""
+    ext = Path(filename).suffix.lower()
+    if ext in _IMAGE_EXTS:
+        return "image"
+    if ext in _VIDEO_EXTS:
+        return "video"
+    if ext in _AUDIO_EXTS:
+        return "voice"
+    return "file"
+
 class WecomConfig(Base):
     """WeCom (Enterprise WeChat) AI Bot channel configuration."""
 
@@ -342,13 +359,21 @@ class WecomChannel(BaseChannel):
                 logger.warning("Failed to download media from WeCom")
                 return None
 
+            if len(data) > WECOM_UPLOAD_MAX_BYTES:
+                logger.warning(
+                    "WeCom inbound media too large: {} bytes (max {})",
+                    len(data),
+                    WECOM_UPLOAD_MAX_BYTES,
+                )
+                return None
+
             media_dir = get_media_dir("wecom")
             if not filename:
                 filename = fname or f"{media_type}_{hash(file_url) % 100000}"
             filename = _sanitize_filename(filename)
 
             file_path = media_dir / filename
-            file_path.write_bytes(data)
+            await asyncio.to_thread(file_path.write_bytes, data)
             logger.debug("Downloaded {} to {}", media_type, file_path)
             return str(file_path)
 
@@ -374,16 +399,7 @@ class WecomChannel(BaseChannel):
 
         try:
             fname = os.path.basename(file_path)
-            ext = os.path.splitext(fname)[1].lower()
-
-            if ext in (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"):
-                media_type = "image"
-            elif ext in (".mp4", ".avi", ".mov"):
-                media_type = "video"
-            elif ext in (".amr", ".mp3", ".wav", ".ogg"):
-                media_type = "voice"
-            else:
-                media_type = "file"
+            media_type = _guess_wecom_media_type(fname)
 
             # Read file size and data in a thread to avoid blocking the event loop
             def _read_file():
@@ -400,9 +416,10 @@ class WecomChannel(BaseChannel):
             md5_hash = hashlib.md5(data).hexdigest()
 
             CHUNK_SIZE = 512 * 1024  # 512 KB raw (before base64)
-            chunk_list = [data[i : i + CHUNK_SIZE] for i in range(0, file_size, CHUNK_SIZE)]
+            mv = memoryview(data)
+            chunk_list = [bytes(mv[i : i + CHUNK_SIZE]) for i in range(0, file_size, CHUNK_SIZE)]
             n_chunks = len(chunk_list)
-            del data  # free raw bytes early
+            del mv, data
 
             # Step 1: init
             req_id = _gen_req_id("upload_init")