fix(wecom): harden upload and inbound media handling

- Use asyncio.to_thread for file I/O to avoid blocking event loop
- Add 200MB upload size limit with early rejection
- Fix file handle leak by using context manager
- Free raw bytes early after chunking to reduce memory pressure
- Add file attachments to media_paths (was text-only, inconsistent with image)
- Use robust _sanitize_filename() instead of os.path.basename() for path safety
- Remove re-raise in send() for consistency with QQ channel
- Fix truncated media_id logging for short IDs
This commit is contained in:
chengyongru 2026-04-10 16:27:23 +08:00 committed by chengyongru
parent 973b888d39
commit 4fe23a01c9

View File

@ -5,7 +5,9 @@ import base64
import hashlib import hashlib
import importlib.util import importlib.util
import os import os
import re
from collections import OrderedDict from collections import OrderedDict
from pathlib import Path
from typing import Any from typing import Any
from loguru import logger from loguru import logger
@ -19,6 +21,20 @@ from pydantic import Field
WECOM_AVAILABLE = importlib.util.find_spec("wecom_aibot_sdk") is not None WECOM_AVAILABLE = importlib.util.find_spec("wecom_aibot_sdk") is not None
# Upload safety limits (matching QQ channel defaults)
WECOM_UPLOAD_MAX_BYTES = 1024 * 1024 * 200 # 200MB
# Replace unsafe characters with "_", keep Chinese and common safe punctuation.
_SAFE_NAME_RE = re.compile(r"[^\w.\-()\[\]()【】\u4e00-\u9fff]+", re.UNICODE)
def _sanitize_filename(name: str) -> str:
"""Sanitize filename to avoid traversal and problematic chars."""
name = (name or "").strip()
name = Path(name).name
name = _SAFE_NAME_RE.sub("_", name).strip("._ ")
return name
class WecomConfig(Base): class WecomConfig(Base):
"""WeCom (Enterprise WeChat) AI Bot channel configuration.""" """WeCom (Enterprise WeChat) AI Bot channel configuration."""
@ -260,7 +276,8 @@ class WecomChannel(BaseChannel):
if file_url and aes_key: if file_url and aes_key:
file_path = await self._download_and_save_media(file_url, aes_key, "file", file_name) file_path = await self._download_and_save_media(file_url, aes_key, "file", file_name)
if file_path: if file_path:
content_parts.append(f"[file: {file_name}]\n[File: source: {file_path}]") content_parts.append(f"[file: {file_name}]")
media_paths.append(file_path)
else: else:
content_parts.append(f"[file: {file_name}: download failed]") content_parts.append(f"[file: {file_name}: download failed]")
else: else:
@ -328,7 +345,7 @@ class WecomChannel(BaseChannel):
media_dir = get_media_dir("wecom") media_dir = get_media_dir("wecom")
if not filename: if not filename:
filename = fname or f"{media_type}_{hash(file_url) % 100000}" filename = fname or f"{media_type}_{hash(file_url) % 100000}"
filename = os.path.basename(filename) filename = _sanitize_filename(filename)
file_path = media_dir / filename file_path = media_dir / filename
file_path.write_bytes(data) file_path.write_bytes(data)
@ -368,13 +385,24 @@ class WecomChannel(BaseChannel):
else: else:
media_type = "file" media_type = "file"
data = open(file_path, "rb").read() # noqa: SIM115 # Read file size and data in a thread to avoid blocking the event loop
file_size = len(data) def _read_file():
md5_hash = hashlib.md5(data).hexdigest() # noqa: S324 file_size = os.path.getsize(file_path)
if file_size > WECOM_UPLOAD_MAX_BYTES:
raise ValueError(
f"File too large: {file_size} bytes (max {WECOM_UPLOAD_MAX_BYTES})"
)
with open(file_path, "rb") as f:
return file_size, f.read()
file_size, data = await asyncio.to_thread(_read_file)
# MD5 is used for file integrity only, not cryptographic security
md5_hash = hashlib.md5(data).hexdigest()
CHUNK_SIZE = 512 * 1024 # 512 KB raw (before base64) CHUNK_SIZE = 512 * 1024 # 512 KB raw (before base64)
chunk_list = [data[i : i + CHUNK_SIZE] for i in range(0, file_size, CHUNK_SIZE)] chunk_list = [data[i : i + CHUNK_SIZE] for i in range(0, file_size, CHUNK_SIZE)]
n_chunks = len(chunk_list) n_chunks = len(chunk_list)
del data # free raw bytes early
# Step 1: init # Step 1: init
req_id = _gen_req_id("upload_init") req_id = _gen_req_id("upload_init")
@ -419,9 +447,13 @@ class WecomChannel(BaseChannel):
logger.warning("WeCom upload finish: no media_id in response body={}", resp.body) logger.warning("WeCom upload finish: no media_id in response body={}", resp.body)
return None, None return None, None
logger.debug("WeCom uploaded {} ({}) → media_id={}", fname, media_type, media_id[:16] + "...") suffix = "..." if len(media_id) > 16 else ""
logger.debug("WeCom uploaded {} ({}) → media_id={}", fname, media_type, media_id[:16] + suffix)
return media_id, media_type return media_id, media_type
except ValueError as e:
logger.warning("WeCom upload skipped for {}: {}", file_path, e)
return None, None
except Exception as e: except Exception as e:
logger.error("WeCom _upload_media_ws error for {}: {}", file_path, e) logger.error("WeCom _upload_media_ws error for {}: {}", file_path, e)
return None, None return None, None
@ -489,6 +521,5 @@ class WecomChannel(BaseChannel):
}) })
logger.info("WeCom proactive send to {}", msg.chat_id) logger.info("WeCom proactive send to {}", msg.chat_id)
except Exception as e: except Exception:
logger.error("Error sending WeCom message: {}", e) logger.exception("Error sending WeCom message to chat_id={}", msg.chat_id)
raise