chengyongru e4137736f6 fix(qq): handle file:// URI on Windows in _read_media_bytes
urlparse on Windows puts the path in netloc, not path. Use
(parsed.path or parsed.netloc) to get the correct raw path.
2026-03-23 15:48:31 +08:00

640 lines
22 KiB
Python

"""QQ channel implementation using botpy SDK.
Inbound:
- Parse QQ botpy messages (C2C / Group)
- Download attachments to media dir using chunked streaming write (memory-safe)
- Publish to Nanobot bus via BaseChannel._handle_message()
- Content includes a clear, actionable "Received files:" list with local paths
Outbound:
- Send attachments (msg.media) first via QQ rich media API (base64 upload + msg_type=7)
- Then send text (plain or markdown)
- msg.media supports local paths, file:// paths, and http(s) URLs
Notes:
- QQ restricts many audio/video formats. We conservatively classify as image vs file.
- Attachment structures differ across botpy versions; we try multiple field candidates.
"""
from __future__ import annotations
import asyncio
import base64
import mimetypes
import os
import re
import time
from collections import deque
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal
from urllib.parse import unquote, urlparse
import aiohttp
from loguru import logger
from pydantic import Field
from nanobot.bus.events import OutboundMessage
from nanobot.bus.queue import MessageBus
from nanobot.channels.base import BaseChannel
from nanobot.config.schema import Base
from nanobot.security.network import validate_url_target
try:
from nanobot.config.paths import get_media_dir
except Exception: # pragma: no cover
get_media_dir = None # type: ignore
try:
import botpy
from botpy.http import Route
QQ_AVAILABLE = True
except ImportError: # pragma: no cover
QQ_AVAILABLE = False
botpy = None
Route = None
if TYPE_CHECKING:
from botpy.message import BaseMessage, C2CMessage, GroupMessage
from botpy.types.message import Media
# QQ rich media file_type: 1=image, 4=file
# (2=voice, 3=video are restricted; we only use image vs file)
QQ_FILE_TYPE_IMAGE = 1
QQ_FILE_TYPE_FILE = 4
_IMAGE_EXTS = {
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".webp",
".tif",
".tiff",
".ico",
".svg",
}
# Replace unsafe characters with "_", keep Chinese and common safe punctuation.
_SAFE_NAME_RE = re.compile(r"[^\w.\-()\[\]()【】\u4e00-\u9fff]+", re.UNICODE)
def _sanitize_filename(name: str) -> str:
"""Sanitize filename to avoid traversal and problematic chars."""
name = (name or "").strip()
name = Path(name).name
name = _SAFE_NAME_RE.sub("_", name).strip("._ ")
return name
def _is_image_name(name: str) -> bool:
return Path(name).suffix.lower() in _IMAGE_EXTS
def _guess_send_file_type(filename: str) -> int:
"""Conservative send type: images -> 1, else -> 4."""
ext = Path(filename).suffix.lower()
mime, _ = mimetypes.guess_type(filename)
if ext in _IMAGE_EXTS or (mime and mime.startswith("image/")):
return QQ_FILE_TYPE_IMAGE
return QQ_FILE_TYPE_FILE
def _make_bot_class(channel: QQChannel) -> type[botpy.Client]:
"""Create a botpy Client subclass bound to the given channel."""
intents = botpy.Intents(public_messages=True, direct_message=True)
class _Bot(botpy.Client):
def __init__(self):
# Disable botpy's file log — nanobot uses loguru; default "botpy.log" fails on read-only fs
super().__init__(intents=intents, ext_handlers=False)
async def on_ready(self):
logger.info("QQ bot ready: {}", self.robot.name)
async def on_c2c_message_create(self, message: C2CMessage):
await channel._on_message(message, is_group=False)
async def on_group_at_message_create(self, message: GroupMessage):
await channel._on_message(message, is_group=True)
async def on_direct_message_create(self, message):
await channel._on_message(message, is_group=False)
return _Bot
class QQConfig(Base):
"""QQ channel configuration using botpy SDK."""
enabled: bool = False
app_id: str = ""
secret: str = ""
allow_from: list[str] = Field(default_factory=list)
msg_format: Literal["plain", "markdown"] = "plain"
# Optional: directory to save inbound attachments. If empty, use nanobot get_media_dir("qq").
media_dir: str = ""
# Download tuning
download_chunk_size: int = 1024 * 256 # 256KB
download_max_bytes: int = 1024 * 1024 * 200 # 200MB safety limit
class QQChannel(BaseChannel):
"""QQ channel using botpy SDK with WebSocket connection."""
name = "qq"
display_name = "QQ"
@classmethod
def default_config(cls) -> dict[str, Any]:
return QQConfig().model_dump(by_alias=True)
def __init__(self, config: Any, bus: MessageBus):
if isinstance(config, dict):
config = QQConfig.model_validate(config)
super().__init__(config, bus)
self.config: QQConfig = config
self._client: botpy.Client | None = None
self._http: aiohttp.ClientSession | None = None
self._processed_ids: deque[str] = deque(maxlen=1000)
self._msg_seq: int = 1 # used to avoid QQ API dedup
self._chat_type_cache: dict[str, str] = {}
self._media_root: Path = self._init_media_root()
# ---------------------------
# Lifecycle
# ---------------------------
def _init_media_root(self) -> Path:
"""Choose a directory for saving inbound attachments."""
if self.config.media_dir:
root = Path(self.config.media_dir).expanduser()
elif get_media_dir:
try:
root = Path(get_media_dir("qq"))
except Exception:
root = Path.home() / ".nanobot" / "media" / "qq"
else:
root = Path.home() / ".nanobot" / "media" / "qq"
root.mkdir(parents=True, exist_ok=True)
logger.info("QQ media directory: {}", str(root))
return root
async def start(self) -> None:
"""Start the QQ bot with auto-reconnect loop."""
if not QQ_AVAILABLE:
logger.error("QQ SDK not installed. Run: pip install qq-botpy")
return
if not self.config.app_id or not self.config.secret:
logger.error("QQ app_id and secret not configured")
return
self._running = True
self._http = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120))
self._client = _make_bot_class(self)()
logger.info("QQ bot started (C2C & Group supported)")
await self._run_bot()
async def _run_bot(self) -> None:
"""Run the bot connection with auto-reconnect."""
while self._running:
try:
await self._client.start(appid=self.config.app_id, secret=self.config.secret)
except Exception as e:
logger.warning("QQ bot error: {}", e)
if self._running:
logger.info("Reconnecting QQ bot in 5 seconds...")
await asyncio.sleep(5)
async def stop(self) -> None:
"""Stop bot and cleanup resources."""
self._running = False
if self._client:
try:
await self._client.close()
except Exception:
pass
self._client = None
if self._http:
try:
await self._http.close()
except Exception:
pass
self._http = None
logger.info("QQ bot stopped")
# ---------------------------
# Outbound (send)
# ---------------------------
async def send(self, msg: OutboundMessage) -> None:
"""Send attachments first, then text."""
if not self._client:
logger.warning("QQ client not initialized")
return
msg_id = msg.metadata.get("message_id")
chat_type = self._chat_type_cache.get(msg.chat_id, "c2c")
is_group = chat_type == "group"
# 1) Send media
for media_ref in msg.media or []:
ok = await self._send_media(
chat_id=msg.chat_id,
media_ref=media_ref,
msg_id=msg_id,
is_group=is_group,
)
if not ok:
filename = (
os.path.basename(urlparse(media_ref).path)
or os.path.basename(media_ref)
or "file"
)
await self._send_text_only(
chat_id=msg.chat_id,
is_group=is_group,
msg_id=msg_id,
content=f"[Attachment send failed: {filename}]",
)
# 2) Send text
if msg.content and msg.content.strip():
await self._send_text_only(
chat_id=msg.chat_id,
is_group=is_group,
msg_id=msg_id,
content=msg.content.strip(),
)
async def _send_text_only(
self,
chat_id: str,
is_group: bool,
msg_id: str | None,
content: str,
) -> None:
"""Send a plain/markdown text message."""
if not self._client:
return
self._msg_seq += 1
use_markdown = self.config.msg_format == "markdown"
payload: dict[str, Any] = {
"msg_type": 2 if use_markdown else 0,
"msg_id": msg_id,
"msg_seq": self._msg_seq,
}
if use_markdown:
payload["markdown"] = {"content": content}
else:
payload["content"] = content
if is_group:
await self._client.api.post_group_message(group_openid=chat_id, **payload)
else:
await self._client.api.post_c2c_message(openid=chat_id, **payload)
async def _send_media(
self,
chat_id: str,
media_ref: str,
msg_id: str | None,
is_group: bool,
) -> bool:
"""Read bytes -> base64 upload -> msg_type=7 send."""
if not self._client:
return False
data, filename = await self._read_media_bytes(media_ref)
if not data or not filename:
return False
try:
file_type = _guess_send_file_type(filename)
file_data_b64 = base64.b64encode(data).decode()
media_obj = await self._post_base64file(
chat_id=chat_id,
is_group=is_group,
file_type=file_type,
file_data=file_data_b64,
file_name=filename,
srv_send_msg=False,
)
if not media_obj:
logger.error("QQ media upload failed: empty response")
return False
self._msg_seq += 1
if is_group:
await self._client.api.post_group_message(
group_openid=chat_id,
msg_type=7,
msg_id=msg_id,
msg_seq=self._msg_seq,
media=media_obj,
)
else:
await self._client.api.post_c2c_message(
openid=chat_id,
msg_type=7,
msg_id=msg_id,
msg_seq=self._msg_seq,
media=media_obj,
)
logger.info("QQ media sent: {}", filename)
return True
except Exception as e:
logger.error("QQ send media failed filename={} err={}", filename, e)
return False
async def _read_media_bytes(self, media_ref: str) -> tuple[bytes | None, str | None]:
"""Read bytes from http(s) or local file path; return (data, filename)."""
media_ref = (media_ref or "").strip()
if not media_ref:
return None, None
# Local file: plain path or file:// URI
if not media_ref.startswith("http://") and not media_ref.startswith("https://"):
try:
if media_ref.startswith("file://"):
parsed = urlparse(media_ref)
# Windows: path in netloc; Unix: path in path
raw = parsed.path or parsed.netloc
local_path = Path(unquote(raw))
else:
local_path = Path(os.path.expanduser(media_ref))
if not local_path.is_file():
logger.warning("QQ outbound media file not found: {}", str(local_path))
return None, None
data = await asyncio.to_thread(local_path.read_bytes)
return data, local_path.name
except Exception as e:
logger.warning("QQ outbound media read error ref={} err={}", media_ref, e)
return None, None
# Remote URL
ok, err = validate_url_target(media_ref)
if not ok:
logger.warning("QQ outbound media URL validation failed url={} err={}", media_ref, err)
return None, None
if not self._http:
self._http = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120))
try:
async with self._http.get(media_ref, allow_redirects=True) as resp:
if resp.status >= 400:
logger.warning(
"QQ outbound media download failed status={} url={}",
resp.status,
media_ref,
)
return None, None
data = await resp.read()
if not data:
return None, None
filename = os.path.basename(urlparse(media_ref).path) or "file.bin"
return data, filename
except Exception as e:
logger.warning("QQ outbound media download error url={} err={}", media_ref, e)
return None, None
# https://github.com/tencent-connect/botpy/issues/198
# https://bot.q.qq.com/wiki/develop/api-v2/server-inter/message/send-receive/rich-media.html
async def _post_base64file(
self,
chat_id: str,
is_group: bool,
file_type: int,
file_data: str,
file_name: str | None = None,
srv_send_msg: bool = False,
) -> Media:
"""Upload base64-encoded file and return Media object."""
if not self._client:
raise RuntimeError("QQ client not initialized")
if is_group:
endpoint = "/v2/groups/{group_openid}/files"
id_key = "group_openid"
else:
endpoint = "/v2/users/{openid}/files"
id_key = "openid"
payload = {
id_key: chat_id,
"file_type": file_type,
"file_data": file_data,
"file_name": file_name,
"srv_send_msg": srv_send_msg,
}
route = Route("POST", endpoint, **{id_key: chat_id})
return await self._client.api._http.request(route, json=payload)
# ---------------------------
# Inbound (receive)
# ---------------------------
async def _on_message(self, data: C2CMessage | GroupMessage, is_group: bool = False) -> None:
"""Parse inbound message, download attachments, and publish to the bus."""
if data.id in self._processed_ids:
return
self._processed_ids.append(data.id)
if is_group:
chat_id = data.group_openid
user_id = data.author.member_openid
self._chat_type_cache[chat_id] = "group"
else:
chat_id = str(
getattr(data.author, "id", None) or getattr(data.author, "user_openid", "unknown")
)
user_id = chat_id
self._chat_type_cache[chat_id] = "c2c"
content = (data.content or "").strip()
# the data used by tests don't contain attachments property
# so we use getattr with a default of [] to avoid AttributeError in tests
attachments = getattr(data, "attachments", None) or []
media_paths, recv_lines, att_meta = await self._handle_attachments(attachments)
# Compose content that always contains actionable saved paths
if recv_lines:
tag = "[Image]" if any(_is_image_name(Path(p).name) for p in media_paths) else "[File]"
file_block = "Received files:\n" + "\n".join(recv_lines)
content = f"{content}\n\n{file_block}".strip() if content else f"{tag}\n{file_block}"
if not content and not media_paths:
return
await self._handle_message(
sender_id=user_id,
chat_id=chat_id,
content=content,
media=media_paths if media_paths else None,
metadata={
"message_id": data.id,
"attachments": att_meta,
},
)
async def _handle_attachments(
self,
attachments: list[BaseMessage._Attachments],
) -> tuple[list[str], list[str], list[dict[str, Any]]]:
"""Extract, download (chunked), and format attachments for agent consumption."""
media_paths: list[str] = []
recv_lines: list[str] = []
att_meta: list[dict[str, Any]] = []
if not attachments:
return media_paths, recv_lines, att_meta
for att in attachments:
url, filename, ctype = att.url, att.filename, att.content_type
logger.info("Downloading file from QQ: {}", filename or url)
local_path = await self._download_to_media_dir_chunked(url, filename_hint=filename)
att_meta.append(
{
"url": url,
"filename": filename,
"content_type": ctype,
"saved_path": local_path,
}
)
if local_path:
media_paths.append(local_path)
shown_name = filename or os.path.basename(local_path)
recv_lines.append(f"- {shown_name}\n saved: {local_path}")
else:
shown_name = filename or url
recv_lines.append(f"- {shown_name}\n saved: [download failed]")
return media_paths, recv_lines, att_meta
async def _download_to_media_dir_chunked(
self,
url: str,
filename_hint: str = "",
) -> str | None:
"""Download an inbound attachment using streaming chunk write.
Uses chunked streaming to avoid loading large files into memory.
Enforces a max download size and writes to a .part temp file
that is atomically renamed on success.
"""
if not self._http:
self._http = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120))
safe = _sanitize_filename(filename_hint)
ts = int(time.time() * 1000)
tmp_path: Path | None = None
try:
async with self._http.get(
url,
timeout=aiohttp.ClientTimeout(total=120),
allow_redirects=True,
) as resp:
if resp.status != 200:
logger.warning("QQ download failed: status={} url={}", resp.status, url)
return None
ctype = (resp.headers.get("Content-Type") or "").lower()
# Infer extension: url -> filename_hint -> content-type -> fallback
ext = Path(urlparse(url).path).suffix
if not ext:
ext = Path(filename_hint).suffix
if not ext:
if "png" in ctype:
ext = ".png"
elif "jpeg" in ctype or "jpg" in ctype:
ext = ".jpg"
elif "gif" in ctype:
ext = ".gif"
elif "webp" in ctype:
ext = ".webp"
elif "pdf" in ctype:
ext = ".pdf"
else:
ext = ".bin"
if safe:
if not Path(safe).suffix:
safe = safe + ext
filename = safe
else:
filename = f"qq_file_{ts}{ext}"
target = self._media_root / filename
if target.exists():
target = self._media_root / f"{target.stem}_{ts}{target.suffix}"
tmp_path = target.with_suffix(target.suffix + ".part")
# Stream write
downloaded = 0
chunk_size = max(1024, int(self.config.download_chunk_size or 262144))
max_bytes = max(
1024 * 1024, int(self.config.download_max_bytes or (200 * 1024 * 1024))
)
def _open_tmp():
tmp_path.parent.mkdir(parents=True, exist_ok=True)
return open(tmp_path, "wb") # noqa: SIM115
f = await asyncio.to_thread(_open_tmp)
try:
async for chunk in resp.content.iter_chunked(chunk_size):
if not chunk:
continue
downloaded += len(chunk)
if downloaded > max_bytes:
logger.warning(
"QQ download exceeded max_bytes={} url={} -> abort",
max_bytes,
url,
)
return None
await asyncio.to_thread(f.write, chunk)
finally:
await asyncio.to_thread(f.close)
# Atomic rename
await asyncio.to_thread(os.replace, tmp_path, target)
tmp_path = None # mark as moved
logger.info("QQ file saved: {}", str(target))
return str(target)
except Exception as e:
logger.error("QQ download error: {}", e)
return None
finally:
# Cleanup partial file
if tmp_path is not None:
try:
tmp_path.unlink(missing_ok=True)
except Exception:
pass