mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-07 03:33:42 +00:00
urlparse on Windows puts the path in netloc, not path. Use (parsed.path or parsed.netloc) to get the correct raw path.
640 lines
22 KiB
Python
640 lines
22 KiB
Python
"""QQ channel implementation using botpy SDK.
|
|
|
|
Inbound:
|
|
- Parse QQ botpy messages (C2C / Group)
|
|
- Download attachments to media dir using chunked streaming write (memory-safe)
|
|
- Publish to Nanobot bus via BaseChannel._handle_message()
|
|
- Content includes a clear, actionable "Received files:" list with local paths
|
|
|
|
Outbound:
|
|
- Send attachments (msg.media) first via QQ rich media API (base64 upload + msg_type=7)
|
|
- Then send text (plain or markdown)
|
|
- msg.media supports local paths, file:// paths, and http(s) URLs
|
|
|
|
Notes:
|
|
- QQ restricts many audio/video formats. We conservatively classify as image vs file.
|
|
- Attachment structures differ across botpy versions; we try multiple field candidates.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import base64
|
|
import mimetypes
|
|
import os
|
|
import re
|
|
import time
|
|
from collections import deque
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, Any, Literal
|
|
from urllib.parse import unquote, urlparse
|
|
|
|
import aiohttp
|
|
from loguru import logger
|
|
from pydantic import Field
|
|
|
|
from nanobot.bus.events import OutboundMessage
|
|
from nanobot.bus.queue import MessageBus
|
|
from nanobot.channels.base import BaseChannel
|
|
from nanobot.config.schema import Base
|
|
from nanobot.security.network import validate_url_target
|
|
|
|
try:
|
|
from nanobot.config.paths import get_media_dir
|
|
except Exception: # pragma: no cover
|
|
get_media_dir = None # type: ignore
|
|
|
|
try:
|
|
import botpy
|
|
from botpy.http import Route
|
|
|
|
QQ_AVAILABLE = True
|
|
except ImportError: # pragma: no cover
|
|
QQ_AVAILABLE = False
|
|
botpy = None
|
|
Route = None
|
|
|
|
if TYPE_CHECKING:
|
|
from botpy.message import BaseMessage, C2CMessage, GroupMessage
|
|
from botpy.types.message import Media
|
|
|
|
|
|
# QQ rich media file_type: 1=image, 4=file
|
|
# (2=voice, 3=video are restricted; we only use image vs file)
|
|
QQ_FILE_TYPE_IMAGE = 1
|
|
QQ_FILE_TYPE_FILE = 4
|
|
|
|
_IMAGE_EXTS = {
|
|
".png",
|
|
".jpg",
|
|
".jpeg",
|
|
".gif",
|
|
".bmp",
|
|
".webp",
|
|
".tif",
|
|
".tiff",
|
|
".ico",
|
|
".svg",
|
|
}
|
|
|
|
# Replace unsafe characters with "_", keep Chinese and common safe punctuation.
|
|
_SAFE_NAME_RE = re.compile(r"[^\w.\-()\[\]()【】\u4e00-\u9fff]+", re.UNICODE)
|
|
|
|
|
|
def _sanitize_filename(name: str) -> str:
|
|
"""Sanitize filename to avoid traversal and problematic chars."""
|
|
name = (name or "").strip()
|
|
name = Path(name).name
|
|
name = _SAFE_NAME_RE.sub("_", name).strip("._ ")
|
|
return name
|
|
|
|
|
|
def _is_image_name(name: str) -> bool:
|
|
return Path(name).suffix.lower() in _IMAGE_EXTS
|
|
|
|
|
|
def _guess_send_file_type(filename: str) -> int:
|
|
"""Conservative send type: images -> 1, else -> 4."""
|
|
ext = Path(filename).suffix.lower()
|
|
mime, _ = mimetypes.guess_type(filename)
|
|
if ext in _IMAGE_EXTS or (mime and mime.startswith("image/")):
|
|
return QQ_FILE_TYPE_IMAGE
|
|
return QQ_FILE_TYPE_FILE
|
|
|
|
|
|
def _make_bot_class(channel: QQChannel) -> type[botpy.Client]:
|
|
"""Create a botpy Client subclass bound to the given channel."""
|
|
intents = botpy.Intents(public_messages=True, direct_message=True)
|
|
|
|
class _Bot(botpy.Client):
|
|
def __init__(self):
|
|
# Disable botpy's file log — nanobot uses loguru; default "botpy.log" fails on read-only fs
|
|
super().__init__(intents=intents, ext_handlers=False)
|
|
|
|
async def on_ready(self):
|
|
logger.info("QQ bot ready: {}", self.robot.name)
|
|
|
|
async def on_c2c_message_create(self, message: C2CMessage):
|
|
await channel._on_message(message, is_group=False)
|
|
|
|
async def on_group_at_message_create(self, message: GroupMessage):
|
|
await channel._on_message(message, is_group=True)
|
|
|
|
async def on_direct_message_create(self, message):
|
|
await channel._on_message(message, is_group=False)
|
|
|
|
return _Bot
|
|
|
|
|
|
class QQConfig(Base):
|
|
"""QQ channel configuration using botpy SDK."""
|
|
|
|
enabled: bool = False
|
|
app_id: str = ""
|
|
secret: str = ""
|
|
allow_from: list[str] = Field(default_factory=list)
|
|
msg_format: Literal["plain", "markdown"] = "plain"
|
|
|
|
# Optional: directory to save inbound attachments. If empty, use nanobot get_media_dir("qq").
|
|
media_dir: str = ""
|
|
|
|
# Download tuning
|
|
download_chunk_size: int = 1024 * 256 # 256KB
|
|
download_max_bytes: int = 1024 * 1024 * 200 # 200MB safety limit
|
|
|
|
|
|
class QQChannel(BaseChannel):
|
|
"""QQ channel using botpy SDK with WebSocket connection."""
|
|
|
|
name = "qq"
|
|
display_name = "QQ"
|
|
|
|
@classmethod
|
|
def default_config(cls) -> dict[str, Any]:
|
|
return QQConfig().model_dump(by_alias=True)
|
|
|
|
def __init__(self, config: Any, bus: MessageBus):
|
|
if isinstance(config, dict):
|
|
config = QQConfig.model_validate(config)
|
|
super().__init__(config, bus)
|
|
self.config: QQConfig = config
|
|
|
|
self._client: botpy.Client | None = None
|
|
self._http: aiohttp.ClientSession | None = None
|
|
|
|
self._processed_ids: deque[str] = deque(maxlen=1000)
|
|
self._msg_seq: int = 1 # used to avoid QQ API dedup
|
|
self._chat_type_cache: dict[str, str] = {}
|
|
|
|
self._media_root: Path = self._init_media_root()
|
|
|
|
# ---------------------------
|
|
# Lifecycle
|
|
# ---------------------------
|
|
|
|
def _init_media_root(self) -> Path:
|
|
"""Choose a directory for saving inbound attachments."""
|
|
if self.config.media_dir:
|
|
root = Path(self.config.media_dir).expanduser()
|
|
elif get_media_dir:
|
|
try:
|
|
root = Path(get_media_dir("qq"))
|
|
except Exception:
|
|
root = Path.home() / ".nanobot" / "media" / "qq"
|
|
else:
|
|
root = Path.home() / ".nanobot" / "media" / "qq"
|
|
|
|
root.mkdir(parents=True, exist_ok=True)
|
|
logger.info("QQ media directory: {}", str(root))
|
|
return root
|
|
|
|
async def start(self) -> None:
|
|
"""Start the QQ bot with auto-reconnect loop."""
|
|
if not QQ_AVAILABLE:
|
|
logger.error("QQ SDK not installed. Run: pip install qq-botpy")
|
|
return
|
|
|
|
if not self.config.app_id or not self.config.secret:
|
|
logger.error("QQ app_id and secret not configured")
|
|
return
|
|
|
|
self._running = True
|
|
self._http = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120))
|
|
|
|
self._client = _make_bot_class(self)()
|
|
logger.info("QQ bot started (C2C & Group supported)")
|
|
await self._run_bot()
|
|
|
|
async def _run_bot(self) -> None:
|
|
"""Run the bot connection with auto-reconnect."""
|
|
while self._running:
|
|
try:
|
|
await self._client.start(appid=self.config.app_id, secret=self.config.secret)
|
|
except Exception as e:
|
|
logger.warning("QQ bot error: {}", e)
|
|
if self._running:
|
|
logger.info("Reconnecting QQ bot in 5 seconds...")
|
|
await asyncio.sleep(5)
|
|
|
|
async def stop(self) -> None:
|
|
"""Stop bot and cleanup resources."""
|
|
self._running = False
|
|
if self._client:
|
|
try:
|
|
await self._client.close()
|
|
except Exception:
|
|
pass
|
|
self._client = None
|
|
|
|
if self._http:
|
|
try:
|
|
await self._http.close()
|
|
except Exception:
|
|
pass
|
|
self._http = None
|
|
|
|
logger.info("QQ bot stopped")
|
|
|
|
# ---------------------------
|
|
# Outbound (send)
|
|
# ---------------------------
|
|
|
|
async def send(self, msg: OutboundMessage) -> None:
|
|
"""Send attachments first, then text."""
|
|
if not self._client:
|
|
logger.warning("QQ client not initialized")
|
|
return
|
|
|
|
msg_id = msg.metadata.get("message_id")
|
|
chat_type = self._chat_type_cache.get(msg.chat_id, "c2c")
|
|
is_group = chat_type == "group"
|
|
|
|
# 1) Send media
|
|
for media_ref in msg.media or []:
|
|
ok = await self._send_media(
|
|
chat_id=msg.chat_id,
|
|
media_ref=media_ref,
|
|
msg_id=msg_id,
|
|
is_group=is_group,
|
|
)
|
|
if not ok:
|
|
filename = (
|
|
os.path.basename(urlparse(media_ref).path)
|
|
or os.path.basename(media_ref)
|
|
or "file"
|
|
)
|
|
await self._send_text_only(
|
|
chat_id=msg.chat_id,
|
|
is_group=is_group,
|
|
msg_id=msg_id,
|
|
content=f"[Attachment send failed: {filename}]",
|
|
)
|
|
|
|
# 2) Send text
|
|
if msg.content and msg.content.strip():
|
|
await self._send_text_only(
|
|
chat_id=msg.chat_id,
|
|
is_group=is_group,
|
|
msg_id=msg_id,
|
|
content=msg.content.strip(),
|
|
)
|
|
|
|
async def _send_text_only(
|
|
self,
|
|
chat_id: str,
|
|
is_group: bool,
|
|
msg_id: str | None,
|
|
content: str,
|
|
) -> None:
|
|
"""Send a plain/markdown text message."""
|
|
if not self._client:
|
|
return
|
|
|
|
self._msg_seq += 1
|
|
use_markdown = self.config.msg_format == "markdown"
|
|
payload: dict[str, Any] = {
|
|
"msg_type": 2 if use_markdown else 0,
|
|
"msg_id": msg_id,
|
|
"msg_seq": self._msg_seq,
|
|
}
|
|
if use_markdown:
|
|
payload["markdown"] = {"content": content}
|
|
else:
|
|
payload["content"] = content
|
|
|
|
if is_group:
|
|
await self._client.api.post_group_message(group_openid=chat_id, **payload)
|
|
else:
|
|
await self._client.api.post_c2c_message(openid=chat_id, **payload)
|
|
|
|
async def _send_media(
|
|
self,
|
|
chat_id: str,
|
|
media_ref: str,
|
|
msg_id: str | None,
|
|
is_group: bool,
|
|
) -> bool:
|
|
"""Read bytes -> base64 upload -> msg_type=7 send."""
|
|
if not self._client:
|
|
return False
|
|
|
|
data, filename = await self._read_media_bytes(media_ref)
|
|
if not data or not filename:
|
|
return False
|
|
|
|
try:
|
|
file_type = _guess_send_file_type(filename)
|
|
file_data_b64 = base64.b64encode(data).decode()
|
|
|
|
media_obj = await self._post_base64file(
|
|
chat_id=chat_id,
|
|
is_group=is_group,
|
|
file_type=file_type,
|
|
file_data=file_data_b64,
|
|
file_name=filename,
|
|
srv_send_msg=False,
|
|
)
|
|
if not media_obj:
|
|
logger.error("QQ media upload failed: empty response")
|
|
return False
|
|
|
|
self._msg_seq += 1
|
|
if is_group:
|
|
await self._client.api.post_group_message(
|
|
group_openid=chat_id,
|
|
msg_type=7,
|
|
msg_id=msg_id,
|
|
msg_seq=self._msg_seq,
|
|
media=media_obj,
|
|
)
|
|
else:
|
|
await self._client.api.post_c2c_message(
|
|
openid=chat_id,
|
|
msg_type=7,
|
|
msg_id=msg_id,
|
|
msg_seq=self._msg_seq,
|
|
media=media_obj,
|
|
)
|
|
|
|
logger.info("QQ media sent: {}", filename)
|
|
return True
|
|
except Exception as e:
|
|
logger.error("QQ send media failed filename={} err={}", filename, e)
|
|
return False
|
|
|
|
async def _read_media_bytes(self, media_ref: str) -> tuple[bytes | None, str | None]:
|
|
"""Read bytes from http(s) or local file path; return (data, filename)."""
|
|
media_ref = (media_ref or "").strip()
|
|
if not media_ref:
|
|
return None, None
|
|
|
|
# Local file: plain path or file:// URI
|
|
if not media_ref.startswith("http://") and not media_ref.startswith("https://"):
|
|
try:
|
|
if media_ref.startswith("file://"):
|
|
parsed = urlparse(media_ref)
|
|
# Windows: path in netloc; Unix: path in path
|
|
raw = parsed.path or parsed.netloc
|
|
local_path = Path(unquote(raw))
|
|
else:
|
|
local_path = Path(os.path.expanduser(media_ref))
|
|
|
|
if not local_path.is_file():
|
|
logger.warning("QQ outbound media file not found: {}", str(local_path))
|
|
return None, None
|
|
|
|
data = await asyncio.to_thread(local_path.read_bytes)
|
|
return data, local_path.name
|
|
except Exception as e:
|
|
logger.warning("QQ outbound media read error ref={} err={}", media_ref, e)
|
|
return None, None
|
|
|
|
# Remote URL
|
|
ok, err = validate_url_target(media_ref)
|
|
if not ok:
|
|
logger.warning("QQ outbound media URL validation failed url={} err={}", media_ref, err)
|
|
return None, None
|
|
|
|
if not self._http:
|
|
self._http = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120))
|
|
try:
|
|
async with self._http.get(media_ref, allow_redirects=True) as resp:
|
|
if resp.status >= 400:
|
|
logger.warning(
|
|
"QQ outbound media download failed status={} url={}",
|
|
resp.status,
|
|
media_ref,
|
|
)
|
|
return None, None
|
|
data = await resp.read()
|
|
if not data:
|
|
return None, None
|
|
filename = os.path.basename(urlparse(media_ref).path) or "file.bin"
|
|
return data, filename
|
|
except Exception as e:
|
|
logger.warning("QQ outbound media download error url={} err={}", media_ref, e)
|
|
return None, None
|
|
|
|
# https://github.com/tencent-connect/botpy/issues/198
|
|
# https://bot.q.qq.com/wiki/develop/api-v2/server-inter/message/send-receive/rich-media.html
|
|
async def _post_base64file(
|
|
self,
|
|
chat_id: str,
|
|
is_group: bool,
|
|
file_type: int,
|
|
file_data: str,
|
|
file_name: str | None = None,
|
|
srv_send_msg: bool = False,
|
|
) -> Media:
|
|
"""Upload base64-encoded file and return Media object."""
|
|
if not self._client:
|
|
raise RuntimeError("QQ client not initialized")
|
|
|
|
if is_group:
|
|
endpoint = "/v2/groups/{group_openid}/files"
|
|
id_key = "group_openid"
|
|
else:
|
|
endpoint = "/v2/users/{openid}/files"
|
|
id_key = "openid"
|
|
|
|
payload = {
|
|
id_key: chat_id,
|
|
"file_type": file_type,
|
|
"file_data": file_data,
|
|
"file_name": file_name,
|
|
"srv_send_msg": srv_send_msg,
|
|
}
|
|
route = Route("POST", endpoint, **{id_key: chat_id})
|
|
return await self._client.api._http.request(route, json=payload)
|
|
|
|
# ---------------------------
|
|
# Inbound (receive)
|
|
# ---------------------------
|
|
|
|
async def _on_message(self, data: C2CMessage | GroupMessage, is_group: bool = False) -> None:
|
|
"""Parse inbound message, download attachments, and publish to the bus."""
|
|
if data.id in self._processed_ids:
|
|
return
|
|
self._processed_ids.append(data.id)
|
|
|
|
if is_group:
|
|
chat_id = data.group_openid
|
|
user_id = data.author.member_openid
|
|
self._chat_type_cache[chat_id] = "group"
|
|
else:
|
|
chat_id = str(
|
|
getattr(data.author, "id", None) or getattr(data.author, "user_openid", "unknown")
|
|
)
|
|
user_id = chat_id
|
|
self._chat_type_cache[chat_id] = "c2c"
|
|
|
|
content = (data.content or "").strip()
|
|
|
|
# the data used by tests don't contain attachments property
|
|
# so we use getattr with a default of [] to avoid AttributeError in tests
|
|
attachments = getattr(data, "attachments", None) or []
|
|
media_paths, recv_lines, att_meta = await self._handle_attachments(attachments)
|
|
|
|
# Compose content that always contains actionable saved paths
|
|
if recv_lines:
|
|
tag = "[Image]" if any(_is_image_name(Path(p).name) for p in media_paths) else "[File]"
|
|
file_block = "Received files:\n" + "\n".join(recv_lines)
|
|
content = f"{content}\n\n{file_block}".strip() if content else f"{tag}\n{file_block}"
|
|
|
|
if not content and not media_paths:
|
|
return
|
|
|
|
await self._handle_message(
|
|
sender_id=user_id,
|
|
chat_id=chat_id,
|
|
content=content,
|
|
media=media_paths if media_paths else None,
|
|
metadata={
|
|
"message_id": data.id,
|
|
"attachments": att_meta,
|
|
},
|
|
)
|
|
|
|
async def _handle_attachments(
|
|
self,
|
|
attachments: list[BaseMessage._Attachments],
|
|
) -> tuple[list[str], list[str], list[dict[str, Any]]]:
|
|
"""Extract, download (chunked), and format attachments for agent consumption."""
|
|
media_paths: list[str] = []
|
|
recv_lines: list[str] = []
|
|
att_meta: list[dict[str, Any]] = []
|
|
|
|
if not attachments:
|
|
return media_paths, recv_lines, att_meta
|
|
|
|
for att in attachments:
|
|
url, filename, ctype = att.url, att.filename, att.content_type
|
|
|
|
logger.info("Downloading file from QQ: {}", filename or url)
|
|
local_path = await self._download_to_media_dir_chunked(url, filename_hint=filename)
|
|
|
|
att_meta.append(
|
|
{
|
|
"url": url,
|
|
"filename": filename,
|
|
"content_type": ctype,
|
|
"saved_path": local_path,
|
|
}
|
|
)
|
|
|
|
if local_path:
|
|
media_paths.append(local_path)
|
|
shown_name = filename or os.path.basename(local_path)
|
|
recv_lines.append(f"- {shown_name}\n saved: {local_path}")
|
|
else:
|
|
shown_name = filename or url
|
|
recv_lines.append(f"- {shown_name}\n saved: [download failed]")
|
|
|
|
return media_paths, recv_lines, att_meta
|
|
|
|
async def _download_to_media_dir_chunked(
|
|
self,
|
|
url: str,
|
|
filename_hint: str = "",
|
|
) -> str | None:
|
|
"""Download an inbound attachment using streaming chunk write.
|
|
|
|
Uses chunked streaming to avoid loading large files into memory.
|
|
Enforces a max download size and writes to a .part temp file
|
|
that is atomically renamed on success.
|
|
"""
|
|
if not self._http:
|
|
self._http = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120))
|
|
|
|
safe = _sanitize_filename(filename_hint)
|
|
ts = int(time.time() * 1000)
|
|
tmp_path: Path | None = None
|
|
|
|
try:
|
|
async with self._http.get(
|
|
url,
|
|
timeout=aiohttp.ClientTimeout(total=120),
|
|
allow_redirects=True,
|
|
) as resp:
|
|
if resp.status != 200:
|
|
logger.warning("QQ download failed: status={} url={}", resp.status, url)
|
|
return None
|
|
|
|
ctype = (resp.headers.get("Content-Type") or "").lower()
|
|
|
|
# Infer extension: url -> filename_hint -> content-type -> fallback
|
|
ext = Path(urlparse(url).path).suffix
|
|
if not ext:
|
|
ext = Path(filename_hint).suffix
|
|
if not ext:
|
|
if "png" in ctype:
|
|
ext = ".png"
|
|
elif "jpeg" in ctype or "jpg" in ctype:
|
|
ext = ".jpg"
|
|
elif "gif" in ctype:
|
|
ext = ".gif"
|
|
elif "webp" in ctype:
|
|
ext = ".webp"
|
|
elif "pdf" in ctype:
|
|
ext = ".pdf"
|
|
else:
|
|
ext = ".bin"
|
|
|
|
if safe:
|
|
if not Path(safe).suffix:
|
|
safe = safe + ext
|
|
filename = safe
|
|
else:
|
|
filename = f"qq_file_{ts}{ext}"
|
|
|
|
target = self._media_root / filename
|
|
if target.exists():
|
|
target = self._media_root / f"{target.stem}_{ts}{target.suffix}"
|
|
|
|
tmp_path = target.with_suffix(target.suffix + ".part")
|
|
|
|
# Stream write
|
|
downloaded = 0
|
|
chunk_size = max(1024, int(self.config.download_chunk_size or 262144))
|
|
max_bytes = max(
|
|
1024 * 1024, int(self.config.download_max_bytes or (200 * 1024 * 1024))
|
|
)
|
|
|
|
def _open_tmp():
|
|
tmp_path.parent.mkdir(parents=True, exist_ok=True)
|
|
return open(tmp_path, "wb") # noqa: SIM115
|
|
|
|
f = await asyncio.to_thread(_open_tmp)
|
|
try:
|
|
async for chunk in resp.content.iter_chunked(chunk_size):
|
|
if not chunk:
|
|
continue
|
|
downloaded += len(chunk)
|
|
if downloaded > max_bytes:
|
|
logger.warning(
|
|
"QQ download exceeded max_bytes={} url={} -> abort",
|
|
max_bytes,
|
|
url,
|
|
)
|
|
return None
|
|
await asyncio.to_thread(f.write, chunk)
|
|
finally:
|
|
await asyncio.to_thread(f.close)
|
|
|
|
# Atomic rename
|
|
await asyncio.to_thread(os.replace, tmp_path, target)
|
|
tmp_path = None # mark as moved
|
|
logger.info("QQ file saved: {}", str(target))
|
|
return str(target)
|
|
|
|
except Exception as e:
|
|
logger.error("QQ download error: {}", e)
|
|
return None
|
|
finally:
|
|
# Cleanup partial file
|
|
if tmp_path is not None:
|
|
try:
|
|
tmp_path.unlink(missing_ok=True)
|
|
except Exception:
|
|
pass
|