fix(feishu): confine downloaded media filenames

This commit is contained in:
hinotoi-agent 2026-05-14 23:43:06 +08:00 committed by Xubin Ren
parent 26665823e3
commit 39db5c4846
2 changed files with 60 additions and 2 deletions

View File

@ -22,6 +22,7 @@ from nanobot.bus.queue import MessageBus
from nanobot.channels.base import BaseChannel
from nanobot.config.paths import get_media_dir
from nanobot.config.schema import Base
from nanobot.utils.helpers import safe_filename
from nanobot.utils.logging_bridge import redirect_lib_logging
FEISHU_AVAILABLE = importlib.util.find_spec("lark_oapi") is not None
@ -1044,6 +1045,19 @@ class FeishuChannel(BaseChannel):
self.logger.exception("Error downloading {} {}", resource_type, file_key)
return None, None
@staticmethod
def _safe_media_filename(filename: str | None, fallback: str) -> str:
"""Return a local-only filename for downloaded Feishu media."""
candidate = filename or fallback
# Feishu/Lark filenames come from message metadata. Treat both POSIX
# and Windows separators as path boundaries before applying the shared
# filename sanitizer so downloads cannot escape the channel media dir.
candidate = os.path.basename(candidate.replace("\\", "/"))
candidate = safe_filename(candidate)
if candidate in ("", ".", ".."):
return safe_filename(fallback) or uuid.uuid4().hex
return candidate
async def _download_and_save_media(
self, msg_type: str, content_json: dict, message_id: str | None = None
) -> tuple[str | None, str]:
@ -1057,15 +1071,17 @@ class FeishuChannel(BaseChannel):
media_dir = get_media_dir("feishu")
data, filename = None, None
fallback_filename = uuid.uuid4().hex
if msg_type == "image":
image_key = content_json.get("image_key")
if image_key and message_id:
fallback_filename = f"{image_key[:16]}.jpg"
data, filename = await loop.run_in_executor(
None, self._download_image_sync, message_id, image_key
)
if not filename:
filename = f"{image_key[:16]}.jpg"
filename = fallback_filename
elif msg_type in ("audio", "file", "media"):
file_key = content_json.get("file_key")
@ -1076,6 +1092,7 @@ class FeishuChannel(BaseChannel):
self.logger.warning("{} message missing message_id", msg_type)
return None, f"[{msg_type}: missing message_id]"
fallback_filename = file_key[:16]
data, filename = await loop.run_in_executor(
None, self._download_file_sync, message_id, file_key, msg_type
)
@ -1085,7 +1102,9 @@ class FeishuChannel(BaseChannel):
return None, f"[{msg_type}: download failed]"
if not filename:
filename = file_key[:16]
filename = fallback_filename
filename = self._safe_media_filename(filename, fallback_filename)
# Feishu voice messages are opus in OGG container.
# Use .ogg extension for better Whisper compatibility.
@ -1094,6 +1113,7 @@ class FeishuChannel(BaseChannel):
filename = f"{filename}.ogg"
if data and filename:
filename = self._safe_media_filename(filename, fallback_filename)
file_path = media_dir / filename
file_path.write_bytes(data)
path_str = str(file_path)

View File

@ -0,0 +1,38 @@
from pathlib import Path
from types import SimpleNamespace
import pytest
from nanobot.channels import feishu as feishu_module
from nanobot.channels.feishu import FeishuChannel
@pytest.mark.asyncio
async def test_feishu_downloaded_media_filename_cannot_escape_media_dir(monkeypatch, tmp_path):
media_dir = tmp_path / "media"
media_dir.mkdir()
outside = tmp_path / "escaped.txt"
monkeypatch.setattr(feishu_module, "get_media_dir", lambda _channel: media_dir)
channel = FeishuChannel.__new__(FeishuChannel)
channel.logger = SimpleNamespace(
debug=lambda *args, **kwargs: None,
warning=lambda *args, **kwargs: None,
)
def fake_download(_message_id, _file_key, _resource_type):
return b"owned", "../escaped.txt"
channel._download_file_sync = fake_download
path_str, content = await channel._download_and_save_media(
"file", {"file_key": "fk_123"}, "msg_123"
)
saved_path = Path(path_str)
assert not outside.exists()
assert saved_path.parent == media_dir
assert saved_path.name == "escaped.txt"
assert saved_path.read_bytes() == b"owned"
assert content == f"[file: {saved_path}]"