fix(dingtalk): block SSRF in outbound media fetches

This commit is contained in:
hinotoi-agent 2026-05-01 12:33:24 +08:00 committed by Xubin Ren
parent 0284174df9
commit ad952e0da2
2 changed files with 420 additions and 35 deletions

View File

@ -9,7 +9,7 @@ import zipfile
from io import BytesIO
from pathlib import Path
from typing import Any
from urllib.parse import unquote, urlparse
from urllib.parse import unquote, urljoin, urlparse
import httpx
from loguru import logger
@ -19,6 +19,10 @@ from nanobot.bus.events import OutboundMessage
from nanobot.bus.queue import MessageBus
from nanobot.channels.base import BaseChannel
from nanobot.config.schema import Base
from nanobot.security.network import validate_resolved_url, validate_url_target
DINGTALK_MAX_REMOTE_MEDIA_BYTES = 20 * 1024 * 1024
DINGTALK_MAX_REMOTE_MEDIA_REDIRECTS = 3
try:
from dingtalk_stream import (
@ -155,6 +159,8 @@ class DingTalkConfig(Base):
client_id: str = ""
client_secret: str = ""
allow_from: list[str] = Field(default_factory=list)
allow_remote_media_redirects: bool = False
remote_media_redirect_allowed_hosts: list[str] = Field(default_factory=list)
class DingTalkChannel(BaseChannel):
@ -281,9 +287,12 @@ class DingTalkChannel(BaseChannel):
def _guess_upload_type(self, media_ref: str) -> str:
ext = Path(urlparse(media_ref).path).suffix.lower()
if ext in self._IMAGE_EXTS: return "image"
if ext in self._AUDIO_EXTS: return "voice"
if ext in self._VIDEO_EXTS: return "video"
if ext in self._IMAGE_EXTS:
return "image"
if ext in self._AUDIO_EXTS:
return "voice"
if ext in self._VIDEO_EXTS:
return "video"
return "file"
def _guess_filename(self, media_ref: str, upload_type: str) -> str:
@ -315,6 +324,146 @@ class DingTalkChannel(BaseChannel):
return self._zip_bytes(filename, data)
return data, filename, content_type
def _validate_remote_media_url(self, media_ref: str) -> bool:
ok, err = validate_url_target(media_ref)
if not ok:
logger.warning("DingTalk remote media URL blocked ref={} reason={}", media_ref, err)
return False
return True
def _redirect_host_allowed(self, current_url: str, next_url: str) -> bool:
current_host = (urlparse(current_url).hostname or "").lower()
next_host = (urlparse(next_url).hostname or "").lower()
if not next_host:
return False
if next_host == current_host:
return True
allowed_hosts = {host.lower() for host in self.config.remote_media_redirect_allowed_hosts}
return next_host in allowed_hosts
def _next_remote_media_url(self, current_url: str, location: str | None) -> str | None:
if not self.config.allow_remote_media_redirects:
logger.warning("DingTalk media download redirect refused ref={}", current_url)
return None
if not location:
logger.warning("DingTalk media download redirect without Location ref={}", current_url)
return None
next_url = urljoin(current_url, location)
if not self._redirect_host_allowed(current_url, next_url):
logger.warning(
"DingTalk media download cross-host redirect refused ref={} next={}",
current_url,
next_url,
)
return None
if not self._validate_remote_media_url(next_url):
return None
return next_url
async def _fetch_remote_media_bytes(
self,
media_ref: str,
) -> tuple[bytes | None, str | None]:
"""Fetch a remote media URL with SSRF, redirect, and size checks."""
if not self._http:
return None, None
if not self._validate_remote_media_url(media_ref):
return None, None
try:
# Prefer streaming with a running byte cap so large responses are not
# materialized before the limit is enforced. Test fakes may only
# implement get(), so keep a small compatibility fallback below.
stream = getattr(self._http, "stream", None)
if stream is not None:
current_url = media_ref
for _ in range(DINGTALK_MAX_REMOTE_MEDIA_REDIRECTS + 1):
async with stream("GET", current_url, follow_redirects=False) as resp:
final_ok, final_err = validate_resolved_url(str(resp.url))
if not final_ok:
logger.warning(
"DingTalk remote media redirect blocked ref={} final={} reason={}",
media_ref,
resp.url,
final_err,
)
return None, None
if 300 <= resp.status_code < 400:
next_url = self._next_remote_media_url(
str(resp.url), resp.headers.get("location")
)
if not next_url:
return None, None
current_url = next_url
continue
if resp.status_code >= 400:
logger.warning(
"DingTalk media download failed status={} ref={}",
resp.status_code,
current_url,
)
return None, None
chunks: list[bytes] = []
total = 0
async for chunk in resp.aiter_bytes():
total += len(chunk)
if total > DINGTALK_MAX_REMOTE_MEDIA_BYTES:
logger.warning(
"DingTalk media download too large ref={} bytes>{}",
current_url,
DINGTALK_MAX_REMOTE_MEDIA_BYTES,
)
return None, None
chunks.append(chunk)
return b"".join(chunks), (resp.headers.get("content-type") or "")
logger.warning("DingTalk media download exceeded redirect limit ref={}", media_ref)
return None, None
current_url = media_ref
for _ in range(DINGTALK_MAX_REMOTE_MEDIA_REDIRECTS + 1):
resp = await self._http.get(current_url, follow_redirects=False)
final_ok, final_err = validate_resolved_url(str(getattr(resp, "url", current_url)))
if not final_ok:
logger.warning(
"DingTalk remote media redirect blocked ref={} final={} reason={}",
media_ref,
getattr(resp, "url", current_url),
final_err,
)
return None, None
if 300 <= resp.status_code < 400:
next_url = self._next_remote_media_url(
str(getattr(resp, "url", current_url)), resp.headers.get("location")
)
if not next_url:
return None, None
current_url = next_url
continue
if resp.status_code >= 400:
logger.warning(
"DingTalk media download failed status={} ref={}",
resp.status_code,
current_url,
)
return None, None
if len(resp.content) > DINGTALK_MAX_REMOTE_MEDIA_BYTES:
logger.warning(
"DingTalk media download too large ref={} bytes>{}",
current_url,
DINGTALK_MAX_REMOTE_MEDIA_BYTES,
)
return None, None
return resp.content, (resp.headers.get("content-type") or "")
logger.warning("DingTalk media download exceeded redirect limit ref={}", media_ref)
return None, None
except httpx.TransportError as e:
logger.error("DingTalk media download network error ref={} err={}", media_ref, e)
raise
except Exception as e:
logger.error("DingTalk media download error ref={} err={}", media_ref, e)
return None, None
async def _read_media_bytes(
self,
media_ref: str,
@ -323,26 +472,12 @@ class DingTalkChannel(BaseChannel):
return None, None, None
if self._is_http_url(media_ref):
if not self._http:
return None, None, None
try:
resp = await self._http.get(media_ref, follow_redirects=True)
if resp.status_code >= 400:
logger.warning(
"DingTalk media download failed status={} ref={}",
resp.status_code,
media_ref,
)
return None, None, None
content_type = (resp.headers.get("content-type") or "").split(";")[0].strip()
filename = self._guess_filename(media_ref, self._guess_upload_type(media_ref))
return resp.content, filename, content_type or None
except httpx.TransportError as e:
logger.error("DingTalk media download network error ref={} err={}", media_ref, e)
raise
except Exception as e:
logger.error("DingTalk media download error ref={} err={}", media_ref, e)
data, raw_content_type = await self._fetch_remote_media_bytes(media_ref)
if data is None:
return None, None, None
content_type = (raw_content_type or "").split(";")[0].strip()
filename = self._guess_filename(media_ref, self._guess_upload_type(media_ref))
return data, filename, content_type or None
try:
if media_ref.startswith("file://"):
@ -435,8 +570,10 @@ class DingTalkChannel(BaseChannel):
if resp.status_code != 200:
logger.error("DingTalk send failed msgKey={} status={} body={}", msg_key, resp.status_code, body[:500])
return False
try: result = resp.json()
except Exception: result = {}
try:
result = resp.json()
except Exception:
result = {}
errcode = result.get("errcode")
if errcode not in (None, 0):
logger.error("DingTalk send api error msgKey={} errcode={} body={}", msg_key, errcode, body[:500])

View File

@ -2,7 +2,6 @@ import asyncio
import zipfile
from io import BytesIO
from types import SimpleNamespace
from unittest.mock import AsyncMock
import httpx
import pytest
@ -17,19 +16,27 @@ except ImportError:
if not DINGTALK_AVAILABLE:
pytest.skip("DingTalk dependencies not installed (dingtalk-stream)", allow_module_level=True)
from nanobot.bus.queue import MessageBus
import nanobot.channels.dingtalk as dingtalk_module
from nanobot.channels.dingtalk import DingTalkChannel, NanobotDingTalkHandler
from nanobot.channels.dingtalk import DingTalkConfig
from nanobot.bus.queue import MessageBus
from nanobot.channels.dingtalk import DingTalkChannel, DingTalkConfig, NanobotDingTalkHandler
class _FakeResponse:
def __init__(self, status_code: int = 200, json_body: dict | None = None) -> None:
def __init__(
self,
status_code: int = 200,
json_body: dict | None = None,
*,
content: bytes = b"",
headers: dict[str, str] | None = None,
url: str = "https://example.com/file",
) -> None:
self.status_code = status_code
self._json_body = json_body or {}
self.text = "{}"
self.content = b""
self.headers = {"content-type": "application/json"}
self.text = content.decode("utf-8", errors="replace") if content else "{}"
self.content = content
self.headers = headers or {"content-type": "application/json"}
self.url = httpx.URL(url)
def json(self) -> dict:
return self._json_body
@ -46,11 +53,13 @@ class _FakeHttp:
return _FakeResponse()
async def post(self, url: str, json=None, headers=None, **kwargs):
self.calls.append({"method": "POST", "url": url, "json": json, "headers": headers})
self.calls.append(
{"method": "POST", "url": url, "json": json, "headers": headers, "kwargs": kwargs}
)
return self._next_response()
async def get(self, url: str, **kwargs):
self.calls.append({"method": "GET", "url": url})
self.calls.append({"method": "GET", "url": url, "kwargs": kwargs})
return self._next_response()
@ -242,6 +251,245 @@ async def test_download_dingtalk_file(tmp_path, monkeypatch) -> None:
assert channel._http.calls[1]["method"] == "GET"
@pytest.mark.asyncio
async def test_read_media_bytes_rejects_private_http_target_before_fetch() -> None:
"""Remote media fetches must not reach loopback/private addresses."""
channel = DingTalkChannel(
DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
200,
content=b"internal secret",
headers={"content-type": "text/plain"},
url="http://127.0.0.1/admin.txt",
)
]
)
data, filename, content_type = await channel._read_media_bytes("http://127.0.0.1/admin.txt")
assert (data, filename, content_type) == (None, None, None)
assert channel._http.calls == []
@pytest.mark.asyncio
async def test_read_media_bytes_rejects_private_redirect_result() -> None:
"""A public-looking media URL must not be accepted after redirecting private."""
channel = DingTalkChannel(
DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
200,
content=b"metadata bytes",
headers={"content-type": "text/plain"},
url="http://127.0.0.1/metadata",
)
]
)
data, filename, content_type = await channel._read_media_bytes("https://example.com/safe.txt")
assert (data, filename, content_type) == (None, None, None)
assert len(channel._http.calls) == 1
@pytest.mark.asyncio
async def test_read_media_bytes_rejects_oversized_remote_response(monkeypatch) -> None:
"""DingTalk media downloads should enforce a byte cap before upload."""
monkeypatch.setattr(dingtalk_module, "DINGTALK_MAX_REMOTE_MEDIA_BYTES", 8, raising=False)
channel = DingTalkChannel(
DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
200,
content=b"123456789",
headers={"content-type": "text/plain"},
url="https://example.com/large.txt",
)
]
)
data, filename, content_type = await channel._read_media_bytes("https://example.com/large.txt")
assert (data, filename, content_type) == (None, None, None)
@pytest.mark.asyncio
async def test_read_media_bytes_does_not_follow_remote_redirects_by_default() -> None:
"""Redirects are refused by default instead of followed into internal networks."""
channel = DingTalkChannel(
DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
302,
headers={"location": "http://127.0.0.1/metadata"},
url="https://example.com/redirect.txt",
)
]
)
data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt")
assert (data, filename, content_type) == (None, None, None)
assert channel._http.calls[0]["kwargs"]["follow_redirects"] is False
@pytest.mark.asyncio
async def test_read_media_bytes_follows_safe_redirect_when_explicitly_enabled() -> None:
"""Operators can opt in to public redirects without enabling private redirects."""
channel = DingTalkChannel(
DingTalkConfig(
client_id="app",
client_secret="secret",
allow_from=["*"],
allow_remote_media_redirects=True,
),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
302,
headers={"location": "https://example.com/final.txt"},
url="https://example.com/redirect.txt",
),
_FakeResponse(
200,
content=b"redirected media",
headers={"content-type": "text/plain"},
url="https://example.com/final.txt",
),
]
)
data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt")
assert (data, filename, content_type) == (b"redirected media", "redirect.txt", "text/plain")
assert [call["url"] for call in channel._http.calls] == [
"https://example.com/redirect.txt",
"https://example.com/final.txt",
]
assert all(call["kwargs"]["follow_redirects"] is False for call in channel._http.calls)
@pytest.mark.asyncio
async def test_read_media_bytes_blocks_cross_host_redirect_without_allowlist() -> None:
"""Redirect opt-in should not allow arbitrary cross-host redirects by default."""
channel = DingTalkChannel(
DingTalkConfig(
client_id="app",
client_secret="secret",
allow_from=["*"],
allow_remote_media_redirects=True,
),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
302,
headers={"location": "https://example.org/final.txt"},
url="https://example.com/redirect.txt",
),
_FakeResponse(
200,
content=b"cross-host media",
headers={"content-type": "text/plain"},
url="https://example.org/final.txt",
),
]
)
data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt")
assert (data, filename, content_type) == (None, None, None)
assert [call["url"] for call in channel._http.calls] == ["https://example.com/redirect.txt"]
@pytest.mark.asyncio
async def test_read_media_bytes_allows_cross_host_redirect_when_allowlisted() -> None:
"""Operators can explicitly allow a known CDN/download host for redirects."""
channel = DingTalkChannel(
DingTalkConfig(
client_id="app",
client_secret="secret",
allow_from=["*"],
allow_remote_media_redirects=True,
remote_media_redirect_allowed_hosts=["example.org"],
),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
302,
headers={"location": "https://example.org/final.txt"},
url="https://example.com/redirect.txt",
),
_FakeResponse(
200,
content=b"cross-host media",
headers={"content-type": "text/plain"},
url="https://example.org/final.txt",
),
]
)
data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt")
assert (data, filename, content_type) == (b"cross-host media", "redirect.txt", "text/plain")
assert [call["url"] for call in channel._http.calls] == [
"https://example.com/redirect.txt",
"https://example.org/final.txt",
]
@pytest.mark.asyncio
async def test_read_media_bytes_blocks_private_redirect_even_when_redirects_enabled() -> None:
"""Redirect opt-in must still validate each hop before fetching it."""
channel = DingTalkChannel(
DingTalkConfig(
client_id="app",
client_secret="secret",
allow_from=["*"],
allow_remote_media_redirects=True,
),
MessageBus(),
)
channel._http = _FakeHttp(
responses=[
_FakeResponse(
302,
headers={"location": "http://127.0.0.1/metadata"},
url="https://example.com/redirect.txt",
),
_FakeResponse(
200,
content=b"internal secret",
headers={"content-type": "text/plain"},
url="http://127.0.0.1/metadata",
),
]
)
data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt")
assert (data, filename, content_type) == (None, None, None)
assert [call["url"] for call in channel._http.calls] == ["https://example.com/redirect.txt"]
def test_normalize_upload_payload_zips_html_attachment() -> None:
channel = DingTalkChannel(
DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]),