From ad952e0da2e6852821afd64a0d06febbcd8d81d2 Mon Sep 17 00:00:00 2001 From: hinotoi-agent Date: Fri, 1 May 2026 12:33:24 +0800 Subject: [PATCH] fix(dingtalk): block SSRF in outbound media fetches --- nanobot/channels/dingtalk.py | 187 ++++++++++++++--- tests/channels/test_dingtalk_channel.py | 268 +++++++++++++++++++++++- 2 files changed, 420 insertions(+), 35 deletions(-) diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py index a863ba0df..609a7fa54 100644 --- a/nanobot/channels/dingtalk.py +++ b/nanobot/channels/dingtalk.py @@ -9,7 +9,7 @@ import zipfile from io import BytesIO from pathlib import Path from typing import Any -from urllib.parse import unquote, urlparse +from urllib.parse import unquote, urljoin, urlparse import httpx from loguru import logger @@ -19,6 +19,10 @@ from nanobot.bus.events import OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.schema import Base +from nanobot.security.network import validate_resolved_url, validate_url_target + +DINGTALK_MAX_REMOTE_MEDIA_BYTES = 20 * 1024 * 1024 +DINGTALK_MAX_REMOTE_MEDIA_REDIRECTS = 3 try: from dingtalk_stream import ( @@ -155,6 +159,8 @@ class DingTalkConfig(Base): client_id: str = "" client_secret: str = "" allow_from: list[str] = Field(default_factory=list) + allow_remote_media_redirects: bool = False + remote_media_redirect_allowed_hosts: list[str] = Field(default_factory=list) class DingTalkChannel(BaseChannel): @@ -281,9 +287,12 @@ class DingTalkChannel(BaseChannel): def _guess_upload_type(self, media_ref: str) -> str: ext = Path(urlparse(media_ref).path).suffix.lower() - if ext in self._IMAGE_EXTS: return "image" - if ext in self._AUDIO_EXTS: return "voice" - if ext in self._VIDEO_EXTS: return "video" + if ext in self._IMAGE_EXTS: + return "image" + if ext in self._AUDIO_EXTS: + return "voice" + if ext in self._VIDEO_EXTS: + return "video" return "file" def _guess_filename(self, media_ref: str, upload_type: str) -> str: @@ -315,6 +324,146 @@ class DingTalkChannel(BaseChannel): return self._zip_bytes(filename, data) return data, filename, content_type + def _validate_remote_media_url(self, media_ref: str) -> bool: + ok, err = validate_url_target(media_ref) + if not ok: + logger.warning("DingTalk remote media URL blocked ref={} reason={}", media_ref, err) + return False + return True + + def _redirect_host_allowed(self, current_url: str, next_url: str) -> bool: + current_host = (urlparse(current_url).hostname or "").lower() + next_host = (urlparse(next_url).hostname or "").lower() + if not next_host: + return False + if next_host == current_host: + return True + allowed_hosts = {host.lower() for host in self.config.remote_media_redirect_allowed_hosts} + return next_host in allowed_hosts + + def _next_remote_media_url(self, current_url: str, location: str | None) -> str | None: + if not self.config.allow_remote_media_redirects: + logger.warning("DingTalk media download redirect refused ref={}", current_url) + return None + if not location: + logger.warning("DingTalk media download redirect without Location ref={}", current_url) + return None + next_url = urljoin(current_url, location) + if not self._redirect_host_allowed(current_url, next_url): + logger.warning( + "DingTalk media download cross-host redirect refused ref={} next={}", + current_url, + next_url, + ) + return None + if not self._validate_remote_media_url(next_url): + return None + return next_url + + async def _fetch_remote_media_bytes( + self, + media_ref: str, + ) -> tuple[bytes | None, str | None]: + """Fetch a remote media URL with SSRF, redirect, and size checks.""" + if not self._http: + return None, None + + if not self._validate_remote_media_url(media_ref): + return None, None + + try: + # Prefer streaming with a running byte cap so large responses are not + # materialized before the limit is enforced. Test fakes may only + # implement get(), so keep a small compatibility fallback below. + stream = getattr(self._http, "stream", None) + if stream is not None: + current_url = media_ref + for _ in range(DINGTALK_MAX_REMOTE_MEDIA_REDIRECTS + 1): + async with stream("GET", current_url, follow_redirects=False) as resp: + final_ok, final_err = validate_resolved_url(str(resp.url)) + if not final_ok: + logger.warning( + "DingTalk remote media redirect blocked ref={} final={} reason={}", + media_ref, + resp.url, + final_err, + ) + return None, None + if 300 <= resp.status_code < 400: + next_url = self._next_remote_media_url( + str(resp.url), resp.headers.get("location") + ) + if not next_url: + return None, None + current_url = next_url + continue + if resp.status_code >= 400: + logger.warning( + "DingTalk media download failed status={} ref={}", + resp.status_code, + current_url, + ) + return None, None + chunks: list[bytes] = [] + total = 0 + async for chunk in resp.aiter_bytes(): + total += len(chunk) + if total > DINGTALK_MAX_REMOTE_MEDIA_BYTES: + logger.warning( + "DingTalk media download too large ref={} bytes>{}", + current_url, + DINGTALK_MAX_REMOTE_MEDIA_BYTES, + ) + return None, None + chunks.append(chunk) + return b"".join(chunks), (resp.headers.get("content-type") or "") + logger.warning("DingTalk media download exceeded redirect limit ref={}", media_ref) + return None, None + + current_url = media_ref + for _ in range(DINGTALK_MAX_REMOTE_MEDIA_REDIRECTS + 1): + resp = await self._http.get(current_url, follow_redirects=False) + final_ok, final_err = validate_resolved_url(str(getattr(resp, "url", current_url))) + if not final_ok: + logger.warning( + "DingTalk remote media redirect blocked ref={} final={} reason={}", + media_ref, + getattr(resp, "url", current_url), + final_err, + ) + return None, None + if 300 <= resp.status_code < 400: + next_url = self._next_remote_media_url( + str(getattr(resp, "url", current_url)), resp.headers.get("location") + ) + if not next_url: + return None, None + current_url = next_url + continue + if resp.status_code >= 400: + logger.warning( + "DingTalk media download failed status={} ref={}", + resp.status_code, + current_url, + ) + return None, None + if len(resp.content) > DINGTALK_MAX_REMOTE_MEDIA_BYTES: + logger.warning( + "DingTalk media download too large ref={} bytes>{}", + current_url, + DINGTALK_MAX_REMOTE_MEDIA_BYTES, + ) + return None, None + return resp.content, (resp.headers.get("content-type") or "") + logger.warning("DingTalk media download exceeded redirect limit ref={}", media_ref) + return None, None + except httpx.TransportError as e: + logger.error("DingTalk media download network error ref={} err={}", media_ref, e) + raise + except Exception as e: + logger.error("DingTalk media download error ref={} err={}", media_ref, e) + return None, None + async def _read_media_bytes( self, media_ref: str, @@ -323,26 +472,12 @@ class DingTalkChannel(BaseChannel): return None, None, None if self._is_http_url(media_ref): - if not self._http: - return None, None, None - try: - resp = await self._http.get(media_ref, follow_redirects=True) - if resp.status_code >= 400: - logger.warning( - "DingTalk media download failed status={} ref={}", - resp.status_code, - media_ref, - ) - return None, None, None - content_type = (resp.headers.get("content-type") or "").split(";")[0].strip() - filename = self._guess_filename(media_ref, self._guess_upload_type(media_ref)) - return resp.content, filename, content_type or None - except httpx.TransportError as e: - logger.error("DingTalk media download network error ref={} err={}", media_ref, e) - raise - except Exception as e: - logger.error("DingTalk media download error ref={} err={}", media_ref, e) + data, raw_content_type = await self._fetch_remote_media_bytes(media_ref) + if data is None: return None, None, None + content_type = (raw_content_type or "").split(";")[0].strip() + filename = self._guess_filename(media_ref, self._guess_upload_type(media_ref)) + return data, filename, content_type or None try: if media_ref.startswith("file://"): @@ -435,8 +570,10 @@ class DingTalkChannel(BaseChannel): if resp.status_code != 200: logger.error("DingTalk send failed msgKey={} status={} body={}", msg_key, resp.status_code, body[:500]) return False - try: result = resp.json() - except Exception: result = {} + try: + result = resp.json() + except Exception: + result = {} errcode = result.get("errcode") if errcode not in (None, 0): logger.error("DingTalk send api error msgKey={} errcode={} body={}", msg_key, errcode, body[:500]) diff --git a/tests/channels/test_dingtalk_channel.py b/tests/channels/test_dingtalk_channel.py index 86de99bb5..f14c81302 100644 --- a/tests/channels/test_dingtalk_channel.py +++ b/tests/channels/test_dingtalk_channel.py @@ -2,7 +2,6 @@ import asyncio import zipfile from io import BytesIO from types import SimpleNamespace -from unittest.mock import AsyncMock import httpx import pytest @@ -17,19 +16,27 @@ except ImportError: if not DINGTALK_AVAILABLE: pytest.skip("DingTalk dependencies not installed (dingtalk-stream)", allow_module_level=True) -from nanobot.bus.queue import MessageBus import nanobot.channels.dingtalk as dingtalk_module -from nanobot.channels.dingtalk import DingTalkChannel, NanobotDingTalkHandler -from nanobot.channels.dingtalk import DingTalkConfig +from nanobot.bus.queue import MessageBus +from nanobot.channels.dingtalk import DingTalkChannel, DingTalkConfig, NanobotDingTalkHandler class _FakeResponse: - def __init__(self, status_code: int = 200, json_body: dict | None = None) -> None: + def __init__( + self, + status_code: int = 200, + json_body: dict | None = None, + *, + content: bytes = b"", + headers: dict[str, str] | None = None, + url: str = "https://example.com/file", + ) -> None: self.status_code = status_code self._json_body = json_body or {} - self.text = "{}" - self.content = b"" - self.headers = {"content-type": "application/json"} + self.text = content.decode("utf-8", errors="replace") if content else "{}" + self.content = content + self.headers = headers or {"content-type": "application/json"} + self.url = httpx.URL(url) def json(self) -> dict: return self._json_body @@ -46,11 +53,13 @@ class _FakeHttp: return _FakeResponse() async def post(self, url: str, json=None, headers=None, **kwargs): - self.calls.append({"method": "POST", "url": url, "json": json, "headers": headers}) + self.calls.append( + {"method": "POST", "url": url, "json": json, "headers": headers, "kwargs": kwargs} + ) return self._next_response() async def get(self, url: str, **kwargs): - self.calls.append({"method": "GET", "url": url}) + self.calls.append({"method": "GET", "url": url, "kwargs": kwargs}) return self._next_response() @@ -242,6 +251,245 @@ async def test_download_dingtalk_file(tmp_path, monkeypatch) -> None: assert channel._http.calls[1]["method"] == "GET" +@pytest.mark.asyncio +async def test_read_media_bytes_rejects_private_http_target_before_fetch() -> None: + """Remote media fetches must not reach loopback/private addresses.""" + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 200, + content=b"internal secret", + headers={"content-type": "text/plain"}, + url="http://127.0.0.1/admin.txt", + ) + ] + ) + + data, filename, content_type = await channel._read_media_bytes("http://127.0.0.1/admin.txt") + + assert (data, filename, content_type) == (None, None, None) + assert channel._http.calls == [] + + +@pytest.mark.asyncio +async def test_read_media_bytes_rejects_private_redirect_result() -> None: + """A public-looking media URL must not be accepted after redirecting private.""" + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 200, + content=b"metadata bytes", + headers={"content-type": "text/plain"}, + url="http://127.0.0.1/metadata", + ) + ] + ) + + data, filename, content_type = await channel._read_media_bytes("https://example.com/safe.txt") + + assert (data, filename, content_type) == (None, None, None) + assert len(channel._http.calls) == 1 + + +@pytest.mark.asyncio +async def test_read_media_bytes_rejects_oversized_remote_response(monkeypatch) -> None: + """DingTalk media downloads should enforce a byte cap before upload.""" + monkeypatch.setattr(dingtalk_module, "DINGTALK_MAX_REMOTE_MEDIA_BYTES", 8, raising=False) + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 200, + content=b"123456789", + headers={"content-type": "text/plain"}, + url="https://example.com/large.txt", + ) + ] + ) + + data, filename, content_type = await channel._read_media_bytes("https://example.com/large.txt") + + assert (data, filename, content_type) == (None, None, None) + + +@pytest.mark.asyncio +async def test_read_media_bytes_does_not_follow_remote_redirects_by_default() -> None: + """Redirects are refused by default instead of followed into internal networks.""" + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 302, + headers={"location": "http://127.0.0.1/metadata"}, + url="https://example.com/redirect.txt", + ) + ] + ) + + data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt") + + assert (data, filename, content_type) == (None, None, None) + assert channel._http.calls[0]["kwargs"]["follow_redirects"] is False + + +@pytest.mark.asyncio +async def test_read_media_bytes_follows_safe_redirect_when_explicitly_enabled() -> None: + """Operators can opt in to public redirects without enabling private redirects.""" + channel = DingTalkChannel( + DingTalkConfig( + client_id="app", + client_secret="secret", + allow_from=["*"], + allow_remote_media_redirects=True, + ), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 302, + headers={"location": "https://example.com/final.txt"}, + url="https://example.com/redirect.txt", + ), + _FakeResponse( + 200, + content=b"redirected media", + headers={"content-type": "text/plain"}, + url="https://example.com/final.txt", + ), + ] + ) + + data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt") + + assert (data, filename, content_type) == (b"redirected media", "redirect.txt", "text/plain") + assert [call["url"] for call in channel._http.calls] == [ + "https://example.com/redirect.txt", + "https://example.com/final.txt", + ] + assert all(call["kwargs"]["follow_redirects"] is False for call in channel._http.calls) + + +@pytest.mark.asyncio +async def test_read_media_bytes_blocks_cross_host_redirect_without_allowlist() -> None: + """Redirect opt-in should not allow arbitrary cross-host redirects by default.""" + channel = DingTalkChannel( + DingTalkConfig( + client_id="app", + client_secret="secret", + allow_from=["*"], + allow_remote_media_redirects=True, + ), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 302, + headers={"location": "https://example.org/final.txt"}, + url="https://example.com/redirect.txt", + ), + _FakeResponse( + 200, + content=b"cross-host media", + headers={"content-type": "text/plain"}, + url="https://example.org/final.txt", + ), + ] + ) + + data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt") + + assert (data, filename, content_type) == (None, None, None) + assert [call["url"] for call in channel._http.calls] == ["https://example.com/redirect.txt"] + + +@pytest.mark.asyncio +async def test_read_media_bytes_allows_cross_host_redirect_when_allowlisted() -> None: + """Operators can explicitly allow a known CDN/download host for redirects.""" + channel = DingTalkChannel( + DingTalkConfig( + client_id="app", + client_secret="secret", + allow_from=["*"], + allow_remote_media_redirects=True, + remote_media_redirect_allowed_hosts=["example.org"], + ), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 302, + headers={"location": "https://example.org/final.txt"}, + url="https://example.com/redirect.txt", + ), + _FakeResponse( + 200, + content=b"cross-host media", + headers={"content-type": "text/plain"}, + url="https://example.org/final.txt", + ), + ] + ) + + data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt") + + assert (data, filename, content_type) == (b"cross-host media", "redirect.txt", "text/plain") + assert [call["url"] for call in channel._http.calls] == [ + "https://example.com/redirect.txt", + "https://example.org/final.txt", + ] + + +@pytest.mark.asyncio +async def test_read_media_bytes_blocks_private_redirect_even_when_redirects_enabled() -> None: + """Redirect opt-in must still validate each hop before fetching it.""" + channel = DingTalkChannel( + DingTalkConfig( + client_id="app", + client_secret="secret", + allow_from=["*"], + allow_remote_media_redirects=True, + ), + MessageBus(), + ) + channel._http = _FakeHttp( + responses=[ + _FakeResponse( + 302, + headers={"location": "http://127.0.0.1/metadata"}, + url="https://example.com/redirect.txt", + ), + _FakeResponse( + 200, + content=b"internal secret", + headers={"content-type": "text/plain"}, + url="http://127.0.0.1/metadata", + ), + ] + ) + + data, filename, content_type = await channel._read_media_bytes("https://example.com/redirect.txt") + + assert (data, filename, content_type) == (None, None, None) + assert [call["url"] for call in channel._http.calls] == ["https://example.com/redirect.txt"] + + def test_normalize_upload_payload_zips_html_attachment() -> None: channel = DingTalkChannel( DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]),