nanobot/nanobot/utils/media_decode.py

56 lines
1.7 KiB
Python

"""Shared helpers for decoding ``data:...;base64,...`` URLs to disk.
Historically lived in ``nanobot.api.server``; now shared by the WebSocket
channel so the ``api`` + ``websocket`` ingress paths apply the same parsing,
size guard, and filesystem layout.
"""
from __future__ import annotations
import base64
import mimetypes
import re
import uuid
from pathlib import Path
from nanobot.utils.helpers import safe_filename
DEFAULT_MAX_BYTES = 10 * 1024 * 1024
MAX_FILE_SIZE = DEFAULT_MAX_BYTES
_DATA_URL_RE = re.compile(r"^data:([^;]+);base64,(.+)$", re.DOTALL)
class FileSizeExceeded(Exception):
"""Raised when a decoded payload exceeds the caller's size limit."""
def save_base64_data_url(
data_url: str,
media_dir: Path,
*,
max_bytes: int | None = None,
) -> str | None:
"""Decode a ``data:<mime>;base64,<payload>`` URL and persist it.
Returns the absolute path on success, ``None`` when the URL shape or the
base64 payload itself is malformed. Raises :class:`FileSizeExceeded`
when the decoded payload is larger than ``max_bytes`` (default 10 MB).
"""
m = _DATA_URL_RE.match(data_url)
if not m:
return None
mime_type, b64_payload = m.group(1), m.group(2)
try:
raw = base64.b64decode(b64_payload)
except Exception:
return None
limit = DEFAULT_MAX_BYTES if max_bytes is None else max_bytes
if len(raw) > limit:
raise FileSizeExceeded(f"File exceeds {limit // (1024 * 1024)}MB limit")
ext = mimetypes.guess_extension(mime_type) or ".bin"
filename = f"{uuid.uuid4().hex[:12]}{ext}"
dest = media_dir / safe_filename(filename)
dest.write_bytes(raw)
return str(dest)