feat(webui): render video media attachments

Add signed media URLs to live WebSocket replies and teach the WebUI to classify and render video attachments, so bot-sent videos can play inline in both live chats and session history.

Made-with: Cursor
This commit is contained in:
Xubin Ren 2026-04-24 19:17:58 +00:00 committed by Xubin Ren
parent be05189f39
commit e52fe2a8e2
10 changed files with 327 additions and 15 deletions

View File

@ -13,6 +13,7 @@ import json
import mimetypes
import re
import secrets
import shutil
import ssl
import time
import uuid
@ -33,6 +34,7 @@ from nanobot.bus.queue import MessageBus
from nanobot.channels.base import BaseChannel
from nanobot.config.paths import get_media_dir
from nanobot.config.schema import Base
from nanobot.utils.helpers import safe_filename
from nanobot.utils.media_decode import (
FileSizeExceeded,
save_base64_data_url,
@ -716,6 +718,33 @@ class WebSocketChannel(BaseChannel):
).digest()[:16]
return f"/api/media/{_b64url_encode(mac)}/{payload}"
def _sign_or_stage_media_path(self, path: Path) -> dict[str, str] | None:
"""Return a signed media URL payload for *path*.
Persisted inbound media already lives under ``get_media_dir`` and can
be signed directly. Outbound bot-generated files may live anywhere on
disk; copy those into the websocket media bucket first so the browser
can fetch them through the existing signed media route without
exposing arbitrary filesystem paths.
"""
signed = self._sign_media_path(path)
if signed is not None:
return {"url": signed, "name": path.name}
try:
if not path.is_file():
return None
media_dir = get_media_dir("websocket")
safe_name = safe_filename(path.name) or "attachment"
staged = media_dir / f"{uuid.uuid4().hex[:12]}-{safe_name}"
shutil.copyfile(path, staged)
except OSError as exc:
logger.warning("websocket: failed to stage outbound media {}: {}", path, exc)
return None
signed = self._sign_media_path(staged)
if signed is None:
return None
return {"url": signed, "name": path.name}
def _handle_media_fetch(self, sig: str, payload: str) -> Response:
"""Serve a single media file previously signed via
:meth:`_sign_media_path`. Validates the signature, decodes the
@ -1124,6 +1153,13 @@ class WebSocketChannel(BaseChannel):
}
if msg.media:
payload["media"] = msg.media
urls: list[dict[str, str]] = []
for entry in msg.media:
signed = self._sign_or_stage_media_path(Path(entry))
if signed is not None:
urls.append(signed)
if urls:
payload["media_urls"] = urls
if msg.reply_to:
payload["reply_to"] = msg.reply_to
# Mark intermediate agent breadcrumbs (tool-call hints, generic

View File

@ -190,6 +190,39 @@ async def test_send_delivers_json_message_with_media_and_reply() -> None:
assert payload["media"] == ["/tmp/a.png"]
@pytest.mark.asyncio
async def test_send_stages_external_media_as_signed_url(monkeypatch, tmp_path) -> None:
bus = MagicMock()
media_root = tmp_path / "media"
ws_media = media_root / "websocket"
ws_media.mkdir(parents=True)
external = tmp_path / "clip.mp4"
external.write_bytes(b"video")
def fake_media_dir(channel: str | None = None):
return ws_media if channel == "websocket" else media_root
monkeypatch.setattr("nanobot.channels.websocket.get_media_dir", fake_media_dir)
channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
mock_ws = AsyncMock()
channel._attach(mock_ws, "chat-1")
await channel.send(
OutboundMessage(
channel="websocket",
chat_id="chat-1",
content="video",
media=[str(external)],
)
)
payload = json.loads(mock_ws.send.call_args[0][0])
assert payload["media"] == [str(external)]
assert payload["media_urls"][0]["name"] == "clip.mp4"
assert payload["media_urls"][0]["url"].startswith("/api/media/")
assert any(p.name.endswith("-clip.mp4") for p in ws_media.iterdir())
@pytest.mark.asyncio
async def test_send_missing_connection_is_noop_without_error() -> None:
bus = MagicMock()

View File

@ -1,11 +1,11 @@
import { useState } from "react";
import { ChevronRight, ImageIcon, Wrench } from "lucide-react";
import { ChevronRight, FileIcon, ImageIcon, PlaySquare, Wrench } from "lucide-react";
import { useTranslation } from "react-i18next";
import { ImageLightbox } from "@/components/ImageLightbox";
import { MarkdownText } from "@/components/MarkdownText";
import { cn } from "@/lib/utils";
import type { UIImage, UIMessage } from "@/lib/types";
import type { UIImage, UIMediaAttachment, UIMessage } from "@/lib/types";
interface MessageBubbleProps {
message: UIMessage;
@ -29,7 +29,9 @@ export function MessageBubble({ message }: MessageBubbleProps) {
if (message.role === "user") {
const images = message.images ?? [];
const media = message.media ?? [];
const hasImages = images.length > 0;
const hasMedia = media.length > 0;
const hasText = message.content.trim().length > 0;
return (
<div
@ -38,7 +40,10 @@ export function MessageBubble({ message }: MessageBubbleProps) {
baseAnim,
)}
>
{hasImages ? <UserImages images={images} /> : null}
{hasImages ? <UserImages images={images} align="right" /> : null}
{!hasImages && hasMedia ? (
<MessageMedia media={media} align="right" />
) : null}
{hasText ? (
<p
className={cn(
@ -54,6 +59,7 @@ export function MessageBubble({ message }: MessageBubbleProps) {
}
const empty = message.content.trim().length === 0;
const media = message.media ?? [];
return (
<div className={cn("w-full text-sm", baseAnim)} style={{ lineHeight: "var(--cjk-line-height)" }}>
{empty && message.isStreaming ? (
@ -62,12 +68,82 @@ export function MessageBubble({ message }: MessageBubbleProps) {
<>
<MarkdownText>{message.content}</MarkdownText>
{message.isStreaming && <StreamCursor />}
{media.length > 0 ? <MessageMedia media={media} align="left" /> : null}
</>
)}
</div>
);
}
function MessageMedia({
media,
align,
}: {
media: UIMediaAttachment[];
align: "left" | "right";
}) {
if (media.length === 0) return null;
const images = media
.filter((item) => item.kind === "image")
.map(({ url, name }) => ({ url, name }));
const nonImages = media.filter((item) => item.kind !== "image");
return (
<div
className={cn(
"mt-2 flex flex-wrap gap-2",
align === "right" ? "justify-end" : "justify-start",
)}
>
{images.length > 0 ? <UserImages images={images} align={align} /> : null}
{nonImages.map((item, i) => (
<MediaCell key={`${item.url ?? item.name ?? item.kind}-${i}`} media={item} />
))}
</div>
);
}
function MediaCell({ media }: { media: UIMediaAttachment }) {
const { t } = useTranslation();
const hasUrl = typeof media.url === "string" && media.url.length > 0;
if (media.kind === "video" && hasUrl) {
return (
<figure className="max-w-[min(100%,32rem)] overflow-hidden rounded-[14px] border border-border/60 bg-muted/40">
<video
src={media.url}
controls
preload="metadata"
className="block max-h-[26rem] w-full bg-black"
aria-label={media.name ? `${t("message.videoAttachment", { defaultValue: "Video attachment" })}: ${media.name}` : t("message.videoAttachment", { defaultValue: "Video attachment" })}
/>
{media.name ? (
<figcaption className="truncate px-3 py-1.5 text-[11.5px] text-muted-foreground">
{media.name}
</figcaption>
) : null}
</figure>
);
}
const label =
media.kind === "video"
? t("message.videoAttachment", { defaultValue: "Video attachment" })
: t("message.fileAttachment", { defaultValue: "File attachment" });
const Icon = media.kind === "video" ? PlaySquare : FileIcon;
return (
<div
className="flex max-w-[18rem] items-center gap-2 rounded-[14px] border border-border/60 bg-muted/40 px-3 py-2 text-xs text-muted-foreground"
title={media.name ?? undefined}
aria-label={label}
>
<Icon className="h-4 w-4 flex-none" aria-hidden />
<span className="truncate">{media.name ?? label}</span>
</div>
);
}
/**
* Right-aligned preview row for images attached to a user turn.
*
@ -82,7 +158,13 @@ export function MessageBubble({ message }: MessageBubbleProps) {
* have no URL (the backend strips data URLs before persisting), so we
* render a labelled placeholder tile instead of a broken ``<img>``.
*/
function UserImages({ images }: { images: UIImage[] }) {
function UserImages({
images,
align = "right",
}: {
images: UIImage[];
align?: "left" | "right";
}) {
const { t } = useTranslation();
// Only real-URL images can open in the lightbox; historical-replay
// placeholders (no URL) have nothing to zoom into.
@ -98,7 +180,12 @@ function UserImages({ images }: { images: UIImage[] }) {
return (
<>
<div className="ml-auto flex flex-wrap items-end justify-end gap-2">
<div
className={cn(
"flex flex-wrap items-end gap-2",
align === "right" ? "ml-auto justify-end" : "mr-auto justify-start",
)}
>
{images.map((img, i) => (
<UserImageCell
key={`${img.url ?? "placeholder"}-${i}`}

View File

@ -1,6 +1,7 @@
import { useCallback, useEffect, useRef, useState } from "react";
import { useClient } from "@/providers/ClientProvider";
import { toMediaAttachment } from "@/lib/media";
import type { StreamError } from "@/lib/nanobot-client";
import type {
InboundEvent,
@ -148,6 +149,10 @@ export function useNanobotStream(
return;
}
const media = ev.media_urls?.length
? ev.media_urls.map((m) => toMediaAttachment(m))
: ev.media?.map((url) => toMediaAttachment({ url }));
// A complete (non-streamed) assistant message. If a stream was in
// flight, drop the placeholder so we don't render the text twice.
const activeId = buffer.current?.messageId;
@ -162,6 +167,7 @@ export function useNanobotStream(
role: "assistant",
content: ev.text,
createdAt: Date.now(),
...(media && media.length > 0 ? { media } : {}),
},
];
});

View File

@ -9,6 +9,7 @@ import {
listSessions,
} from "@/lib/api";
import { deriveTitle } from "@/lib/format";
import { toMediaAttachment } from "@/lib/media";
import type { ChatSummary, UIMessage } from "@/lib/types";
const EMPTY_MESSAGES: UIMessage[] = [];
@ -123,17 +124,16 @@ export function useSessionHistory(key: string | null): {
const ui: UIMessage[] = body.messages.flatMap((m, idx) => {
if (m.role !== "user" && m.role !== "assistant") return [];
if (typeof m.content !== "string") return [];
// Hydrate signed media URLs into the bubble's ``images`` slot so
// historical user turns render real previews (the live-send path
// uses data URLs; both shapes converge on the same ``UIImage``).
// Hydrate signed media URLs into generic UI attachments. Image-only
// user turns still populate the legacy ``images`` slot so the
// existing optimistic-send and lightbox paths remain unchanged.
const media =
Array.isArray(m.media_urls) && m.media_urls.length > 0
? m.media_urls.map((mu) => toMediaAttachment(mu))
: undefined;
const images =
m.role === "user" &&
Array.isArray(m.media_urls) &&
m.media_urls.length > 0
? m.media_urls.map((mu) => ({
url: mu.url,
name: mu.name,
}))
m.role === "user" && media?.every((item) => item.kind === "image")
? media.map((item) => ({ url: item.url, name: item.name }))
: undefined;
return [
{
@ -142,6 +142,7 @@ export function useSessionHistory(key: string | null): {
content: m.content,
createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(),
...(images ? { images } : {}),
...(media ? { media } : {}),
},
];
});

59
webui/src/lib/media.ts Normal file
View File

@ -0,0 +1,59 @@
import type { UIMediaAttachment, UIMediaKind } from "@/lib/types";
const IMAGE_EXTENSIONS = new Set([
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".ico",
".tif",
".tiff",
]);
const VIDEO_EXTENSIONS = new Set([
".mp4",
".webm",
".mov",
".m4v",
".avi",
".mkv",
".3gp",
]);
function cleanPath(value: string): string {
return value.split(/[?#]/, 1)[0]?.toLowerCase() ?? "";
}
function extensionOf(value?: string): string {
if (!value) return "";
const path = cleanPath(value);
const dot = path.lastIndexOf(".");
if (dot < 0) return "";
return path.slice(dot);
}
export function inferMediaKind(media: { url?: string; name?: string }): UIMediaKind {
const url = media.url ?? "";
if (url.startsWith("data:image/")) return "image";
if (url.startsWith("data:video/")) return "video";
const ext = extensionOf(media.name) || extensionOf(url);
if (IMAGE_EXTENSIONS.has(ext)) return "image";
if (VIDEO_EXTENSIONS.has(ext)) return "video";
return "file";
}
export function toMediaAttachment(media: {
url?: string;
name?: string;
kind?: UIMediaKind;
}): UIMediaAttachment {
return {
kind: media.kind ?? inferMediaKind(media),
url: media.url,
name: media.name,
};
}

View File

@ -22,6 +22,14 @@ export interface UIImage {
name?: string;
}
export type UIMediaKind = "image" | "video" | "file";
export interface UIMediaAttachment {
kind: UIMediaKind;
url?: string;
name?: string;
}
export interface UIMessage {
id: string;
role: Role;
@ -34,6 +42,8 @@ export interface UIMessage {
traces?: string[];
/** User turn: optimistic blob URLs for preview. Replay: placeholder chips. */
images?: UIImage[];
/** Signed or local UI-renderable media attachments. */
media?: UIMediaAttachment[];
}
export interface ChatSummary {
@ -71,6 +81,7 @@ export type InboundEvent =
text: string;
reply_to?: string;
media?: string[];
media_urls?: Array<{ url: string; name?: string }>;
/** Present when the frame is an agent breadcrumb (e.g. tool hint,
* generic progress line) rather than a conversational reply. */
kind?: "tool_hint" | "progress";

View File

@ -40,4 +40,28 @@ describe("MessageBubble", () => {
fireEvent.click(toggle);
expect(screen.queryByText('weather("get")')).not.toBeInTheDocument();
});
it("renders video media as an inline player", () => {
const message: UIMessage = {
id: "a1",
role: "assistant",
content: "here is the clip",
createdAt: Date.now(),
media: [
{
kind: "video",
url: "/api/media/sig/payload",
name: "demo.mp4",
},
],
};
const { container } = render(<MessageBubble message={message} />);
expect(screen.getByText("here is the clip")).toBeInTheDocument();
const video = screen.getByLabelText(/video attachment/i);
expect(video.tagName).toBe("VIDEO");
expect(video).toHaveAttribute("src", "/api/media/sig/payload");
expect(container.querySelector("video[controls]")).toBeInTheDocument();
});
});

View File

@ -92,4 +92,25 @@ describe("useNanobotStream", () => {
expect(result.current.messages[1].role).toBe("assistant");
expect(result.current.messages[1].kind).toBeUndefined();
});
it("attaches assistant media_urls to complete messages", () => {
const fake = fakeClient();
const { result } = renderHook(() => useNanobotStream("chat-m", []), {
wrapper: wrap(fake.client),
});
act(() => {
fake.emit("chat-m", {
event: "message",
chat_id: "chat-m",
text: "video ready",
media_urls: [{ url: "/api/media/sig/payload", name: "demo.mp4" }],
});
});
expect(result.current.messages).toHaveLength(1);
expect(result.current.messages[0].media).toEqual([
{ kind: "video", url: "/api/media/sig/payload", name: "demo.mp4" },
]);
});
});

View File

@ -130,12 +130,46 @@ describe("useSessions", () => {
{ url: "/api/media/sig-1/payload-1", name: "snap.png" },
{ url: "/api/media/sig-2/payload-2", name: "diag.jpg" },
]);
expect(first.media).toEqual([
{ kind: "image", url: "/api/media/sig-1/payload-1", name: "snap.png" },
{ kind: "image", url: "/api/media/sig-2/payload-2", name: "diag.jpg" },
]);
expect(second.role).toBe("assistant");
expect(second.images).toBeUndefined();
expect(third.role).toBe("user");
expect(third.images).toBeUndefined();
});
it("hydrates historical assistant video media_urls into media attachments", async () => {
vi.mocked(api.fetchSessionMessages).mockResolvedValue({
key: "websocket:chat-video",
created_at: "2026-04-20T10:00:00Z",
updated_at: "2026-04-20T10:05:00Z",
messages: [
{
role: "assistant",
content: "clip ready",
timestamp: "2026-04-20T10:00:01Z",
media_urls: [
{ url: "/api/media/sig-v/payload-v", name: "clip.mp4" },
],
},
],
});
const { result } = renderHook(() => useSessionHistory("websocket:chat-video"), {
wrapper: wrap(fakeClient()),
});
await waitFor(() => expect(result.current.loading).toBe(false));
expect(result.current.messages[0].role).toBe("assistant");
expect(result.current.messages[0].images).toBeUndefined();
expect(result.current.messages[0].media).toEqual([
{ kind: "video", url: "/api/media/sig-v/payload-v", name: "clip.mp4" },
]);
});
it("keeps the session in the list when delete fails", async () => {
vi.mocked(api.listSessions).mockResolvedValue([
{