mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-23 01:52:52 +00:00
feat(webui): render video media attachments
Add signed media URLs to live WebSocket replies and teach the WebUI to classify and render video attachments, so bot-sent videos can play inline in both live chats and session history. Made-with: Cursor
This commit is contained in:
parent
be05189f39
commit
e52fe2a8e2
@ -13,6 +13,7 @@ import json
|
||||
import mimetypes
|
||||
import re
|
||||
import secrets
|
||||
import shutil
|
||||
import ssl
|
||||
import time
|
||||
import uuid
|
||||
@ -33,6 +34,7 @@ from nanobot.bus.queue import MessageBus
|
||||
from nanobot.channels.base import BaseChannel
|
||||
from nanobot.config.paths import get_media_dir
|
||||
from nanobot.config.schema import Base
|
||||
from nanobot.utils.helpers import safe_filename
|
||||
from nanobot.utils.media_decode import (
|
||||
FileSizeExceeded,
|
||||
save_base64_data_url,
|
||||
@ -716,6 +718,33 @@ class WebSocketChannel(BaseChannel):
|
||||
).digest()[:16]
|
||||
return f"/api/media/{_b64url_encode(mac)}/{payload}"
|
||||
|
||||
def _sign_or_stage_media_path(self, path: Path) -> dict[str, str] | None:
|
||||
"""Return a signed media URL payload for *path*.
|
||||
|
||||
Persisted inbound media already lives under ``get_media_dir`` and can
|
||||
be signed directly. Outbound bot-generated files may live anywhere on
|
||||
disk; copy those into the websocket media bucket first so the browser
|
||||
can fetch them through the existing signed media route without
|
||||
exposing arbitrary filesystem paths.
|
||||
"""
|
||||
signed = self._sign_media_path(path)
|
||||
if signed is not None:
|
||||
return {"url": signed, "name": path.name}
|
||||
try:
|
||||
if not path.is_file():
|
||||
return None
|
||||
media_dir = get_media_dir("websocket")
|
||||
safe_name = safe_filename(path.name) or "attachment"
|
||||
staged = media_dir / f"{uuid.uuid4().hex[:12]}-{safe_name}"
|
||||
shutil.copyfile(path, staged)
|
||||
except OSError as exc:
|
||||
logger.warning("websocket: failed to stage outbound media {}: {}", path, exc)
|
||||
return None
|
||||
signed = self._sign_media_path(staged)
|
||||
if signed is None:
|
||||
return None
|
||||
return {"url": signed, "name": path.name}
|
||||
|
||||
def _handle_media_fetch(self, sig: str, payload: str) -> Response:
|
||||
"""Serve a single media file previously signed via
|
||||
:meth:`_sign_media_path`. Validates the signature, decodes the
|
||||
@ -1124,6 +1153,13 @@ class WebSocketChannel(BaseChannel):
|
||||
}
|
||||
if msg.media:
|
||||
payload["media"] = msg.media
|
||||
urls: list[dict[str, str]] = []
|
||||
for entry in msg.media:
|
||||
signed = self._sign_or_stage_media_path(Path(entry))
|
||||
if signed is not None:
|
||||
urls.append(signed)
|
||||
if urls:
|
||||
payload["media_urls"] = urls
|
||||
if msg.reply_to:
|
||||
payload["reply_to"] = msg.reply_to
|
||||
# Mark intermediate agent breadcrumbs (tool-call hints, generic
|
||||
|
||||
@ -190,6 +190,39 @@ async def test_send_delivers_json_message_with_media_and_reply() -> None:
|
||||
assert payload["media"] == ["/tmp/a.png"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_stages_external_media_as_signed_url(monkeypatch, tmp_path) -> None:
|
||||
bus = MagicMock()
|
||||
media_root = tmp_path / "media"
|
||||
ws_media = media_root / "websocket"
|
||||
ws_media.mkdir(parents=True)
|
||||
external = tmp_path / "clip.mp4"
|
||||
external.write_bytes(b"video")
|
||||
|
||||
def fake_media_dir(channel: str | None = None):
|
||||
return ws_media if channel == "websocket" else media_root
|
||||
|
||||
monkeypatch.setattr("nanobot.channels.websocket.get_media_dir", fake_media_dir)
|
||||
channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
|
||||
mock_ws = AsyncMock()
|
||||
channel._attach(mock_ws, "chat-1")
|
||||
|
||||
await channel.send(
|
||||
OutboundMessage(
|
||||
channel="websocket",
|
||||
chat_id="chat-1",
|
||||
content="video",
|
||||
media=[str(external)],
|
||||
)
|
||||
)
|
||||
|
||||
payload = json.loads(mock_ws.send.call_args[0][0])
|
||||
assert payload["media"] == [str(external)]
|
||||
assert payload["media_urls"][0]["name"] == "clip.mp4"
|
||||
assert payload["media_urls"][0]["url"].startswith("/api/media/")
|
||||
assert any(p.name.endswith("-clip.mp4") for p in ws_media.iterdir())
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_missing_connection_is_noop_without_error() -> None:
|
||||
bus = MagicMock()
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
import { useState } from "react";
|
||||
import { ChevronRight, ImageIcon, Wrench } from "lucide-react";
|
||||
import { ChevronRight, FileIcon, ImageIcon, PlaySquare, Wrench } from "lucide-react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
|
||||
import { ImageLightbox } from "@/components/ImageLightbox";
|
||||
import { MarkdownText } from "@/components/MarkdownText";
|
||||
import { cn } from "@/lib/utils";
|
||||
import type { UIImage, UIMessage } from "@/lib/types";
|
||||
import type { UIImage, UIMediaAttachment, UIMessage } from "@/lib/types";
|
||||
|
||||
interface MessageBubbleProps {
|
||||
message: UIMessage;
|
||||
@ -29,7 +29,9 @@ export function MessageBubble({ message }: MessageBubbleProps) {
|
||||
|
||||
if (message.role === "user") {
|
||||
const images = message.images ?? [];
|
||||
const media = message.media ?? [];
|
||||
const hasImages = images.length > 0;
|
||||
const hasMedia = media.length > 0;
|
||||
const hasText = message.content.trim().length > 0;
|
||||
return (
|
||||
<div
|
||||
@ -38,7 +40,10 @@ export function MessageBubble({ message }: MessageBubbleProps) {
|
||||
baseAnim,
|
||||
)}
|
||||
>
|
||||
{hasImages ? <UserImages images={images} /> : null}
|
||||
{hasImages ? <UserImages images={images} align="right" /> : null}
|
||||
{!hasImages && hasMedia ? (
|
||||
<MessageMedia media={media} align="right" />
|
||||
) : null}
|
||||
{hasText ? (
|
||||
<p
|
||||
className={cn(
|
||||
@ -54,6 +59,7 @@ export function MessageBubble({ message }: MessageBubbleProps) {
|
||||
}
|
||||
|
||||
const empty = message.content.trim().length === 0;
|
||||
const media = message.media ?? [];
|
||||
return (
|
||||
<div className={cn("w-full text-sm", baseAnim)} style={{ lineHeight: "var(--cjk-line-height)" }}>
|
||||
{empty && message.isStreaming ? (
|
||||
@ -62,12 +68,82 @@ export function MessageBubble({ message }: MessageBubbleProps) {
|
||||
<>
|
||||
<MarkdownText>{message.content}</MarkdownText>
|
||||
{message.isStreaming && <StreamCursor />}
|
||||
{media.length > 0 ? <MessageMedia media={media} align="left" /> : null}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MessageMedia({
|
||||
media,
|
||||
align,
|
||||
}: {
|
||||
media: UIMediaAttachment[];
|
||||
align: "left" | "right";
|
||||
}) {
|
||||
if (media.length === 0) return null;
|
||||
const images = media
|
||||
.filter((item) => item.kind === "image")
|
||||
.map(({ url, name }) => ({ url, name }));
|
||||
const nonImages = media.filter((item) => item.kind !== "image");
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"mt-2 flex flex-wrap gap-2",
|
||||
align === "right" ? "justify-end" : "justify-start",
|
||||
)}
|
||||
>
|
||||
{images.length > 0 ? <UserImages images={images} align={align} /> : null}
|
||||
{nonImages.map((item, i) => (
|
||||
<MediaCell key={`${item.url ?? item.name ?? item.kind}-${i}`} media={item} />
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MediaCell({ media }: { media: UIMediaAttachment }) {
|
||||
const { t } = useTranslation();
|
||||
const hasUrl = typeof media.url === "string" && media.url.length > 0;
|
||||
|
||||
if (media.kind === "video" && hasUrl) {
|
||||
return (
|
||||
<figure className="max-w-[min(100%,32rem)] overflow-hidden rounded-[14px] border border-border/60 bg-muted/40">
|
||||
<video
|
||||
src={media.url}
|
||||
controls
|
||||
preload="metadata"
|
||||
className="block max-h-[26rem] w-full bg-black"
|
||||
aria-label={media.name ? `${t("message.videoAttachment", { defaultValue: "Video attachment" })}: ${media.name}` : t("message.videoAttachment", { defaultValue: "Video attachment" })}
|
||||
/>
|
||||
{media.name ? (
|
||||
<figcaption className="truncate px-3 py-1.5 text-[11.5px] text-muted-foreground">
|
||||
{media.name}
|
||||
</figcaption>
|
||||
) : null}
|
||||
</figure>
|
||||
);
|
||||
}
|
||||
|
||||
const label =
|
||||
media.kind === "video"
|
||||
? t("message.videoAttachment", { defaultValue: "Video attachment" })
|
||||
: t("message.fileAttachment", { defaultValue: "File attachment" });
|
||||
const Icon = media.kind === "video" ? PlaySquare : FileIcon;
|
||||
|
||||
return (
|
||||
<div
|
||||
className="flex max-w-[18rem] items-center gap-2 rounded-[14px] border border-border/60 bg-muted/40 px-3 py-2 text-xs text-muted-foreground"
|
||||
title={media.name ?? undefined}
|
||||
aria-label={label}
|
||||
>
|
||||
<Icon className="h-4 w-4 flex-none" aria-hidden />
|
||||
<span className="truncate">{media.name ?? label}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Right-aligned preview row for images attached to a user turn.
|
||||
*
|
||||
@ -82,7 +158,13 @@ export function MessageBubble({ message }: MessageBubbleProps) {
|
||||
* have no URL (the backend strips data URLs before persisting), so we
|
||||
* render a labelled placeholder tile instead of a broken ``<img>``.
|
||||
*/
|
||||
function UserImages({ images }: { images: UIImage[] }) {
|
||||
function UserImages({
|
||||
images,
|
||||
align = "right",
|
||||
}: {
|
||||
images: UIImage[];
|
||||
align?: "left" | "right";
|
||||
}) {
|
||||
const { t } = useTranslation();
|
||||
// Only real-URL images can open in the lightbox; historical-replay
|
||||
// placeholders (no URL) have nothing to zoom into.
|
||||
@ -98,7 +180,12 @@ function UserImages({ images }: { images: UIImage[] }) {
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="ml-auto flex flex-wrap items-end justify-end gap-2">
|
||||
<div
|
||||
className={cn(
|
||||
"flex flex-wrap items-end gap-2",
|
||||
align === "right" ? "ml-auto justify-end" : "mr-auto justify-start",
|
||||
)}
|
||||
>
|
||||
{images.map((img, i) => (
|
||||
<UserImageCell
|
||||
key={`${img.url ?? "placeholder"}-${i}`}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
import { useClient } from "@/providers/ClientProvider";
|
||||
import { toMediaAttachment } from "@/lib/media";
|
||||
import type { StreamError } from "@/lib/nanobot-client";
|
||||
import type {
|
||||
InboundEvent,
|
||||
@ -148,6 +149,10 @@ export function useNanobotStream(
|
||||
return;
|
||||
}
|
||||
|
||||
const media = ev.media_urls?.length
|
||||
? ev.media_urls.map((m) => toMediaAttachment(m))
|
||||
: ev.media?.map((url) => toMediaAttachment({ url }));
|
||||
|
||||
// A complete (non-streamed) assistant message. If a stream was in
|
||||
// flight, drop the placeholder so we don't render the text twice.
|
||||
const activeId = buffer.current?.messageId;
|
||||
@ -162,6 +167,7 @@ export function useNanobotStream(
|
||||
role: "assistant",
|
||||
content: ev.text,
|
||||
createdAt: Date.now(),
|
||||
...(media && media.length > 0 ? { media } : {}),
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
@ -9,6 +9,7 @@ import {
|
||||
listSessions,
|
||||
} from "@/lib/api";
|
||||
import { deriveTitle } from "@/lib/format";
|
||||
import { toMediaAttachment } from "@/lib/media";
|
||||
import type { ChatSummary, UIMessage } from "@/lib/types";
|
||||
|
||||
const EMPTY_MESSAGES: UIMessage[] = [];
|
||||
@ -123,17 +124,16 @@ export function useSessionHistory(key: string | null): {
|
||||
const ui: UIMessage[] = body.messages.flatMap((m, idx) => {
|
||||
if (m.role !== "user" && m.role !== "assistant") return [];
|
||||
if (typeof m.content !== "string") return [];
|
||||
// Hydrate signed media URLs into the bubble's ``images`` slot so
|
||||
// historical user turns render real previews (the live-send path
|
||||
// uses data URLs; both shapes converge on the same ``UIImage``).
|
||||
// Hydrate signed media URLs into generic UI attachments. Image-only
|
||||
// user turns still populate the legacy ``images`` slot so the
|
||||
// existing optimistic-send and lightbox paths remain unchanged.
|
||||
const media =
|
||||
Array.isArray(m.media_urls) && m.media_urls.length > 0
|
||||
? m.media_urls.map((mu) => toMediaAttachment(mu))
|
||||
: undefined;
|
||||
const images =
|
||||
m.role === "user" &&
|
||||
Array.isArray(m.media_urls) &&
|
||||
m.media_urls.length > 0
|
||||
? m.media_urls.map((mu) => ({
|
||||
url: mu.url,
|
||||
name: mu.name,
|
||||
}))
|
||||
m.role === "user" && media?.every((item) => item.kind === "image")
|
||||
? media.map((item) => ({ url: item.url, name: item.name }))
|
||||
: undefined;
|
||||
return [
|
||||
{
|
||||
@ -142,6 +142,7 @@ export function useSessionHistory(key: string | null): {
|
||||
content: m.content,
|
||||
createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(),
|
||||
...(images ? { images } : {}),
|
||||
...(media ? { media } : {}),
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
59
webui/src/lib/media.ts
Normal file
59
webui/src/lib/media.ts
Normal file
@ -0,0 +1,59 @@
|
||||
import type { UIMediaAttachment, UIMediaKind } from "@/lib/types";
|
||||
|
||||
const IMAGE_EXTENSIONS = new Set([
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".ico",
|
||||
".tif",
|
||||
".tiff",
|
||||
]);
|
||||
|
||||
const VIDEO_EXTENSIONS = new Set([
|
||||
".mp4",
|
||||
".webm",
|
||||
".mov",
|
||||
".m4v",
|
||||
".avi",
|
||||
".mkv",
|
||||
".3gp",
|
||||
]);
|
||||
|
||||
function cleanPath(value: string): string {
|
||||
return value.split(/[?#]/, 1)[0]?.toLowerCase() ?? "";
|
||||
}
|
||||
|
||||
function extensionOf(value?: string): string {
|
||||
if (!value) return "";
|
||||
const path = cleanPath(value);
|
||||
const dot = path.lastIndexOf(".");
|
||||
if (dot < 0) return "";
|
||||
return path.slice(dot);
|
||||
}
|
||||
|
||||
export function inferMediaKind(media: { url?: string; name?: string }): UIMediaKind {
|
||||
const url = media.url ?? "";
|
||||
if (url.startsWith("data:image/")) return "image";
|
||||
if (url.startsWith("data:video/")) return "video";
|
||||
|
||||
const ext = extensionOf(media.name) || extensionOf(url);
|
||||
if (IMAGE_EXTENSIONS.has(ext)) return "image";
|
||||
if (VIDEO_EXTENSIONS.has(ext)) return "video";
|
||||
return "file";
|
||||
}
|
||||
|
||||
export function toMediaAttachment(media: {
|
||||
url?: string;
|
||||
name?: string;
|
||||
kind?: UIMediaKind;
|
||||
}): UIMediaAttachment {
|
||||
return {
|
||||
kind: media.kind ?? inferMediaKind(media),
|
||||
url: media.url,
|
||||
name: media.name,
|
||||
};
|
||||
}
|
||||
|
||||
@ -22,6 +22,14 @@ export interface UIImage {
|
||||
name?: string;
|
||||
}
|
||||
|
||||
export type UIMediaKind = "image" | "video" | "file";
|
||||
|
||||
export interface UIMediaAttachment {
|
||||
kind: UIMediaKind;
|
||||
url?: string;
|
||||
name?: string;
|
||||
}
|
||||
|
||||
export interface UIMessage {
|
||||
id: string;
|
||||
role: Role;
|
||||
@ -34,6 +42,8 @@ export interface UIMessage {
|
||||
traces?: string[];
|
||||
/** User turn: optimistic blob URLs for preview. Replay: placeholder chips. */
|
||||
images?: UIImage[];
|
||||
/** Signed or local UI-renderable media attachments. */
|
||||
media?: UIMediaAttachment[];
|
||||
}
|
||||
|
||||
export interface ChatSummary {
|
||||
@ -71,6 +81,7 @@ export type InboundEvent =
|
||||
text: string;
|
||||
reply_to?: string;
|
||||
media?: string[];
|
||||
media_urls?: Array<{ url: string; name?: string }>;
|
||||
/** Present when the frame is an agent breadcrumb (e.g. tool hint,
|
||||
* generic progress line) rather than a conversational reply. */
|
||||
kind?: "tool_hint" | "progress";
|
||||
|
||||
@ -40,4 +40,28 @@ describe("MessageBubble", () => {
|
||||
fireEvent.click(toggle);
|
||||
expect(screen.queryByText('weather("get")')).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("renders video media as an inline player", () => {
|
||||
const message: UIMessage = {
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
content: "here is the clip",
|
||||
createdAt: Date.now(),
|
||||
media: [
|
||||
{
|
||||
kind: "video",
|
||||
url: "/api/media/sig/payload",
|
||||
name: "demo.mp4",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const { container } = render(<MessageBubble message={message} />);
|
||||
|
||||
expect(screen.getByText("here is the clip")).toBeInTheDocument();
|
||||
const video = screen.getByLabelText(/video attachment/i);
|
||||
expect(video.tagName).toBe("VIDEO");
|
||||
expect(video).toHaveAttribute("src", "/api/media/sig/payload");
|
||||
expect(container.querySelector("video[controls]")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
@ -92,4 +92,25 @@ describe("useNanobotStream", () => {
|
||||
expect(result.current.messages[1].role).toBe("assistant");
|
||||
expect(result.current.messages[1].kind).toBeUndefined();
|
||||
});
|
||||
|
||||
it("attaches assistant media_urls to complete messages", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-m", []), {
|
||||
wrapper: wrap(fake.client),
|
||||
});
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-m", {
|
||||
event: "message",
|
||||
chat_id: "chat-m",
|
||||
text: "video ready",
|
||||
media_urls: [{ url: "/api/media/sig/payload", name: "demo.mp4" }],
|
||||
});
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(1);
|
||||
expect(result.current.messages[0].media).toEqual([
|
||||
{ kind: "video", url: "/api/media/sig/payload", name: "demo.mp4" },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@ -130,12 +130,46 @@ describe("useSessions", () => {
|
||||
{ url: "/api/media/sig-1/payload-1", name: "snap.png" },
|
||||
{ url: "/api/media/sig-2/payload-2", name: "diag.jpg" },
|
||||
]);
|
||||
expect(first.media).toEqual([
|
||||
{ kind: "image", url: "/api/media/sig-1/payload-1", name: "snap.png" },
|
||||
{ kind: "image", url: "/api/media/sig-2/payload-2", name: "diag.jpg" },
|
||||
]);
|
||||
expect(second.role).toBe("assistant");
|
||||
expect(second.images).toBeUndefined();
|
||||
expect(third.role).toBe("user");
|
||||
expect(third.images).toBeUndefined();
|
||||
});
|
||||
|
||||
it("hydrates historical assistant video media_urls into media attachments", async () => {
|
||||
vi.mocked(api.fetchSessionMessages).mockResolvedValue({
|
||||
key: "websocket:chat-video",
|
||||
created_at: "2026-04-20T10:00:00Z",
|
||||
updated_at: "2026-04-20T10:05:00Z",
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
content: "clip ready",
|
||||
timestamp: "2026-04-20T10:00:01Z",
|
||||
media_urls: [
|
||||
{ url: "/api/media/sig-v/payload-v", name: "clip.mp4" },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const { result } = renderHook(() => useSessionHistory("websocket:chat-video"), {
|
||||
wrapper: wrap(fakeClient()),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.loading).toBe(false));
|
||||
|
||||
expect(result.current.messages[0].role).toBe("assistant");
|
||||
expect(result.current.messages[0].images).toBeUndefined();
|
||||
expect(result.current.messages[0].media).toEqual([
|
||||
{ kind: "video", url: "/api/media/sig-v/payload-v", name: "clip.mp4" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps the session in the list when delete fails", async () => {
|
||||
vi.mocked(api.listSessions).mockResolvedValue([
|
||||
{
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user