diff --git a/nanobot/channels/websocket.py b/nanobot/channels/websocket.py index b01191787..c76371e98 100644 --- a/nanobot/channels/websocket.py +++ b/nanobot/channels/websocket.py @@ -13,6 +13,7 @@ import json import mimetypes import re import secrets +import shutil import ssl import time import uuid @@ -33,6 +34,7 @@ from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.paths import get_media_dir from nanobot.config.schema import Base +from nanobot.utils.helpers import safe_filename from nanobot.utils.media_decode import ( FileSizeExceeded, save_base64_data_url, @@ -716,6 +718,33 @@ class WebSocketChannel(BaseChannel): ).digest()[:16] return f"/api/media/{_b64url_encode(mac)}/{payload}" + def _sign_or_stage_media_path(self, path: Path) -> dict[str, str] | None: + """Return a signed media URL payload for *path*. + + Persisted inbound media already lives under ``get_media_dir`` and can + be signed directly. Outbound bot-generated files may live anywhere on + disk; copy those into the websocket media bucket first so the browser + can fetch them through the existing signed media route without + exposing arbitrary filesystem paths. + """ + signed = self._sign_media_path(path) + if signed is not None: + return {"url": signed, "name": path.name} + try: + if not path.is_file(): + return None + media_dir = get_media_dir("websocket") + safe_name = safe_filename(path.name) or "attachment" + staged = media_dir / f"{uuid.uuid4().hex[:12]}-{safe_name}" + shutil.copyfile(path, staged) + except OSError as exc: + logger.warning("websocket: failed to stage outbound media {}: {}", path, exc) + return None + signed = self._sign_media_path(staged) + if signed is None: + return None + return {"url": signed, "name": path.name} + def _handle_media_fetch(self, sig: str, payload: str) -> Response: """Serve a single media file previously signed via :meth:`_sign_media_path`. Validates the signature, decodes the @@ -1124,6 +1153,13 @@ class WebSocketChannel(BaseChannel): } if msg.media: payload["media"] = msg.media + urls: list[dict[str, str]] = [] + for entry in msg.media: + signed = self._sign_or_stage_media_path(Path(entry)) + if signed is not None: + urls.append(signed) + if urls: + payload["media_urls"] = urls if msg.reply_to: payload["reply_to"] = msg.reply_to # Mark intermediate agent breadcrumbs (tool-call hints, generic diff --git a/tests/channels/test_websocket_channel.py b/tests/channels/test_websocket_channel.py index c7d4923f8..c92c88ba8 100644 --- a/tests/channels/test_websocket_channel.py +++ b/tests/channels/test_websocket_channel.py @@ -190,6 +190,39 @@ async def test_send_delivers_json_message_with_media_and_reply() -> None: assert payload["media"] == ["/tmp/a.png"] +@pytest.mark.asyncio +async def test_send_stages_external_media_as_signed_url(monkeypatch, tmp_path) -> None: + bus = MagicMock() + media_root = tmp_path / "media" + ws_media = media_root / "websocket" + ws_media.mkdir(parents=True) + external = tmp_path / "clip.mp4" + external.write_bytes(b"video") + + def fake_media_dir(channel: str | None = None): + return ws_media if channel == "websocket" else media_root + + monkeypatch.setattr("nanobot.channels.websocket.get_media_dir", fake_media_dir) + channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) + mock_ws = AsyncMock() + channel._attach(mock_ws, "chat-1") + + await channel.send( + OutboundMessage( + channel="websocket", + chat_id="chat-1", + content="video", + media=[str(external)], + ) + ) + + payload = json.loads(mock_ws.send.call_args[0][0]) + assert payload["media"] == [str(external)] + assert payload["media_urls"][0]["name"] == "clip.mp4" + assert payload["media_urls"][0]["url"].startswith("/api/media/") + assert any(p.name.endswith("-clip.mp4") for p in ws_media.iterdir()) + + @pytest.mark.asyncio async def test_send_missing_connection_is_noop_without_error() -> None: bus = MagicMock() diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx index 076c3000b..d16115871 100644 --- a/webui/src/components/MessageBubble.tsx +++ b/webui/src/components/MessageBubble.tsx @@ -1,11 +1,11 @@ import { useState } from "react"; -import { ChevronRight, ImageIcon, Wrench } from "lucide-react"; +import { ChevronRight, FileIcon, ImageIcon, PlaySquare, Wrench } from "lucide-react"; import { useTranslation } from "react-i18next"; import { ImageLightbox } from "@/components/ImageLightbox"; import { MarkdownText } from "@/components/MarkdownText"; import { cn } from "@/lib/utils"; -import type { UIImage, UIMessage } from "@/lib/types"; +import type { UIImage, UIMediaAttachment, UIMessage } from "@/lib/types"; interface MessageBubbleProps { message: UIMessage; @@ -29,7 +29,9 @@ export function MessageBubble({ message }: MessageBubbleProps) { if (message.role === "user") { const images = message.images ?? []; + const media = message.media ?? []; const hasImages = images.length > 0; + const hasMedia = media.length > 0; const hasText = message.content.trim().length > 0; return (
- {hasImages ? : null} + {hasImages ? : null} + {!hasImages && hasMedia ? ( + + ) : null} {hasText ? (

{empty && message.isStreaming ? ( @@ -62,12 +68,82 @@ export function MessageBubble({ message }: MessageBubbleProps) { <> {message.content} {message.isStreaming && } + {media.length > 0 ? : null} )}

); } +function MessageMedia({ + media, + align, +}: { + media: UIMediaAttachment[]; + align: "left" | "right"; +}) { + if (media.length === 0) return null; + const images = media + .filter((item) => item.kind === "image") + .map(({ url, name }) => ({ url, name })); + const nonImages = media.filter((item) => item.kind !== "image"); + + return ( +
+ {images.length > 0 ? : null} + {nonImages.map((item, i) => ( + + ))} +
+ ); +} + +function MediaCell({ media }: { media: UIMediaAttachment }) { + const { t } = useTranslation(); + const hasUrl = typeof media.url === "string" && media.url.length > 0; + + if (media.kind === "video" && hasUrl) { + return ( +
+
+ ); + } + + const label = + media.kind === "video" + ? t("message.videoAttachment", { defaultValue: "Video attachment" }) + : t("message.fileAttachment", { defaultValue: "File attachment" }); + const Icon = media.kind === "video" ? PlaySquare : FileIcon; + + return ( +
+ + {media.name ?? label} +
+ ); +} + /** * Right-aligned preview row for images attached to a user turn. * @@ -82,7 +158,13 @@ export function MessageBubble({ message }: MessageBubbleProps) { * have no URL (the backend strips data URLs before persisting), so we * render a labelled placeholder tile instead of a broken ````. */ -function UserImages({ images }: { images: UIImage[] }) { +function UserImages({ + images, + align = "right", +}: { + images: UIImage[]; + align?: "left" | "right"; +}) { const { t } = useTranslation(); // Only real-URL images can open in the lightbox; historical-replay // placeholders (no URL) have nothing to zoom into. @@ -98,7 +180,12 @@ function UserImages({ images }: { images: UIImage[] }) { return ( <> -
+
{images.map((img, i) => ( toMediaAttachment(m)) + : ev.media?.map((url) => toMediaAttachment({ url })); + // A complete (non-streamed) assistant message. If a stream was in // flight, drop the placeholder so we don't render the text twice. const activeId = buffer.current?.messageId; @@ -162,6 +167,7 @@ export function useNanobotStream( role: "assistant", content: ev.text, createdAt: Date.now(), + ...(media && media.length > 0 ? { media } : {}), }, ]; }); diff --git a/webui/src/hooks/useSessions.ts b/webui/src/hooks/useSessions.ts index ea51c2206..719d4ce16 100644 --- a/webui/src/hooks/useSessions.ts +++ b/webui/src/hooks/useSessions.ts @@ -9,6 +9,7 @@ import { listSessions, } from "@/lib/api"; import { deriveTitle } from "@/lib/format"; +import { toMediaAttachment } from "@/lib/media"; import type { ChatSummary, UIMessage } from "@/lib/types"; const EMPTY_MESSAGES: UIMessage[] = []; @@ -123,17 +124,16 @@ export function useSessionHistory(key: string | null): { const ui: UIMessage[] = body.messages.flatMap((m, idx) => { if (m.role !== "user" && m.role !== "assistant") return []; if (typeof m.content !== "string") return []; - // Hydrate signed media URLs into the bubble's ``images`` slot so - // historical user turns render real previews (the live-send path - // uses data URLs; both shapes converge on the same ``UIImage``). + // Hydrate signed media URLs into generic UI attachments. Image-only + // user turns still populate the legacy ``images`` slot so the + // existing optimistic-send and lightbox paths remain unchanged. + const media = + Array.isArray(m.media_urls) && m.media_urls.length > 0 + ? m.media_urls.map((mu) => toMediaAttachment(mu)) + : undefined; const images = - m.role === "user" && - Array.isArray(m.media_urls) && - m.media_urls.length > 0 - ? m.media_urls.map((mu) => ({ - url: mu.url, - name: mu.name, - })) + m.role === "user" && media?.every((item) => item.kind === "image") + ? media.map((item) => ({ url: item.url, name: item.name })) : undefined; return [ { @@ -142,6 +142,7 @@ export function useSessionHistory(key: string | null): { content: m.content, createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(), ...(images ? { images } : {}), + ...(media ? { media } : {}), }, ]; }); diff --git a/webui/src/lib/media.ts b/webui/src/lib/media.ts new file mode 100644 index 000000000..399bc33a5 --- /dev/null +++ b/webui/src/lib/media.ts @@ -0,0 +1,59 @@ +import type { UIMediaAttachment, UIMediaKind } from "@/lib/types"; + +const IMAGE_EXTENSIONS = new Set([ + ".png", + ".jpg", + ".jpeg", + ".gif", + ".webp", + ".bmp", + ".ico", + ".tif", + ".tiff", +]); + +const VIDEO_EXTENSIONS = new Set([ + ".mp4", + ".webm", + ".mov", + ".m4v", + ".avi", + ".mkv", + ".3gp", +]); + +function cleanPath(value: string): string { + return value.split(/[?#]/, 1)[0]?.toLowerCase() ?? ""; +} + +function extensionOf(value?: string): string { + if (!value) return ""; + const path = cleanPath(value); + const dot = path.lastIndexOf("."); + if (dot < 0) return ""; + return path.slice(dot); +} + +export function inferMediaKind(media: { url?: string; name?: string }): UIMediaKind { + const url = media.url ?? ""; + if (url.startsWith("data:image/")) return "image"; + if (url.startsWith("data:video/")) return "video"; + + const ext = extensionOf(media.name) || extensionOf(url); + if (IMAGE_EXTENSIONS.has(ext)) return "image"; + if (VIDEO_EXTENSIONS.has(ext)) return "video"; + return "file"; +} + +export function toMediaAttachment(media: { + url?: string; + name?: string; + kind?: UIMediaKind; +}): UIMediaAttachment { + return { + kind: media.kind ?? inferMediaKind(media), + url: media.url, + name: media.name, + }; +} + diff --git a/webui/src/lib/types.ts b/webui/src/lib/types.ts index 6ffc75a95..245a65bd7 100644 --- a/webui/src/lib/types.ts +++ b/webui/src/lib/types.ts @@ -22,6 +22,14 @@ export interface UIImage { name?: string; } +export type UIMediaKind = "image" | "video" | "file"; + +export interface UIMediaAttachment { + kind: UIMediaKind; + url?: string; + name?: string; +} + export interface UIMessage { id: string; role: Role; @@ -34,6 +42,8 @@ export interface UIMessage { traces?: string[]; /** User turn: optimistic blob URLs for preview. Replay: placeholder chips. */ images?: UIImage[]; + /** Signed or local UI-renderable media attachments. */ + media?: UIMediaAttachment[]; } export interface ChatSummary { @@ -71,6 +81,7 @@ export type InboundEvent = text: string; reply_to?: string; media?: string[]; + media_urls?: Array<{ url: string; name?: string }>; /** Present when the frame is an agent breadcrumb (e.g. tool hint, * generic progress line) rather than a conversational reply. */ kind?: "tool_hint" | "progress"; diff --git a/webui/src/tests/message-bubble.test.tsx b/webui/src/tests/message-bubble.test.tsx index 80c24018a..e8dec29ab 100644 --- a/webui/src/tests/message-bubble.test.tsx +++ b/webui/src/tests/message-bubble.test.tsx @@ -40,4 +40,28 @@ describe("MessageBubble", () => { fireEvent.click(toggle); expect(screen.queryByText('weather("get")')).not.toBeInTheDocument(); }); + + it("renders video media as an inline player", () => { + const message: UIMessage = { + id: "a1", + role: "assistant", + content: "here is the clip", + createdAt: Date.now(), + media: [ + { + kind: "video", + url: "/api/media/sig/payload", + name: "demo.mp4", + }, + ], + }; + + const { container } = render(); + + expect(screen.getByText("here is the clip")).toBeInTheDocument(); + const video = screen.getByLabelText(/video attachment/i); + expect(video.tagName).toBe("VIDEO"); + expect(video).toHaveAttribute("src", "/api/media/sig/payload"); + expect(container.querySelector("video[controls]")).toBeInTheDocument(); + }); }); diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx index 91b3036c6..6485980cc 100644 --- a/webui/src/tests/useNanobotStream.test.tsx +++ b/webui/src/tests/useNanobotStream.test.tsx @@ -92,4 +92,25 @@ describe("useNanobotStream", () => { expect(result.current.messages[1].role).toBe("assistant"); expect(result.current.messages[1].kind).toBeUndefined(); }); + + it("attaches assistant media_urls to complete messages", () => { + const fake = fakeClient(); + const { result } = renderHook(() => useNanobotStream("chat-m", []), { + wrapper: wrap(fake.client), + }); + + act(() => { + fake.emit("chat-m", { + event: "message", + chat_id: "chat-m", + text: "video ready", + media_urls: [{ url: "/api/media/sig/payload", name: "demo.mp4" }], + }); + }); + + expect(result.current.messages).toHaveLength(1); + expect(result.current.messages[0].media).toEqual([ + { kind: "video", url: "/api/media/sig/payload", name: "demo.mp4" }, + ]); + }); }); diff --git a/webui/src/tests/useSessions.test.tsx b/webui/src/tests/useSessions.test.tsx index aaabe3d82..ad4f1c1af 100644 --- a/webui/src/tests/useSessions.test.tsx +++ b/webui/src/tests/useSessions.test.tsx @@ -130,12 +130,46 @@ describe("useSessions", () => { { url: "/api/media/sig-1/payload-1", name: "snap.png" }, { url: "/api/media/sig-2/payload-2", name: "diag.jpg" }, ]); + expect(first.media).toEqual([ + { kind: "image", url: "/api/media/sig-1/payload-1", name: "snap.png" }, + { kind: "image", url: "/api/media/sig-2/payload-2", name: "diag.jpg" }, + ]); expect(second.role).toBe("assistant"); expect(second.images).toBeUndefined(); expect(third.role).toBe("user"); expect(third.images).toBeUndefined(); }); + it("hydrates historical assistant video media_urls into media attachments", async () => { + vi.mocked(api.fetchSessionMessages).mockResolvedValue({ + key: "websocket:chat-video", + created_at: "2026-04-20T10:00:00Z", + updated_at: "2026-04-20T10:05:00Z", + messages: [ + { + role: "assistant", + content: "clip ready", + timestamp: "2026-04-20T10:00:01Z", + media_urls: [ + { url: "/api/media/sig-v/payload-v", name: "clip.mp4" }, + ], + }, + ], + }); + + const { result } = renderHook(() => useSessionHistory("websocket:chat-video"), { + wrapper: wrap(fakeClient()), + }); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + expect(result.current.messages[0].role).toBe("assistant"); + expect(result.current.messages[0].images).toBeUndefined(); + expect(result.current.messages[0].media).toEqual([ + { kind: "video", url: "/api/media/sig-v/payload-v", name: "clip.mp4" }, + ]); + }); + it("keeps the session in the list when delete fails", async () => { vi.mocked(api.listSessions).mockResolvedValue([ {