diff --git a/nanobot/channels/websocket.py b/nanobot/channels/websocket.py index 8fab785aa..a8e28317f 100644 --- a/nanobot/channels/websocket.py +++ b/nanobot/channels/websocket.py @@ -27,16 +27,16 @@ from websockets.exceptions import ConnectionClosed from websockets.http11 import Request as WsRequest from websockets.http11 import Response -from nanobot.security.workspace_access import ( - WORKSPACE_SCOPE_METADATA_KEY, - WorkspaceScopeError, -) from nanobot.bus.events import OUTBOUND_META_AGENT_UI, OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.command.builtin import builtin_command_palette from nanobot.config.paths import get_media_dir, get_workspace_path from nanobot.config.schema import Base +from nanobot.security.workspace_access import ( + WORKSPACE_SCOPE_METADATA_KEY, + WorkspaceScopeError, +) from nanobot.session.goal_state import goal_state_ws_blob from nanobot.session.webui_turns import websocket_turn_wall_started_at from nanobot.utils.media_decode import ( @@ -44,14 +44,14 @@ from nanobot.utils.media_decode import ( save_base64_data_url, ) from nanobot.utils.subagent_channel_display import scrub_subagent_messages_for_channel -from nanobot.webui.settings_api import runtime_capabilities from nanobot.webui.cli_apps_api import normalize_cli_app_mentions +from nanobot.webui.mcp_presets_api import normalize_mcp_preset_mentions from nanobot.webui.media_api import ( serve_signed_media, sign_media_path, sign_or_stage_media_path, ) -from nanobot.webui.mcp_presets_api import normalize_mcp_preset_mentions +from nanobot.webui.settings_api import runtime_capabilities from nanobot.webui.settings_routes import WebUISettingsRouter from nanobot.webui.sidebar_state import ( read_webui_sidebar_state, @@ -990,7 +990,8 @@ class WebSocketChannel(BaseChannel): scope = self._webui_workspaces.scope_for_session_key(decoded_key) data = build_webui_thread_response( decoded_key, - augment_user_media=self._augment_transcript_user_media, + augment_user_media=self._augment_transcript_media, + augment_assistant_media=self._augment_transcript_media, augment_assistant_text=lambda text: rewrite_local_markdown_images( text, workspace_path=scope.project_path, @@ -1010,7 +1011,7 @@ class WebSocketChannel(BaseChannel): except (ValueError, TypeError) as e: self.logger.warning("webui transcript append failed: {}", e) - def _augment_transcript_user_media(self, paths: list[str]) -> list[dict[str, Any]]: + def _augment_transcript_media(self, paths: list[str]) -> list[dict[str, Any]]: out: list[dict[str, Any]] = [] for pstr in paths: path = Path(pstr) @@ -1018,7 +1019,12 @@ class WebSocketChannel(BaseChannel): if att is None: continue mime, _ = mimetypes.guess_type(path.name) - kind = "video" if mime and mime.startswith("video/") else "image" + if mime and mime.startswith("video/"): + kind = "video" + elif mime and mime.startswith("image/"): + kind = "image" + else: + kind = "file" out.append( {"kind": kind, "url": att["url"], "name": att.get("name", path.name)}, ) diff --git a/nanobot/webui/transcript.py b/nanobot/webui/transcript.py index dc888277d..9d7125bca 100644 --- a/nanobot/webui/transcript.py +++ b/nanobot/webui/transcript.py @@ -353,17 +353,36 @@ def _merge_unique_tool_trace_lines( return traces, added +def _media_from_signed_urls(value: Any) -> list[dict[str, Any]]: + media: list[dict[str, Any]] = [] + urls = value if isinstance(value, list) else [] + for m in urls: + if isinstance(m, dict) and m.get("url"): + name = str(m.get("name") or "") + media.append( + { + "kind": _media_kind_from_name(name), + "url": str(m["url"]), + "name": name, + }, + ) + return media + + def replay_transcript_to_ui_messages( lines: list[dict[str, Any]], *, augment_user_media: Callable[[list[str]], list[dict[str, Any]]] | None = None, + augment_assistant_media: Callable[[list[str]], list[dict[str, Any]]] | None = None, augment_assistant_text: Callable[[str], str] | None = None, ) -> list[dict[str, Any]]: """Fold JSONL records into ``UIMessage``-shaped dicts for the WebUI. Mirrors the core fold in ``useNanobotStream.ts`` (delta, reasoning, message+kind, turn_end). ``augment_user_media`` maps persisted filesystem - paths to ``{url, name?}`` / attachment dicts the client expects. + paths to ``{url, name?}`` / attachment dicts the client expects. Assistant + media gets a separate hook so replay can re-sign outbound attachments after + a gateway restart instead of reusing stale process-local signed URLs. """ messages: list[dict[str, Any]] = [] buffer_message_id: str | None = None @@ -832,19 +851,14 @@ def replay_transcript_to_ui_messages( buffer_parts = [] text = rec.get("text") content_s = text if isinstance(text, str) else "" - media_urls = rec.get("media_urls") media: list[dict[str, Any]] = [] - if isinstance(media_urls, list): - for m in media_urls: - if isinstance(m, dict) and m.get("url"): - name = str(m.get("name") or "") - media.append( - { - "kind": _media_kind_from_name(name), - "url": str(m["url"]), - "name": name, - }, - ) + raw_media = rec.get("media") + raw_media_list = raw_media if isinstance(raw_media, list) else [] + media_paths = [path for path in raw_media_list if isinstance(path, str) and path] + if media_paths and augment_assistant_media is not None: + media = augment_assistant_media(media_paths) + if not media and (not media_paths or augment_assistant_media is None): + media = _media_from_signed_urls(rec.get("media_urls")) extra: dict[str, Any] = {"content": content_s} if media: extra["media"] = media @@ -888,6 +902,7 @@ def build_webui_thread_response( session_key: str, *, augment_user_media: Callable[[list[str]], list[dict[str, Any]]] | None = None, + augment_assistant_media: Callable[[list[str]], list[dict[str, Any]]] | None = None, augment_assistant_text: Callable[[str], str] | None = None, ) -> dict[str, Any] | None: """Return a payload compatible with ``WebuiThreadPersistedPayload``.""" @@ -897,6 +912,7 @@ def build_webui_thread_response( msgs = replay_transcript_to_ui_messages( lines, augment_user_media=augment_user_media, + augment_assistant_media=augment_assistant_media, augment_assistant_text=augment_assistant_text, ) return { diff --git a/tests/channels/test_websocket_http_routes.py b/tests/channels/test_websocket_http_routes.py index 980004953..ddf771c13 100644 --- a/tests/channels/test_websocket_http_routes.py +++ b/tests/channels/test_websocket_http_routes.py @@ -514,6 +514,66 @@ async def test_session_routes_accept_percent_encoded_websocket_keys( await server_task +@pytest.mark.asyncio +async def test_webui_thread_resigns_assistant_media_urls( + bus: MagicMock, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + from nanobot.webui.transcript import append_transcript_object + + monkeypatch.setattr("nanobot.config.paths.get_data_dir", lambda: tmp_path) + media_root = tmp_path / "media" + websocket_media = media_root / "websocket" + websocket_media.mkdir(parents=True) + external = tmp_path / "clip.mp4" + external.write_bytes(b"video") + + def fake_media_dir(channel: str | None = None) -> Path: + return websocket_media if channel == "websocket" else media_root + + monkeypatch.setattr("nanobot.channels.websocket.get_media_dir", fake_media_dir) + + append_transcript_object( + "websocket:video-replay", + {"event": "user", "chat_id": "video-replay", "text": "make a video"}, + ) + append_transcript_object( + "websocket:video-replay", + { + "event": "message", + "chat_id": "video-replay", + "text": "video ready", + "media": [str(external)], + "media_urls": [{"url": "/api/media/old-sig/old-payload", "name": "clip.mp4"}], + }, + ) + + channel = _ch(bus, port=29914) + server_task = asyncio.create_task(channel.start()) + await asyncio.sleep(0.3) + try: + boot = await _http_get("http://127.0.0.1:29914/webui/bootstrap") + token = boot.json()["token"] + auth = {"Authorization": f"Bearer {token}"} + resp = await _http_get( + "http://127.0.0.1:29914/api/sessions/websocket:video-replay/webui-thread", + headers=auth, + ) + assert resp.status_code == 200 + assistant = next(m for m in resp.json()["messages"] if m["role"] == "assistant") + media = assistant["media"] + assert media[0]["kind"] == "video" + assert media[0]["name"] == "clip.mp4" + assert media[0]["url"].startswith("/api/media/") + assert media[0]["url"] != "/api/media/old-sig/old-payload" + + fetched = await _http_get(f"http://127.0.0.1:29914{media[0]['url']}") + assert fetched.status_code == 200 + assert fetched.content == b"video" + finally: + await channel.stop() + await server_task + + @pytest.mark.asyncio async def test_session_routes_reject_non_websocket_keys( bus: MagicMock, tmp_path: Path diff --git a/tests/utils/test_webui_transcript.py b/tests/utils/test_webui_transcript.py index 2381c6c3a..167b6b4d9 100644 --- a/tests/utils/test_webui_transcript.py +++ b/tests/utils/test_webui_transcript.py @@ -84,6 +84,28 @@ def test_replay_infers_video_media_from_attachment_name() -> None: ] +def test_replay_resigns_assistant_media_paths_before_stale_urls() -> None: + msgs = replay_transcript_to_ui_messages( + [ + {"event": "user", "chat_id": "t-video-resign", "text": "render"}, + { + "event": "message", + "chat_id": "t-video-resign", + "text": "video ready", + "media": ["/tmp/intro.mp4"], + "media_urls": [{"url": "/api/media/old-sig/old-payload", "name": "intro.mp4"}], + }, + ], + augment_assistant_media=lambda paths: [ + {"kind": "video", "url": f"/api/media/new-sig/{paths[0].split('/')[-1]}", "name": "intro.mp4"}, + ], + ) + + assert msgs[1]["media"] == [ + {"kind": "video", "url": "/api/media/new-sig/intro.mp4", "name": "intro.mp4"}, + ] + + def test_replay_infers_svg_media_from_attachment_name() -> None: msgs = replay_transcript_to_ui_messages( [