fix(webui): resign replayed assistant media

This commit is contained in:
Xubin Ren 2026-06-02 15:49:12 +08:00
parent a371907809
commit 21c60b0c97
4 changed files with 126 additions and 22 deletions

View File

@ -27,16 +27,16 @@ from websockets.exceptions import ConnectionClosed
from websockets.http11 import Request as WsRequest
from websockets.http11 import Response
from nanobot.security.workspace_access import (
WORKSPACE_SCOPE_METADATA_KEY,
WorkspaceScopeError,
)
from nanobot.bus.events import OUTBOUND_META_AGENT_UI, OutboundMessage
from nanobot.bus.queue import MessageBus
from nanobot.channels.base import BaseChannel
from nanobot.command.builtin import builtin_command_palette
from nanobot.config.paths import get_media_dir, get_workspace_path
from nanobot.config.schema import Base
from nanobot.security.workspace_access import (
WORKSPACE_SCOPE_METADATA_KEY,
WorkspaceScopeError,
)
from nanobot.session.goal_state import goal_state_ws_blob
from nanobot.session.webui_turns import websocket_turn_wall_started_at
from nanobot.utils.media_decode import (
@ -44,14 +44,14 @@ from nanobot.utils.media_decode import (
save_base64_data_url,
)
from nanobot.utils.subagent_channel_display import scrub_subagent_messages_for_channel
from nanobot.webui.settings_api import runtime_capabilities
from nanobot.webui.cli_apps_api import normalize_cli_app_mentions
from nanobot.webui.mcp_presets_api import normalize_mcp_preset_mentions
from nanobot.webui.media_api import (
serve_signed_media,
sign_media_path,
sign_or_stage_media_path,
)
from nanobot.webui.mcp_presets_api import normalize_mcp_preset_mentions
from nanobot.webui.settings_api import runtime_capabilities
from nanobot.webui.settings_routes import WebUISettingsRouter
from nanobot.webui.sidebar_state import (
read_webui_sidebar_state,
@ -990,7 +990,8 @@ class WebSocketChannel(BaseChannel):
scope = self._webui_workspaces.scope_for_session_key(decoded_key)
data = build_webui_thread_response(
decoded_key,
augment_user_media=self._augment_transcript_user_media,
augment_user_media=self._augment_transcript_media,
augment_assistant_media=self._augment_transcript_media,
augment_assistant_text=lambda text: rewrite_local_markdown_images(
text,
workspace_path=scope.project_path,
@ -1010,7 +1011,7 @@ class WebSocketChannel(BaseChannel):
except (ValueError, TypeError) as e:
self.logger.warning("webui transcript append failed: {}", e)
def _augment_transcript_user_media(self, paths: list[str]) -> list[dict[str, Any]]:
def _augment_transcript_media(self, paths: list[str]) -> list[dict[str, Any]]:
out: list[dict[str, Any]] = []
for pstr in paths:
path = Path(pstr)
@ -1018,7 +1019,12 @@ class WebSocketChannel(BaseChannel):
if att is None:
continue
mime, _ = mimetypes.guess_type(path.name)
kind = "video" if mime and mime.startswith("video/") else "image"
if mime and mime.startswith("video/"):
kind = "video"
elif mime and mime.startswith("image/"):
kind = "image"
else:
kind = "file"
out.append(
{"kind": kind, "url": att["url"], "name": att.get("name", path.name)},
)

View File

@ -353,17 +353,36 @@ def _merge_unique_tool_trace_lines(
return traces, added
def _media_from_signed_urls(value: Any) -> list[dict[str, Any]]:
media: list[dict[str, Any]] = []
urls = value if isinstance(value, list) else []
for m in urls:
if isinstance(m, dict) and m.get("url"):
name = str(m.get("name") or "")
media.append(
{
"kind": _media_kind_from_name(name),
"url": str(m["url"]),
"name": name,
},
)
return media
def replay_transcript_to_ui_messages(
lines: list[dict[str, Any]],
*,
augment_user_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
augment_assistant_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
augment_assistant_text: Callable[[str], str] | None = None,
) -> list[dict[str, Any]]:
"""Fold JSONL records into ``UIMessage``-shaped dicts for the WebUI.
Mirrors the core fold in ``useNanobotStream.ts`` (delta, reasoning,
message+kind, turn_end). ``augment_user_media`` maps persisted filesystem
paths to ``{url, name?}`` / attachment dicts the client expects.
paths to ``{url, name?}`` / attachment dicts the client expects. Assistant
media gets a separate hook so replay can re-sign outbound attachments after
a gateway restart instead of reusing stale process-local signed URLs.
"""
messages: list[dict[str, Any]] = []
buffer_message_id: str | None = None
@ -832,19 +851,14 @@ def replay_transcript_to_ui_messages(
buffer_parts = []
text = rec.get("text")
content_s = text if isinstance(text, str) else ""
media_urls = rec.get("media_urls")
media: list[dict[str, Any]] = []
if isinstance(media_urls, list):
for m in media_urls:
if isinstance(m, dict) and m.get("url"):
name = str(m.get("name") or "")
media.append(
{
"kind": _media_kind_from_name(name),
"url": str(m["url"]),
"name": name,
},
)
raw_media = rec.get("media")
raw_media_list = raw_media if isinstance(raw_media, list) else []
media_paths = [path for path in raw_media_list if isinstance(path, str) and path]
if media_paths and augment_assistant_media is not None:
media = augment_assistant_media(media_paths)
if not media and (not media_paths or augment_assistant_media is None):
media = _media_from_signed_urls(rec.get("media_urls"))
extra: dict[str, Any] = {"content": content_s}
if media:
extra["media"] = media
@ -888,6 +902,7 @@ def build_webui_thread_response(
session_key: str,
*,
augment_user_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
augment_assistant_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
augment_assistant_text: Callable[[str], str] | None = None,
) -> dict[str, Any] | None:
"""Return a payload compatible with ``WebuiThreadPersistedPayload``."""
@ -897,6 +912,7 @@ def build_webui_thread_response(
msgs = replay_transcript_to_ui_messages(
lines,
augment_user_media=augment_user_media,
augment_assistant_media=augment_assistant_media,
augment_assistant_text=augment_assistant_text,
)
return {

View File

@ -514,6 +514,66 @@ async def test_session_routes_accept_percent_encoded_websocket_keys(
await server_task
@pytest.mark.asyncio
async def test_webui_thread_resigns_assistant_media_urls(
bus: MagicMock, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
from nanobot.webui.transcript import append_transcript_object
monkeypatch.setattr("nanobot.config.paths.get_data_dir", lambda: tmp_path)
media_root = tmp_path / "media"
websocket_media = media_root / "websocket"
websocket_media.mkdir(parents=True)
external = tmp_path / "clip.mp4"
external.write_bytes(b"video")
def fake_media_dir(channel: str | None = None) -> Path:
return websocket_media if channel == "websocket" else media_root
monkeypatch.setattr("nanobot.channels.websocket.get_media_dir", fake_media_dir)
append_transcript_object(
"websocket:video-replay",
{"event": "user", "chat_id": "video-replay", "text": "make a video"},
)
append_transcript_object(
"websocket:video-replay",
{
"event": "message",
"chat_id": "video-replay",
"text": "video ready",
"media": [str(external)],
"media_urls": [{"url": "/api/media/old-sig/old-payload", "name": "clip.mp4"}],
},
)
channel = _ch(bus, port=29914)
server_task = asyncio.create_task(channel.start())
await asyncio.sleep(0.3)
try:
boot = await _http_get("http://127.0.0.1:29914/webui/bootstrap")
token = boot.json()["token"]
auth = {"Authorization": f"Bearer {token}"}
resp = await _http_get(
"http://127.0.0.1:29914/api/sessions/websocket:video-replay/webui-thread",
headers=auth,
)
assert resp.status_code == 200
assistant = next(m for m in resp.json()["messages"] if m["role"] == "assistant")
media = assistant["media"]
assert media[0]["kind"] == "video"
assert media[0]["name"] == "clip.mp4"
assert media[0]["url"].startswith("/api/media/")
assert media[0]["url"] != "/api/media/old-sig/old-payload"
fetched = await _http_get(f"http://127.0.0.1:29914{media[0]['url']}")
assert fetched.status_code == 200
assert fetched.content == b"video"
finally:
await channel.stop()
await server_task
@pytest.mark.asyncio
async def test_session_routes_reject_non_websocket_keys(
bus: MagicMock, tmp_path: Path

View File

@ -84,6 +84,28 @@ def test_replay_infers_video_media_from_attachment_name() -> None:
]
def test_replay_resigns_assistant_media_paths_before_stale_urls() -> None:
msgs = replay_transcript_to_ui_messages(
[
{"event": "user", "chat_id": "t-video-resign", "text": "render"},
{
"event": "message",
"chat_id": "t-video-resign",
"text": "video ready",
"media": ["/tmp/intro.mp4"],
"media_urls": [{"url": "/api/media/old-sig/old-payload", "name": "intro.mp4"}],
},
],
augment_assistant_media=lambda paths: [
{"kind": "video", "url": f"/api/media/new-sig/{paths[0].split('/')[-1]}", "name": "intro.mp4"},
],
)
assert msgs[1]["media"] == [
{"kind": "video", "url": "/api/media/new-sig/intro.mp4", "name": "intro.mp4"},
]
def test_replay_infers_svg_media_from_attachment_name() -> None:
msgs = replay_transcript_to_ui_messages(
[