mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-15 07:14:08 +00:00
1888 lines
67 KiB
Python
1888 lines
67 KiB
Python
"""Append-only WebUI display transcript (JSONL), separate from agent session."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import binascii
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
import time
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Mapping, NamedTuple
|
|
from urllib.parse import unquote, urlparse
|
|
|
|
from loguru import logger
|
|
|
|
from nanobot.config.paths import get_webui_dir
|
|
from nanobot.session.manager import SessionManager
|
|
from nanobot.webui.metadata import WEBUI_MESSAGE_SOURCE_METADATA_KEY, WEBUI_TURN_METADATA_KEY
|
|
|
|
WEBUI_TRANSCRIPT_SCHEMA_VERSION = 3
|
|
WEBUI_FORK_MARKER_EVENT = "fork_marker"
|
|
_MAX_TRANSCRIPT_FILE_BYTES = 8 * 1024 * 1024
|
|
_TARGET_ACTIVE_TRANSCRIPT_BYTES = _MAX_TRANSCRIPT_FILE_BYTES // 2
|
|
_TRANSCRIPT_SEGMENT_MANIFEST_VERSION = 2
|
|
_TRANSCRIPT_ACTIVE_CHUNK_ID = "active"
|
|
_TRANSCRIPT_SEGMENT_RE = re.compile(r"^\d{6}\.jsonl$")
|
|
_DEFAULT_TRANSCRIPT_PAGE_LIMIT = 160
|
|
_MAX_TRANSCRIPT_PAGE_LIMIT = 1000
|
|
_WEBUI_TURN_ID_RE = re.compile(r"^[A-Za-z0-9._:-]{1,128}$")
|
|
_MARKDOWN_LOCAL_IMAGE_RE = re.compile(
|
|
r"!\[([^\]]*)\]\((<[^>]+>|[^)\s]+)(\s+(?:\"[^\"]*\"|'[^']*'))?\)"
|
|
)
|
|
_INLINE_MARKDOWN_IMAGE_EXTS: frozenset[str] = frozenset({
|
|
".png",
|
|
".jpg",
|
|
".jpeg",
|
|
".webp",
|
|
".gif",
|
|
".svg",
|
|
})
|
|
_INLINE_MARKDOWN_VIDEO_EXTS: frozenset[str] = frozenset({
|
|
".mp4",
|
|
".mov",
|
|
".webm",
|
|
})
|
|
_INLINE_MARKDOWN_MEDIA_EXTS = _INLINE_MARKDOWN_IMAGE_EXTS | _INLINE_MARKDOWN_VIDEO_EXTS
|
|
_FILE_EDIT_TOOL_NAMES: frozenset[str] = frozenset({
|
|
"write_file",
|
|
"edit_file",
|
|
"apply_patch",
|
|
})
|
|
_TURN_DISPLAY_EVENTS: frozenset[str] = frozenset({
|
|
"reasoning_delta",
|
|
"reasoning_end",
|
|
"delta",
|
|
"stream_end",
|
|
"message",
|
|
"file_edit",
|
|
"turn_end",
|
|
})
|
|
|
|
|
|
def rewrite_local_markdown_images(
|
|
text: str,
|
|
*,
|
|
workspace_path: Path,
|
|
sign_path: Callable[[Path], Mapping[str, Any] | None],
|
|
) -> str:
|
|
"""Rewrite markdown media paths inside the workspace to signed WebUI media URLs."""
|
|
if "![" not in text:
|
|
return text
|
|
|
|
def resolve_url(raw_url: str) -> str | None:
|
|
url = raw_url.strip()
|
|
if url.startswith("<") and url.endswith(">"):
|
|
url = url[1:-1].strip()
|
|
if not url or url.startswith(("/api/media/", "#")):
|
|
return None
|
|
parsed = urlparse(url)
|
|
if parsed.scheme or parsed.netloc or parsed.query or parsed.fragment:
|
|
return None
|
|
path_text = unquote(url)
|
|
if Path(path_text).suffix.lower() not in _INLINE_MARKDOWN_MEDIA_EXTS:
|
|
return None
|
|
candidate = Path(path_text).expanduser()
|
|
if not candidate.is_absolute():
|
|
candidate = workspace_path / candidate
|
|
try:
|
|
resolved = candidate.resolve(strict=False)
|
|
resolved.relative_to(workspace_path)
|
|
except (OSError, ValueError):
|
|
return None
|
|
if not resolved.is_file():
|
|
return None
|
|
signed = sign_path(resolved)
|
|
return str(signed.get("url")) if signed and signed.get("url") else None
|
|
|
|
def replace(match: re.Match[str]) -> str:
|
|
signed_url = resolve_url(match.group(2))
|
|
if not signed_url:
|
|
return match.group(0)
|
|
title = match.group(3) or ""
|
|
return f""
|
|
|
|
return _MARKDOWN_LOCAL_IMAGE_RE.sub(replace, text)
|
|
|
|
|
|
def _media_kind_from_name(name: str) -> str:
|
|
ext = Path(name).suffix.lower()
|
|
if ext in _INLINE_MARKDOWN_IMAGE_EXTS:
|
|
return "image"
|
|
if ext in _INLINE_MARKDOWN_VIDEO_EXTS:
|
|
return "video"
|
|
return "file"
|
|
|
|
|
|
def webui_transcript_path(session_key: str) -> Path:
|
|
stem = SessionManager.safe_key(session_key)
|
|
return get_webui_dir() / f"{stem}.jsonl"
|
|
|
|
|
|
def webui_transcript_segments_dir(session_key: str) -> Path:
|
|
stem = SessionManager.safe_key(session_key)
|
|
return get_webui_dir() / f"{stem}.segments"
|
|
|
|
|
|
def _webui_transcript_manifest_path(session_key: str) -> Path:
|
|
return webui_transcript_segments_dir(session_key) / "manifest.json"
|
|
|
|
|
|
def _legacy_webui_thread_path(session_key: str) -> Path:
|
|
stem = SessionManager.safe_key(session_key)
|
|
return get_webui_dir() / f"{stem}.json"
|
|
|
|
|
|
class _TranscriptTurnRef(NamedTuple):
|
|
ordinal: int
|
|
records: list[dict[str, Any]]
|
|
|
|
|
|
class _TranscriptChunkRef(NamedTuple):
|
|
chunk_id: str
|
|
start_ordinal: int
|
|
turn_count: int
|
|
user_count: int
|
|
|
|
|
|
def _record_json_line(record: dict[str, Any]) -> str:
|
|
return json.dumps(record, ensure_ascii=False, separators=(",", ":"))
|
|
|
|
|
|
def _read_transcript_file(path: Path) -> list[dict[str, Any]]:
|
|
lines_out: list[dict[str, Any]] = []
|
|
try:
|
|
with open(path, encoding="utf-8") as f:
|
|
for line_no, line in enumerate(f, start=1):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
obj = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
logger.warning("bad jsonl at {} line {}", path, line_no)
|
|
continue
|
|
if isinstance(obj, dict):
|
|
lines_out.append(obj)
|
|
except OSError as e:
|
|
logger.warning("read transcript failed {}: {}", path, e)
|
|
return []
|
|
return lines_out
|
|
|
|
|
|
def _records_bytes(records: list[dict[str, Any]]) -> int:
|
|
total = 0
|
|
for record in records:
|
|
total += len(_record_json_line(record).encode("utf-8")) + 1
|
|
return total
|
|
|
|
|
|
def _flatten_turns(turns: list[list[dict[str, Any]]]) -> list[dict[str, Any]]:
|
|
return [record for turn in turns for record in turn]
|
|
|
|
|
|
def _write_records_to_path(path: Path, rows: list[dict[str, Any]]) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp_path = path.with_suffix(path.suffix + ".tmp")
|
|
try:
|
|
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
for row in rows:
|
|
raw = _record_json_line(row)
|
|
if len(raw.encode("utf-8")) > _MAX_TRANSCRIPT_FILE_BYTES:
|
|
raise ValueError("webui transcript line too large")
|
|
f.write(raw + "\n")
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
os.replace(tmp_path, path)
|
|
except BaseException:
|
|
tmp_path.unlink(missing_ok=True)
|
|
raise
|
|
|
|
|
|
def _segment_file_path(session_key: str, segment_id: str) -> Path:
|
|
return webui_transcript_segments_dir(session_key) / f"{segment_id}.jsonl"
|
|
|
|
|
|
def _segment_ids_on_disk(session_key: str) -> list[str]:
|
|
directory = webui_transcript_segments_dir(session_key)
|
|
if not directory.is_dir():
|
|
return []
|
|
return sorted(
|
|
path.stem
|
|
for path in directory.iterdir()
|
|
if path.is_file() and _TRANSCRIPT_SEGMENT_RE.fullmatch(path.name)
|
|
)
|
|
|
|
|
|
def _segment_manifest_entry(session_key: str, segment_id: str) -> dict[str, Any]:
|
|
path = _segment_file_path(session_key, segment_id)
|
|
lines = _read_transcript_file(path)
|
|
return {
|
|
"id": segment_id,
|
|
"bytes": path.stat().st_size if path.exists() else 0,
|
|
"turn_count": len(_split_transcript_turns(lines)),
|
|
"user_count": sum(1 for line in lines if _is_user_transcript_row(line)),
|
|
}
|
|
|
|
|
|
def _non_negative_int(value: Any) -> int | None:
|
|
if isinstance(value, bool) or not isinstance(value, int) or value < 0:
|
|
return None
|
|
return value
|
|
|
|
|
|
def _normalize_manifest_entry(session_key: str, entry: Any) -> dict[str, Any] | None:
|
|
if not isinstance(entry, dict):
|
|
return None
|
|
segment_id = entry.get("id")
|
|
if not isinstance(segment_id, str) or not _TRANSCRIPT_SEGMENT_RE.fullmatch(f"{segment_id}.jsonl"):
|
|
return None
|
|
segment_path = _segment_file_path(session_key, segment_id)
|
|
values = {
|
|
key: _non_negative_int(entry.get(key))
|
|
for key in ("bytes", "turn_count", "user_count")
|
|
}
|
|
if not segment_path.is_file() or values["bytes"] != segment_path.stat().st_size:
|
|
return None
|
|
if values["turn_count"] is None or values["user_count"] is None:
|
|
return None
|
|
return {
|
|
"id": segment_id,
|
|
"bytes": values["bytes"],
|
|
"turn_count": values["turn_count"],
|
|
"user_count": values["user_count"],
|
|
}
|
|
|
|
|
|
def _write_segment_manifest(session_key: str, segment_ids: list[str]) -> None:
|
|
directory = webui_transcript_segments_dir(session_key)
|
|
directory.mkdir(parents=True, exist_ok=True)
|
|
data = {
|
|
"version": _TRANSCRIPT_SEGMENT_MANIFEST_VERSION,
|
|
"segments": [_segment_manifest_entry(session_key, segment_id) for segment_id in segment_ids],
|
|
}
|
|
path = _webui_transcript_manifest_path(session_key)
|
|
tmp_path = path.with_suffix(".json.tmp")
|
|
try:
|
|
tmp_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
os.replace(tmp_path, path)
|
|
except BaseException:
|
|
tmp_path.unlink(missing_ok=True)
|
|
raise
|
|
|
|
|
|
def _rebuild_segment_manifest(session_key: str) -> list[str]:
|
|
segment_ids = _segment_ids_on_disk(session_key)
|
|
if segment_ids:
|
|
_write_segment_manifest(session_key, segment_ids)
|
|
else:
|
|
_webui_transcript_manifest_path(session_key).unlink(missing_ok=True)
|
|
return segment_ids
|
|
|
|
|
|
def _rebuilt_segment_manifest_entries(session_key: str) -> list[dict[str, Any]]:
|
|
return [_segment_manifest_entry(session_key, segment_id) for segment_id in _rebuild_segment_manifest(session_key)]
|
|
|
|
|
|
def _read_segment_manifest_entries(session_key: str) -> list[dict[str, Any]]:
|
|
directory = webui_transcript_segments_dir(session_key)
|
|
if not directory.is_dir():
|
|
return []
|
|
path = _webui_transcript_manifest_path(session_key)
|
|
if not path.is_file():
|
|
return _rebuilt_segment_manifest_entries(session_key)
|
|
try:
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
raw_segments = data.get("segments") if isinstance(data, dict) else None
|
|
if data.get("version") != _TRANSCRIPT_SEGMENT_MANIFEST_VERSION or not isinstance(raw_segments, list):
|
|
return _rebuilt_segment_manifest_entries(session_key)
|
|
entries: list[dict[str, Any]] = []
|
|
for entry in raw_segments:
|
|
normalized = _normalize_manifest_entry(session_key, entry)
|
|
if normalized is None:
|
|
return _rebuilt_segment_manifest_entries(session_key)
|
|
entries.append(normalized)
|
|
if [entry["id"] for entry in entries] != _segment_ids_on_disk(session_key):
|
|
return _rebuilt_segment_manifest_entries(session_key)
|
|
return entries
|
|
except (OSError, json.JSONDecodeError, TypeError, AttributeError):
|
|
return _rebuilt_segment_manifest_entries(session_key)
|
|
|
|
|
|
def _read_segment_ids(session_key: str) -> list[str]:
|
|
return [entry["id"] for entry in _read_segment_manifest_entries(session_key)]
|
|
|
|
|
|
def _append_segment_turns(session_key: str, turns: list[list[dict[str, Any]]]) -> None:
|
|
if not turns:
|
|
return
|
|
segment_ids = _read_segment_ids(session_key)
|
|
next_id = int(segment_ids[-1]) + 1 if segment_ids else 1
|
|
batch: list[list[dict[str, Any]]] = []
|
|
batch_bytes = 0
|
|
for turn in turns:
|
|
turn_bytes = _records_bytes(turn)
|
|
if batch and batch_bytes + turn_bytes > _MAX_TRANSCRIPT_FILE_BYTES:
|
|
segment_id = f"{next_id:06d}"
|
|
_write_records_to_path(_segment_file_path(session_key, segment_id), _flatten_turns(batch))
|
|
segment_ids.append(segment_id)
|
|
next_id += 1
|
|
batch = []
|
|
batch_bytes = 0
|
|
batch.append(turn)
|
|
batch_bytes += turn_bytes
|
|
if batch:
|
|
segment_id = f"{next_id:06d}"
|
|
_write_records_to_path(_segment_file_path(session_key, segment_id), _flatten_turns(batch))
|
|
segment_ids.append(segment_id)
|
|
_write_segment_manifest(session_key, segment_ids)
|
|
|
|
|
|
def _rotate_active_transcript_if_needed(session_key: str) -> None:
|
|
path = webui_transcript_path(session_key)
|
|
if not path.is_file():
|
|
return
|
|
try:
|
|
if path.stat().st_size <= _MAX_TRANSCRIPT_FILE_BYTES:
|
|
return
|
|
except OSError:
|
|
return
|
|
|
|
lines = _read_transcript_file(path)
|
|
if not lines:
|
|
return
|
|
turns = _split_transcript_turns(lines)
|
|
if len(turns) <= 1:
|
|
return
|
|
|
|
keep_start = len(turns) - 1
|
|
keep_bytes = 0
|
|
for idx in range(len(turns) - 1, -1, -1):
|
|
turn_bytes = _records_bytes(turns[idx])
|
|
if idx == len(turns) - 1 or keep_bytes + turn_bytes <= _TARGET_ACTIVE_TRANSCRIPT_BYTES:
|
|
keep_start = idx
|
|
keep_bytes += turn_bytes
|
|
continue
|
|
break
|
|
|
|
moved = turns[:keep_start]
|
|
kept = turns[keep_start:]
|
|
if not moved:
|
|
return
|
|
_append_segment_turns(session_key, moved)
|
|
_write_records_to_path(path, _flatten_turns(kept))
|
|
|
|
|
|
def _chunk_ids(session_key: str) -> list[str]:
|
|
_rotate_active_transcript_if_needed(session_key)
|
|
ids = _read_segment_ids(session_key)
|
|
if webui_transcript_path(session_key).is_file():
|
|
ids.append(_TRANSCRIPT_ACTIVE_CHUNK_ID)
|
|
return ids
|
|
|
|
|
|
def _read_chunk_turns(session_key: str, chunk_id: str) -> list[list[dict[str, Any]]]:
|
|
if chunk_id == _TRANSCRIPT_ACTIVE_CHUNK_ID:
|
|
path = webui_transcript_path(session_key)
|
|
else:
|
|
path = _segment_file_path(session_key, chunk_id)
|
|
if not path.is_file():
|
|
return []
|
|
return _split_transcript_turns(_read_transcript_file(path))
|
|
|
|
|
|
def _encode_page_cursor(before_turn_ordinal: int) -> str:
|
|
raw = json.dumps(
|
|
{"before_turn": before_turn_ordinal},
|
|
separators=(",", ":"),
|
|
ensure_ascii=False,
|
|
).encode("utf-8")
|
|
return base64.urlsafe_b64encode(raw).decode("ascii").rstrip("=")
|
|
|
|
|
|
def _decode_page_cursor(value: str | None) -> int | None:
|
|
if not value:
|
|
return None
|
|
try:
|
|
padded = value + "=" * (-len(value) % 4)
|
|
data = json.loads(base64.urlsafe_b64decode(padded.encode("ascii")).decode("utf-8"))
|
|
except (binascii.Error, json.JSONDecodeError, UnicodeDecodeError, ValueError):
|
|
return None
|
|
if not isinstance(data, dict):
|
|
return None
|
|
before_turn = data.get("before_turn")
|
|
if (
|
|
isinstance(before_turn, bool)
|
|
or not isinstance(before_turn, int)
|
|
or before_turn < 0
|
|
):
|
|
return None
|
|
return before_turn
|
|
|
|
|
|
def _coerce_page_limit(limit: int | None) -> int:
|
|
if limit is None:
|
|
return _DEFAULT_TRANSCRIPT_PAGE_LIMIT
|
|
return max(1, min(_MAX_TRANSCRIPT_PAGE_LIMIT, int(limit)))
|
|
|
|
|
|
def _chunk_turn_refs(session_key: str) -> list[_TranscriptChunkRef]:
|
|
_rotate_active_transcript_if_needed(session_key)
|
|
refs: list[_TranscriptChunkRef] = []
|
|
ordinal = 0
|
|
for entry in _read_segment_manifest_entries(session_key):
|
|
chunk_id = str(entry["id"])
|
|
turn_count = int(entry["turn_count"])
|
|
if turn_count <= 0:
|
|
continue
|
|
refs.append(_TranscriptChunkRef(chunk_id, ordinal, turn_count, int(entry["user_count"])))
|
|
ordinal += turn_count
|
|
if webui_transcript_path(session_key).is_file():
|
|
active_turns = _read_chunk_turns(session_key, _TRANSCRIPT_ACTIVE_CHUNK_ID)
|
|
active_turn_count = len(active_turns)
|
|
if active_turn_count > 0:
|
|
refs.append(
|
|
_TranscriptChunkRef(
|
|
_TRANSCRIPT_ACTIVE_CHUNK_ID,
|
|
ordinal,
|
|
active_turn_count,
|
|
sum(1 for turn in active_turns for row in turn if _is_user_transcript_row(row)),
|
|
),
|
|
)
|
|
return refs
|
|
|
|
|
|
def _count_user_messages_before_ordinal(
|
|
session_key: str,
|
|
chunks: list[_TranscriptChunkRef],
|
|
before_ordinal: int,
|
|
) -> int:
|
|
total = 0
|
|
for chunk in chunks:
|
|
if before_ordinal <= chunk.start_ordinal:
|
|
break
|
|
local_end = min(chunk.turn_count, before_ordinal - chunk.start_ordinal)
|
|
if local_end <= 0:
|
|
continue
|
|
if local_end >= chunk.turn_count:
|
|
total += chunk.user_count
|
|
continue
|
|
turns = _read_chunk_turns(session_key, chunk.chunk_id)
|
|
total += sum(
|
|
1
|
|
for turn in turns[:local_end]
|
|
for row in turn
|
|
if _is_user_transcript_row(row)
|
|
)
|
|
return total
|
|
|
|
|
|
def _select_transcript_page(
|
|
session_key: str,
|
|
*,
|
|
limit: int | None,
|
|
before: str | None,
|
|
_manifest_rebuilt: bool = False,
|
|
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
page_limit = _coerce_page_limit(limit)
|
|
chunks = _chunk_turn_refs(session_key)
|
|
total_turns = sum(chunk.turn_count for chunk in chunks)
|
|
before_ordinal = _decode_page_cursor(before)
|
|
upper_ordinal = total_turns if before_ordinal is None else min(before_ordinal, total_turns)
|
|
selected: list[_TranscriptTurnRef] = []
|
|
selected_message_count = 0
|
|
|
|
for chunk in reversed(chunks):
|
|
if chunk.start_ordinal >= upper_ordinal:
|
|
continue
|
|
local_upper = min(chunk.turn_count, upper_ordinal - chunk.start_ordinal)
|
|
if local_upper <= 0:
|
|
continue
|
|
turns = _read_chunk_turns(session_key, chunk.chunk_id)
|
|
if (
|
|
chunk.chunk_id != _TRANSCRIPT_ACTIVE_CHUNK_ID
|
|
and len(turns) != chunk.turn_count
|
|
and not _manifest_rebuilt
|
|
):
|
|
_rebuild_segment_manifest(session_key)
|
|
return _select_transcript_page(
|
|
session_key,
|
|
limit=limit,
|
|
before=before,
|
|
_manifest_rebuilt=True,
|
|
)
|
|
local_upper = min(local_upper, len(turns))
|
|
for turn_index in range(local_upper - 1, -1, -1):
|
|
ordinal = chunk.start_ordinal + turn_index
|
|
turn = turns[turn_index]
|
|
selected.append(_TranscriptTurnRef(ordinal, turn))
|
|
selected_message_count += len(replay_transcript_to_ui_messages(turn))
|
|
if selected_message_count >= page_limit:
|
|
break
|
|
if selected_message_count >= page_limit:
|
|
break
|
|
|
|
selected_chronological = list(reversed(selected))
|
|
lines = [record for ref in selected_chronological for record in ref.records]
|
|
if not selected_chronological:
|
|
return [], {
|
|
"before_cursor": None,
|
|
"has_more_before": False,
|
|
"loaded_message_count": 0,
|
|
"user_message_offset": 0,
|
|
}
|
|
|
|
first_ref = selected_chronological[0]
|
|
has_more = first_ref.ordinal > 0
|
|
page = {
|
|
"before_cursor": _encode_page_cursor(first_ref.ordinal) if has_more else None,
|
|
"has_more_before": has_more,
|
|
"loaded_message_count": 0,
|
|
"user_message_offset": _count_user_messages_before_ordinal(
|
|
session_key,
|
|
chunks,
|
|
first_ref.ordinal,
|
|
),
|
|
}
|
|
return lines, page
|
|
|
|
|
|
def read_transcript_lines(session_key: str) -> list[dict[str, Any]]:
|
|
lines: list[dict[str, Any]] = []
|
|
for chunk_id in _chunk_ids(session_key):
|
|
if chunk_id == _TRANSCRIPT_ACTIVE_CHUNK_ID:
|
|
lines.extend(_read_transcript_file(webui_transcript_path(session_key)))
|
|
else:
|
|
lines.extend(_read_transcript_file(_segment_file_path(session_key, chunk_id)))
|
|
return lines
|
|
|
|
|
|
def _write_transcript_lines(session_key: str, rows: list[dict[str, Any]]) -> None:
|
|
delete_webui_transcript(session_key)
|
|
path = webui_transcript_path(session_key)
|
|
_write_records_to_path(path, rows)
|
|
_rotate_active_transcript_if_needed(session_key)
|
|
|
|
|
|
def _append_to_active_transcript(session_key: str, obj: dict[str, Any]) -> None:
|
|
raw = _record_json_line(obj)
|
|
if len(raw.encode("utf-8")) > _MAX_TRANSCRIPT_FILE_BYTES:
|
|
msg = "webui transcript line too large"
|
|
raise ValueError(msg)
|
|
path = webui_transcript_path(session_key)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
line = raw + "\n"
|
|
with open(path, "a", encoding="utf-8") as f:
|
|
f.write(line)
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
|
|
|
|
def append_transcript_object(session_key: str, obj: dict[str, Any]) -> None:
|
|
_append_to_active_transcript(session_key, obj)
|
|
if obj.get("event") == "turn_end":
|
|
_rotate_active_transcript_if_needed(session_key)
|
|
|
|
|
|
def normalize_webui_turn_id(value: Any) -> str:
|
|
if isinstance(value, str):
|
|
candidate = value.strip()
|
|
if _WEBUI_TURN_ID_RE.fullmatch(candidate):
|
|
return candidate
|
|
return str(uuid.uuid4())
|
|
|
|
|
|
def webui_message_source(metadata: dict[str, Any] | None) -> dict[str, str] | None:
|
|
raw = (metadata or {}).get(WEBUI_MESSAGE_SOURCE_METADATA_KEY)
|
|
if not isinstance(raw, dict) or raw.get("kind") != "cron":
|
|
return None
|
|
source: dict[str, str] = {"kind": "cron"}
|
|
label = raw.get("label")
|
|
if isinstance(label, str) and label.strip():
|
|
source["label"] = label.strip()
|
|
return source
|
|
|
|
|
|
class WebUITranscriptRecorder:
|
|
"""Prepare and persist WebUI wire events without leaking UI rules into channels."""
|
|
|
|
def __init__(self, log: Any = logger) -> None:
|
|
self._log = log
|
|
self._turn_sequences: dict[tuple[str, str], int] = {}
|
|
|
|
def client_turn_metadata(self, value: Any) -> dict[str, str]:
|
|
return {WEBUI_TURN_METADATA_KEY: normalize_webui_turn_id(value)}
|
|
|
|
def prepare_event(
|
|
self,
|
|
chat_id: str,
|
|
event: dict[str, Any],
|
|
*,
|
|
metadata: dict[str, Any] | None = None,
|
|
phase: str | None = None,
|
|
include_source: bool = False,
|
|
) -> None:
|
|
if include_source and (source := webui_message_source(metadata)):
|
|
event["source"] = source
|
|
self._annotate_turn(chat_id, event, metadata, phase)
|
|
|
|
def prepare_and_append(
|
|
self,
|
|
chat_id: str,
|
|
event: dict[str, Any],
|
|
*,
|
|
metadata: dict[str, Any] | None = None,
|
|
phase: str | None = None,
|
|
include_source: bool = False,
|
|
transcript_overrides: dict[str, Any] | None = None,
|
|
) -> None:
|
|
self.prepare_event(
|
|
chat_id,
|
|
event,
|
|
metadata=metadata,
|
|
phase=phase,
|
|
include_source=include_source,
|
|
)
|
|
record = dict(event)
|
|
if transcript_overrides:
|
|
record.update(transcript_overrides)
|
|
self.append(chat_id, record)
|
|
|
|
def append_user_message(
|
|
self,
|
|
chat_id: str,
|
|
text: str,
|
|
*,
|
|
metadata: dict[str, Any],
|
|
media_paths: list[str] | None = None,
|
|
cli_apps: list[dict[str, Any]] | None = None,
|
|
mcp_presets: list[dict[str, Any]] | None = None,
|
|
) -> None:
|
|
if text.strip() == "/stop" and not media_paths:
|
|
return
|
|
payload = build_user_transcript_event(
|
|
chat_id,
|
|
text,
|
|
media_paths=media_paths,
|
|
cli_apps=cli_apps,
|
|
mcp_presets=mcp_presets,
|
|
)
|
|
if payload is None:
|
|
return
|
|
self.prepare_and_append(chat_id, payload, metadata=metadata, phase="user")
|
|
|
|
def append(self, chat_id: str, event: dict[str, Any]) -> None:
|
|
try:
|
|
dup = json.loads(json.dumps(event, ensure_ascii=False))
|
|
append_transcript_object(f"websocket:{chat_id}", dup)
|
|
except (OSError, ValueError, TypeError) as e:
|
|
self._log.warning("webui transcript append failed: {}", e)
|
|
|
|
def _next_turn_seq(self, chat_id: str, turn_id: str) -> int:
|
|
key = (chat_id, turn_id)
|
|
seq = self._turn_sequences.get(key, 0) + 1
|
|
self._turn_sequences[key] = seq
|
|
return seq
|
|
|
|
def _annotate_turn(
|
|
self,
|
|
chat_id: str,
|
|
event: dict[str, Any],
|
|
metadata: dict[str, Any] | None,
|
|
phase: str | None,
|
|
) -> None:
|
|
if phase is None:
|
|
return
|
|
turn_id = (metadata or {}).get(WEBUI_TURN_METADATA_KEY)
|
|
if not isinstance(turn_id, str) or not turn_id:
|
|
return
|
|
event["turn_id"] = turn_id
|
|
event["turn_phase"] = phase
|
|
event["turn_seq"] = self._next_turn_seq(chat_id, turn_id)
|
|
if phase == "complete":
|
|
self._turn_sequences.pop((chat_id, turn_id), None)
|
|
|
|
|
|
def _chat_id_from_session_key(session_key: str) -> str | None:
|
|
if not session_key.startswith("websocket:"):
|
|
return None
|
|
chat_id = session_key.split(":", 1)[1].strip()
|
|
return chat_id or None
|
|
|
|
|
|
def _is_user_transcript_row(row: dict[str, Any]) -> bool:
|
|
return row.get("event") == "user" or row.get("role") == "user"
|
|
|
|
|
|
def fork_transcript_before_user_index(
|
|
source_key: str,
|
|
target_key: str,
|
|
before_user_index: int,
|
|
) -> bool:
|
|
"""Copy transcript rows before a zero-based global user-message index.
|
|
|
|
``before_user_index == user_count`` copies the full transcript prefix. WebUI
|
|
uses that when forking from an assistant reply at the end of a chat.
|
|
"""
|
|
if before_user_index < 0:
|
|
return False
|
|
lines = read_transcript_lines(source_key)
|
|
if not lines:
|
|
return False
|
|
|
|
target_chat_id = _chat_id_from_session_key(target_key)
|
|
copied: list[dict[str, Any]] = []
|
|
user_index = 0
|
|
found_target = False
|
|
for row in lines:
|
|
if row.get("event") == WEBUI_FORK_MARKER_EVENT:
|
|
continue
|
|
if _is_user_transcript_row(row):
|
|
if user_index == before_user_index:
|
|
found_target = True
|
|
break
|
|
user_index += 1
|
|
dup = json.loads(json.dumps(row, ensure_ascii=False))
|
|
if target_chat_id is not None:
|
|
dup["chat_id"] = target_chat_id
|
|
copied.append(dup)
|
|
if user_index == before_user_index:
|
|
found_target = True
|
|
|
|
if not found_target:
|
|
return False
|
|
|
|
_write_transcript_lines(target_key, copied)
|
|
return True
|
|
|
|
|
|
def append_fork_marker(session_key: str) -> None:
|
|
"""Mark the UI-only boundary where a WebUI fork starts accepting new turns."""
|
|
append_transcript_object(
|
|
session_key,
|
|
{
|
|
"event": WEBUI_FORK_MARKER_EVENT,
|
|
"chat_id": _chat_id_from_session_key(session_key),
|
|
},
|
|
)
|
|
|
|
|
|
def write_session_messages_as_transcript(
|
|
target_key: str,
|
|
messages: list[dict[str, Any]],
|
|
) -> None:
|
|
"""Write a minimal WebUI transcript from already-truncated session messages."""
|
|
target_chat_id = _chat_id_from_session_key(target_key)
|
|
rows: list[dict[str, Any]] = []
|
|
for msg in messages:
|
|
role = msg.get("role")
|
|
content = msg.get("content")
|
|
text = content if isinstance(content, str) else ""
|
|
if role == "user":
|
|
row: dict[str, Any] = {"event": "user", "chat_id": target_chat_id, "text": text}
|
|
media = msg.get("media")
|
|
if isinstance(media, list) and media:
|
|
row["media_paths"] = [str(p) for p in media if isinstance(p, str) and p]
|
|
for key in ("cli_apps", "mcp_presets"):
|
|
value = msg.get(key)
|
|
if isinstance(value, list) and value:
|
|
row[key] = json.loads(json.dumps(value, ensure_ascii=False))
|
|
elif role == "assistant" and text.strip():
|
|
row = {"event": "message", "chat_id": target_chat_id, "text": text}
|
|
media = msg.get("media")
|
|
if isinstance(media, list) and media:
|
|
row["media"] = [str(p) for p in media if isinstance(p, str) and p]
|
|
else:
|
|
continue
|
|
rows.append(row)
|
|
_write_transcript_lines(target_key, rows)
|
|
|
|
|
|
def delete_webui_transcript(session_key: str) -> bool:
|
|
removed = False
|
|
for path in (webui_transcript_path(session_key), _legacy_webui_thread_path(session_key)):
|
|
if not path.is_file():
|
|
continue
|
|
try:
|
|
path.unlink()
|
|
removed = True
|
|
except OSError as e:
|
|
logger.warning("Failed to delete webui transcript {}: {}", path, e)
|
|
segments_dir = webui_transcript_segments_dir(session_key)
|
|
if segments_dir.is_dir():
|
|
try:
|
|
shutil.rmtree(segments_dir)
|
|
removed = True
|
|
except OSError as e:
|
|
logger.warning("Failed to delete webui transcript segments {}: {}", segments_dir, e)
|
|
return removed
|
|
|
|
|
|
def build_user_transcript_event(
|
|
chat_id: str,
|
|
text: str,
|
|
*,
|
|
media_paths: list[Any] | None = None,
|
|
cli_apps: list[Any] | None = None,
|
|
mcp_presets: list[Any] | None = None,
|
|
) -> dict[str, Any] | None:
|
|
paths = [str(path) for path in (media_paths or []) if path]
|
|
if not text and not paths:
|
|
return None
|
|
event: dict[str, Any] = {
|
|
"event": "user",
|
|
"chat_id": chat_id,
|
|
"text": text,
|
|
}
|
|
if paths:
|
|
event["media_paths"] = paths
|
|
apps = [dict(app) for app in (cli_apps or []) if isinstance(app, Mapping)]
|
|
if apps:
|
|
event["cli_apps"] = apps
|
|
presets = [dict(preset) for preset in (mcp_presets or []) if isinstance(preset, Mapping)]
|
|
if presets:
|
|
event["mcp_presets"] = presets
|
|
return event
|
|
|
|
|
|
def _session_user_event(
|
|
session_key: str,
|
|
message: dict[str, Any],
|
|
) -> dict[str, Any] | None:
|
|
if message.get("role") != "user":
|
|
return None
|
|
content = message.get("content")
|
|
text = content if isinstance(content, str) else ""
|
|
media = message.get("media")
|
|
cli_apps = message.get("cli_apps")
|
|
mcp_presets = message.get("mcp_presets")
|
|
chat_id = session_key.split(":", 1)[1] if ":" in session_key else session_key
|
|
return build_user_transcript_event(
|
|
chat_id,
|
|
text,
|
|
media_paths=media if isinstance(media, list) else None,
|
|
cli_apps=cli_apps if isinstance(cli_apps, list) else None,
|
|
mcp_presets=mcp_presets if isinstance(mcp_presets, list) else None,
|
|
)
|
|
|
|
|
|
def _assistant_text_signature(value: Any) -> str:
|
|
return value.strip() if isinstance(value, str) else ""
|
|
|
|
|
|
def _session_backfill_turns(
|
|
session_key: str,
|
|
session_messages: list[dict[str, Any]],
|
|
) -> list[tuple[dict[str, Any], tuple[str, ...]]]:
|
|
turns: list[tuple[dict[str, Any], tuple[str, ...]]] = []
|
|
current_user: dict[str, Any] | None = None
|
|
assistant_texts: list[str] = []
|
|
|
|
def flush() -> None:
|
|
if current_user is None:
|
|
return
|
|
signature = tuple(text for text in assistant_texts if text)
|
|
if signature:
|
|
turns.append((current_user, signature))
|
|
|
|
for message in session_messages:
|
|
role = message.get("role")
|
|
if role == "user":
|
|
flush()
|
|
current_user = _session_user_event(session_key, message)
|
|
assistant_texts = []
|
|
continue
|
|
if role == "assistant" and current_user is not None:
|
|
text = _assistant_text_signature(message.get("content"))
|
|
if text:
|
|
assistant_texts.append(text)
|
|
flush()
|
|
return turns
|
|
|
|
|
|
def _split_transcript_turns(lines: list[dict[str, Any]]) -> list[list[dict[str, Any]]]:
|
|
turns: list[list[dict[str, Any]]] = []
|
|
current: list[dict[str, Any]] = []
|
|
for rec in lines:
|
|
current.append(rec)
|
|
if rec.get("event") == "turn_end":
|
|
turns.append(current)
|
|
current = []
|
|
if current:
|
|
turns.append(current)
|
|
return turns
|
|
|
|
|
|
def _transcript_turn_signature(records: list[dict[str, Any]]) -> tuple[str, ...]:
|
|
texts: list[str] = []
|
|
for message in replay_transcript_to_ui_messages(records):
|
|
if message.get("role") != "assistant" or message.get("kind") == "trace":
|
|
continue
|
|
text = _assistant_text_signature(message.get("content"))
|
|
if text:
|
|
texts.append(text)
|
|
return tuple(texts)
|
|
|
|
|
|
def _find_unique_session_turn(
|
|
session_turns: list[tuple[dict[str, Any], tuple[str, ...]]],
|
|
signature: tuple[str, ...],
|
|
start: int,
|
|
) -> int | None:
|
|
if not signature:
|
|
return None
|
|
found: int | None = None
|
|
for index in range(start, len(session_turns)):
|
|
if session_turns[index][1] != signature:
|
|
continue
|
|
if found is not None:
|
|
return None
|
|
found = index
|
|
return found
|
|
|
|
|
|
def _with_backfilled_user(
|
|
records: list[dict[str, Any]],
|
|
user_event: dict[str, Any],
|
|
) -> list[dict[str, Any]]:
|
|
for index, rec in enumerate(records):
|
|
if rec.get("event") in _TURN_DISPLAY_EVENTS:
|
|
return [*records[:index], dict(user_event), *records[index:]]
|
|
return records
|
|
|
|
|
|
def inject_missing_user_events_from_session(
|
|
session_key: str,
|
|
lines: list[dict[str, Any]],
|
|
session_messages: list[dict[str, Any]] | None,
|
|
) -> list[dict[str, Any]]:
|
|
"""Backfill user rows for legacy WebUI transcripts that only stored assistant streams."""
|
|
if not lines or not session_messages:
|
|
return lines
|
|
session_turns = _session_backfill_turns(session_key, session_messages)
|
|
if not session_turns:
|
|
return lines
|
|
|
|
out: list[dict[str, Any]] = []
|
|
session_cursor = 0
|
|
for turn in _split_transcript_turns(lines):
|
|
has_user = any(rec.get("event") == "user" for rec in turn)
|
|
signature = _transcript_turn_signature(turn)
|
|
match_index = _find_unique_session_turn(session_turns, signature, session_cursor)
|
|
if match_index is None:
|
|
out.extend(turn)
|
|
continue
|
|
out.extend(turn if has_user else _with_backfilled_user(turn, session_turns[match_index][0]))
|
|
session_cursor = match_index + 1
|
|
return out
|
|
|
|
|
|
def _format_tool_call_trace(call: Any) -> str | None:
|
|
if not call or not isinstance(call, dict):
|
|
return None
|
|
fn = call.get("function")
|
|
name = fn.get("name") if isinstance(fn, dict) else None
|
|
if not isinstance(name, str) or not name:
|
|
raw_name = call.get("name")
|
|
name = raw_name if isinstance(raw_name, str) else ""
|
|
if not name:
|
|
return None
|
|
args = (fn.get("arguments") if isinstance(fn, dict) else None) or call.get("arguments")
|
|
if isinstance(args, str) and args.strip():
|
|
return f"{name}({args})"
|
|
if args and isinstance(args, dict):
|
|
return f"{name}({json.dumps(args, ensure_ascii=False)})"
|
|
return f"{name}()"
|
|
|
|
|
|
def tool_trace_lines_from_events(events: Any) -> list[str]:
|
|
if not isinstance(events, list):
|
|
return []
|
|
lines: list[str] = []
|
|
seen: set[str] = set()
|
|
for event in events:
|
|
if not event or not isinstance(event, dict):
|
|
continue
|
|
if event.get("phase") not in {"start", "end", "error"}:
|
|
continue
|
|
call_id = event.get("call_id")
|
|
if isinstance(call_id, str) and call_id:
|
|
if call_id in seen:
|
|
continue
|
|
seen.add(call_id)
|
|
t = _format_tool_call_trace(event)
|
|
if t:
|
|
lines.append(t)
|
|
return lines
|
|
|
|
|
|
_PHASE_RANK = {"start": 1, "end": 2, "error": 3}
|
|
|
|
|
|
def _normalize_tool_events(events: Any) -> list[dict[str, Any]]:
|
|
if not isinstance(events, list):
|
|
return []
|
|
out: list[dict[str, Any]] = []
|
|
for event in events:
|
|
if not event or not isinstance(event, dict):
|
|
continue
|
|
if event.get("phase") not in {"start", "end", "error"}:
|
|
continue
|
|
if not isinstance(event.get("name"), str):
|
|
fn = event.get("function")
|
|
if not (isinstance(fn, dict) and isinstance(fn.get("name"), str)):
|
|
continue
|
|
out.append(dict(event))
|
|
return out
|
|
|
|
|
|
def _tool_event_key(event: dict[str, Any]) -> str:
|
|
call_id = event.get("call_id")
|
|
if isinstance(call_id, str) and call_id:
|
|
return f"call:{call_id}"
|
|
return _format_tool_call_trace(event) or json.dumps(event, sort_keys=True, ensure_ascii=False)
|
|
|
|
|
|
def _tool_event_file_edit_key(event: dict[str, Any]) -> str | None:
|
|
call_id = event.get("call_id")
|
|
if not isinstance(call_id, str) or not call_id:
|
|
return None
|
|
name = event.get("name")
|
|
if not isinstance(name, str) or not name:
|
|
fn = event.get("function")
|
|
name = fn.get("name") if isinstance(fn, dict) else ""
|
|
if not isinstance(name, str) or name not in _FILE_EDIT_TOOL_NAMES:
|
|
return None
|
|
return f"{call_id}|{name}"
|
|
|
|
|
|
def _merge_tool_events(previous: Any, incoming: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
if not isinstance(previous, list) or not previous:
|
|
return incoming
|
|
if not incoming:
|
|
return [dict(event) for event in previous if isinstance(event, dict)]
|
|
merged = [dict(event) for event in previous if isinstance(event, dict)]
|
|
index_by_key = {_tool_event_key(event): idx for idx, event in enumerate(merged)}
|
|
for event in incoming:
|
|
key = _tool_event_key(event)
|
|
existing_index = index_by_key.get(key)
|
|
if existing_index is None:
|
|
index_by_key[key] = len(merged)
|
|
merged.append(event)
|
|
continue
|
|
existing = merged[existing_index]
|
|
incoming_rank = _PHASE_RANK.get(str(event.get("phase")), 0)
|
|
existing_rank = _PHASE_RANK.get(str(existing.get("phase")), 0)
|
|
if incoming_rank >= existing_rank:
|
|
merged[existing_index] = {**existing, **event}
|
|
return merged
|
|
|
|
|
|
def _file_edit_key(edit: dict[str, Any]) -> str:
|
|
call_id = str(edit.get("call_id") or "")
|
|
tool = str(edit.get("tool") or "")
|
|
if call_id:
|
|
return f"{call_id}|{tool}"
|
|
return f"{tool}|{edit.get('path') or ''}"
|
|
|
|
|
|
def _message_has_file_edit_for_tool_event(
|
|
message: dict[str, Any],
|
|
event: dict[str, Any],
|
|
) -> bool:
|
|
key = _tool_event_file_edit_key(event)
|
|
if not key:
|
|
return False
|
|
edits = message.get("fileEdits")
|
|
if not isinstance(edits, list):
|
|
return False
|
|
return any(isinstance(edit, dict) and _file_edit_key(edit) == key for edit in edits)
|
|
|
|
|
|
def _filter_covered_file_edit_tool_events(
|
|
messages: list[dict[str, Any]],
|
|
events: list[dict[str, Any]],
|
|
) -> list[dict[str, Any]]:
|
|
if not events:
|
|
return events
|
|
return [
|
|
event
|
|
for event in events
|
|
if not any(_message_has_file_edit_for_tool_event(message, event) for message in messages)
|
|
]
|
|
|
|
|
|
def _strip_covered_file_edit_tool_hints(
|
|
message: dict[str, Any],
|
|
edits: list[dict[str, Any]],
|
|
) -> dict[str, Any]:
|
|
incoming_keys = {
|
|
_file_edit_key(edit)
|
|
for edit in edits
|
|
if isinstance(edit, dict)
|
|
}
|
|
events = message.get("toolEvents")
|
|
if not incoming_keys or not isinstance(events, list):
|
|
return message
|
|
|
|
kept_events: list[dict[str, Any]] = []
|
|
removed_trace_lines: set[str] = set()
|
|
changed = False
|
|
for event in events:
|
|
if not isinstance(event, dict):
|
|
continue
|
|
key = _tool_event_file_edit_key(event)
|
|
if key and key in incoming_keys:
|
|
changed = True
|
|
removed_trace_lines.update(tool_trace_lines_from_events([event]))
|
|
continue
|
|
kept_events.append(event)
|
|
if not changed:
|
|
return message
|
|
|
|
raw_traces = message.get("traces")
|
|
if isinstance(raw_traces, list):
|
|
previous_traces = [trace for trace in raw_traces if isinstance(trace, str)]
|
|
else:
|
|
content = message.get("content")
|
|
previous_traces = [content] if isinstance(content, str) and content else []
|
|
next_traces = [trace for trace in previous_traces if trace not in removed_trace_lines]
|
|
next_message = {
|
|
**message,
|
|
"traces": next_traces,
|
|
"content": next_traces[-1] if next_traces else "",
|
|
}
|
|
if kept_events:
|
|
next_message["toolEvents"] = kept_events
|
|
else:
|
|
next_message.pop("toolEvents", None)
|
|
return next_message
|
|
|
|
|
|
def _merge_unique_tool_trace_lines(
|
|
previous_traces: list[str],
|
|
lines: list[str],
|
|
) -> tuple[list[str], bool]:
|
|
seen_lines = set(previous_traces)
|
|
traces = list(previous_traces)
|
|
added = False
|
|
for line in lines:
|
|
if line in seen_lines:
|
|
continue
|
|
seen_lines.add(line)
|
|
traces.append(line)
|
|
added = True
|
|
return traces, added
|
|
|
|
|
|
def _media_from_signed_urls(value: Any) -> list[dict[str, Any]]:
|
|
media: list[dict[str, Any]] = []
|
|
urls = value if isinstance(value, list) else []
|
|
for m in urls:
|
|
if isinstance(m, dict) and m.get("url"):
|
|
name = str(m.get("name") or "")
|
|
media.append(
|
|
{
|
|
"kind": _media_kind_from_name(name),
|
|
"url": str(m["url"]),
|
|
"name": name,
|
|
},
|
|
)
|
|
return media
|
|
|
|
|
|
def replay_transcript_to_ui_messages(
|
|
lines: list[dict[str, Any]],
|
|
*,
|
|
augment_user_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
|
|
augment_assistant_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
|
|
augment_assistant_text: Callable[[str], str] | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
"""Fold JSONL records into ``UIMessage``-shaped dicts for the WebUI.
|
|
|
|
Mirrors the core fold in ``useNanobotStream.ts`` (delta, reasoning,
|
|
message+kind, turn_end). ``augment_user_media`` maps persisted filesystem
|
|
paths to ``{url, name?}`` / attachment dicts the client expects. Assistant
|
|
media gets a separate hook so replay can re-sign outbound attachments after
|
|
a gateway restart instead of reusing stale process-local signed URLs.
|
|
"""
|
|
messages: list[dict[str, Any]] = []
|
|
buffer_message_id: str | None = None
|
|
buffer_parts: list[str] = []
|
|
suppress_until_turn_end = False
|
|
active_activity_segment_id: str | None = None
|
|
active_file_edit_segment_id: str | None = None
|
|
activity_segment_counter = 0
|
|
_ts_base = int(time.time() * 1000)
|
|
closed_turn_ids: set[str] = set()
|
|
replay_turn_aliases: dict[str, str] = {}
|
|
|
|
def _new_id(prefix: str, idx: int) -> str:
|
|
return f"{prefix}-{idx}-{uuid.uuid4().hex[:8]}"
|
|
|
|
def _new_activity_segment(*, activate: bool = True) -> str:
|
|
nonlocal active_activity_segment_id, activity_segment_counter
|
|
activity_segment_counter += 1
|
|
segment_id = f"activity-{activity_segment_counter}"
|
|
if activate:
|
|
active_activity_segment_id = segment_id
|
|
return segment_id
|
|
|
|
def _turn_fields(rec: dict[str, Any], fallback_phase: str | None = None) -> dict[str, Any]:
|
|
fields: dict[str, Any] = {}
|
|
turn_id = rec.get("turn_id")
|
|
if isinstance(turn_id, str) and turn_id:
|
|
if turn_id in closed_turn_ids:
|
|
fields["turnId"] = replay_turn_aliases.setdefault(
|
|
turn_id,
|
|
f"{turn_id}:replay:{idx}",
|
|
)
|
|
else:
|
|
fields["turnId"] = turn_id
|
|
phase = rec.get("turn_phase")
|
|
if isinstance(phase, str) and phase:
|
|
fields["turnPhase"] = phase
|
|
elif fallback_phase:
|
|
fields["turnPhase"] = fallback_phase
|
|
seq = rec.get("turn_seq")
|
|
if isinstance(seq, (int, float)):
|
|
fields["turnSeq"] = int(seq)
|
|
return fields
|
|
|
|
def _source_fields(rec: dict[str, Any]) -> dict[str, Any]:
|
|
source = rec.get("source")
|
|
if not isinstance(source, dict) or source.get("kind") != "cron":
|
|
return {}
|
|
out: dict[str, Any] = {"source": {"kind": "cron"}}
|
|
label = source.get("label")
|
|
if isinstance(label, str) and label.strip():
|
|
out["source"]["label"] = label.strip()
|
|
return out
|
|
|
|
def _same_turn(message: dict[str, Any], turn_fields: dict[str, Any]) -> bool:
|
|
turn_id = turn_fields.get("turnId")
|
|
message_turn_id = message.get("turnId")
|
|
return not turn_id or not message_turn_id or turn_id == message_turn_id
|
|
|
|
def _ensure_activity_segment() -> str:
|
|
return active_activity_segment_id or _new_activity_segment()
|
|
|
|
def close_activity_for_answer() -> None:
|
|
nonlocal active_activity_segment_id, active_file_edit_segment_id
|
|
active_activity_segment_id = None
|
|
active_file_edit_segment_id = None
|
|
|
|
def close_file_edit_phase_before_activity() -> None:
|
|
nonlocal active_activity_segment_id, active_file_edit_segment_id
|
|
if active_file_edit_segment_id:
|
|
active_activity_segment_id = None
|
|
active_file_edit_segment_id = None
|
|
|
|
def attach_reasoning_chunk(
|
|
prev: list[dict[str, Any]],
|
|
chunk: str,
|
|
idx: int,
|
|
turn_fields: dict[str, Any] | None = None,
|
|
) -> None:
|
|
turn_fields = turn_fields or {}
|
|
for i in range(len(prev) - 1, -1, -1):
|
|
candidate = prev[i]
|
|
if candidate.get("role") == "user":
|
|
break
|
|
if candidate.get("kind") == "trace":
|
|
break
|
|
if candidate.get("role") != "assistant":
|
|
continue
|
|
if not _same_turn(candidate, turn_fields):
|
|
break
|
|
content = str(candidate.get("content") or "")
|
|
has_answer = len(content) > 0
|
|
if (
|
|
candidate.get("reasoningStreaming")
|
|
or candidate.get("reasoning") is not None
|
|
or has_answer
|
|
or candidate.get("isStreaming")
|
|
):
|
|
prev[i] = {
|
|
**candidate,
|
|
"reasoning": (str(candidate.get("reasoning") or "")) + chunk,
|
|
"reasoningStreaming": True,
|
|
"activitySegmentId": candidate.get("activitySegmentId") or _ensure_activity_segment(),
|
|
**turn_fields,
|
|
}
|
|
return
|
|
if not has_answer and candidate.get("isStreaming"):
|
|
prev[i] = {
|
|
**candidate,
|
|
"reasoning": chunk,
|
|
"reasoningStreaming": True,
|
|
"activitySegmentId": candidate.get("activitySegmentId") or _ensure_activity_segment(),
|
|
**turn_fields,
|
|
}
|
|
return
|
|
break
|
|
segment = _ensure_activity_segment()
|
|
prev.append(
|
|
{
|
|
"id": _new_id("as", idx),
|
|
"role": "assistant",
|
|
"content": "",
|
|
"isStreaming": True,
|
|
"reasoning": chunk,
|
|
"reasoningStreaming": True,
|
|
"activitySegmentId": segment,
|
|
**turn_fields,
|
|
"createdAt": _ts_base + idx,
|
|
},
|
|
)
|
|
|
|
def find_active_placeholder(
|
|
prev: list[dict[str, Any]],
|
|
turn_fields: dict[str, Any] | None = None,
|
|
) -> str | None:
|
|
turn_fields = turn_fields or {}
|
|
last = prev[-1] if prev else None
|
|
if not last:
|
|
return None
|
|
if last.get("role") != "assistant" or last.get("kind") == "trace":
|
|
return None
|
|
if str(last.get("content") or ""):
|
|
return None
|
|
if not last.get("isStreaming"):
|
|
return None
|
|
if not _same_turn(last, turn_fields):
|
|
return None
|
|
return str(last.get("id"))
|
|
|
|
def demote_interrupted_assistant(segment: str) -> None:
|
|
nonlocal buffer_message_id, buffer_parts
|
|
for i in range(len(messages) - 1, -1, -1):
|
|
candidate = messages[i]
|
|
if candidate.get("role") == "user":
|
|
break
|
|
content = candidate.get("content")
|
|
if (
|
|
candidate.get("role") != "assistant"
|
|
or candidate.get("kind") == "trace"
|
|
or not candidate.get("isStreaming")
|
|
or not isinstance(content, str)
|
|
or not content.strip()
|
|
or candidate.get("media")
|
|
):
|
|
continue
|
|
reasoning_parts = [
|
|
part
|
|
for part in (candidate.get("reasoning"), content)
|
|
if isinstance(part, str) and part.strip()
|
|
]
|
|
messages[i] = {
|
|
**candidate,
|
|
"content": "",
|
|
"reasoning": "\n\n".join(reasoning_parts),
|
|
"reasoningStreaming": False,
|
|
"isStreaming": False,
|
|
"activitySegmentId": candidate.get("activitySegmentId") or segment,
|
|
}
|
|
if buffer_message_id == candidate.get("id"):
|
|
buffer_message_id = None
|
|
buffer_parts = []
|
|
return
|
|
|
|
def close_reasoning(prev: list[dict[str, Any]]) -> None:
|
|
for i in range(len(prev) - 1, -1, -1):
|
|
if prev[i].get("reasoningStreaming"):
|
|
prev[i] = {**prev[i], "reasoningStreaming": False}
|
|
return
|
|
|
|
def is_reasoning_only_placeholder(m: dict[str, Any]) -> bool:
|
|
return (
|
|
m.get("role") == "assistant"
|
|
and m.get("kind") != "trace"
|
|
and not str(m.get("content") or "").strip()
|
|
and bool(m.get("reasoning"))
|
|
and not m.get("reasoningStreaming")
|
|
and not m.get("media")
|
|
)
|
|
|
|
def is_tool_trace_at(index: int) -> bool:
|
|
m = messages[index] if 0 <= index < len(messages) else None
|
|
return bool(m and m.get("kind") == "trace")
|
|
|
|
def prune_reasoning_only() -> None:
|
|
nonlocal messages
|
|
kept: list[dict[str, Any]] = []
|
|
for i, m in enumerate(messages):
|
|
if is_reasoning_only_placeholder(m) and not is_tool_trace_at(i + 1):
|
|
continue
|
|
kept.append(m)
|
|
messages = kept
|
|
|
|
def stamp_latency(latency_ms: int) -> None:
|
|
for i in range(len(messages) - 1, -1, -1):
|
|
if messages[i].get("role") == "assistant" and messages[i].get("kind") != "trace":
|
|
messages[i] = {
|
|
**messages[i],
|
|
"latencyMs": latency_ms,
|
|
"isStreaming": False,
|
|
}
|
|
return
|
|
|
|
def absorb_complete(extra: dict[str, Any], idx: int) -> None:
|
|
nonlocal active_activity_segment_id, active_file_edit_segment_id
|
|
last = messages[-1] if messages else None
|
|
if last and is_reasoning_only_placeholder(last) and _same_turn(last, extra):
|
|
messages[-1] = {
|
|
**last,
|
|
**extra,
|
|
"isStreaming": False,
|
|
"reasoningStreaming": False,
|
|
}
|
|
else:
|
|
messages.append(
|
|
{
|
|
"id": _new_id("as", idx),
|
|
"role": "assistant",
|
|
"createdAt": _ts_base + idx,
|
|
**extra,
|
|
},
|
|
)
|
|
active_activity_segment_id = None
|
|
active_file_edit_segment_id = None
|
|
|
|
def find_file_edit_trace_index(
|
|
segment: str | None,
|
|
edits: list[dict[str, Any]],
|
|
) -> int | None:
|
|
incoming_keys = {_file_edit_key(edit) for edit in edits if isinstance(edit, dict)}
|
|
for i in range(len(messages) - 1, -1, -1):
|
|
candidate = messages[i]
|
|
if candidate.get("role") == "user":
|
|
break
|
|
if candidate.get("kind") != "trace":
|
|
continue
|
|
if segment and candidate.get("activitySegmentId") == segment:
|
|
return i
|
|
existing_edits = candidate.get("fileEdits")
|
|
if isinstance(existing_edits, list):
|
|
for existing in existing_edits:
|
|
if isinstance(existing, dict) and _file_edit_key(existing) in incoming_keys:
|
|
return i
|
|
existing_tool_events = candidate.get("toolEvents")
|
|
if isinstance(existing_tool_events, list):
|
|
for event in existing_tool_events:
|
|
if not isinstance(event, dict):
|
|
continue
|
|
key = _tool_event_file_edit_key(event)
|
|
if key and key in incoming_keys:
|
|
return i
|
|
return None
|
|
|
|
def upsert_file_edits(
|
|
edits: list[dict[str, Any]],
|
|
idx: int,
|
|
turn_fields: dict[str, Any] | None = None,
|
|
) -> None:
|
|
nonlocal active_file_edit_segment_id
|
|
turn_fields = turn_fields or {}
|
|
if not edits:
|
|
return
|
|
segment = active_file_edit_segment_id
|
|
if not segment:
|
|
segment = _new_activity_segment(activate=False)
|
|
active_file_edit_segment_id = segment
|
|
demote_interrupted_assistant(segment)
|
|
target_index = find_file_edit_trace_index(segment, edits)
|
|
if target_index is not None:
|
|
last = messages[target_index]
|
|
segment = str(last.get("activitySegmentId") or segment or _new_activity_segment(activate=False))
|
|
active_file_edit_segment_id = segment
|
|
last = _strip_covered_file_edit_tool_hints(last, edits)
|
|
else:
|
|
if not segment:
|
|
segment = _new_activity_segment(activate=False)
|
|
active_file_edit_segment_id = segment
|
|
messages.append(
|
|
{
|
|
"id": _new_id("tr", idx),
|
|
"role": "tool",
|
|
"kind": "trace",
|
|
"content": "",
|
|
"traces": [],
|
|
"fileEdits": [],
|
|
"activitySegmentId": segment,
|
|
**turn_fields,
|
|
"createdAt": _ts_base + idx,
|
|
},
|
|
)
|
|
target_index = len(messages) - 1
|
|
last = messages[target_index]
|
|
if not segment:
|
|
segment = _new_activity_segment(activate=False)
|
|
active_file_edit_segment_id = segment
|
|
existing = list(last.get("fileEdits") or [])
|
|
index_by_key = {
|
|
_file_edit_key(edit): pos
|
|
for pos, edit in enumerate(existing)
|
|
if isinstance(edit, dict)
|
|
}
|
|
for edit in edits:
|
|
if not isinstance(edit, dict):
|
|
continue
|
|
key = _file_edit_key(edit)
|
|
if key in index_by_key:
|
|
pos = index_by_key[key]
|
|
merged = {**existing[pos], **edit}
|
|
if edit.get("path") and not edit.get("pending"):
|
|
merged.pop("pending", None)
|
|
existing[pos] = merged
|
|
else:
|
|
index_by_key[key] = len(existing)
|
|
existing.append(dict(edit))
|
|
messages[target_index] = {
|
|
**last,
|
|
"fileEdits": existing,
|
|
"activitySegmentId": last.get("activitySegmentId") or segment,
|
|
**turn_fields,
|
|
}
|
|
|
|
for idx, rec in enumerate(lines):
|
|
ev = rec.get("event")
|
|
if ev == "user":
|
|
active_activity_segment_id = None
|
|
active_file_edit_segment_id = None
|
|
text = rec.get("text")
|
|
text_s = text if isinstance(text, str) else ""
|
|
media_paths = rec.get("media_paths")
|
|
paths: list[str] = []
|
|
if isinstance(media_paths, list):
|
|
paths = [str(p) for p in media_paths if p]
|
|
media_att: list[dict[str, Any]] | None = None
|
|
if paths and augment_user_media is not None:
|
|
media_att = augment_user_media(paths)
|
|
row: dict[str, Any] = {
|
|
"id": _new_id("u", idx),
|
|
"role": "user",
|
|
"content": text_s,
|
|
**_turn_fields(rec, "user"),
|
|
"createdAt": _ts_base + idx,
|
|
}
|
|
if media_att:
|
|
row["media"] = media_att
|
|
if all(m.get("kind") == "image" for m in media_att):
|
|
row["images"] = [{"url": m.get("url"), "name": m.get("name")} for m in media_att]
|
|
cli_apps = rec.get("cli_apps")
|
|
if isinstance(cli_apps, list) and cli_apps:
|
|
row["cliApps"] = [dict(app) for app in cli_apps if isinstance(app, dict)]
|
|
mcp_presets = rec.get("mcp_presets")
|
|
if isinstance(mcp_presets, list) and mcp_presets:
|
|
row["mcpPresets"] = [
|
|
dict(preset) for preset in mcp_presets if isinstance(preset, dict)
|
|
]
|
|
messages.append(row)
|
|
continue
|
|
|
|
if ev == "file_edit":
|
|
raw_edits = rec.get("edits")
|
|
if isinstance(raw_edits, list):
|
|
upsert_file_edits(
|
|
[e for e in raw_edits if isinstance(e, dict)],
|
|
idx,
|
|
_turn_fields(rec, "activity"),
|
|
)
|
|
continue
|
|
|
|
if ev == "delta":
|
|
if suppress_until_turn_end:
|
|
continue
|
|
chunk = rec.get("text")
|
|
if not isinstance(chunk, str):
|
|
continue
|
|
close_activity_for_answer()
|
|
turn_fields = _turn_fields(rec, "answer")
|
|
adopted = find_active_placeholder(messages, turn_fields) if buffer_message_id is None else None
|
|
if buffer_message_id is None:
|
|
if adopted:
|
|
buffer_message_id = adopted
|
|
else:
|
|
buffer_message_id = _new_id("buf", idx)
|
|
messages.append(
|
|
{
|
|
"id": buffer_message_id,
|
|
"role": "assistant",
|
|
"content": "",
|
|
"isStreaming": True,
|
|
**_turn_fields(rec, "answer"),
|
|
"createdAt": _ts_base + idx,
|
|
},
|
|
)
|
|
buffer_parts.append(chunk)
|
|
combined = "".join(buffer_parts)
|
|
for i, m in enumerate(messages):
|
|
if m.get("id") == buffer_message_id:
|
|
messages[i] = {
|
|
**m,
|
|
"content": combined,
|
|
"isStreaming": True,
|
|
**_turn_fields(rec, "answer"),
|
|
}
|
|
break
|
|
continue
|
|
|
|
if ev == "stream_end":
|
|
if suppress_until_turn_end:
|
|
buffer_message_id = None
|
|
buffer_parts = []
|
|
continue
|
|
final_text = rec.get("text")
|
|
if isinstance(final_text, str):
|
|
if buffer_message_id is None:
|
|
buffer_message_id = _new_id("buf", idx)
|
|
messages.append(
|
|
{
|
|
"id": buffer_message_id,
|
|
"role": "assistant",
|
|
"content": final_text,
|
|
"isStreaming": True,
|
|
**_turn_fields(rec, "answer"),
|
|
"createdAt": _ts_base + idx,
|
|
},
|
|
)
|
|
else:
|
|
for i, m in enumerate(messages):
|
|
if m.get("id") == buffer_message_id:
|
|
messages[i] = {
|
|
**m,
|
|
"content": final_text,
|
|
"isStreaming": True,
|
|
**_turn_fields(rec, "answer"),
|
|
}
|
|
break
|
|
buffer_message_id = None
|
|
buffer_parts = []
|
|
continue
|
|
|
|
if ev == "reasoning_delta":
|
|
if suppress_until_turn_end:
|
|
continue
|
|
chunk = rec.get("text")
|
|
if not isinstance(chunk, str) or not chunk:
|
|
continue
|
|
close_file_edit_phase_before_activity()
|
|
attach_reasoning_chunk(messages, chunk, idx, _turn_fields(rec, "reasoning"))
|
|
continue
|
|
|
|
if ev == "reasoning_end":
|
|
if suppress_until_turn_end:
|
|
continue
|
|
close_reasoning(messages)
|
|
continue
|
|
|
|
if ev == "message":
|
|
if suppress_until_turn_end and rec.get("kind") in (
|
|
"tool_hint",
|
|
"progress",
|
|
"reasoning",
|
|
):
|
|
continue
|
|
kind = rec.get("kind")
|
|
if kind == "reasoning":
|
|
line = rec.get("text")
|
|
if not isinstance(line, str) or not line:
|
|
continue
|
|
close_file_edit_phase_before_activity()
|
|
attach_reasoning_chunk(messages, line, idx, _turn_fields(rec, "reasoning"))
|
|
close_reasoning(messages)
|
|
continue
|
|
if kind in ("tool_hint", "progress"):
|
|
structured_events = _normalize_tool_events(rec.get("tool_events"))
|
|
visible_structured_events = _filter_covered_file_edit_tool_events(messages, structured_events)
|
|
structured = tool_trace_lines_from_events(visible_structured_events)
|
|
text = rec.get("text")
|
|
if structured:
|
|
trace_lines = structured
|
|
elif structured_events:
|
|
trace_lines = []
|
|
elif isinstance(text, str) and text:
|
|
trace_lines = [text]
|
|
else:
|
|
trace_lines = []
|
|
if not trace_lines:
|
|
continue
|
|
segment = _ensure_activity_segment()
|
|
demote_interrupted_assistant(segment)
|
|
last = messages[-1] if messages else None
|
|
if (
|
|
last
|
|
and last.get("kind") == "trace"
|
|
and not last.get("isStreaming")
|
|
and (last.get("activitySegmentId") in (None, segment))
|
|
):
|
|
prev_traces = list(last.get("traces") or [last.get("content")])
|
|
if structured:
|
|
merged_traces, added = _merge_unique_tool_trace_lines(prev_traces, structured)
|
|
if not added and not visible_structured_events:
|
|
continue
|
|
else:
|
|
merged_traces = prev_traces + trace_lines
|
|
merged = {
|
|
**last,
|
|
"traces": merged_traces,
|
|
"content": merged_traces[-1],
|
|
"toolEvents": _merge_tool_events(last.get("toolEvents"), visible_structured_events)
|
|
if visible_structured_events
|
|
else last.get("toolEvents"),
|
|
"activitySegmentId": last.get("activitySegmentId") or segment,
|
|
**_turn_fields(rec, "activity"),
|
|
}
|
|
messages[-1] = merged
|
|
else:
|
|
messages.append(
|
|
{
|
|
"id": _new_id("tr", idx),
|
|
"role": "tool",
|
|
"kind": "trace",
|
|
"content": trace_lines[-1],
|
|
"traces": trace_lines,
|
|
**({"toolEvents": visible_structured_events} if visible_structured_events else {}),
|
|
"activitySegmentId": segment,
|
|
**_turn_fields(rec, "activity"),
|
|
"createdAt": _ts_base + idx,
|
|
},
|
|
)
|
|
continue
|
|
|
|
buffer_message_id = None
|
|
buffer_parts = []
|
|
text = rec.get("text")
|
|
content_s = text if isinstance(text, str) else ""
|
|
media: list[dict[str, Any]] = []
|
|
raw_media = rec.get("media")
|
|
raw_media_list = raw_media if isinstance(raw_media, list) else []
|
|
media_paths = [path for path in raw_media_list if isinstance(path, str) and path]
|
|
if media_paths and augment_assistant_media is not None:
|
|
media = augment_assistant_media(media_paths)
|
|
if not media and (not media_paths or augment_assistant_media is None):
|
|
media = _media_from_signed_urls(rec.get("media_urls"))
|
|
extra: dict[str, Any] = {"content": content_s}
|
|
if media:
|
|
extra["media"] = media
|
|
lat = rec.get("latency_ms")
|
|
if isinstance(lat, (int, float)) and lat >= 0:
|
|
extra["latencyMs"] = int(lat)
|
|
extra.update(_turn_fields(rec, "answer"))
|
|
extra.update(_source_fields(rec))
|
|
absorb_complete(extra, idx)
|
|
if media:
|
|
suppress_until_turn_end = True
|
|
continue
|
|
|
|
if ev == "turn_end":
|
|
suppress_until_turn_end = False
|
|
active_activity_segment_id = None
|
|
active_file_edit_segment_id = None
|
|
turn_id = rec.get("turn_id")
|
|
if isinstance(turn_id, str) and turn_id:
|
|
if turn_id in replay_turn_aliases:
|
|
replay_turn_aliases.pop(turn_id, None)
|
|
else:
|
|
closed_turn_ids.add(turn_id)
|
|
for i, m in enumerate(messages):
|
|
if m.get("isStreaming"):
|
|
messages[i] = {**m, "isStreaming": False}
|
|
prune_reasoning_only()
|
|
lat = rec.get("latency_ms")
|
|
if isinstance(lat, (int, float)) and lat >= 0:
|
|
stamp_latency(int(lat))
|
|
buffer_message_id = None
|
|
buffer_parts = []
|
|
continue
|
|
|
|
for i, m in enumerate(messages):
|
|
if (
|
|
augment_assistant_text is not None
|
|
and m.get("role") == "assistant"
|
|
and m.get("kind") != "trace"
|
|
and isinstance(m.get("content"), str)
|
|
):
|
|
messages[i] = {**m, "content": augment_assistant_text(m["content"])}
|
|
m.pop("isStreaming", None)
|
|
m.pop("reasoningStreaming", None)
|
|
return messages
|
|
|
|
|
|
def fork_boundary_message_count(lines: list[dict[str, Any]]) -> int | None:
|
|
"""Return the replayed UI message count before the first fork marker, if any."""
|
|
for idx, rec in enumerate(lines):
|
|
if rec.get("event") != WEBUI_FORK_MARKER_EVENT:
|
|
continue
|
|
return len(replay_transcript_to_ui_messages(lines[:idx]))
|
|
return None
|
|
|
|
|
|
def has_pending_tool_calls(lines: list[dict[str, Any]]) -> bool:
|
|
"""Return True when the selected transcript tail looks like an unfinished turn."""
|
|
for rec in reversed(lines):
|
|
ev = rec.get("event")
|
|
if ev == "turn_end":
|
|
return False
|
|
if ev == "user":
|
|
return False
|
|
if ev == "message":
|
|
return rec.get("kind") in {"tool_hint", "progress", "reasoning"}
|
|
if ev in {
|
|
"delta",
|
|
"stream_end",
|
|
"reasoning_delta",
|
|
"reasoning_end",
|
|
"file_edit",
|
|
}:
|
|
return True
|
|
if ev in {WEBUI_FORK_MARKER_EVENT}:
|
|
continue
|
|
return False
|
|
|
|
|
|
def build_webui_thread_response(
|
|
session_key: str,
|
|
*,
|
|
augment_user_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
|
|
augment_assistant_media: Callable[[list[str]], list[dict[str, Any]]] | None = None,
|
|
augment_assistant_text: Callable[[str], str] | None = None,
|
|
session_messages: list[dict[str, Any]] | None = None,
|
|
limit: int | None = None,
|
|
direction: str | None = None,
|
|
before: str | None = None,
|
|
) -> dict[str, Any] | None:
|
|
"""Return a payload compatible with ``WebuiThreadPersistedPayload``."""
|
|
paginated = limit is not None or direction is not None or before is not None
|
|
page: dict[str, Any] | None = None
|
|
if paginated:
|
|
lines, page = _select_transcript_page(session_key, limit=limit, before=before)
|
|
else:
|
|
lines = read_transcript_lines(session_key)
|
|
if not lines:
|
|
return None
|
|
lines = inject_missing_user_events_from_session(session_key, lines, session_messages)
|
|
fork_boundary = fork_boundary_message_count(lines)
|
|
msgs = replay_transcript_to_ui_messages(
|
|
lines,
|
|
augment_user_media=augment_user_media,
|
|
augment_assistant_media=augment_assistant_media,
|
|
augment_assistant_text=augment_assistant_text,
|
|
)
|
|
payload = {
|
|
"schemaVersion": WEBUI_TRANSCRIPT_SCHEMA_VERSION,
|
|
"sessionKey": session_key,
|
|
"messages": msgs,
|
|
"has_pending_tool_calls": has_pending_tool_calls(lines),
|
|
}
|
|
if page is not None:
|
|
page["loaded_message_count"] = len(msgs)
|
|
payload["page"] = page
|
|
if fork_boundary is not None:
|
|
payload["fork_boundary_message_count"] = fork_boundary
|
|
return payload
|