fix(memory): serialize cursor allocation in append_history (#4081)

This commit is contained in:
04cb 2026-06-02 08:12:24 +08:00 committed by Xubin Ren
parent 3e98a03188
commit ac226d66f9
2 changed files with 43 additions and 11 deletions

View File

@ -6,6 +6,7 @@ import asyncio
import json import json
import os import os
import re import re
import threading
import weakref import weakref
from contextlib import suppress from contextlib import suppress
from datetime import datetime from datetime import datetime
@ -61,6 +62,7 @@ class MemoryStore:
self._dream_cursor_file = self.memory_dir / ".dream_cursor" self._dream_cursor_file = self.memory_dir / ".dream_cursor"
self._corruption_logged = False # rate-limit non-int cursor warning self._corruption_logged = False # rate-limit non-int cursor warning
self._oversize_logged = False # rate-limit oversized-entry warning self._oversize_logged = False # rate-limit oversized-entry warning
self._append_lock = threading.Lock() # serialize cursor allocation + append
self._git = GitStore(workspace, tracked_files=[ self._git = GitStore(workspace, tracked_files=[
"SOUL.md", "USER.md", "memory/MEMORY.md", "memory/.dream_cursor", "SOUL.md", "USER.md", "memory/MEMORY.md", "memory/.dream_cursor",
]) ])
@ -248,7 +250,6 @@ class MemoryStore:
large writes (e.g. an LLM echoing its input back as a "summary"). large writes (e.g. an LLM echoing its input back as a "summary").
""" """
limit = max_chars if max_chars is not None else _HISTORY_ENTRY_HARD_CAP limit = max_chars if max_chars is not None else _HISTORY_ENTRY_HARD_CAP
cursor = self._next_cursor()
ts = datetime.now().strftime("%Y-%m-%d %H:%M") ts = datetime.now().strftime("%Y-%m-%d %H:%M")
raw = entry.rstrip() raw = entry.rstrip()
if len(raw) > limit: if len(raw) > limit:
@ -262,16 +263,20 @@ class MemoryStore:
) )
raw = truncate_text(raw, limit) raw = truncate_text(raw, limit)
content = strip_think(raw) content = strip_think(raw)
if raw and not content: # Cursor allocation and the append must be atomic: concurrent writers
logger.debug( # could otherwise read the same current cursor and emit duplicates.
"history entry {} stripped to empty (likely template leak); " with self._append_lock:
"persisting empty content to avoid re-polluting context", cursor = self._next_cursor()
cursor, if raw and not content:
) logger.debug(
record = {"cursor": cursor, "timestamp": ts, "content": content} "history entry {} stripped to empty (likely template leak); "
with open(self.history_file, "a", encoding="utf-8") as f: "persisting empty content to avoid re-polluting context",
f.write(json.dumps(record, ensure_ascii=False) + "\n") cursor,
self._cursor_file.write_text(str(cursor), encoding="utf-8") )
record = {"cursor": cursor, "timestamp": ts, "content": content}
with open(self.history_file, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
self._cursor_file.write_text(str(cursor), encoding="utf-8")
return cursor return cursor
@staticmethod @staticmethod

View File

@ -129,6 +129,33 @@ class TestHistoryWithCursor:
cursor = store.append_history("new event") cursor = store.append_history("new event")
assert cursor == 1 assert cursor == 1
def test_append_history_allocates_unique_cursors_under_concurrent_writes(self, store):
"""Regression: concurrent appends must not allocate duplicate cursors."""
import threading
writers = 16
start = threading.Barrier(writers)
cursors: list[int] = []
lock = threading.Lock()
def worker(i):
start.wait()
c = store.append_history(f"event {i}")
with lock:
cursors.append(c)
threads = [threading.Thread(target=worker, args=(i,)) for i in range(writers)]
for t in threads:
t.start()
for t in threads:
t.join()
assert len(cursors) == writers
assert len(set(cursors)) == writers, f"duplicate cursors: {sorted(cursors)}"
assert sorted(cursors) == list(range(1, writers + 1))
persisted = store.read_unprocessed_history(since_cursor=0)
assert sorted(e["cursor"] for e in persisted) == list(range(1, writers + 1))
def test_compact_history_drops_oldest(self, tmp_path): def test_compact_history_drops_oldest(self, tmp_path):
store = MemoryStore(tmp_path, max_history_entries=2) store = MemoryStore(tmp_path, max_history_entries=2)
store.append_history("event 1") store.append_history("event 1")