Claude: replace module-level file read states with per-loop per-session state class. fixes #3571

This commit is contained in:
LZDQ 2026-05-01 18:58:41 +08:00 committed by Xubin Ren
parent 6891a7a4d4
commit 58ae2d5b7e
7 changed files with 214 additions and 108 deletions

View File

@ -28,6 +28,7 @@ from nanobot.agent.tools.ask import (
pending_ask_user_id,
)
from nanobot.agent.tools.cron import CronTool
from nanobot.agent.tools.file_state import FileStates
from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
from nanobot.agent.tools.message import MessageTool
from nanobot.agent.tools.notebook import NotebookEditTool
@ -247,6 +248,10 @@ class AgentLoop:
self.context = ContextBuilder(workspace, timezone=timezone, disabled_skills=disabled_skills)
self.sessions = session_manager or SessionManager(workspace)
self.tools = ToolRegistry()
# Per-session file-read/write tracker (issue #3571) — shared across
# the filesystem tools registered below so this AgentLoop does not
# leak read-dedup state into another loop's tools.
self._file_states = FileStates()
self.runner = AgentRunner(provider)
self.subagents = SubagentManager(
provider=provider,
@ -348,17 +353,24 @@ class AgentLoop:
self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None
)
extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None
# One FileStates per session so read-dedup and read-before-edit
# tracking does not leak across sessions sharing this process
# (issue #3571).
file_states = self._file_states
self.tools.register(AskUserTool())
self.tools.register(
ReadFileTool(
workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read
workspace=self.workspace,
allowed_dir=allowed_dir,
extra_allowed_dirs=extra_read,
file_states=file_states,
)
)
for cls in (WriteFileTool, EditFileTool, ListDirTool):
self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
for cls in (GlobTool, GrepTool):
self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir))
self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
if self.exec_config.enable:
self.tools.register(
ExecTool(

View File

@ -753,23 +753,28 @@ class Dream:
def _build_tools(self) -> ToolRegistry:
"""Build a minimal tool registry for the Dream agent."""
from nanobot.agent.skills import BUILTIN_SKILLS_DIR
from nanobot.agent.tools.file_state import FileStates
from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, WriteFileTool
tools = ToolRegistry()
workspace = self.store.workspace
# Allow reading builtin skills for reference during skill creation
extra_read = [BUILTIN_SKILLS_DIR] if BUILTIN_SKILLS_DIR.exists() else None
# Dream gets its own FileStates so its caches stay isolated from the
# main loop's sessions (issue #3571).
file_states = FileStates()
tools.register(ReadFileTool(
workspace=workspace,
allowed_dir=workspace,
extra_allowed_dirs=extra_read,
file_states=file_states,
))
tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace))
tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace, file_states=file_states))
# write_file resolves relative paths from workspace root, but can only
# write under skills/ so the prompt can safely use skills/<name>/SKILL.md.
skills_dir = workspace / "skills"
skills_dir.mkdir(parents=True, exist_ok=True)
tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir))
tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir, file_states=file_states))
return tools
# -- skill listing --------------------------------------------------------

View File

@ -168,12 +168,16 @@ class SubagentManager:
tools = ToolRegistry()
allowed_dir = self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None
extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None
tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read))
tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir))
tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir))
tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir))
tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir))
tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir))
# Subagent gets its own FileStates so its read-dedup cache is
# isolated from the parent loop's sessions (issue #3571).
from nanobot.agent.tools.file_state import FileStates
file_states = FileStates()
tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read, file_states=file_states))
tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
if self.exec_config.enable:
tools.register(ExecTool(
working_dir=str(self.workspace),

View File

@ -17,9 +17,6 @@ class ReadState:
can_dedup: bool
_state: dict[str, ReadState] = {}
def _hash_file(p: str) -> str | None:
try:
return hashlib.sha256(Path(p).read_bytes()).hexdigest()
@ -27,93 +24,141 @@ def _hash_file(p: str) -> str | None:
return None
class FileStates:
"""Per-session read/write tracker.
Owns its own state dict so read-dedup ("File unchanged since last read")
and read-before-edit warnings stay scoped to one agent session and do
not leak across sessions sharing this process.
"""
__slots__ = ("_state",)
def __init__(self) -> None:
self._state: dict[str, ReadState] = {}
def record_read(self, path: str | Path, offset: int = 1, limit: int | None = None) -> None:
"""Record that a file was read (called after successful read)."""
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
return
self._state[p] = ReadState(
mtime=mtime,
offset=offset,
limit=limit,
content_hash=_hash_file(p),
can_dedup=True,
)
def record_write(self, path: str | Path) -> None:
"""Record that a file was written (updates mtime in state)."""
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
self._state.pop(p, None)
return
self._state[p] = ReadState(
mtime=mtime,
offset=1,
limit=None,
content_hash=_hash_file(p),
can_dedup=False,
)
def check_read(self, path: str | Path) -> str | None:
"""Check if a file has been read and is fresh.
Returns None if OK, or a warning string.
When mtime changed but file content is identical (e.g. touch, editor save),
the check passes to avoid false-positive staleness warnings.
"""
p = str(Path(path).resolve())
entry = self._state.get(p)
if entry is None:
return "Warning: file has not been read yet. Read it first to verify content before editing."
try:
current_mtime = os.path.getmtime(p)
except OSError:
return None
if current_mtime != entry.mtime:
if entry.content_hash and _hash_file(p) == entry.content_hash:
entry.mtime = current_mtime
return None
return "Warning: file has been modified since last read. Re-read to verify content before editing."
# mtime unchanged - still check content hash to detect quick modifications
if entry.content_hash and _hash_file(p) != entry.content_hash:
return "Warning: file has been modified since last read. Re-read to verify content before editing."
return None
def is_unchanged(self, path: str | Path, offset: int = 1, limit: int | None = None) -> bool:
"""Return True if file was previously read with same params and content is unchanged."""
p = str(Path(path).resolve())
entry = self._state.get(p)
if entry is None:
return False
if not entry.can_dedup:
return False
if entry.offset != offset or entry.limit != limit:
return False
try:
current_mtime = os.path.getmtime(p)
except OSError:
return False
if current_mtime != entry.mtime:
# mtime changed - check if content also changed
current_hash = _hash_file(p)
if current_hash != entry.content_hash:
# Content actually changed - don't dedup
entry.can_dedup = False
return False
# Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time
entry.can_dedup = False
return True
# mtime unchanged - content must be identical
return True
def get(self, path: str | Path) -> ReadState | None:
"""Return the raw ReadState entry for a path, or None."""
return self._state.get(str(Path(path).resolve()))
def clear(self) -> None:
"""Clear all tracked state (useful for testing)."""
self._state.clear()
# Module-level default instance, retained for backward compatibility with
# tests and callers that reach in directly. Per-session callers should hold
# their own FileStates instance instead of touching this one.
_default = FileStates()
def record_read(path: str | Path, offset: int = 1, limit: int | None = None) -> None:
"""Record that a file was read (called after successful read)."""
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
return
_state[p] = ReadState(
mtime=mtime,
offset=offset,
limit=limit,
content_hash=_hash_file(p),
can_dedup=True,
)
_default.record_read(path, offset=offset, limit=limit)
def record_write(path: str | Path) -> None:
"""Record that a file was written (updates mtime in state)."""
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
_state.pop(p, None)
return
_state[p] = ReadState(
mtime=mtime,
offset=1,
limit=None,
content_hash=_hash_file(p),
can_dedup=False,
)
_default.record_write(path)
def check_read(path: str | Path) -> str | None:
"""Check if a file has been read and is fresh.
Returns None if OK, or a warning string.
When mtime changed but file content is identical (e.g. touch, editor save),
the check passes to avoid false-positive staleness warnings.
"""
p = str(Path(path).resolve())
entry = _state.get(p)
if entry is None:
return "Warning: file has not been read yet. Read it first to verify content before editing."
try:
current_mtime = os.path.getmtime(p)
except OSError:
return None
if current_mtime != entry.mtime:
if entry.content_hash and _hash_file(p) == entry.content_hash:
entry.mtime = current_mtime
return None
return "Warning: file has been modified since last read. Re-read to verify content before editing."
# mtime unchanged - still check content hash to detect quick modifications
if entry.content_hash and _hash_file(p) != entry.content_hash:
return "Warning: file has been modified since last read. Re-read to verify content before editing."
return None
return _default.check_read(path)
def is_unchanged(path: str | Path, offset: int = 1, limit: int | None = None) -> bool:
"""Return True if file was previously read with same params and content is unchanged."""
p = str(Path(path).resolve())
entry = _state.get(p)
if entry is None:
return False
if not entry.can_dedup:
return False
if entry.offset != offset or entry.limit != limit:
return False
try:
current_mtime = os.path.getmtime(p)
except OSError:
return False
if current_mtime != entry.mtime:
# mtime changed - check if content also changed
current_hash = _hash_file(p)
if current_hash != entry.content_hash:
# Content actually changed - don't dedup
entry.can_dedup = False
return False
# Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time
entry.can_dedup = False
return True
# mtime unchanged - content must be identical
return True
return _default.is_unchanged(path, offset=offset, limit=limit)
def clear() -> None:
"""Clear all tracked state (useful for testing)."""
_state.clear()
_default.clear()
# Legacy attribute for callers that reached into the module-level dict
# directly (filesystem.py used to do this). Kept as a property-like accessor
# so existing imports keep working.
def __getattr__(name: str):
if name == "_state":
return _default._state
raise AttributeError(name)

View File

@ -9,7 +9,7 @@ from typing import Any
from nanobot.agent.tools.base import Tool, tool_parameters
from nanobot.agent.tools.schema import BooleanSchema, IntegerSchema, StringSchema, tool_parameters_schema
from nanobot.agent.tools import file_state
from nanobot.agent.tools.file_state import FileStates, _hash_file
from nanobot.utils.helpers import build_image_content_blocks, detect_image_mime
from nanobot.config.paths import get_media_dir
@ -49,10 +49,15 @@ class _FsTool(Tool):
workspace: Path | None = None,
allowed_dir: Path | None = None,
extra_allowed_dirs: list[Path] | None = None,
file_states: FileStates | None = None,
):
self._workspace = workspace
self._allowed_dir = allowed_dir
self._extra_allowed_dirs = extra_allowed_dirs
# Read-dedup / read-before-edit state is scoped to one FileStates
# so it does not leak across sessions sharing this process
# (issue #3571). Bare constructions get a private instance.
self._file_states: FileStates = file_states if file_states is not None else FileStates()
def _resolve(self, path: str) -> Path:
return _resolve_path(path, self._workspace, self._allowed_dir, self._extra_allowed_dirs)
@ -184,7 +189,7 @@ class ReadFileTool(_FsTool):
# Read dedup: same path + offset + limit + unchanged mtime → stub
# Always check for external modifications before dedup
entry = file_state._state.get(str(fp.resolve()))
entry = self._file_states.get(fp)
try:
current_mtime = os.path.getmtime(fp)
except OSError:
@ -193,21 +198,21 @@ class ReadFileTool(_FsTool):
if current_mtime != entry.mtime:
# File was modified externally - force full read and mark as not dedupable
entry.can_dedup = False
file_state.record_read(fp, offset=offset, limit=limit) # Update state with new mtime
self._file_states.record_read(fp, offset=offset, limit=limit) # Update state with new mtime
# Continue to read full content (don't return dedup message)
else:
# File unchanged - return dedup message
# But only if content is actually unchanged (not just mtime)
current_hash = file_state._hash_file(str(fp))
current_hash = _hash_file(str(fp))
if current_hash == entry.content_hash:
return f"[File unchanged since last read: {path}]"
else:
# Content changed despite same mtime - force full read
entry.can_dedup = False
file_state.record_read(fp, offset=offset, limit=limit)
self._file_states.record_read(fp, offset=offset, limit=limit)
else:
# No previous state or marked as not dedupable - read full content
file_state.record_read(fp, offset=offset, limit=limit)
self._file_states.record_read(fp, offset=offset, limit=limit)
# Force full read by setting can_dedup to False for this read
if entry:
entry.can_dedup = False
@ -256,7 +261,7 @@ class ReadFileTool(_FsTool):
result += f"\n\n(Showing lines {offset}-{end} of {total}. Use offset={end + 1} to continue.)"
else:
result += f"\n\n(End of file — {total} lines total)"
file_state.record_read(fp, offset=offset, limit=limit)
self._file_states.record_read(fp, offset=offset, limit=limit)
return result
except PermissionError as e:
return f"Error: {e}"
@ -365,7 +370,7 @@ class WriteFileTool(_FsTool):
fp = self._resolve(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content, encoding="utf-8")
file_state.record_write(fp)
self._file_states.record_write(fp)
return f"Successfully wrote {len(content)} characters to {fp}"
except PermissionError as e:
return f"Error: {e}"
@ -699,7 +704,7 @@ class EditFileTool(_FsTool):
if old_text == "":
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(new_text, encoding="utf-8")
file_state.record_write(fp)
self._file_states.record_write(fp)
return f"Successfully created {fp}"
return self._file_not_found_msg(path, fp)
@ -718,11 +723,11 @@ class EditFileTool(_FsTool):
if content.strip():
return f"Error: Cannot create file — {path} already exists and is not empty."
fp.write_text(new_text, encoding="utf-8")
file_state.record_write(fp)
self._file_states.record_write(fp)
return f"Successfully edited {fp}"
# Read-before-edit check
warning = file_state.check_read(fp)
warning = self._file_states.check_read(fp)
raw = fp.read_bytes()
uses_crlf = b"\r\n" in raw
@ -767,7 +772,7 @@ class EditFileTool(_FsTool):
new_content = new_content.replace("\n", "\r\n")
fp.write_bytes(new_content.encode("utf-8"))
file_state.record_write(fp)
self._file_states.record_write(fp)
msg = f"Successfully edited {fp}"
if warning:
msg = f"{warning}\n{msg}"

View File

@ -27,12 +27,16 @@ class TestEditReadTracking:
"""edit_file should warn when file hasn't been read first."""
@pytest.fixture()
def read_tool(self, tmp_path):
return ReadFileTool(workspace=tmp_path)
def file_states(self):
return file_state.FileStates()
@pytest.fixture()
def edit_tool(self, tmp_path):
return EditFileTool(workspace=tmp_path)
def read_tool(self, tmp_path, file_states):
return ReadFileTool(workspace=tmp_path, file_states=file_states)
@pytest.fixture()
def edit_tool(self, tmp_path, file_states):
return EditFileTool(workspace=tmp_path, file_states=file_states)
@pytest.mark.asyncio
async def test_edit_warns_if_file_not_read_first(self, edit_tool, tmp_path):

View File

@ -97,6 +97,37 @@ class TestReadDedup:
assert isinstance(second, list)
# ---------------------------------------------------------------------------
# Cross-session isolation (issue #3571)
# ---------------------------------------------------------------------------
# Each session must keep its own read cache. When session A reads a file,
# session B reading the same file must still receive the full content, not
# the "[File unchanged since last read]" dedup stub. The stub is only valid
# within the session that first cached the read.
class TestReadDedupSessionIsolation:
@pytest.mark.asyncio
async def test_separate_sessions_do_not_share_dedup_state(self, tmp_path):
f = tmp_path / "shared.txt"
f.write_text("\n".join(f"line {i}" for i in range(10)), encoding="utf-8")
session_a_tool = ReadFileTool(workspace=tmp_path)
session_b_tool = ReadFileTool(workspace=tmp_path)
first = await session_a_tool.execute(path=str(f))
assert "line 0" in first
# Session B has never read this file before — it must see the full
# content, not the dedup stub from session A.
second = await session_b_tool.execute(path=str(f))
assert "unchanged" not in second.lower(), (
"Session B should not inherit session A's read-dedup state. "
f"Got: {second!r}"
)
assert "line 0" in second
# ---------------------------------------------------------------------------
# PDF support
# ---------------------------------------------------------------------------