Claude: replace module-level file read states with per-loop per-session state class. fixes #3571

This commit is contained in:
LZDQ 2026-05-01 18:58:41 +08:00 committed by Xubin Ren
parent 6891a7a4d4
commit 58ae2d5b7e
7 changed files with 214 additions and 108 deletions

View File

@ -28,6 +28,7 @@ from nanobot.agent.tools.ask import (
pending_ask_user_id, pending_ask_user_id,
) )
from nanobot.agent.tools.cron import CronTool from nanobot.agent.tools.cron import CronTool
from nanobot.agent.tools.file_state import FileStates
from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
from nanobot.agent.tools.message import MessageTool from nanobot.agent.tools.message import MessageTool
from nanobot.agent.tools.notebook import NotebookEditTool from nanobot.agent.tools.notebook import NotebookEditTool
@ -247,6 +248,10 @@ class AgentLoop:
self.context = ContextBuilder(workspace, timezone=timezone, disabled_skills=disabled_skills) self.context = ContextBuilder(workspace, timezone=timezone, disabled_skills=disabled_skills)
self.sessions = session_manager or SessionManager(workspace) self.sessions = session_manager or SessionManager(workspace)
self.tools = ToolRegistry() self.tools = ToolRegistry()
# Per-session file-read/write tracker (issue #3571) — shared across
# the filesystem tools registered below so this AgentLoop does not
# leak read-dedup state into another loop's tools.
self._file_states = FileStates()
self.runner = AgentRunner(provider) self.runner = AgentRunner(provider)
self.subagents = SubagentManager( self.subagents = SubagentManager(
provider=provider, provider=provider,
@ -348,17 +353,24 @@ class AgentLoop:
self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None
) )
extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None
# One FileStates per session so read-dedup and read-before-edit
# tracking does not leak across sessions sharing this process
# (issue #3571).
file_states = self._file_states
self.tools.register(AskUserTool()) self.tools.register(AskUserTool())
self.tools.register( self.tools.register(
ReadFileTool( ReadFileTool(
workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read workspace=self.workspace,
allowed_dir=allowed_dir,
extra_allowed_dirs=extra_read,
file_states=file_states,
) )
) )
for cls in (WriteFileTool, EditFileTool, ListDirTool): for cls in (WriteFileTool, EditFileTool, ListDirTool):
self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
for cls in (GlobTool, GrepTool): for cls in (GlobTool, GrepTool):
self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir)) self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
if self.exec_config.enable: if self.exec_config.enable:
self.tools.register( self.tools.register(
ExecTool( ExecTool(

View File

@ -753,23 +753,28 @@ class Dream:
def _build_tools(self) -> ToolRegistry: def _build_tools(self) -> ToolRegistry:
"""Build a minimal tool registry for the Dream agent.""" """Build a minimal tool registry for the Dream agent."""
from nanobot.agent.skills import BUILTIN_SKILLS_DIR from nanobot.agent.skills import BUILTIN_SKILLS_DIR
from nanobot.agent.tools.file_state import FileStates
from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, WriteFileTool
tools = ToolRegistry() tools = ToolRegistry()
workspace = self.store.workspace workspace = self.store.workspace
# Allow reading builtin skills for reference during skill creation # Allow reading builtin skills for reference during skill creation
extra_read = [BUILTIN_SKILLS_DIR] if BUILTIN_SKILLS_DIR.exists() else None extra_read = [BUILTIN_SKILLS_DIR] if BUILTIN_SKILLS_DIR.exists() else None
# Dream gets its own FileStates so its caches stay isolated from the
# main loop's sessions (issue #3571).
file_states = FileStates()
tools.register(ReadFileTool( tools.register(ReadFileTool(
workspace=workspace, workspace=workspace,
allowed_dir=workspace, allowed_dir=workspace,
extra_allowed_dirs=extra_read, extra_allowed_dirs=extra_read,
file_states=file_states,
)) ))
tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace)) tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace, file_states=file_states))
# write_file resolves relative paths from workspace root, but can only # write_file resolves relative paths from workspace root, but can only
# write under skills/ so the prompt can safely use skills/<name>/SKILL.md. # write under skills/ so the prompt can safely use skills/<name>/SKILL.md.
skills_dir = workspace / "skills" skills_dir = workspace / "skills"
skills_dir.mkdir(parents=True, exist_ok=True) skills_dir.mkdir(parents=True, exist_ok=True)
tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir)) tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir, file_states=file_states))
return tools return tools
# -- skill listing -------------------------------------------------------- # -- skill listing --------------------------------------------------------

View File

@ -168,12 +168,16 @@ class SubagentManager:
tools = ToolRegistry() tools = ToolRegistry()
allowed_dir = self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None allowed_dir = self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None
extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None
tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read)) # Subagent gets its own FileStates so its read-dedup cache is
tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) # isolated from the parent loop's sessions (issue #3571).
tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) from nanobot.agent.tools.file_state import FileStates
tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir)) file_states = FileStates()
tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir)) tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read, file_states=file_states))
tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir)) tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
if self.exec_config.enable: if self.exec_config.enable:
tools.register(ExecTool( tools.register(ExecTool(
working_dir=str(self.workspace), working_dir=str(self.workspace),

View File

@ -17,9 +17,6 @@ class ReadState:
can_dedup: bool can_dedup: bool
_state: dict[str, ReadState] = {}
def _hash_file(p: str) -> str | None: def _hash_file(p: str) -> str | None:
try: try:
return hashlib.sha256(Path(p).read_bytes()).hexdigest() return hashlib.sha256(Path(p).read_bytes()).hexdigest()
@ -27,93 +24,141 @@ def _hash_file(p: str) -> str | None:
return None return None
class FileStates:
"""Per-session read/write tracker.
Owns its own state dict so read-dedup ("File unchanged since last read")
and read-before-edit warnings stay scoped to one agent session and do
not leak across sessions sharing this process.
"""
__slots__ = ("_state",)
def __init__(self) -> None:
self._state: dict[str, ReadState] = {}
def record_read(self, path: str | Path, offset: int = 1, limit: int | None = None) -> None:
"""Record that a file was read (called after successful read)."""
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
return
self._state[p] = ReadState(
mtime=mtime,
offset=offset,
limit=limit,
content_hash=_hash_file(p),
can_dedup=True,
)
def record_write(self, path: str | Path) -> None:
"""Record that a file was written (updates mtime in state)."""
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
self._state.pop(p, None)
return
self._state[p] = ReadState(
mtime=mtime,
offset=1,
limit=None,
content_hash=_hash_file(p),
can_dedup=False,
)
def check_read(self, path: str | Path) -> str | None:
"""Check if a file has been read and is fresh.
Returns None if OK, or a warning string.
When mtime changed but file content is identical (e.g. touch, editor save),
the check passes to avoid false-positive staleness warnings.
"""
p = str(Path(path).resolve())
entry = self._state.get(p)
if entry is None:
return "Warning: file has not been read yet. Read it first to verify content before editing."
try:
current_mtime = os.path.getmtime(p)
except OSError:
return None
if current_mtime != entry.mtime:
if entry.content_hash and _hash_file(p) == entry.content_hash:
entry.mtime = current_mtime
return None
return "Warning: file has been modified since last read. Re-read to verify content before editing."
# mtime unchanged - still check content hash to detect quick modifications
if entry.content_hash and _hash_file(p) != entry.content_hash:
return "Warning: file has been modified since last read. Re-read to verify content before editing."
return None
def is_unchanged(self, path: str | Path, offset: int = 1, limit: int | None = None) -> bool:
"""Return True if file was previously read with same params and content is unchanged."""
p = str(Path(path).resolve())
entry = self._state.get(p)
if entry is None:
return False
if not entry.can_dedup:
return False
if entry.offset != offset or entry.limit != limit:
return False
try:
current_mtime = os.path.getmtime(p)
except OSError:
return False
if current_mtime != entry.mtime:
# mtime changed - check if content also changed
current_hash = _hash_file(p)
if current_hash != entry.content_hash:
# Content actually changed - don't dedup
entry.can_dedup = False
return False
# Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time
entry.can_dedup = False
return True
# mtime unchanged - content must be identical
return True
def get(self, path: str | Path) -> ReadState | None:
"""Return the raw ReadState entry for a path, or None."""
return self._state.get(str(Path(path).resolve()))
def clear(self) -> None:
"""Clear all tracked state (useful for testing)."""
self._state.clear()
# Module-level default instance, retained for backward compatibility with
# tests and callers that reach in directly. Per-session callers should hold
# their own FileStates instance instead of touching this one.
_default = FileStates()
def record_read(path: str | Path, offset: int = 1, limit: int | None = None) -> None: def record_read(path: str | Path, offset: int = 1, limit: int | None = None) -> None:
"""Record that a file was read (called after successful read).""" _default.record_read(path, offset=offset, limit=limit)
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
return
_state[p] = ReadState(
mtime=mtime,
offset=offset,
limit=limit,
content_hash=_hash_file(p),
can_dedup=True,
)
def record_write(path: str | Path) -> None: def record_write(path: str | Path) -> None:
"""Record that a file was written (updates mtime in state).""" _default.record_write(path)
p = str(Path(path).resolve())
try:
mtime = os.path.getmtime(p)
except OSError:
_state.pop(p, None)
return
_state[p] = ReadState(
mtime=mtime,
offset=1,
limit=None,
content_hash=_hash_file(p),
can_dedup=False,
)
def check_read(path: str | Path) -> str | None: def check_read(path: str | Path) -> str | None:
"""Check if a file has been read and is fresh. return _default.check_read(path)
Returns None if OK, or a warning string.
When mtime changed but file content is identical (e.g. touch, editor save),
the check passes to avoid false-positive staleness warnings.
"""
p = str(Path(path).resolve())
entry = _state.get(p)
if entry is None:
return "Warning: file has not been read yet. Read it first to verify content before editing."
try:
current_mtime = os.path.getmtime(p)
except OSError:
return None
if current_mtime != entry.mtime:
if entry.content_hash and _hash_file(p) == entry.content_hash:
entry.mtime = current_mtime
return None
return "Warning: file has been modified since last read. Re-read to verify content before editing."
# mtime unchanged - still check content hash to detect quick modifications
if entry.content_hash and _hash_file(p) != entry.content_hash:
return "Warning: file has been modified since last read. Re-read to verify content before editing."
return None
def is_unchanged(path: str | Path, offset: int = 1, limit: int | None = None) -> bool: def is_unchanged(path: str | Path, offset: int = 1, limit: int | None = None) -> bool:
"""Return True if file was previously read with same params and content is unchanged.""" return _default.is_unchanged(path, offset=offset, limit=limit)
p = str(Path(path).resolve())
entry = _state.get(p)
if entry is None:
return False
if not entry.can_dedup:
return False
if entry.offset != offset or entry.limit != limit:
return False
try:
current_mtime = os.path.getmtime(p)
except OSError:
return False
if current_mtime != entry.mtime:
# mtime changed - check if content also changed
current_hash = _hash_file(p)
if current_hash != entry.content_hash:
# Content actually changed - don't dedup
entry.can_dedup = False
return False
# Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time
entry.can_dedup = False
return True
# mtime unchanged - content must be identical
return True
def clear() -> None: def clear() -> None:
"""Clear all tracked state (useful for testing).""" _default.clear()
_state.clear()
# Legacy attribute for callers that reached into the module-level dict
# directly (filesystem.py used to do this). Kept as a property-like accessor
# so existing imports keep working.
def __getattr__(name: str):
if name == "_state":
return _default._state
raise AttributeError(name)

View File

@ -9,7 +9,7 @@ from typing import Any
from nanobot.agent.tools.base import Tool, tool_parameters from nanobot.agent.tools.base import Tool, tool_parameters
from nanobot.agent.tools.schema import BooleanSchema, IntegerSchema, StringSchema, tool_parameters_schema from nanobot.agent.tools.schema import BooleanSchema, IntegerSchema, StringSchema, tool_parameters_schema
from nanobot.agent.tools import file_state from nanobot.agent.tools.file_state import FileStates, _hash_file
from nanobot.utils.helpers import build_image_content_blocks, detect_image_mime from nanobot.utils.helpers import build_image_content_blocks, detect_image_mime
from nanobot.config.paths import get_media_dir from nanobot.config.paths import get_media_dir
@ -49,10 +49,15 @@ class _FsTool(Tool):
workspace: Path | None = None, workspace: Path | None = None,
allowed_dir: Path | None = None, allowed_dir: Path | None = None,
extra_allowed_dirs: list[Path] | None = None, extra_allowed_dirs: list[Path] | None = None,
file_states: FileStates | None = None,
): ):
self._workspace = workspace self._workspace = workspace
self._allowed_dir = allowed_dir self._allowed_dir = allowed_dir
self._extra_allowed_dirs = extra_allowed_dirs self._extra_allowed_dirs = extra_allowed_dirs
# Read-dedup / read-before-edit state is scoped to one FileStates
# so it does not leak across sessions sharing this process
# (issue #3571). Bare constructions get a private instance.
self._file_states: FileStates = file_states if file_states is not None else FileStates()
def _resolve(self, path: str) -> Path: def _resolve(self, path: str) -> Path:
return _resolve_path(path, self._workspace, self._allowed_dir, self._extra_allowed_dirs) return _resolve_path(path, self._workspace, self._allowed_dir, self._extra_allowed_dirs)
@ -184,7 +189,7 @@ class ReadFileTool(_FsTool):
# Read dedup: same path + offset + limit + unchanged mtime → stub # Read dedup: same path + offset + limit + unchanged mtime → stub
# Always check for external modifications before dedup # Always check for external modifications before dedup
entry = file_state._state.get(str(fp.resolve())) entry = self._file_states.get(fp)
try: try:
current_mtime = os.path.getmtime(fp) current_mtime = os.path.getmtime(fp)
except OSError: except OSError:
@ -193,21 +198,21 @@ class ReadFileTool(_FsTool):
if current_mtime != entry.mtime: if current_mtime != entry.mtime:
# File was modified externally - force full read and mark as not dedupable # File was modified externally - force full read and mark as not dedupable
entry.can_dedup = False entry.can_dedup = False
file_state.record_read(fp, offset=offset, limit=limit) # Update state with new mtime self._file_states.record_read(fp, offset=offset, limit=limit) # Update state with new mtime
# Continue to read full content (don't return dedup message) # Continue to read full content (don't return dedup message)
else: else:
# File unchanged - return dedup message # File unchanged - return dedup message
# But only if content is actually unchanged (not just mtime) # But only if content is actually unchanged (not just mtime)
current_hash = file_state._hash_file(str(fp)) current_hash = _hash_file(str(fp))
if current_hash == entry.content_hash: if current_hash == entry.content_hash:
return f"[File unchanged since last read: {path}]" return f"[File unchanged since last read: {path}]"
else: else:
# Content changed despite same mtime - force full read # Content changed despite same mtime - force full read
entry.can_dedup = False entry.can_dedup = False
file_state.record_read(fp, offset=offset, limit=limit) self._file_states.record_read(fp, offset=offset, limit=limit)
else: else:
# No previous state or marked as not dedupable - read full content # No previous state or marked as not dedupable - read full content
file_state.record_read(fp, offset=offset, limit=limit) self._file_states.record_read(fp, offset=offset, limit=limit)
# Force full read by setting can_dedup to False for this read # Force full read by setting can_dedup to False for this read
if entry: if entry:
entry.can_dedup = False entry.can_dedup = False
@ -256,7 +261,7 @@ class ReadFileTool(_FsTool):
result += f"\n\n(Showing lines {offset}-{end} of {total}. Use offset={end + 1} to continue.)" result += f"\n\n(Showing lines {offset}-{end} of {total}. Use offset={end + 1} to continue.)"
else: else:
result += f"\n\n(End of file — {total} lines total)" result += f"\n\n(End of file — {total} lines total)"
file_state.record_read(fp, offset=offset, limit=limit) self._file_states.record_read(fp, offset=offset, limit=limit)
return result return result
except PermissionError as e: except PermissionError as e:
return f"Error: {e}" return f"Error: {e}"
@ -365,7 +370,7 @@ class WriteFileTool(_FsTool):
fp = self._resolve(path) fp = self._resolve(path)
fp.parent.mkdir(parents=True, exist_ok=True) fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content, encoding="utf-8") fp.write_text(content, encoding="utf-8")
file_state.record_write(fp) self._file_states.record_write(fp)
return f"Successfully wrote {len(content)} characters to {fp}" return f"Successfully wrote {len(content)} characters to {fp}"
except PermissionError as e: except PermissionError as e:
return f"Error: {e}" return f"Error: {e}"
@ -699,7 +704,7 @@ class EditFileTool(_FsTool):
if old_text == "": if old_text == "":
fp.parent.mkdir(parents=True, exist_ok=True) fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(new_text, encoding="utf-8") fp.write_text(new_text, encoding="utf-8")
file_state.record_write(fp) self._file_states.record_write(fp)
return f"Successfully created {fp}" return f"Successfully created {fp}"
return self._file_not_found_msg(path, fp) return self._file_not_found_msg(path, fp)
@ -718,11 +723,11 @@ class EditFileTool(_FsTool):
if content.strip(): if content.strip():
return f"Error: Cannot create file — {path} already exists and is not empty." return f"Error: Cannot create file — {path} already exists and is not empty."
fp.write_text(new_text, encoding="utf-8") fp.write_text(new_text, encoding="utf-8")
file_state.record_write(fp) self._file_states.record_write(fp)
return f"Successfully edited {fp}" return f"Successfully edited {fp}"
# Read-before-edit check # Read-before-edit check
warning = file_state.check_read(fp) warning = self._file_states.check_read(fp)
raw = fp.read_bytes() raw = fp.read_bytes()
uses_crlf = b"\r\n" in raw uses_crlf = b"\r\n" in raw
@ -767,7 +772,7 @@ class EditFileTool(_FsTool):
new_content = new_content.replace("\n", "\r\n") new_content = new_content.replace("\n", "\r\n")
fp.write_bytes(new_content.encode("utf-8")) fp.write_bytes(new_content.encode("utf-8"))
file_state.record_write(fp) self._file_states.record_write(fp)
msg = f"Successfully edited {fp}" msg = f"Successfully edited {fp}"
if warning: if warning:
msg = f"{warning}\n{msg}" msg = f"{warning}\n{msg}"

View File

@ -27,12 +27,16 @@ class TestEditReadTracking:
"""edit_file should warn when file hasn't been read first.""" """edit_file should warn when file hasn't been read first."""
@pytest.fixture() @pytest.fixture()
def read_tool(self, tmp_path): def file_states(self):
return ReadFileTool(workspace=tmp_path) return file_state.FileStates()
@pytest.fixture() @pytest.fixture()
def edit_tool(self, tmp_path): def read_tool(self, tmp_path, file_states):
return EditFileTool(workspace=tmp_path) return ReadFileTool(workspace=tmp_path, file_states=file_states)
@pytest.fixture()
def edit_tool(self, tmp_path, file_states):
return EditFileTool(workspace=tmp_path, file_states=file_states)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_edit_warns_if_file_not_read_first(self, edit_tool, tmp_path): async def test_edit_warns_if_file_not_read_first(self, edit_tool, tmp_path):

View File

@ -97,6 +97,37 @@ class TestReadDedup:
assert isinstance(second, list) assert isinstance(second, list)
# ---------------------------------------------------------------------------
# Cross-session isolation (issue #3571)
# ---------------------------------------------------------------------------
# Each session must keep its own read cache. When session A reads a file,
# session B reading the same file must still receive the full content, not
# the "[File unchanged since last read]" dedup stub. The stub is only valid
# within the session that first cached the read.
class TestReadDedupSessionIsolation:
@pytest.mark.asyncio
async def test_separate_sessions_do_not_share_dedup_state(self, tmp_path):
f = tmp_path / "shared.txt"
f.write_text("\n".join(f"line {i}" for i in range(10)), encoding="utf-8")
session_a_tool = ReadFileTool(workspace=tmp_path)
session_b_tool = ReadFileTool(workspace=tmp_path)
first = await session_a_tool.execute(path=str(f))
assert "line 0" in first
# Session B has never read this file before — it must see the full
# content, not the dedup stub from session A.
second = await session_b_tool.execute(path=str(f))
assert "unchanged" not in second.lower(), (
"Session B should not inherit session A's read-dedup state. "
f"Got: {second!r}"
)
assert "line 0" in second
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# PDF support # PDF support
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------