Claude: replace module-level file read states with per-loop per-session state class. fixes #3571

2026-05-30 13:31:12 +00:00 · 2026-05-01 18:58:41 +08:00 · 2026-05-01 18:58:41 +08:00 · 58ae2d5b7e
commit 58ae2d5b7e
parent 6891a7a4d4
7 changed files with 214 additions and 108 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -28,6 +28,7 @@ from nanobot.agent.tools.ask import (
    pending_ask_user_id,
 )
 from nanobot.agent.tools.cron import CronTool
+from nanobot.agent.tools.file_state import FileStates
 from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
 from nanobot.agent.tools.message import MessageTool
 from nanobot.agent.tools.notebook import NotebookEditTool
@ -247,6 +248,10 @@ class AgentLoop:
        self.context = ContextBuilder(workspace, timezone=timezone, disabled_skills=disabled_skills)
        self.sessions = session_manager or SessionManager(workspace)
        self.tools = ToolRegistry()
+        # Per-session file-read/write tracker (issue #3571) — shared across
+        # the filesystem tools registered below so this AgentLoop does not
+        # leak read-dedup state into another loop's tools.
+        self._file_states = FileStates()
        self.runner = AgentRunner(provider)
        self.subagents = SubagentManager(
            provider=provider,
@ -348,17 +353,24 @@ class AgentLoop:
            self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None
        )
        extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None
+        # One FileStates per session so read-dedup and read-before-edit
+        # tracking does not leak across sessions sharing this process
+        # (issue #3571).
+        file_states = self._file_states
        self.tools.register(AskUserTool())
        self.tools.register(
            ReadFileTool(
-                workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read
+                workspace=self.workspace,
+                allowed_dir=allowed_dir,
+                extra_allowed_dirs=extra_read,
+                file_states=file_states,
            )
        )
        for cls in (WriteFileTool, EditFileTool, ListDirTool):
-            self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
+            self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
        for cls in (GlobTool, GrepTool):
-            self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
-        self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir))
+            self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
+        self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
        if self.exec_config.enable:
            self.tools.register(
                ExecTool(
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@ -753,23 +753,28 @@ class Dream:
    def _build_tools(self) -> ToolRegistry:
        """Build a minimal tool registry for the Dream agent."""
        from nanobot.agent.skills import BUILTIN_SKILLS_DIR
+        from nanobot.agent.tools.file_state import FileStates
        from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, WriteFileTool

        tools = ToolRegistry()
        workspace = self.store.workspace
        # Allow reading builtin skills for reference during skill creation
        extra_read = [BUILTIN_SKILLS_DIR] if BUILTIN_SKILLS_DIR.exists() else None
+        # Dream gets its own FileStates so its caches stay isolated from the
+        # main loop's sessions (issue #3571).
+        file_states = FileStates()
        tools.register(ReadFileTool(
            workspace=workspace,
            allowed_dir=workspace,
            extra_allowed_dirs=extra_read,
+            file_states=file_states,
        ))
-        tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace))
+        tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace, file_states=file_states))
        # write_file resolves relative paths from workspace root, but can only
        # write under skills/ so the prompt can safely use skills/<name>/SKILL.md.
        skills_dir = workspace / "skills"
        skills_dir.mkdir(parents=True, exist_ok=True)
-        tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir))
+        tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir, file_states=file_states))
        return tools

    # -- skill listing --------------------------------------------------------
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@ -168,12 +168,16 @@ class SubagentManager:
            tools = ToolRegistry()
            allowed_dir = self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None
            extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None
-            tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read))
-            tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir))
-            tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir))
-            tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir))
-            tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir))
-            tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir))
+            # Subagent gets its own FileStates so its read-dedup cache is
+            # isolated from the parent loop's sessions (issue #3571).
+            from nanobot.agent.tools.file_state import FileStates
+            file_states = FileStates()
+            tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read, file_states=file_states))
+            tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
+            tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
+            tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
+            tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
+            tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states))
            if self.exec_config.enable:
                tools.register(ExecTool(
                    working_dir=str(self.workspace),
--- a/nanobot/agent/tools/file_state.py
+++ b/nanobot/agent/tools/file_state.py
@ -17,9 +17,6 @@ class ReadState:
    can_dedup: bool


-_state: dict[str, ReadState] = {}
-
-
 def _hash_file(p: str) -> str | None:
    try:
        return hashlib.sha256(Path(p).read_bytes()).hexdigest()
@ -27,93 +24,141 @@ def _hash_file(p: str) -> str | None:
        return None


+class FileStates:
+    """Per-session read/write tracker.
+
+    Owns its own state dict so read-dedup ("File unchanged since last read")
+    and read-before-edit warnings stay scoped to one agent session and do
+    not leak across sessions sharing this process.
+    """
+
+    __slots__ = ("_state",)
+
+    def __init__(self) -> None:
+        self._state: dict[str, ReadState] = {}
+
+    def record_read(self, path: str | Path, offset: int = 1, limit: int | None = None) -> None:
+        """Record that a file was read (called after successful read)."""
+        p = str(Path(path).resolve())
+        try:
+            mtime = os.path.getmtime(p)
+        except OSError:
+            return
+        self._state[p] = ReadState(
+            mtime=mtime,
+            offset=offset,
+            limit=limit,
+            content_hash=_hash_file(p),
+            can_dedup=True,
+        )
+
+    def record_write(self, path: str | Path) -> None:
+        """Record that a file was written (updates mtime in state)."""
+        p = str(Path(path).resolve())
+        try:
+            mtime = os.path.getmtime(p)
+        except OSError:
+            self._state.pop(p, None)
+            return
+        self._state[p] = ReadState(
+            mtime=mtime,
+            offset=1,
+            limit=None,
+            content_hash=_hash_file(p),
+            can_dedup=False,
+        )
+
+    def check_read(self, path: str | Path) -> str | None:
+        """Check if a file has been read and is fresh.
+
+        Returns None if OK, or a warning string.
+        When mtime changed but file content is identical (e.g. touch, editor save),
+        the check passes to avoid false-positive staleness warnings.
+        """
+        p = str(Path(path).resolve())
+        entry = self._state.get(p)
+        if entry is None:
+            return "Warning: file has not been read yet. Read it first to verify content before editing."
+        try:
+            current_mtime = os.path.getmtime(p)
+        except OSError:
+            return None
+        if current_mtime != entry.mtime:
+            if entry.content_hash and _hash_file(p) == entry.content_hash:
+                entry.mtime = current_mtime
+                return None
+            return "Warning: file has been modified since last read. Re-read to verify content before editing."
+        # mtime unchanged - still check content hash to detect quick modifications
+        if entry.content_hash and _hash_file(p) != entry.content_hash:
+            return "Warning: file has been modified since last read. Re-read to verify content before editing."
+        return None
+
+    def is_unchanged(self, path: str | Path, offset: int = 1, limit: int | None = None) -> bool:
+        """Return True if file was previously read with same params and content is unchanged."""
+        p = str(Path(path).resolve())
+        entry = self._state.get(p)
+        if entry is None:
+            return False
+        if not entry.can_dedup:
+            return False
+        if entry.offset != offset or entry.limit != limit:
+            return False
+        try:
+            current_mtime = os.path.getmtime(p)
+        except OSError:
+            return False
+        if current_mtime != entry.mtime:
+            # mtime changed - check if content also changed
+            current_hash = _hash_file(p)
+            if current_hash != entry.content_hash:
+                # Content actually changed - don't dedup
+                entry.can_dedup = False
+                return False
+            # Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time
+            entry.can_dedup = False
+            return True
+        # mtime unchanged - content must be identical
+        return True
+
+    def get(self, path: str | Path) -> ReadState | None:
+        """Return the raw ReadState entry for a path, or None."""
+        return self._state.get(str(Path(path).resolve()))
+
+    def clear(self) -> None:
+        """Clear all tracked state (useful for testing)."""
+        self._state.clear()
+
+
+# Module-level default instance, retained for backward compatibility with
+# tests and callers that reach in directly. Per-session callers should hold
+# their own FileStates instance instead of touching this one.
+_default = FileStates()
+
+
 def record_read(path: str | Path, offset: int = 1, limit: int | None = None) -> None:
-    """Record that a file was read (called after successful read)."""
-    p = str(Path(path).resolve())
-    try:
-        mtime = os.path.getmtime(p)
-    except OSError:
-        return
-    _state[p] = ReadState(
-        mtime=mtime,
-        offset=offset,
-        limit=limit,
-        content_hash=_hash_file(p),
-        can_dedup=True,
-    )
+    _default.record_read(path, offset=offset, limit=limit)


 def record_write(path: str | Path) -> None:
-    """Record that a file was written (updates mtime in state)."""
-    p = str(Path(path).resolve())
-    try:
-        mtime = os.path.getmtime(p)
-    except OSError:
-        _state.pop(p, None)
-        return
-    _state[p] = ReadState(
-        mtime=mtime,
-        offset=1,
-        limit=None,
-        content_hash=_hash_file(p),
-        can_dedup=False,
-    )
+    _default.record_write(path)


 def check_read(path: str | Path) -> str | None:
-    """Check if a file has been read and is fresh.
-
-    Returns None if OK, or a warning string.
-    When mtime changed but file content is identical (e.g. touch, editor save),
-    the check passes to avoid false-positive staleness warnings.
-    """
-    p = str(Path(path).resolve())
-    entry = _state.get(p)
-    if entry is None:
-        return "Warning: file has not been read yet. Read it first to verify content before editing."
-    try:
-        current_mtime = os.path.getmtime(p)
-    except OSError:
-        return None
-    if current_mtime != entry.mtime:
-        if entry.content_hash and _hash_file(p) == entry.content_hash:
-            entry.mtime = current_mtime
-            return None
-        return "Warning: file has been modified since last read. Re-read to verify content before editing."
-    # mtime unchanged - still check content hash to detect quick modifications
-    if entry.content_hash and _hash_file(p) != entry.content_hash:
-        return "Warning: file has been modified since last read. Re-read to verify content before editing."
-    return None
+    return _default.check_read(path)


 def is_unchanged(path: str | Path, offset: int = 1, limit: int | None = None) -> bool:
-    """Return True if file was previously read with same params and content is unchanged."""
-    p = str(Path(path).resolve())
-    entry = _state.get(p)
-    if entry is None:
-        return False
-    if not entry.can_dedup:
-        return False
-    if entry.offset != offset or entry.limit != limit:
-        return False
-    try:
-        current_mtime = os.path.getmtime(p)
-    except OSError:
-        return False
-    if current_mtime != entry.mtime:
-        # mtime changed - check if content also changed
-        current_hash = _hash_file(p)
-        if current_hash != entry.content_hash:
-            # Content actually changed - don't dedup
-            entry.can_dedup = False
-            return False
-        # Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time
-        entry.can_dedup = False
-        return True
-    # mtime unchanged - content must be identical
-    return True
+    return _default.is_unchanged(path, offset=offset, limit=limit)


 def clear() -> None:
-    """Clear all tracked state (useful for testing)."""
-    _state.clear()
+    _default.clear()
+
+
+# Legacy attribute for callers that reached into the module-level dict
+# directly (filesystem.py used to do this). Kept as a property-like accessor
+# so existing imports keep working.
+def __getattr__(name: str):
+    if name == "_state":
+        return _default._state
+    raise AttributeError(name)
--- a/nanobot/agent/tools/filesystem.py
+++ b/nanobot/agent/tools/filesystem.py
@ -9,7 +9,7 @@ from typing import Any

 from nanobot.agent.tools.base import Tool, tool_parameters
 from nanobot.agent.tools.schema import BooleanSchema, IntegerSchema, StringSchema, tool_parameters_schema
-from nanobot.agent.tools import file_state
+from nanobot.agent.tools.file_state import FileStates, _hash_file
 from nanobot.utils.helpers import build_image_content_blocks, detect_image_mime
 from nanobot.config.paths import get_media_dir

@ -49,10 +49,15 @@ class _FsTool(Tool):
        workspace: Path | None = None,
        allowed_dir: Path | None = None,
        extra_allowed_dirs: list[Path] | None = None,
+        file_states: FileStates | None = None,
    ):
        self._workspace = workspace
        self._allowed_dir = allowed_dir
        self._extra_allowed_dirs = extra_allowed_dirs
+        # Read-dedup / read-before-edit state is scoped to one FileStates
+        # so it does not leak across sessions sharing this process
+        # (issue #3571). Bare constructions get a private instance.
+        self._file_states: FileStates = file_states if file_states is not None else FileStates()

    def _resolve(self, path: str) -> Path:
        return _resolve_path(path, self._workspace, self._allowed_dir, self._extra_allowed_dirs)
@ -184,7 +189,7 @@ class ReadFileTool(_FsTool):

            # Read dedup: same path + offset + limit + unchanged mtime → stub
            # Always check for external modifications before dedup
-            entry = file_state._state.get(str(fp.resolve()))
+            entry = self._file_states.get(fp)
            try:
                current_mtime = os.path.getmtime(fp)
            except OSError:
@ -193,21 +198,21 @@ class ReadFileTool(_FsTool):
                if current_mtime != entry.mtime:
                    # File was modified externally - force full read and mark as not dedupable
                    entry.can_dedup = False
-                    file_state.record_read(fp, offset=offset, limit=limit)  # Update state with new mtime
+                    self._file_states.record_read(fp, offset=offset, limit=limit)  # Update state with new mtime
                    # Continue to read full content (don't return dedup message)
                else:
                    # File unchanged - return dedup message
                    # But only if content is actually unchanged (not just mtime)
-                    current_hash = file_state._hash_file(str(fp))
+                    current_hash = _hash_file(str(fp))
                    if current_hash == entry.content_hash:
                        return f"[File unchanged since last read: {path}]"
                    else:
                        # Content changed despite same mtime - force full read
                        entry.can_dedup = False
-                        file_state.record_read(fp, offset=offset, limit=limit)
+                        self._file_states.record_read(fp, offset=offset, limit=limit)
            else:
                # No previous state or marked as not dedupable - read full content
-                file_state.record_read(fp, offset=offset, limit=limit)
+                self._file_states.record_read(fp, offset=offset, limit=limit)
                # Force full read by setting can_dedup to False for this read
                if entry:
                    entry.can_dedup = False
@ -256,7 +261,7 @@ class ReadFileTool(_FsTool):
                result += f"\n\n(Showing lines {offset}-{end} of {total}. Use offset={end + 1} to continue.)"
            else:
                result += f"\n\n(End of file — {total} lines total)"
-            file_state.record_read(fp, offset=offset, limit=limit)
+            self._file_states.record_read(fp, offset=offset, limit=limit)
            return result
        except PermissionError as e:
            return f"Error: {e}"
@ -365,7 +370,7 @@ class WriteFileTool(_FsTool):
            fp = self._resolve(path)
            fp.parent.mkdir(parents=True, exist_ok=True)
            fp.write_text(content, encoding="utf-8")
-            file_state.record_write(fp)
+            self._file_states.record_write(fp)
            return f"Successfully wrote {len(content)} characters to {fp}"
        except PermissionError as e:
            return f"Error: {e}"
@ -699,7 +704,7 @@ class EditFileTool(_FsTool):
                if old_text == "":
                    fp.parent.mkdir(parents=True, exist_ok=True)
                    fp.write_text(new_text, encoding="utf-8")
-                    file_state.record_write(fp)
+                    self._file_states.record_write(fp)
                    return f"Successfully created {fp}"
                return self._file_not_found_msg(path, fp)

@ -718,11 +723,11 @@ class EditFileTool(_FsTool):
                if content.strip():
                    return f"Error: Cannot create file — {path} already exists and is not empty."
                fp.write_text(new_text, encoding="utf-8")
-                file_state.record_write(fp)
+                self._file_states.record_write(fp)
                return f"Successfully edited {fp}"

            # Read-before-edit check
-            warning = file_state.check_read(fp)
+            warning = self._file_states.check_read(fp)

            raw = fp.read_bytes()
            uses_crlf = b"\r\n" in raw
@ -767,7 +772,7 @@ class EditFileTool(_FsTool):
                new_content = new_content.replace("\n", "\r\n")

            fp.write_bytes(new_content.encode("utf-8"))
-            file_state.record_write(fp)
+            self._file_states.record_write(fp)
            msg = f"Successfully edited {fp}"
            if warning:
                msg = f"{warning}\n{msg}"
--- a/tests/tools/test_edit_enhancements.py
+++ b/tests/tools/test_edit_enhancements.py
@ -27,12 +27,16 @@ class TestEditReadTracking:
    """edit_file should warn when file hasn't been read first."""

    @pytest.fixture()
-    def read_tool(self, tmp_path):
-        return ReadFileTool(workspace=tmp_path)
+    def file_states(self):
+        return file_state.FileStates()

    @pytest.fixture()
-    def edit_tool(self, tmp_path):
-        return EditFileTool(workspace=tmp_path)
+    def read_tool(self, tmp_path, file_states):
+        return ReadFileTool(workspace=tmp_path, file_states=file_states)
+
+    @pytest.fixture()
+    def edit_tool(self, tmp_path, file_states):
+        return EditFileTool(workspace=tmp_path, file_states=file_states)

    @pytest.mark.asyncio
    async def test_edit_warns_if_file_not_read_first(self, edit_tool, tmp_path):
--- a/tests/tools/test_read_enhancements.py
+++ b/tests/tools/test_read_enhancements.py
@ -97,6 +97,37 @@ class TestReadDedup:
        assert isinstance(second, list)


+# ---------------------------------------------------------------------------
+# Cross-session isolation (issue #3571)
+# ---------------------------------------------------------------------------
+# Each session must keep its own read cache. When session A reads a file,
+# session B reading the same file must still receive the full content, not
+# the "[File unchanged since last read]" dedup stub. The stub is only valid
+# within the session that first cached the read.
+
+class TestReadDedupSessionIsolation:
+
+    @pytest.mark.asyncio
+    async def test_separate_sessions_do_not_share_dedup_state(self, tmp_path):
+        f = tmp_path / "shared.txt"
+        f.write_text("\n".join(f"line {i}" for i in range(10)), encoding="utf-8")
+
+        session_a_tool = ReadFileTool(workspace=tmp_path)
+        session_b_tool = ReadFileTool(workspace=tmp_path)
+
+        first = await session_a_tool.execute(path=str(f))
+        assert "line 0" in first
+
+        # Session B has never read this file before — it must see the full
+        # content, not the dedup stub from session A.
+        second = await session_b_tool.execute(path=str(f))
+        assert "unchanged" not in second.lower(), (
+            "Session B should not inherit session A's read-dedup state. "
+            f"Got: {second!r}"
+        )
+        assert "line 0" in second
+
+
 # ---------------------------------------------------------------------------
 # PDF support
 # ---------------------------------------------------------------------------