diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index cbddfc286..756391995 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -28,6 +28,7 @@ from nanobot.agent.tools.ask import ( pending_ask_user_id, ) from nanobot.agent.tools.cron import CronTool +from nanobot.agent.tools.file_state import FileStates from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.message import MessageTool from nanobot.agent.tools.notebook import NotebookEditTool @@ -247,6 +248,10 @@ class AgentLoop: self.context = ContextBuilder(workspace, timezone=timezone, disabled_skills=disabled_skills) self.sessions = session_manager or SessionManager(workspace) self.tools = ToolRegistry() + # Per-session file-read/write tracker (issue #3571) — shared across + # the filesystem tools registered below so this AgentLoop does not + # leak read-dedup state into another loop's tools. + self._file_states = FileStates() self.runner = AgentRunner(provider) self.subagents = SubagentManager( provider=provider, @@ -348,17 +353,24 @@ class AgentLoop: self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None ) extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None + # One FileStates per session so read-dedup and read-before-edit + # tracking does not leak across sessions sharing this process + # (issue #3571). + file_states = self._file_states self.tools.register(AskUserTool()) self.tools.register( ReadFileTool( - workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read + workspace=self.workspace, + allowed_dir=allowed_dir, + extra_allowed_dirs=extra_read, + file_states=file_states, ) ) for cls in (WriteFileTool, EditFileTool, ListDirTool): - self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) + self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) for cls in (GlobTool, GrepTool): - self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) - self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir)) + self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) + self.tools.register(NotebookEditTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) if self.exec_config.enable: self.tools.register( ExecTool( diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 4bf79c356..80f6c580f 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -753,23 +753,28 @@ class Dream: def _build_tools(self) -> ToolRegistry: """Build a minimal tool registry for the Dream agent.""" from nanobot.agent.skills import BUILTIN_SKILLS_DIR + from nanobot.agent.tools.file_state import FileStates from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, WriteFileTool tools = ToolRegistry() workspace = self.store.workspace # Allow reading builtin skills for reference during skill creation extra_read = [BUILTIN_SKILLS_DIR] if BUILTIN_SKILLS_DIR.exists() else None + # Dream gets its own FileStates so its caches stay isolated from the + # main loop's sessions (issue #3571). + file_states = FileStates() tools.register(ReadFileTool( workspace=workspace, allowed_dir=workspace, extra_allowed_dirs=extra_read, + file_states=file_states, )) - tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace)) + tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace, file_states=file_states)) # write_file resolves relative paths from workspace root, but can only # write under skills/ so the prompt can safely use skills//SKILL.md. skills_dir = workspace / "skills" skills_dir.mkdir(parents=True, exist_ok=True) - tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir)) + tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir, file_states=file_states)) return tools # -- skill listing -------------------------------------------------------- diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index e64dc8f97..6d16c7699 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -168,12 +168,16 @@ class SubagentManager: tools = ToolRegistry() allowed_dir = self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None - tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read)) - tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) - tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) - tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir)) - tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir)) - tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir)) + # Subagent gets its own FileStates so its read-dedup cache is + # isolated from the parent loop's sessions (issue #3571). + from nanobot.agent.tools.file_state import FileStates + file_states = FileStates() + tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read, file_states=file_states)) + tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) + tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) + tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) + tools.register(GlobTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) + tools.register(GrepTool(workspace=self.workspace, allowed_dir=allowed_dir, file_states=file_states)) if self.exec_config.enable: tools.register(ExecTool( working_dir=str(self.workspace), diff --git a/nanobot/agent/tools/file_state.py b/nanobot/agent/tools/file_state.py index 74bb9bf65..018581ac9 100644 --- a/nanobot/agent/tools/file_state.py +++ b/nanobot/agent/tools/file_state.py @@ -17,9 +17,6 @@ class ReadState: can_dedup: bool -_state: dict[str, ReadState] = {} - - def _hash_file(p: str) -> str | None: try: return hashlib.sha256(Path(p).read_bytes()).hexdigest() @@ -27,93 +24,141 @@ def _hash_file(p: str) -> str | None: return None +class FileStates: + """Per-session read/write tracker. + + Owns its own state dict so read-dedup ("File unchanged since last read") + and read-before-edit warnings stay scoped to one agent session and do + not leak across sessions sharing this process. + """ + + __slots__ = ("_state",) + + def __init__(self) -> None: + self._state: dict[str, ReadState] = {} + + def record_read(self, path: str | Path, offset: int = 1, limit: int | None = None) -> None: + """Record that a file was read (called after successful read).""" + p = str(Path(path).resolve()) + try: + mtime = os.path.getmtime(p) + except OSError: + return + self._state[p] = ReadState( + mtime=mtime, + offset=offset, + limit=limit, + content_hash=_hash_file(p), + can_dedup=True, + ) + + def record_write(self, path: str | Path) -> None: + """Record that a file was written (updates mtime in state).""" + p = str(Path(path).resolve()) + try: + mtime = os.path.getmtime(p) + except OSError: + self._state.pop(p, None) + return + self._state[p] = ReadState( + mtime=mtime, + offset=1, + limit=None, + content_hash=_hash_file(p), + can_dedup=False, + ) + + def check_read(self, path: str | Path) -> str | None: + """Check if a file has been read and is fresh. + + Returns None if OK, or a warning string. + When mtime changed but file content is identical (e.g. touch, editor save), + the check passes to avoid false-positive staleness warnings. + """ + p = str(Path(path).resolve()) + entry = self._state.get(p) + if entry is None: + return "Warning: file has not been read yet. Read it first to verify content before editing." + try: + current_mtime = os.path.getmtime(p) + except OSError: + return None + if current_mtime != entry.mtime: + if entry.content_hash and _hash_file(p) == entry.content_hash: + entry.mtime = current_mtime + return None + return "Warning: file has been modified since last read. Re-read to verify content before editing." + # mtime unchanged - still check content hash to detect quick modifications + if entry.content_hash and _hash_file(p) != entry.content_hash: + return "Warning: file has been modified since last read. Re-read to verify content before editing." + return None + + def is_unchanged(self, path: str | Path, offset: int = 1, limit: int | None = None) -> bool: + """Return True if file was previously read with same params and content is unchanged.""" + p = str(Path(path).resolve()) + entry = self._state.get(p) + if entry is None: + return False + if not entry.can_dedup: + return False + if entry.offset != offset or entry.limit != limit: + return False + try: + current_mtime = os.path.getmtime(p) + except OSError: + return False + if current_mtime != entry.mtime: + # mtime changed - check if content also changed + current_hash = _hash_file(p) + if current_hash != entry.content_hash: + # Content actually changed - don't dedup + entry.can_dedup = False + return False + # Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time + entry.can_dedup = False + return True + # mtime unchanged - content must be identical + return True + + def get(self, path: str | Path) -> ReadState | None: + """Return the raw ReadState entry for a path, or None.""" + return self._state.get(str(Path(path).resolve())) + + def clear(self) -> None: + """Clear all tracked state (useful for testing).""" + self._state.clear() + + +# Module-level default instance, retained for backward compatibility with +# tests and callers that reach in directly. Per-session callers should hold +# their own FileStates instance instead of touching this one. +_default = FileStates() + + def record_read(path: str | Path, offset: int = 1, limit: int | None = None) -> None: - """Record that a file was read (called after successful read).""" - p = str(Path(path).resolve()) - try: - mtime = os.path.getmtime(p) - except OSError: - return - _state[p] = ReadState( - mtime=mtime, - offset=offset, - limit=limit, - content_hash=_hash_file(p), - can_dedup=True, - ) + _default.record_read(path, offset=offset, limit=limit) def record_write(path: str | Path) -> None: - """Record that a file was written (updates mtime in state).""" - p = str(Path(path).resolve()) - try: - mtime = os.path.getmtime(p) - except OSError: - _state.pop(p, None) - return - _state[p] = ReadState( - mtime=mtime, - offset=1, - limit=None, - content_hash=_hash_file(p), - can_dedup=False, - ) + _default.record_write(path) def check_read(path: str | Path) -> str | None: - """Check if a file has been read and is fresh. - - Returns None if OK, or a warning string. - When mtime changed but file content is identical (e.g. touch, editor save), - the check passes to avoid false-positive staleness warnings. - """ - p = str(Path(path).resolve()) - entry = _state.get(p) - if entry is None: - return "Warning: file has not been read yet. Read it first to verify content before editing." - try: - current_mtime = os.path.getmtime(p) - except OSError: - return None - if current_mtime != entry.mtime: - if entry.content_hash and _hash_file(p) == entry.content_hash: - entry.mtime = current_mtime - return None - return "Warning: file has been modified since last read. Re-read to verify content before editing." - # mtime unchanged - still check content hash to detect quick modifications - if entry.content_hash and _hash_file(p) != entry.content_hash: - return "Warning: file has been modified since last read. Re-read to verify content before editing." - return None + return _default.check_read(path) def is_unchanged(path: str | Path, offset: int = 1, limit: int | None = None) -> bool: - """Return True if file was previously read with same params and content is unchanged.""" - p = str(Path(path).resolve()) - entry = _state.get(p) - if entry is None: - return False - if not entry.can_dedup: - return False - if entry.offset != offset or entry.limit != limit: - return False - try: - current_mtime = os.path.getmtime(p) - except OSError: - return False - if current_mtime != entry.mtime: - # mtime changed - check if content also changed - current_hash = _hash_file(p) - if current_hash != entry.content_hash: - # Content actually changed - don't dedup - entry.can_dedup = False - return False - # Content identical despite mtime change (e.g. touch) - mark as not dedupable to force full read next time - entry.can_dedup = False - return True - # mtime unchanged - content must be identical - return True + return _default.is_unchanged(path, offset=offset, limit=limit) def clear() -> None: - """Clear all tracked state (useful for testing).""" - _state.clear() + _default.clear() + + +# Legacy attribute for callers that reached into the module-level dict +# directly (filesystem.py used to do this). Kept as a property-like accessor +# so existing imports keep working. +def __getattr__(name: str): + if name == "_state": + return _default._state + raise AttributeError(name) diff --git a/nanobot/agent/tools/filesystem.py b/nanobot/agent/tools/filesystem.py index c0d628a71..500f0123b 100644 --- a/nanobot/agent/tools/filesystem.py +++ b/nanobot/agent/tools/filesystem.py @@ -9,7 +9,7 @@ from typing import Any from nanobot.agent.tools.base import Tool, tool_parameters from nanobot.agent.tools.schema import BooleanSchema, IntegerSchema, StringSchema, tool_parameters_schema -from nanobot.agent.tools import file_state +from nanobot.agent.tools.file_state import FileStates, _hash_file from nanobot.utils.helpers import build_image_content_blocks, detect_image_mime from nanobot.config.paths import get_media_dir @@ -49,10 +49,15 @@ class _FsTool(Tool): workspace: Path | None = None, allowed_dir: Path | None = None, extra_allowed_dirs: list[Path] | None = None, + file_states: FileStates | None = None, ): self._workspace = workspace self._allowed_dir = allowed_dir self._extra_allowed_dirs = extra_allowed_dirs + # Read-dedup / read-before-edit state is scoped to one FileStates + # so it does not leak across sessions sharing this process + # (issue #3571). Bare constructions get a private instance. + self._file_states: FileStates = file_states if file_states is not None else FileStates() def _resolve(self, path: str) -> Path: return _resolve_path(path, self._workspace, self._allowed_dir, self._extra_allowed_dirs) @@ -184,7 +189,7 @@ class ReadFileTool(_FsTool): # Read dedup: same path + offset + limit + unchanged mtime → stub # Always check for external modifications before dedup - entry = file_state._state.get(str(fp.resolve())) + entry = self._file_states.get(fp) try: current_mtime = os.path.getmtime(fp) except OSError: @@ -193,21 +198,21 @@ class ReadFileTool(_FsTool): if current_mtime != entry.mtime: # File was modified externally - force full read and mark as not dedupable entry.can_dedup = False - file_state.record_read(fp, offset=offset, limit=limit) # Update state with new mtime + self._file_states.record_read(fp, offset=offset, limit=limit) # Update state with new mtime # Continue to read full content (don't return dedup message) else: # File unchanged - return dedup message # But only if content is actually unchanged (not just mtime) - current_hash = file_state._hash_file(str(fp)) + current_hash = _hash_file(str(fp)) if current_hash == entry.content_hash: return f"[File unchanged since last read: {path}]" else: # Content changed despite same mtime - force full read entry.can_dedup = False - file_state.record_read(fp, offset=offset, limit=limit) + self._file_states.record_read(fp, offset=offset, limit=limit) else: # No previous state or marked as not dedupable - read full content - file_state.record_read(fp, offset=offset, limit=limit) + self._file_states.record_read(fp, offset=offset, limit=limit) # Force full read by setting can_dedup to False for this read if entry: entry.can_dedup = False @@ -256,7 +261,7 @@ class ReadFileTool(_FsTool): result += f"\n\n(Showing lines {offset}-{end} of {total}. Use offset={end + 1} to continue.)" else: result += f"\n\n(End of file — {total} lines total)" - file_state.record_read(fp, offset=offset, limit=limit) + self._file_states.record_read(fp, offset=offset, limit=limit) return result except PermissionError as e: return f"Error: {e}" @@ -365,7 +370,7 @@ class WriteFileTool(_FsTool): fp = self._resolve(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content, encoding="utf-8") - file_state.record_write(fp) + self._file_states.record_write(fp) return f"Successfully wrote {len(content)} characters to {fp}" except PermissionError as e: return f"Error: {e}" @@ -699,7 +704,7 @@ class EditFileTool(_FsTool): if old_text == "": fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(new_text, encoding="utf-8") - file_state.record_write(fp) + self._file_states.record_write(fp) return f"Successfully created {fp}" return self._file_not_found_msg(path, fp) @@ -718,11 +723,11 @@ class EditFileTool(_FsTool): if content.strip(): return f"Error: Cannot create file — {path} already exists and is not empty." fp.write_text(new_text, encoding="utf-8") - file_state.record_write(fp) + self._file_states.record_write(fp) return f"Successfully edited {fp}" # Read-before-edit check - warning = file_state.check_read(fp) + warning = self._file_states.check_read(fp) raw = fp.read_bytes() uses_crlf = b"\r\n" in raw @@ -767,7 +772,7 @@ class EditFileTool(_FsTool): new_content = new_content.replace("\n", "\r\n") fp.write_bytes(new_content.encode("utf-8")) - file_state.record_write(fp) + self._file_states.record_write(fp) msg = f"Successfully edited {fp}" if warning: msg = f"{warning}\n{msg}" diff --git a/tests/tools/test_edit_enhancements.py b/tests/tools/test_edit_enhancements.py index 7ad098960..1f22c963b 100644 --- a/tests/tools/test_edit_enhancements.py +++ b/tests/tools/test_edit_enhancements.py @@ -27,12 +27,16 @@ class TestEditReadTracking: """edit_file should warn when file hasn't been read first.""" @pytest.fixture() - def read_tool(self, tmp_path): - return ReadFileTool(workspace=tmp_path) + def file_states(self): + return file_state.FileStates() @pytest.fixture() - def edit_tool(self, tmp_path): - return EditFileTool(workspace=tmp_path) + def read_tool(self, tmp_path, file_states): + return ReadFileTool(workspace=tmp_path, file_states=file_states) + + @pytest.fixture() + def edit_tool(self, tmp_path, file_states): + return EditFileTool(workspace=tmp_path, file_states=file_states) @pytest.mark.asyncio async def test_edit_warns_if_file_not_read_first(self, edit_tool, tmp_path): diff --git a/tests/tools/test_read_enhancements.py b/tests/tools/test_read_enhancements.py index f7a62f05b..b5577f058 100644 --- a/tests/tools/test_read_enhancements.py +++ b/tests/tools/test_read_enhancements.py @@ -97,6 +97,37 @@ class TestReadDedup: assert isinstance(second, list) +# --------------------------------------------------------------------------- +# Cross-session isolation (issue #3571) +# --------------------------------------------------------------------------- +# Each session must keep its own read cache. When session A reads a file, +# session B reading the same file must still receive the full content, not +# the "[File unchanged since last read]" dedup stub. The stub is only valid +# within the session that first cached the read. + +class TestReadDedupSessionIsolation: + + @pytest.mark.asyncio + async def test_separate_sessions_do_not_share_dedup_state(self, tmp_path): + f = tmp_path / "shared.txt" + f.write_text("\n".join(f"line {i}" for i in range(10)), encoding="utf-8") + + session_a_tool = ReadFileTool(workspace=tmp_path) + session_b_tool = ReadFileTool(workspace=tmp_path) + + first = await session_a_tool.execute(path=str(f)) + assert "line 0" in first + + # Session B has never read this file before — it must see the full + # content, not the dedup stub from session A. + second = await session_b_tool.execute(path=str(f)) + assert "unchanged" not in second.lower(), ( + "Session B should not inherit session A's read-dedup state. " + f"Got: {second!r}" + ) + assert "line 0" in second + + # --------------------------------------------------------------------------- # PDF support # ---------------------------------------------------------------------------