From b2598270bfeff1ce56b9dddb623eed06942c0da5 Mon Sep 17 00:00:00 2001 From: chengyongru <61816729+chengyongru@users.noreply.github.com> Date: Thu, 2 Apr 2026 23:52:13 +0800 Subject: [PATCH] feat(memory): add git-backed version control for dream memory files (#2753) - Add GitStore class wrapping dulwich for memory file versioning - Auto-commit memory changes during Dream consolidation - Add /dream-log and /dream-restore commands for history browsing - Pass tracked_files as constructor param, generate .gitignore dynamically --- nanobot/agent/git_store.py | 299 ++++++++++++++++++++++++++++++++++ nanobot/agent/memory.py | 14 ++ nanobot/command/builtin.py | 95 +++++++++-- nanobot/utils/helpers.py | 11 ++ pyproject.toml | 1 + tests/agent/test_git_store.py | 225 +++++++++++++++++++++++++ 6 files changed, 631 insertions(+), 14 deletions(-) create mode 100644 nanobot/agent/git_store.py create mode 100644 tests/agent/test_git_store.py diff --git a/nanobot/agent/git_store.py b/nanobot/agent/git_store.py new file mode 100644 index 000000000..a09c7730f --- /dev/null +++ b/nanobot/agent/git_store.py @@ -0,0 +1,299 @@ +"""Git-backed version control for memory files, using dulwich.""" + +from __future__ import annotations + +import io +import time +from dataclasses import dataclass +from pathlib import Path + +from loguru import logger + + +@dataclass +class CommitInfo: + sha: str # Short SHA (8 chars) + message: str + timestamp: str # Formatted datetime + + def format(self, diff: str = "") -> str: + """Format this commit for display, optionally with a diff.""" + header = f"## {self.message.splitlines()[0]}\n`{self.sha}` — {self.timestamp}\n" + if diff: + return f"{header}\n```diff\n{diff}\n```" + return f"{header}\n(no file changes)" + + +class GitStore: + """Git-backed version control for memory files.""" + + def __init__(self, workspace: Path, tracked_files: list[str]): + self._workspace = workspace + self._tracked_files = tracked_files + + def is_initialized(self) -> bool: + """Check if the git repo has been initialized.""" + return (self._workspace / ".git").is_dir() + + # -- init ------------------------------------------------------------------ + + def init(self) -> bool: + """Initialize a git repo if not already initialized. + + Creates .gitignore and makes an initial commit. + Returns True if a new repo was created, False if already exists. + """ + if self.is_initialized(): + return False + + try: + from dulwich import porcelain + + porcelain.init(str(self._workspace)) + + # Write .gitignore + gitignore = self._workspace / ".gitignore" + gitignore.write_text(self._build_gitignore(), encoding="utf-8") + + # Ensure tracked files exist (touch them if missing) so the initial + # commit has something to track. + for rel in self._tracked_files: + p = self._workspace / rel + p.parent.mkdir(parents=True, exist_ok=True) + if not p.exists(): + p.write_text("", encoding="utf-8") + + # Initial commit + porcelain.add(str(self._workspace), paths=[".gitignore"] + self._tracked_files) + porcelain.commit( + str(self._workspace), + message=b"init: nanobot memory store", + author=b"nanobot ", + committer=b"nanobot ", + ) + logger.info("Git store initialized at {}", self._workspace) + return True + except Exception: + logger.warning("Git store init failed for {}", self._workspace) + return False + + # -- daily operations ------------------------------------------------------ + + def auto_commit(self, message: str) -> str | None: + """Stage tracked memory files and commit if there are changes. + + Returns the short commit SHA, or None if nothing to commit. + """ + if not self.is_initialized(): + return None + + try: + from dulwich import porcelain + + # .gitignore excludes everything except tracked files, + # so any staged/unstaged change must be in our files. + st = porcelain.status(str(self._workspace)) + if not st.unstaged and not any(st.staged.values()): + return None + + msg_bytes = message.encode("utf-8") if isinstance(message, str) else message + porcelain.add(str(self._workspace), paths=self._tracked_files) + sha_bytes = porcelain.commit( + str(self._workspace), + message=msg_bytes, + author=b"nanobot ", + committer=b"nanobot ", + ) + if sha_bytes is None: + return None + sha = sha_bytes.hex()[:8] + logger.debug("Git auto-commit: {} ({})", sha, message) + return sha + except Exception: + logger.warning("Git auto-commit failed: {}", message) + return None + + # -- internal helpers ------------------------------------------------------ + + def _resolve_sha(self, short_sha: str) -> bytes | None: + """Resolve a short SHA prefix to the full SHA bytes.""" + try: + from dulwich.repo import Repo + + with Repo(str(self._workspace)) as repo: + try: + sha = repo.refs[b"HEAD"] + except KeyError: + return None + + while sha: + if sha.hex().startswith(short_sha): + return sha + commit = repo[sha] + if commit.type_name != b"commit": + break + sha = commit.parents[0] if commit.parents else None + return None + except Exception: + return None + + def _build_gitignore(self) -> str: + """Generate .gitignore content from tracked files.""" + dirs: set[str] = set() + for f in self._tracked_files: + parent = str(Path(f).parent) + if parent != ".": + dirs.add(parent) + lines = ["/*"] + for d in sorted(dirs): + lines.append(f"!{d}/") + for f in self._tracked_files: + lines.append(f"!{f}") + lines.append("!.gitignore") + return "\n".join(lines) + "\n" + + # -- query ----------------------------------------------------------------- + + def log(self, max_entries: int = 20) -> list[CommitInfo]: + """Return simplified commit log.""" + if not self.is_initialized(): + return [] + + try: + from dulwich.repo import Repo + + entries: list[CommitInfo] = [] + with Repo(str(self._workspace)) as repo: + try: + head = repo.refs[b"HEAD"] + except KeyError: + return [] + + sha = head + while sha and len(entries) < max_entries: + commit = repo[sha] + if commit.type_name != b"commit": + break + ts = time.strftime( + "%Y-%m-%d %H:%M", + time.localtime(commit.commit_time), + ) + msg = commit.message.decode("utf-8", errors="replace").strip() + entries.append(CommitInfo( + sha=sha.hex()[:8], + message=msg, + timestamp=ts, + )) + sha = commit.parents[0] if commit.parents else None + + return entries + except Exception: + logger.warning("Git log failed") + return [] + + def diff_commits(self, sha1: str, sha2: str) -> str: + """Show diff between two commits.""" + if not self.is_initialized(): + return "" + + try: + from dulwich import porcelain + + full1 = self._resolve_sha(sha1) + full2 = self._resolve_sha(sha2) + if not full1 or not full2: + return "" + + out = io.BytesIO() + porcelain.diff( + str(self._workspace), + commit=full1, + commit2=full2, + outstream=out, + ) + return out.getvalue().decode("utf-8", errors="replace") + except Exception: + logger.warning("Git diff_commits failed") + return "" + + def find_commit(self, short_sha: str, max_entries: int = 20) -> CommitInfo | None: + """Find a commit by short SHA prefix match.""" + for c in self.log(max_entries=max_entries): + if c.sha.startswith(short_sha): + return c + return None + + def show_commit_diff(self, short_sha: str, max_entries: int = 20) -> tuple[CommitInfo, str] | None: + """Find a commit and return it with its diff vs the parent.""" + commits = self.log(max_entries=max_entries) + for i, c in enumerate(commits): + if c.sha.startswith(short_sha): + if i + 1 < len(commits): + diff = self.diff_commits(commits[i + 1].sha, c.sha) + else: + diff = "" + return c, diff + return None + + # -- restore --------------------------------------------------------------- + + def revert(self, commit: str) -> str | None: + """Restore all tracked memory files to their state at the given commit. + + This reads the file contents from the target commit, writes them back, + and creates a new commit. Does not require merge3. + Returns the new commit SHA, or None on failure. + """ + if not self.is_initialized(): + return None + + try: + from dulwich.repo import Repo + + full_sha = self._resolve_sha(commit) + if not full_sha: + logger.warning("Git revert: SHA not found: {}", commit) + return None + + restored: list[str] = [] + with Repo(str(self._workspace)) as repo: + commit_obj = repo[full_sha] + if commit_obj.type_name != b"commit": + return None + tree = repo[commit_obj.tree] + + for filepath in self._tracked_files: + content = self._read_blob_from_tree(repo, tree, filepath) + if content is not None: + dest = self._workspace / filepath + dest.write_text(content, encoding="utf-8") + restored.append(filepath) + + if not restored: + return None + + # Commit the restored state + msg = f"revert: restore to {commit}" + return self.auto_commit(msg) + except Exception: + logger.warning("Git revert failed for {}", commit) + return None + + @staticmethod + def _read_blob_from_tree(repo, tree, filepath: str) -> str | None: + """Read a blob's content from a tree object by walking path parts.""" + parts = Path(filepath).parts + current = tree + for part in parts: + try: + entry = current[part.encode()] + except KeyError: + return None + obj = repo[entry[1]] + if obj.type_name == b"blob": + return obj.data.decode("utf-8", errors="replace") + if obj.type_name == b"tree": + current = obj + else: + return None + return None diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index ff2bb0c23..65f3ccadb 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -15,6 +15,7 @@ from nanobot.utils.helpers import ensure_dir, estimate_message_tokens, estimate_ from nanobot.agent.runner import AgentRunSpec, AgentRunner from nanobot.agent.tools.registry import ToolRegistry +from nanobot.agent.git_store import GitStore if TYPE_CHECKING: from nanobot.providers.base import LLMProvider @@ -41,6 +42,13 @@ class MemoryStore: self._dream_log_file = self.memory_dir / ".dream-log.md" self._cursor_file = self.memory_dir / ".cursor" self._dream_cursor_file = self.memory_dir / ".dream_cursor" + self._git = GitStore(workspace, tracked_files=[ + "SOUL.md", "USER.md", "memory/MEMORY.md", + ]) + + @property + def git(self) -> GitStore: + return self._git # -- generic helpers ----------------------------------------------------- @@ -576,4 +584,10 @@ class Dream: else: self.store.append_dream_log(f"## {ts}\nNo changes.\n") + # Git auto-commit (only when there are actual changes) + if changelog and self.store.git.is_initialized(): + sha = self.store.git.auto_commit(f"dream: {ts}, {len(changelog)} change(s)") + if sha: + logger.info("Dream commit: {}", sha) + return True diff --git a/nanobot/command/builtin.py b/nanobot/command/builtin.py index 265f4c0ed..c8cf0c7f6 100644 --- a/nanobot/command/builtin.py +++ b/nanobot/command/builtin.py @@ -103,23 +103,86 @@ async def cmd_dream(ctx: CommandContext) -> OutboundMessage: async def cmd_dream_log(ctx: CommandContext) -> OutboundMessage: - """Show the Dream consolidation log.""" - loop = ctx.loop - store = loop.consolidator.store - log = store.read_dream_log() - if not log: - # Check if Dream has ever processed anything + """Show what the last Dream changed. + + Default: diff of the latest commit (HEAD~1 vs HEAD). + With /dream-log : diff of that specific commit. + """ + store = ctx.loop.consolidator.store + git = store.git + + if not git.is_initialized(): if store.get_last_dream_cursor() == 0: - content = "Dream has not run yet." + msg = "Dream has not run yet." else: - content = "No dream log yet." + msg = "Git not initialized for memory files." + return OutboundMessage( + channel=ctx.msg.channel, chat_id=ctx.msg.chat_id, + content=msg, metadata={"render_as": "text"}, + ) + + args = ctx.args.strip() + + if args: + # Show diff of a specific commit + sha = args.split()[0] + result = git.show_commit_diff(sha) + if not result: + content = f"Commit `{sha}` not found." + else: + commit, diff = result + content = commit.format(diff) else: - content = f"## Dream Log\n\n{log}" + # Default: show the latest commit's diff + result = git.show_commit_diff(git.log(max_entries=1)[0].sha) if git.log(max_entries=1) else None + if result: + commit, diff = result + content = commit.format(diff) + else: + content = "No commits yet." + return OutboundMessage( - channel=ctx.msg.channel, - chat_id=ctx.msg.chat_id, - content=content, - metadata={"render_as": "text"}, + channel=ctx.msg.channel, chat_id=ctx.msg.chat_id, + content=content, metadata={"render_as": "text"}, + ) + + +async def cmd_dream_restore(ctx: CommandContext) -> OutboundMessage: + """Restore memory files from a previous dream commit. + + Usage: + /dream-restore — list recent commits + /dream-restore — revert a specific commit + """ + store = ctx.loop.consolidator.store + git = store.git + if not git.is_initialized(): + return OutboundMessage( + channel=ctx.msg.channel, chat_id=ctx.msg.chat_id, + content="Git not initialized for memory files.", + ) + + args = ctx.args.strip() + if not args: + # Show recent commits for the user to pick + commits = git.log(max_entries=10) + if not commits: + content = "No commits found." + else: + lines = ["## Recent Dream Commits\n", "Use `/dream-restore ` to revert a commit.\n"] + for c in commits: + lines.append(f"- `{c.sha}` {c.message.splitlines()[0]} ({c.timestamp})") + content = "\n".join(lines) + else: + sha = args.split()[0] + new_sha = git.revert(sha) + if new_sha: + content = f"Reverted commit `{sha}` → new commit `{new_sha}`." + else: + content = f"Failed to revert commit `{sha}`. Check if the SHA is correct." + return OutboundMessage( + channel=ctx.msg.channel, chat_id=ctx.msg.chat_id, + content=content, metadata={"render_as": "text"}, ) @@ -142,7 +205,8 @@ def build_help_text() -> str: "/restart — Restart the bot", "/status — Show bot status", "/dream — Manually trigger Dream consolidation", - "/dream-log — Show Dream consolidation log", + "/dream-log — Show what the last Dream changed", + "/dream-restore — Revert memory to a previous state", "/help — Show available commands", ] return "\n".join(lines) @@ -157,4 +221,7 @@ def register_builtin_commands(router: CommandRouter) -> None: router.exact("/status", cmd_status) router.exact("/dream", cmd_dream) router.exact("/dream-log", cmd_dream_log) + router.prefix("/dream-log ", cmd_dream_log) + router.exact("/dream-restore", cmd_dream_restore) + router.prefix("/dream-restore ", cmd_dream_restore) router.exact("/help", cmd_help) diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index badfc978d..1bcb519ce 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -303,4 +303,15 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str] from rich.console import Console for name in added: Console().print(f" [dim]Created {name}[/dim]") + + # Initialize git for memory version control + try: + from nanobot.agent.git_store import GitStore + gs = GitStore(workspace, tracked_files=[ + "SOUL.md", "USER.md", "memory/MEMORY.md", + ]) + gs.init() + except Exception: + pass + return added diff --git a/pyproject.toml b/pyproject.toml index 51d494668..a00cf6bc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dependencies = [ "chardet>=3.0.2,<6.0.0", "openai>=2.8.0", "tiktoken>=0.12.0,<1.0.0", + "dulwich>=0.22.0,<1.0.0", ] [project.optional-dependencies] diff --git a/tests/agent/test_git_store.py b/tests/agent/test_git_store.py new file mode 100644 index 000000000..d374cfbad --- /dev/null +++ b/tests/agent/test_git_store.py @@ -0,0 +1,225 @@ +"""Tests for GitStore — git-backed version control for memory files.""" + +import pytest +from pathlib import Path + +from nanobot.agent.git_store import GitStore, CommitInfo + + +TRACKED = ["SOUL.md", "USER.md", "memory/MEMORY.md"] + + +@pytest.fixture +def git(tmp_path): + """Uninitialized GitStore.""" + return GitStore(tmp_path, tracked_files=TRACKED) + + +@pytest.fixture +def git_ready(git): + """Initialized GitStore with one initial commit.""" + git.init() + return git + + +class TestInit: + def test_not_initialized_by_default(self, git, tmp_path): + assert not git.is_initialized() + assert not (tmp_path / ".git").is_dir() + + def test_init_creates_git_dir(self, git, tmp_path): + assert git.init() + assert (tmp_path / ".git").is_dir() + + def test_init_idempotent(self, git_ready): + assert not git_ready.init() + + def test_init_creates_gitignore(self, git_ready): + gi = git_ready._workspace / ".gitignore" + assert gi.exists() + content = gi.read_text(encoding="utf-8") + for f in TRACKED: + assert f"!{f}" in content + + def test_init_touches_tracked_files(self, git_ready): + for f in TRACKED: + assert (git_ready._workspace / f).exists() + + def test_init_makes_initial_commit(self, git_ready): + commits = git_ready.log() + assert len(commits) == 1 + assert "init" in commits[0].message + + +class TestBuildGitignore: + def test_subdirectory_dirs(self, git): + content = git._build_gitignore() + assert "!memory/\n" in content + for f in TRACKED: + assert f"!{f}\n" in content + assert content.startswith("/*\n") + + def test_root_level_files_no_dir_entries(self, tmp_path): + gs = GitStore(tmp_path, tracked_files=["a.md", "b.md"]) + content = gs._build_gitignore() + assert "!a.md\n" in content + assert "!b.md\n" in content + dir_lines = [l for l in content.split("\n") if l.startswith("!") and l.endswith("/")] + assert dir_lines == [] + + +class TestAutoCommit: + def test_returns_none_when_not_initialized(self, git): + assert git.auto_commit("test") is None + + def test_commits_file_change(self, git_ready): + (git_ready._workspace / "SOUL.md").write_text("updated", encoding="utf-8") + sha = git_ready.auto_commit("update soul") + assert sha is not None + assert len(sha) == 8 + + def test_returns_none_when_no_changes(self, git_ready): + assert git_ready.auto_commit("no change") is None + + def test_commit_appears_in_log(self, git_ready): + ws = git_ready._workspace + (ws / "SOUL.md").write_text("v2", encoding="utf-8") + sha = git_ready.auto_commit("update soul") + commits = git_ready.log() + assert len(commits) == 2 + assert commits[0].sha == sha + + def test_does_not_create_empty_commits(self, git_ready): + git_ready.auto_commit("nothing 1") + git_ready.auto_commit("nothing 2") + assert len(git_ready.log()) == 1 # only init commit + + +class TestLog: + def test_empty_when_not_initialized(self, git): + assert git.log() == [] + + def test_newest_first(self, git_ready): + ws = git_ready._workspace + for i in range(3): + (ws / "SOUL.md").write_text(f"v{i}", encoding="utf-8") + git_ready.auto_commit(f"commit {i}") + + commits = git_ready.log() + assert len(commits) == 4 # init + 3 + assert "commit 2" in commits[0].message + assert "init" in commits[-1].message + + def test_max_entries(self, git_ready): + ws = git_ready._workspace + for i in range(10): + (ws / "SOUL.md").write_text(f"v{i}", encoding="utf-8") + git_ready.auto_commit(f"c{i}") + assert len(git_ready.log(max_entries=3)) == 3 + + def test_commit_info_fields(self, git_ready): + c = git_ready.log()[0] + assert isinstance(c, CommitInfo) + assert len(c.sha) == 8 + assert c.timestamp + assert c.message + + +class TestDiffCommits: + def test_empty_when_not_initialized(self, git): + assert git.diff_commits("a", "b") == "" + + def test_diff_between_two_commits(self, git_ready): + ws = git_ready._workspace + (ws / "SOUL.md").write_text("original", encoding="utf-8") + git_ready.auto_commit("v1") + (ws / "SOUL.md").write_text("modified", encoding="utf-8") + git_ready.auto_commit("v2") + + commits = git_ready.log() + diff = git_ready.diff_commits(commits[1].sha, commits[0].sha) + assert "modified" in diff + + def test_invalid_sha_returns_empty(self, git_ready): + assert git_ready.diff_commits("deadbeef", "cafebabe") == "" + + +class TestFindCommit: + def test_finds_by_prefix(self, git_ready): + ws = git_ready._workspace + (ws / "SOUL.md").write_text("v2", encoding="utf-8") + sha = git_ready.auto_commit("v2") + found = git_ready.find_commit(sha[:4]) + assert found is not None + assert found.sha == sha + + def test_returns_none_for_unknown(self, git_ready): + assert git_ready.find_commit("deadbeef") is None + + +class TestShowCommitDiff: + def test_returns_commit_with_diff(self, git_ready): + ws = git_ready._workspace + (ws / "SOUL.md").write_text("content", encoding="utf-8") + sha = git_ready.auto_commit("add content") + result = git_ready.show_commit_diff(sha) + assert result is not None + commit, diff = result + assert commit.sha == sha + assert "content" in diff + + def test_first_commit_has_empty_diff(self, git_ready): + init_sha = git_ready.log()[-1].sha + result = git_ready.show_commit_diff(init_sha) + assert result is not None + _, diff = result + assert diff == "" + + def test_returns_none_for_unknown(self, git_ready): + assert git_ready.show_commit_diff("deadbeef") is None + + +class TestCommitInfoFormat: + def test_format_with_diff(self): + from nanobot.agent.git_store import CommitInfo + c = CommitInfo(sha="abcd1234", message="test commit\nsecond line", timestamp="2026-04-02 12:00") + result = c.format(diff="some diff") + assert "test commit" in result + assert "`abcd1234`" in result + assert "some diff" in result + + def test_format_without_diff(self): + from nanobot.agent.git_store import CommitInfo + c = CommitInfo(sha="abcd1234", message="test", timestamp="2026-04-02 12:00") + result = c.format() + assert "(no file changes)" in result + + +class TestRevert: + def test_returns_none_when_not_initialized(self, git): + assert git.revert("abc") is None + + def test_reverts_file_content(self, git_ready): + ws = git_ready._workspace + (ws / "SOUL.md").write_text("v2 content", encoding="utf-8") + git_ready.auto_commit("v2") + + commits = git_ready.log() + new_sha = git_ready.revert(commits[1].sha) # revert to init + assert new_sha is not None + assert (ws / "SOUL.md").read_text(encoding="utf-8") == "" + + def test_invalid_sha_returns_none(self, git_ready): + assert git_ready.revert("deadbeef") is None + + +class TestMemoryStoreGitProperty: + def test_git_property_exposes_gitstore(self, tmp_path): + from nanobot.agent.memory import MemoryStore + store = MemoryStore(tmp_path) + assert isinstance(store.git, GitStore) + + def test_git_property_is_same_object(self, tmp_path): + from nanobot.agent.memory import MemoryStore + store = MemoryStore(tmp_path) + assert store.git is store._git