feat(memory): add git-backed version control for dream memory files (#2753)

- Add GitStore class wrapping dulwich for memory file versioning
- Auto-commit memory changes during Dream consolidation
- Add /dream-log and /dream-restore commands for history browsing
- Pass tracked_files as constructor param, generate .gitignore dynamically
This commit is contained in:
chengyongru 2026-04-02 23:52:13 +08:00 committed by GitHub
parent 9d07093e6d
commit b2598270bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 631 additions and 14 deletions

299
nanobot/agent/git_store.py Normal file
View File

@ -0,0 +1,299 @@
"""Git-backed version control for memory files, using dulwich."""
from __future__ import annotations
import io
import time
from dataclasses import dataclass
from pathlib import Path
from loguru import logger
@dataclass
class CommitInfo:
sha: str # Short SHA (8 chars)
message: str
timestamp: str # Formatted datetime
def format(self, diff: str = "") -> str:
"""Format this commit for display, optionally with a diff."""
header = f"## {self.message.splitlines()[0]}\n`{self.sha}` — {self.timestamp}\n"
if diff:
return f"{header}\n```diff\n{diff}\n```"
return f"{header}\n(no file changes)"
class GitStore:
"""Git-backed version control for memory files."""
def __init__(self, workspace: Path, tracked_files: list[str]):
self._workspace = workspace
self._tracked_files = tracked_files
def is_initialized(self) -> bool:
"""Check if the git repo has been initialized."""
return (self._workspace / ".git").is_dir()
# -- init ------------------------------------------------------------------
def init(self) -> bool:
"""Initialize a git repo if not already initialized.
Creates .gitignore and makes an initial commit.
Returns True if a new repo was created, False if already exists.
"""
if self.is_initialized():
return False
try:
from dulwich import porcelain
porcelain.init(str(self._workspace))
# Write .gitignore
gitignore = self._workspace / ".gitignore"
gitignore.write_text(self._build_gitignore(), encoding="utf-8")
# Ensure tracked files exist (touch them if missing) so the initial
# commit has something to track.
for rel in self._tracked_files:
p = self._workspace / rel
p.parent.mkdir(parents=True, exist_ok=True)
if not p.exists():
p.write_text("", encoding="utf-8")
# Initial commit
porcelain.add(str(self._workspace), paths=[".gitignore"] + self._tracked_files)
porcelain.commit(
str(self._workspace),
message=b"init: nanobot memory store",
author=b"nanobot <nanobot@dream>",
committer=b"nanobot <nanobot@dream>",
)
logger.info("Git store initialized at {}", self._workspace)
return True
except Exception:
logger.warning("Git store init failed for {}", self._workspace)
return False
# -- daily operations ------------------------------------------------------
def auto_commit(self, message: str) -> str | None:
"""Stage tracked memory files and commit if there are changes.
Returns the short commit SHA, or None if nothing to commit.
"""
if not self.is_initialized():
return None
try:
from dulwich import porcelain
# .gitignore excludes everything except tracked files,
# so any staged/unstaged change must be in our files.
st = porcelain.status(str(self._workspace))
if not st.unstaged and not any(st.staged.values()):
return None
msg_bytes = message.encode("utf-8") if isinstance(message, str) else message
porcelain.add(str(self._workspace), paths=self._tracked_files)
sha_bytes = porcelain.commit(
str(self._workspace),
message=msg_bytes,
author=b"nanobot <nanobot@dream>",
committer=b"nanobot <nanobot@dream>",
)
if sha_bytes is None:
return None
sha = sha_bytes.hex()[:8]
logger.debug("Git auto-commit: {} ({})", sha, message)
return sha
except Exception:
logger.warning("Git auto-commit failed: {}", message)
return None
# -- internal helpers ------------------------------------------------------
def _resolve_sha(self, short_sha: str) -> bytes | None:
"""Resolve a short SHA prefix to the full SHA bytes."""
try:
from dulwich.repo import Repo
with Repo(str(self._workspace)) as repo:
try:
sha = repo.refs[b"HEAD"]
except KeyError:
return None
while sha:
if sha.hex().startswith(short_sha):
return sha
commit = repo[sha]
if commit.type_name != b"commit":
break
sha = commit.parents[0] if commit.parents else None
return None
except Exception:
return None
def _build_gitignore(self) -> str:
"""Generate .gitignore content from tracked files."""
dirs: set[str] = set()
for f in self._tracked_files:
parent = str(Path(f).parent)
if parent != ".":
dirs.add(parent)
lines = ["/*"]
for d in sorted(dirs):
lines.append(f"!{d}/")
for f in self._tracked_files:
lines.append(f"!{f}")
lines.append("!.gitignore")
return "\n".join(lines) + "\n"
# -- query -----------------------------------------------------------------
def log(self, max_entries: int = 20) -> list[CommitInfo]:
"""Return simplified commit log."""
if not self.is_initialized():
return []
try:
from dulwich.repo import Repo
entries: list[CommitInfo] = []
with Repo(str(self._workspace)) as repo:
try:
head = repo.refs[b"HEAD"]
except KeyError:
return []
sha = head
while sha and len(entries) < max_entries:
commit = repo[sha]
if commit.type_name != b"commit":
break
ts = time.strftime(
"%Y-%m-%d %H:%M",
time.localtime(commit.commit_time),
)
msg = commit.message.decode("utf-8", errors="replace").strip()
entries.append(CommitInfo(
sha=sha.hex()[:8],
message=msg,
timestamp=ts,
))
sha = commit.parents[0] if commit.parents else None
return entries
except Exception:
logger.warning("Git log failed")
return []
def diff_commits(self, sha1: str, sha2: str) -> str:
"""Show diff between two commits."""
if not self.is_initialized():
return ""
try:
from dulwich import porcelain
full1 = self._resolve_sha(sha1)
full2 = self._resolve_sha(sha2)
if not full1 or not full2:
return ""
out = io.BytesIO()
porcelain.diff(
str(self._workspace),
commit=full1,
commit2=full2,
outstream=out,
)
return out.getvalue().decode("utf-8", errors="replace")
except Exception:
logger.warning("Git diff_commits failed")
return ""
def find_commit(self, short_sha: str, max_entries: int = 20) -> CommitInfo | None:
"""Find a commit by short SHA prefix match."""
for c in self.log(max_entries=max_entries):
if c.sha.startswith(short_sha):
return c
return None
def show_commit_diff(self, short_sha: str, max_entries: int = 20) -> tuple[CommitInfo, str] | None:
"""Find a commit and return it with its diff vs the parent."""
commits = self.log(max_entries=max_entries)
for i, c in enumerate(commits):
if c.sha.startswith(short_sha):
if i + 1 < len(commits):
diff = self.diff_commits(commits[i + 1].sha, c.sha)
else:
diff = ""
return c, diff
return None
# -- restore ---------------------------------------------------------------
def revert(self, commit: str) -> str | None:
"""Restore all tracked memory files to their state at the given commit.
This reads the file contents from the target commit, writes them back,
and creates a new commit. Does not require merge3.
Returns the new commit SHA, or None on failure.
"""
if not self.is_initialized():
return None
try:
from dulwich.repo import Repo
full_sha = self._resolve_sha(commit)
if not full_sha:
logger.warning("Git revert: SHA not found: {}", commit)
return None
restored: list[str] = []
with Repo(str(self._workspace)) as repo:
commit_obj = repo[full_sha]
if commit_obj.type_name != b"commit":
return None
tree = repo[commit_obj.tree]
for filepath in self._tracked_files:
content = self._read_blob_from_tree(repo, tree, filepath)
if content is not None:
dest = self._workspace / filepath
dest.write_text(content, encoding="utf-8")
restored.append(filepath)
if not restored:
return None
# Commit the restored state
msg = f"revert: restore to {commit}"
return self.auto_commit(msg)
except Exception:
logger.warning("Git revert failed for {}", commit)
return None
@staticmethod
def _read_blob_from_tree(repo, tree, filepath: str) -> str | None:
"""Read a blob's content from a tree object by walking path parts."""
parts = Path(filepath).parts
current = tree
for part in parts:
try:
entry = current[part.encode()]
except KeyError:
return None
obj = repo[entry[1]]
if obj.type_name == b"blob":
return obj.data.decode("utf-8", errors="replace")
if obj.type_name == b"tree":
current = obj
else:
return None
return None

View File

@ -15,6 +15,7 @@ from nanobot.utils.helpers import ensure_dir, estimate_message_tokens, estimate_
from nanobot.agent.runner import AgentRunSpec, AgentRunner
from nanobot.agent.tools.registry import ToolRegistry
from nanobot.agent.git_store import GitStore
if TYPE_CHECKING:
from nanobot.providers.base import LLMProvider
@ -41,6 +42,13 @@ class MemoryStore:
self._dream_log_file = self.memory_dir / ".dream-log.md"
self._cursor_file = self.memory_dir / ".cursor"
self._dream_cursor_file = self.memory_dir / ".dream_cursor"
self._git = GitStore(workspace, tracked_files=[
"SOUL.md", "USER.md", "memory/MEMORY.md",
])
@property
def git(self) -> GitStore:
return self._git
# -- generic helpers -----------------------------------------------------
@ -576,4 +584,10 @@ class Dream:
else:
self.store.append_dream_log(f"## {ts}\nNo changes.\n")
# Git auto-commit (only when there are actual changes)
if changelog and self.store.git.is_initialized():
sha = self.store.git.auto_commit(f"dream: {ts}, {len(changelog)} change(s)")
if sha:
logger.info("Dream commit: {}", sha)
return True

View File

@ -103,23 +103,86 @@ async def cmd_dream(ctx: CommandContext) -> OutboundMessage:
async def cmd_dream_log(ctx: CommandContext) -> OutboundMessage:
"""Show the Dream consolidation log."""
loop = ctx.loop
store = loop.consolidator.store
log = store.read_dream_log()
if not log:
# Check if Dream has ever processed anything
"""Show what the last Dream changed.
Default: diff of the latest commit (HEAD~1 vs HEAD).
With /dream-log <sha>: diff of that specific commit.
"""
store = ctx.loop.consolidator.store
git = store.git
if not git.is_initialized():
if store.get_last_dream_cursor() == 0:
content = "Dream has not run yet."
msg = "Dream has not run yet."
else:
content = "No dream log yet."
msg = "Git not initialized for memory files."
return OutboundMessage(
channel=ctx.msg.channel, chat_id=ctx.msg.chat_id,
content=msg, metadata={"render_as": "text"},
)
args = ctx.args.strip()
if args:
# Show diff of a specific commit
sha = args.split()[0]
result = git.show_commit_diff(sha)
if not result:
content = f"Commit `{sha}` not found."
else:
commit, diff = result
content = commit.format(diff)
else:
content = f"## Dream Log\n\n{log}"
# Default: show the latest commit's diff
result = git.show_commit_diff(git.log(max_entries=1)[0].sha) if git.log(max_entries=1) else None
if result:
commit, diff = result
content = commit.format(diff)
else:
content = "No commits yet."
return OutboundMessage(
channel=ctx.msg.channel,
chat_id=ctx.msg.chat_id,
content=content,
metadata={"render_as": "text"},
channel=ctx.msg.channel, chat_id=ctx.msg.chat_id,
content=content, metadata={"render_as": "text"},
)
async def cmd_dream_restore(ctx: CommandContext) -> OutboundMessage:
"""Restore memory files from a previous dream commit.
Usage:
/dream-restore list recent commits
/dream-restore <sha> revert a specific commit
"""
store = ctx.loop.consolidator.store
git = store.git
if not git.is_initialized():
return OutboundMessage(
channel=ctx.msg.channel, chat_id=ctx.msg.chat_id,
content="Git not initialized for memory files.",
)
args = ctx.args.strip()
if not args:
# Show recent commits for the user to pick
commits = git.log(max_entries=10)
if not commits:
content = "No commits found."
else:
lines = ["## Recent Dream Commits\n", "Use `/dream-restore <sha>` to revert a commit.\n"]
for c in commits:
lines.append(f"- `{c.sha}` {c.message.splitlines()[0]} ({c.timestamp})")
content = "\n".join(lines)
else:
sha = args.split()[0]
new_sha = git.revert(sha)
if new_sha:
content = f"Reverted commit `{sha}` → new commit `{new_sha}`."
else:
content = f"Failed to revert commit `{sha}`. Check if the SHA is correct."
return OutboundMessage(
channel=ctx.msg.channel, chat_id=ctx.msg.chat_id,
content=content, metadata={"render_as": "text"},
)
@ -142,7 +205,8 @@ def build_help_text() -> str:
"/restart — Restart the bot",
"/status — Show bot status",
"/dream — Manually trigger Dream consolidation",
"/dream-log — Show Dream consolidation log",
"/dream-log — Show what the last Dream changed",
"/dream-restore — Revert memory to a previous state",
"/help — Show available commands",
]
return "\n".join(lines)
@ -157,4 +221,7 @@ def register_builtin_commands(router: CommandRouter) -> None:
router.exact("/status", cmd_status)
router.exact("/dream", cmd_dream)
router.exact("/dream-log", cmd_dream_log)
router.prefix("/dream-log ", cmd_dream_log)
router.exact("/dream-restore", cmd_dream_restore)
router.prefix("/dream-restore ", cmd_dream_restore)
router.exact("/help", cmd_help)

View File

@ -303,4 +303,15 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]
from rich.console import Console
for name in added:
Console().print(f" [dim]Created {name}[/dim]")
# Initialize git for memory version control
try:
from nanobot.agent.git_store import GitStore
gs = GitStore(workspace, tracked_files=[
"SOUL.md", "USER.md", "memory/MEMORY.md",
])
gs.init()
except Exception:
pass
return added

View File

@ -48,6 +48,7 @@ dependencies = [
"chardet>=3.0.2,<6.0.0",
"openai>=2.8.0",
"tiktoken>=0.12.0,<1.0.0",
"dulwich>=0.22.0,<1.0.0",
]
[project.optional-dependencies]

View File

@ -0,0 +1,225 @@
"""Tests for GitStore — git-backed version control for memory files."""
import pytest
from pathlib import Path
from nanobot.agent.git_store import GitStore, CommitInfo
TRACKED = ["SOUL.md", "USER.md", "memory/MEMORY.md"]
@pytest.fixture
def git(tmp_path):
"""Uninitialized GitStore."""
return GitStore(tmp_path, tracked_files=TRACKED)
@pytest.fixture
def git_ready(git):
"""Initialized GitStore with one initial commit."""
git.init()
return git
class TestInit:
def test_not_initialized_by_default(self, git, tmp_path):
assert not git.is_initialized()
assert not (tmp_path / ".git").is_dir()
def test_init_creates_git_dir(self, git, tmp_path):
assert git.init()
assert (tmp_path / ".git").is_dir()
def test_init_idempotent(self, git_ready):
assert not git_ready.init()
def test_init_creates_gitignore(self, git_ready):
gi = git_ready._workspace / ".gitignore"
assert gi.exists()
content = gi.read_text(encoding="utf-8")
for f in TRACKED:
assert f"!{f}" in content
def test_init_touches_tracked_files(self, git_ready):
for f in TRACKED:
assert (git_ready._workspace / f).exists()
def test_init_makes_initial_commit(self, git_ready):
commits = git_ready.log()
assert len(commits) == 1
assert "init" in commits[0].message
class TestBuildGitignore:
def test_subdirectory_dirs(self, git):
content = git._build_gitignore()
assert "!memory/\n" in content
for f in TRACKED:
assert f"!{f}\n" in content
assert content.startswith("/*\n")
def test_root_level_files_no_dir_entries(self, tmp_path):
gs = GitStore(tmp_path, tracked_files=["a.md", "b.md"])
content = gs._build_gitignore()
assert "!a.md\n" in content
assert "!b.md\n" in content
dir_lines = [l for l in content.split("\n") if l.startswith("!") and l.endswith("/")]
assert dir_lines == []
class TestAutoCommit:
def test_returns_none_when_not_initialized(self, git):
assert git.auto_commit("test") is None
def test_commits_file_change(self, git_ready):
(git_ready._workspace / "SOUL.md").write_text("updated", encoding="utf-8")
sha = git_ready.auto_commit("update soul")
assert sha is not None
assert len(sha) == 8
def test_returns_none_when_no_changes(self, git_ready):
assert git_ready.auto_commit("no change") is None
def test_commit_appears_in_log(self, git_ready):
ws = git_ready._workspace
(ws / "SOUL.md").write_text("v2", encoding="utf-8")
sha = git_ready.auto_commit("update soul")
commits = git_ready.log()
assert len(commits) == 2
assert commits[0].sha == sha
def test_does_not_create_empty_commits(self, git_ready):
git_ready.auto_commit("nothing 1")
git_ready.auto_commit("nothing 2")
assert len(git_ready.log()) == 1 # only init commit
class TestLog:
def test_empty_when_not_initialized(self, git):
assert git.log() == []
def test_newest_first(self, git_ready):
ws = git_ready._workspace
for i in range(3):
(ws / "SOUL.md").write_text(f"v{i}", encoding="utf-8")
git_ready.auto_commit(f"commit {i}")
commits = git_ready.log()
assert len(commits) == 4 # init + 3
assert "commit 2" in commits[0].message
assert "init" in commits[-1].message
def test_max_entries(self, git_ready):
ws = git_ready._workspace
for i in range(10):
(ws / "SOUL.md").write_text(f"v{i}", encoding="utf-8")
git_ready.auto_commit(f"c{i}")
assert len(git_ready.log(max_entries=3)) == 3
def test_commit_info_fields(self, git_ready):
c = git_ready.log()[0]
assert isinstance(c, CommitInfo)
assert len(c.sha) == 8
assert c.timestamp
assert c.message
class TestDiffCommits:
def test_empty_when_not_initialized(self, git):
assert git.diff_commits("a", "b") == ""
def test_diff_between_two_commits(self, git_ready):
ws = git_ready._workspace
(ws / "SOUL.md").write_text("original", encoding="utf-8")
git_ready.auto_commit("v1")
(ws / "SOUL.md").write_text("modified", encoding="utf-8")
git_ready.auto_commit("v2")
commits = git_ready.log()
diff = git_ready.diff_commits(commits[1].sha, commits[0].sha)
assert "modified" in diff
def test_invalid_sha_returns_empty(self, git_ready):
assert git_ready.diff_commits("deadbeef", "cafebabe") == ""
class TestFindCommit:
def test_finds_by_prefix(self, git_ready):
ws = git_ready._workspace
(ws / "SOUL.md").write_text("v2", encoding="utf-8")
sha = git_ready.auto_commit("v2")
found = git_ready.find_commit(sha[:4])
assert found is not None
assert found.sha == sha
def test_returns_none_for_unknown(self, git_ready):
assert git_ready.find_commit("deadbeef") is None
class TestShowCommitDiff:
def test_returns_commit_with_diff(self, git_ready):
ws = git_ready._workspace
(ws / "SOUL.md").write_text("content", encoding="utf-8")
sha = git_ready.auto_commit("add content")
result = git_ready.show_commit_diff(sha)
assert result is not None
commit, diff = result
assert commit.sha == sha
assert "content" in diff
def test_first_commit_has_empty_diff(self, git_ready):
init_sha = git_ready.log()[-1].sha
result = git_ready.show_commit_diff(init_sha)
assert result is not None
_, diff = result
assert diff == ""
def test_returns_none_for_unknown(self, git_ready):
assert git_ready.show_commit_diff("deadbeef") is None
class TestCommitInfoFormat:
def test_format_with_diff(self):
from nanobot.agent.git_store import CommitInfo
c = CommitInfo(sha="abcd1234", message="test commit\nsecond line", timestamp="2026-04-02 12:00")
result = c.format(diff="some diff")
assert "test commit" in result
assert "`abcd1234`" in result
assert "some diff" in result
def test_format_without_diff(self):
from nanobot.agent.git_store import CommitInfo
c = CommitInfo(sha="abcd1234", message="test", timestamp="2026-04-02 12:00")
result = c.format()
assert "(no file changes)" in result
class TestRevert:
def test_returns_none_when_not_initialized(self, git):
assert git.revert("abc") is None
def test_reverts_file_content(self, git_ready):
ws = git_ready._workspace
(ws / "SOUL.md").write_text("v2 content", encoding="utf-8")
git_ready.auto_commit("v2")
commits = git_ready.log()
new_sha = git_ready.revert(commits[1].sha) # revert to init
assert new_sha is not None
assert (ws / "SOUL.md").read_text(encoding="utf-8") == ""
def test_invalid_sha_returns_none(self, git_ready):
assert git_ready.revert("deadbeef") is None
class TestMemoryStoreGitProperty:
def test_git_property_exposes_gitstore(self, tmp_path):
from nanobot.agent.memory import MemoryStore
store = MemoryStore(tmp_path)
assert isinstance(store.git, GitStore)
def test_git_property_is_same_object(self, tmp_path):
from nanobot.agent.memory import MemoryStore
store = MemoryStore(tmp_path)
assert store.git is store._git