diff --git a/docs/memory.md b/docs/memory.md index 763e0643d..38da6cc73 100644 --- a/docs/memory.md +++ b/docs/memory.md @@ -54,10 +54,7 @@ Dream reads: - the current `USER.md` - the current `memory/MEMORY.md` -Then it works in two phases: - -1. It studies what is new and what is already known. -2. It edits the long-term files surgically, not by rewriting everything, but by making the smallest honest change that keeps memory coherent. +Then it edits the long-term files surgically in a single pass — not by rewriting everything, but by making the smallest honest change that keeps memory coherent. This is why nanobot's memory is not just archival. It is interpretive. @@ -160,21 +157,17 @@ Dream is configured under `agents.defaults.dream`: | Field | Meaning | |-------|---------| | `intervalH` | How often Dream runs, in hours | -| `modelOverride` | Optional Dream-specific model override | -| `maxBatchSize` | How many history entries Dream processes per run | -| `maxIterations` | The tool budget for Dream's editing phase | +| `cron` | Cron expression override (takes precedence over `intervalH`) | +| `modelOverride` | Optional Dream-specific model override *(pending implementation)* | +| `maxBatchSize` | *(Deprecated — not used)* | +| `maxIterations` | *(Deprecated — not used)* | In practical terms: -- `modelOverride: null` means Dream uses the same model as the main agent. Set it only if you want Dream to run on a different model. -- `maxBatchSize` controls how many new `history.jsonl` entries Dream consumes in one run. Larger batches catch up faster; smaller batches are lighter and steadier. -- `maxIterations` limits how many read/edit steps Dream can take while updating `SOUL.md`, `USER.md`, and `MEMORY.md`. It is a safety budget, not a quality score. -- `intervalH` is the normal way to configure Dream. Internally it runs as an `every` schedule, not as a cron expression. - -Legacy note: - -- Older source-based configs may still contain `dream.cron`. nanobot continues to honor it for backward compatibility, but new configs should use `intervalH`. -- Older source-based configs may still contain `dream.model`. nanobot continues to honor it for backward compatibility, but new configs should use `modelOverride`. +- `intervalH` is the normal way to configure Dream frequency. Internally it runs as an `every` schedule. +- `cron` overrides `intervalH` when set, allowing precise cron expressions (e.g. `0 */4 * * *`). +- `modelOverride` is reserved for a future release. Currently Dream uses the same model as the main agent. +- `maxBatchSize` and `maxIterations` are preserved for config compatibility but no longer affect behavior. ## In Practice diff --git a/nanobot/agent/__init__.py b/nanobot/agent/__init__.py index 9eef5a0c6..7d3ab2af4 100644 --- a/nanobot/agent/__init__.py +++ b/nanobot/agent/__init__.py @@ -3,7 +3,7 @@ from nanobot.agent.context import ContextBuilder from nanobot.agent.hook import AgentHook, AgentHookContext, CompositeHook from nanobot.agent.loop import AgentLoop -from nanobot.agent.memory import Dream, MemoryStore +from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader from nanobot.agent.subagent import SubagentManager @@ -13,7 +13,6 @@ __all__ = [ "AgentLoop", "CompositeHook", "ContextBuilder", - "Dream", "MemoryStore", "SkillsLoader", "SubagentManager", diff --git a/nanobot/agent/autocompact.py b/nanobot/agent/autocompact.py index 4ad241170..f5a8401b1 100644 --- a/nanobot/agent/autocompact.py +++ b/nanobot/agent/autocompact.py @@ -16,6 +16,7 @@ if TYPE_CHECKING: class AutoCompact: _RECENT_SUFFIX_MESSAGES = 8 + _INTERNAL_SESSION_PREFIXES = ("dream:",) def __init__(self, sessions: SessionManager, consolidator: Consolidator, session_ttl_minutes: int = 0): @@ -37,13 +38,17 @@ class AutoCompact: def _format_summary(text: str, last_active: datetime) -> str: return f"Previous conversation summary (last active {last_active.isoformat()}):\n{text}" + @classmethod + def _is_internal_session(cls, key: str) -> bool: + return key.startswith(cls._INTERNAL_SESSION_PREFIXES) + def check_expired(self, schedule_background: Callable[[Coroutine], None], active_session_keys: Collection[str] = ()) -> None: """Schedule archival for idle sessions, skipping those with in-flight agent tasks.""" now = datetime.now() for info in self.sessions.list_sessions(): key = info.get("key", "") - if not key or key in self._archiving: + if not key or self._is_internal_session(key) or key in self._archiving: continue if key in active_session_keys: continue @@ -52,6 +57,9 @@ class AutoCompact: schedule_background(self._archive(key)) async def _archive(self, key: str) -> None: + if self._is_internal_session(key): + self._archiving.discard(key) + return try: summary = await self.consolidator.compact_idle_session( key, self._RECENT_SUFFIX_MESSAGES, @@ -70,6 +78,10 @@ class AutoCompact: self._archiving.discard(key) def prepare_session(self, session: Session, key: str) -> tuple[Session, str | None]: + if self._is_internal_session(key): + self._archiving.discard(key) + self._summaries.pop(key, None) + return session, None if key in self._archiving or self._is_expired(session.updated_at): logger.info("Auto-compact: reloading session {} (archiving={})", key, key in self._archiving) session = self.sessions.get_or_create(key) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 9aa7395c3..d89f0c927 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -69,6 +69,7 @@ class ContextBuilder: channel: str | None = None, session_summary: str | None = None, workspace: Path | None = None, + include_memory_recent_history: bool = True, ) -> str: """Build the system prompt from identity, bootstrap files, memory, and skills.""" root = workspace or self.workspace @@ -94,14 +95,15 @@ class ContextBuilder: if skills_summary: parts.append(render_template("agent/skills_section.md", skills_summary=skills_summary)) - entries = self.memory.read_unprocessed_history(since_cursor=self.memory.get_last_dream_cursor()) - if entries: - capped = entries[-self._MAX_RECENT_HISTORY:] - history_text = "\n".join( - f"- [{e['timestamp']}] {e['content']}" for e in capped - ) - history_text = truncate_text(history_text, self._MAX_HISTORY_CHARS) - parts.append("# Recent History\n\n" + history_text) + if include_memory_recent_history: + entries = self.memory.read_unprocessed_history(since_cursor=self.memory.get_last_dream_cursor()) + if entries: + capped = entries[-self._MAX_RECENT_HISTORY:] + history_text = "\n".join( + f"- [{e['timestamp']}] {e['content']}" for e in capped + ) + history_text = truncate_text(history_text, self._MAX_HISTORY_CHARS) + parts.append("# Recent History\n\n" + history_text) if session_summary: parts.append(f"[Archived Context Summary]\n\n{session_summary}") @@ -193,6 +195,7 @@ class ContextBuilder: runtime_state: Any | None = None, inbound_message: Any | None = None, skip_runtime_lines: bool = False, + include_memory_recent_history: bool = True, ) -> list[dict[str, Any]]: """Build the complete message list for an LLM call.""" root = workspace or self.workspace @@ -228,6 +231,7 @@ class ContextBuilder: channel=channel, session_summary=session_summary, workspace=root, + include_memory_recent_history=include_memory_recent_history, ), }, *history, diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ddc7399a1..f31589cb9 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -19,7 +19,7 @@ from nanobot.agent import model_presets as preset_helpers from nanobot.agent.autocompact import AutoCompact from nanobot.agent.context import ContextBuilder from nanobot.agent.hook import AgentHook, CompositeHook -from nanobot.agent.memory import Consolidator, Dream +from nanobot.agent.memory import Consolidator from nanobot.agent.progress_hook import AgentProgressHook from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec from nanobot.agent.subagent import SubagentManager @@ -123,6 +123,10 @@ class TurnContext: pending_queue: asyncio.Queue | None = None pending_summary: str | None = None + + ephemeral: bool = False + tools: ToolRegistry | None = None + turn_wall_started_at: float = field(default_factory=time.time) visible_run_started_at: float | None = None turn_latency_ms: int | None = None @@ -316,11 +320,6 @@ class AgentLoop: consolidator=self.consolidator, session_ttl_minutes=session_ttl_minutes, ) - self.dream = Dream( - store=self.context.memory, - provider=provider, - model=self.model, - ) self.model_presets: dict[str, ModelPresetConfig] = model_presets or {} self._active_preset: str | None = None if model_preset: @@ -409,7 +408,6 @@ class AgentLoop: self.runner.provider = provider self.subagents.set_provider(provider, model) self.consolidator.set_provider(provider, model, context_window_tokens) - self.dream.set_provider(provider, model) self._provider_signature = snapshot.signature if publish_update and self._runtime_model_publisher is not None: self._runtime_model_publisher( @@ -595,6 +593,7 @@ class AgentLoop: session: Session, history: list[dict[str, Any]], pending_summary: str | None, + include_memory_recent_history: bool = True, ) -> list[dict[str, Any]]: """Build the initial message list for the LLM turn.""" scope = self.workspace_scopes.for_message(msg, session.metadata) @@ -610,6 +609,7 @@ class AgentLoop: workspace=scope.project_path, runtime_state=self, inbound_message=msg, + include_memory_recent_history=include_memory_recent_history, ) async def _dispatch_command_inline( @@ -673,6 +673,8 @@ class AgentLoop: metadata: dict[str, Any] | None = None, session_key: str | None = None, pending_queue: asyncio.Queue | None = None, + ephemeral: bool = False, + tools: ToolRegistry | None = None, ) -> tuple[str | None, list[str], list[dict], str, bool]: """Run the agent iteration loop. @@ -698,9 +700,9 @@ class AgentLoop: set_tool_context=self._set_tool_context, on_iteration=lambda iteration: setattr(self, "_current_iteration", iteration), ) - hook: AgentHook = ( - CompositeHook([loop_hook] + self._extra_hooks) if self._extra_hooks else loop_hook - ) + hook: AgentHook = loop_hook + if not ephemeral and self._extra_hooks: + hook = CompositeHook([loop_hook] + self._extra_hooks) async def _checkpoint(payload: dict[str, Any]) -> None: if session is None: @@ -787,7 +789,7 @@ class AgentLoop: try: result = await self.runner.run(AgentRunSpec( initial_messages=initial_messages, - tools=self.tools, + tools=tools or self.tools, model=self.model, max_iterations=self.max_iterations, max_tool_result_chars=self.max_tool_result_chars, @@ -1186,6 +1188,8 @@ class AgentLoop: on_stream: Callable[[str], Awaitable[None]] | None = None, on_stream_end: Callable[..., Awaitable[None]] | None = None, pending_queue: asyncio.Queue | None = None, + ephemeral: bool = False, + tools: ToolRegistry | None = None, ) -> OutboundMessage | None: """Process a single inbound message and return the response.""" self._refresh_provider_snapshot() @@ -1216,6 +1220,8 @@ class AgentLoop: on_stream=on_stream, on_stream_end=on_stream_end, pending_queue=pending_queue, + ephemeral=ephemeral, + tools=tools, ) while ctx.state is not TurnState.DONE: @@ -1372,10 +1378,11 @@ class AgentLoop: return "dispatch" async def _state_build(self, ctx: TurnContext) -> str: - await self.consolidator.maybe_consolidate_by_tokens( - ctx.session, - replay_max_messages=self._max_messages, - ) + if not ctx.ephemeral: + await self.consolidator.maybe_consolidate_by_tokens( + ctx.session, + replay_max_messages=self._max_messages, + ) self._set_tool_context( ctx.msg.channel, ctx.msg.chat_id, @@ -1403,6 +1410,7 @@ class AgentLoop: ctx.session, ctx.history, ctx.pending_summary, + include_memory_recent_history=not ctx.ephemeral, ) ctx.user_persisted_early = self._persist_user_message_early( ctx.msg, ctx.session @@ -1437,6 +1445,8 @@ class AgentLoop: metadata=ctx.msg.metadata, session_key=ctx.session_key, pending_queue=ctx.pending_queue, + ephemeral=ctx.ephemeral, + tools=ctx.tools, ) final_content, tools_used, all_msgs, stop_reason, had_injections = result ctx.final_content = final_content @@ -1471,16 +1481,17 @@ class AgentLoop: ctx.session_key, ctx.turn_latency_ms, ) - ctx.session.enforce_file_cap(on_archive=self.context.memory.raw_archive) + if not ctx.ephemeral: + ctx.session.enforce_file_cap(on_archive=self.context.memory.raw_archive) + self._schedule_background( + self.consolidator.maybe_consolidate_by_tokens( + ctx.session, + replay_max_messages=self._max_messages, + ) + ) self._clear_pending_user_turn(ctx.session) self._clear_runtime_checkpoint(ctx.session) self.sessions.save(ctx.session) - self._schedule_background( - self.consolidator.maybe_consolidate_by_tokens( - ctx.session, - replay_max_messages=self._max_messages, - ) - ) return "ok" async def _state_respond(self, ctx: TurnContext) -> str: @@ -1496,6 +1507,8 @@ class AgentLoop: ctx.on_stream, turn_latency_ms=ctx.turn_latency_ms, ) + if ctx.ephemeral and ctx.outbound is not None: + ctx.outbound.metadata["_stop_reason"] = ctx.stop_reason return "ok" def _sanitize_persisted_blocks( @@ -1720,6 +1733,8 @@ class AgentLoop: on_progress: Callable[..., Awaitable[None]] | None = None, on_stream: Callable[[str], Awaitable[None]] | None = None, on_stream_end: Callable[..., Awaitable[None]] | None = None, + ephemeral: bool = False, + tools: ToolRegistry | None = None, ) -> OutboundMessage | None: """Process a message directly and return the outbound payload.""" await self._connect_mcp() @@ -1731,12 +1746,18 @@ class AgentLoop: lock = self._session_locks.setdefault(session_key, asyncio.Lock()) try: async with lock: + kwargs: dict[str, Any] = { + "session_key": session_key, + "on_progress": on_progress, + "on_stream": on_stream, + "on_stream_end": on_stream_end, + "ephemeral": ephemeral, + } + if tools is not None: + kwargs["tools"] = tools return await self._process_message( msg, - session_key=session_key, - on_progress=on_progress, - on_stream=on_stream, - on_stream_end=on_stream_end, + **kwargs, ) finally: await self._runtime_events().run_status_changed(msg, session_key, "idle") diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index fc5480b97..5aedb511a 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -1,4 +1,4 @@ -"""Memory system: pure file I/O store, lightweight Consolidator, and Dream processor.""" +"""Memory system: pure file I/O store and lightweight Consolidator.""" from __future__ import annotations @@ -16,8 +16,6 @@ from typing import TYPE_CHECKING, Any, Callable, Iterator import tiktoken from loguru import logger -from nanobot.agent.runner import AgentRunner, AgentRunSpec -from nanobot.agent.tools.registry import ToolRegistry from nanobot.session.manager import Session from nanobot.utils.gitstore import GitStore from nanobot.utils.helpers import ( @@ -405,6 +403,78 @@ class MemoryStore: def set_last_dream_cursor(self, cursor: int) -> None: self._dream_cursor_file.write_text(str(cursor), encoding="utf-8") + def build_dream_prompt(self, *, max_entries: int = 20) -> tuple[str, int] | None: + """Build the Dream prompt with unprocessed history context. + + Returns ``(prompt, last_cursor)`` or ``None`` if nothing to process. + """ + from nanobot.agent.skills import BUILTIN_SKILLS_DIR + + last_cursor = self.get_last_dream_cursor() + entries = self.read_unprocessed_history(since_cursor=last_cursor) + if not entries: + return None + + batch = entries[:max_entries] + history_text = "\n".join( + f"[{e['timestamp']}] {truncate_text(e['content'], 500)}" + for e in batch + ) + skill_creator_path = str(BUILTIN_SKILLS_DIR / "skill-creator" / "SKILL.md") + template = render_template( + "agent/dream.md", strip=True, skill_creator_path=skill_creator_path, + ) + prompt = f"{template}\n\n## Conversation History\n{history_text}" + return (prompt, batch[-1]["cursor"]) + + def build_dream_tools(self): + """Build the restricted tool registry used by Dream runs.""" + from nanobot.agent.skills import BUILTIN_SKILLS_DIR + from nanobot.agent.tools.apply_patch import ApplyPatchTool + from nanobot.agent.tools.file_state import FileStates + from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, WriteFileTool + from nanobot.agent.tools.registry import ToolRegistry + + tools = ToolRegistry() + file_states = FileStates() + workspace = self.workspace + skills_dir = workspace / "skills" + skills_dir.mkdir(parents=True, exist_ok=True) + + extra_read = [BUILTIN_SKILLS_DIR] if BUILTIN_SKILLS_DIR.exists() else None + editable_roots = [self.soul_file, self.user_file, skills_dir] + + tools.register(ReadFileTool( + workspace=workspace, + allowed_dir=workspace, + extra_allowed_dirs=extra_read, + file_states=file_states, + )) + tools.register(EditFileTool( + workspace=workspace, + allowed_dir=self.memory_dir, + extra_allowed_dirs=editable_roots, + file_states=file_states, + )) + tools.register(ApplyPatchTool( + workspace=workspace, + allowed_dir=self.memory_dir, + extra_allowed_dirs=editable_roots, + file_states=file_states, + )) + tools.register(WriteFileTool( + workspace=workspace, + allowed_dir=skills_dir, + file_states=file_states, + )) + return tools + + @staticmethod + def dream_run_completed(resp: object | None) -> bool: + """Return True only when an ephemeral Dream agent turn completed cleanly.""" + metadata = getattr(resp, "metadata", None) + return isinstance(metadata, dict) and metadata.get("_stop_reason") == "completed" + # -- message formatting utility ------------------------------------------ @staticmethod @@ -431,13 +501,49 @@ class MemoryStore: "Memory consolidation degraded: raw-archived {} messages", len(messages) ) + # ------------------------------------------------------------------ + # Dream helpers + # ------------------------------------------------------------------ + + @staticmethod + def dream_session_key() -> str: + """Return a unique session key for a Dream run, e.g. ``dream:20260528-100000``.""" + return f"dream:{datetime.now():%Y%m%d-%H%M%S}" + + @staticmethod + def build_dream_commit_message(prefix: str, resp: object | None) -> str: + """Build a Dream auto-commit message, appending the LLM summary if present.""" + msg = prefix + if resp is not None and getattr(resp, "content", None): + msg = f"{msg}\n\n{resp.content.strip()}" + return msg + + @staticmethod + def prune_dream_sessions(sessions_dir: Path, *, keep: int = 10) -> None: + """Remove the oldest Dream session files, keeping only the N most recent. + + Only files matching ``dream_*.jsonl`` are considered. Non-dream session + files are never touched. + """ + dream_files = sorted( + sessions_dir.glob("dream_*.jsonl"), key=lambda p: p.stat().st_mtime, + ) + if len(dream_files) <= keep: + return + + to_remove = dream_files[: len(dream_files) - keep] + for path in to_remove: + try: + path.unlink() + logger.debug("Pruned old dream session: {}", path.stem) + except OSError: + logger.warning("Failed to prune dream session {}", path) # --------------------------------------------------------------------------- # Consolidator — lightweight token-budget triggered consolidation # --------------------------------------------------------------------------- - # Individual history.jsonl writers cap their own payloads tightly; the # _HISTORY_ENTRY_HARD_CAP at append_history() is a belt-and-suspenders default # that catches any new caller that forgot to set its own cap. @@ -847,320 +953,3 @@ class Consolidator: ) return summary - - -# --------------------------------------------------------------------------- -# Dream — heavyweight cron-scheduled memory consolidation -# --------------------------------------------------------------------------- - - -# Single source of truth for the staleness threshold used in _annotate_with_ages -# *and* in the Phase 1 prompt template (passed as `stale_threshold_days`). -# Keep code and prompt aligned — if you bump this, the LLM's instruction string -# updates automatically. -_STALE_THRESHOLD_DAYS = 14 - - -class Dream: - """Two-phase memory processor: analyze history.jsonl, then edit files via AgentRunner. - - Phase 1 produces an analysis summary (plain LLM call). - Phase 2 delegates to AgentRunner with read_file / edit_file tools so the - LLM can make targeted, incremental edits instead of replacing entire files. - """ - - # Caps on prompt-bound inputs so Dream's LLM calls never exceed the model's - # context window just because a file (or a legacy large history entry) grew - # unexpectedly. Each file still appears in full via read_file when the agent - # needs it in Phase 2 — these caps only bound the Phase 1/2 prompt preview. - _MEMORY_FILE_MAX_CHARS = 32_000 - _SOUL_FILE_MAX_CHARS = 16_000 - _USER_FILE_MAX_CHARS = 16_000 - _HISTORY_ENTRY_PREVIEW_MAX_CHARS = 4_000 - - def __init__( - self, - store: MemoryStore, - provider: LLMProvider, - model: str, - max_batch_size: int = 20, - max_iterations: int = 10, - max_tool_result_chars: int = 16_000, - annotate_line_ages: bool = True, - ): - self.store = store - self.provider = provider - self.model = model - self.max_batch_size = max_batch_size - self.max_iterations = max_iterations - self.max_tool_result_chars = max_tool_result_chars - # Kill switch for the git-blame-based per-line age annotation in Phase 1. - # Default True keeps the #3212 behavior; set False to feed MEMORY.md raw - # (e.g. if a specific LLM reacts poorly to the `← Nd` suffix). - self.annotate_line_ages = annotate_line_ages - self._runner = AgentRunner(provider) - self._tools = self._build_tools() - - def set_provider(self, provider: LLMProvider, model: str) -> None: - self.provider = provider - self.model = model - self._runner.provider = provider - - # -- tool registry ------------------------------------------------------- - - def _build_tools(self) -> ToolRegistry: - """Build a minimal tool registry for the Dream agent.""" - from nanobot.agent.skills import BUILTIN_SKILLS_DIR - from nanobot.agent.tools.file_state import FileStates - from nanobot.agent.tools.filesystem import EditFileTool, ReadFileTool, WriteFileTool - - tools = ToolRegistry() - workspace = self.store.workspace - # Allow reading builtin skills for reference during skill creation - extra_read = [BUILTIN_SKILLS_DIR] if BUILTIN_SKILLS_DIR.exists() else None - # Dream gets its own FileStates so its caches stay isolated from the - # main loop's sessions (issue #3571). - file_states = FileStates() - tools.register(ReadFileTool( - workspace=workspace, - allowed_dir=workspace, - extra_allowed_dirs=extra_read, - file_states=file_states, - )) - tools.register(EditFileTool(workspace=workspace, allowed_dir=workspace, file_states=file_states)) - # write_file resolves relative paths from workspace root, but can only - # write under skills/ so the prompt can safely use skills//SKILL.md. - skills_dir = workspace / "skills" - skills_dir.mkdir(parents=True, exist_ok=True) - tools.register(WriteFileTool(workspace=workspace, allowed_dir=skills_dir, file_states=file_states)) - return tools - - # -- skill listing -------------------------------------------------------- - - def _list_existing_skills(self) -> list[str]: - """List existing skills as 'name — description' for dedup context.""" - import re as _re - - from nanobot.agent.skills import BUILTIN_SKILLS_DIR - - desc_re = _re.compile(r"^description:\s*(.+)$", _re.MULTILINE | _re.IGNORECASE) - entries: dict[str, str] = {} - for base in (self.store.workspace / "skills", BUILTIN_SKILLS_DIR): - if not base.exists(): - continue - for d in base.iterdir(): - if not d.is_dir(): - continue - skill_md = d / "SKILL.md" - if not skill_md.exists(): - continue - # Prefer workspace skills over builtin (same name) - if d.name in entries and base == BUILTIN_SKILLS_DIR: - continue - content = skill_md.read_text(encoding="utf-8")[:500] - m = desc_re.search(content) - desc = m.group(1).strip() if m else "(no description)" - entries[d.name] = desc - return [f"{name} — {desc}" for name, desc in sorted(entries.items())] - - # -- main entry ---------------------------------------------------------- - - def _annotate_with_ages(self, content: str) -> str: - """Append per-line age suffixes to MEMORY.md content. - - Each non-blank line whose age exceeds ``_STALE_THRESHOLD_DAYS`` gets a - suffix like ``← 30d`` indicating days since last modification. - Returns the original content unchanged if git is unavailable, - annotate fails, or the line count doesn't match the age count - (which can happen with an uncommitted working-tree edit — better to - skip annotation than to tag the wrong line). - SOUL.md and USER.md are never annotated. - """ - file_path = "memory/MEMORY.md" - try: - ages = self.store.git.line_ages(file_path) - except Exception: - logger.debug("line_ages failed for {}", file_path) - return content - if not ages: - return content - - had_trailing = content.endswith("\n") - lines = content.splitlines() - # If HEAD-blob line count disagrees with the working-tree content we - # received, ages would be assigned to the wrong lines — skip entirely - # and feed the LLM un-annotated content rather than misleading data. - if len(lines) != len(ages): - logger.debug( - "line_ages length mismatch for {} (lines={}, ages={}); skipping annotation", - file_path, len(lines), len(ages), - ) - return content - - annotated: list[str] = [] - for line, age in zip(lines, ages): - if not line.strip(): - annotated.append(line) - continue - if age.age_days > _STALE_THRESHOLD_DAYS: - annotated.append(f"{line} \u2190 {age.age_days}d") - else: - annotated.append(line) - result = "\n".join(annotated) - if had_trailing: - result += "\n" - return result - - async def run(self) -> bool: - """Process unprocessed history entries. Returns True if work was done.""" - from nanobot.agent.skills import BUILTIN_SKILLS_DIR - - last_cursor = self.store.get_last_dream_cursor() - entries = self.store.read_unprocessed_history(since_cursor=last_cursor) - if not entries: - return False - - batch = entries[: self.max_batch_size] - logger.info( - "Dream: processing {} entries (cursor {}→{}), batch={}", - len(entries), last_cursor, batch[-1]["cursor"], len(batch), - ) - - # Build history text for LLM — cap each entry so a legacy oversized - # record (e.g. pre-#3412 raw_archive dump) can't blow up the prompt. - history_text = "\n".join( - f"[{e['timestamp']}] " - f"{truncate_text(e['content'], self._HISTORY_ENTRY_PREVIEW_MAX_CHARS)}" - for e in batch - ) - - # Current file contents + per-line age annotations (MEMORY.md only). - # Each file is capped in the *prompt preview* only; Phase 2 still sees - # the full file via the read_file tool. - current_date = datetime.now().strftime("%Y-%m-%d") - raw_memory = self.store.read_memory() or "(empty)" - annotated_memory = ( - self._annotate_with_ages(raw_memory) - if self.annotate_line_ages - else raw_memory - ) - current_memory = truncate_text(annotated_memory, self._MEMORY_FILE_MAX_CHARS) - current_soul = truncate_text( - self.store.read_soul() or "(empty)", self._SOUL_FILE_MAX_CHARS, - ) - current_user = truncate_text( - self.store.read_user() or "(empty)", self._USER_FILE_MAX_CHARS, - ) - - file_context = ( - f"## Current Date\n{current_date}\n\n" - f"## Current MEMORY.md ({len(current_memory)} chars)\n{current_memory}\n\n" - f"## Current SOUL.md ({len(current_soul)} chars)\n{current_soul}\n\n" - f"## Current USER.md ({len(current_user)} chars)\n{current_user}" - ) - - # Phase 1: Analyze (no skills list — dedup is Phase 2's job) - phase1_prompt = ( - f"## Conversation History\n{history_text}\n\n{file_context}" - ) - - try: - phase1_response = await self.provider.chat_with_retry( - model=self.model, - messages=[ - { - "role": "system", - "content": render_template( - "agent/dream_phase1.md", - strip=True, - stale_threshold_days=_STALE_THRESHOLD_DAYS, - ), - }, - {"role": "user", "content": phase1_prompt}, - ], - tools=None, - tool_choice=None, - ) - analysis = phase1_response.content or "" - logger.debug("Dream Phase 1 analysis ({} chars): {}", len(analysis), analysis[:500]) - except Exception: - logger.exception("Dream Phase 1 failed") - return False - - # Phase 2: Delegate to AgentRunner with read_file / edit_file - existing_skills = self._list_existing_skills() - skills_section = "" - if existing_skills: - skills_section = ( - "\n\n## Existing Skills\n" - + "\n".join(f"- {s}" for s in existing_skills) - ) - phase2_prompt = f"## Analysis Result\n{analysis}\n\n{file_context}{skills_section}" - - tools = self._tools - skill_creator_path = BUILTIN_SKILLS_DIR / "skill-creator" / "SKILL.md" - messages: list[dict[str, Any]] = [ - { - "role": "system", - "content": render_template( - "agent/dream_phase2.md", - strip=True, - skill_creator_path=str(skill_creator_path), - ), - }, - {"role": "user", "content": phase2_prompt}, - ] - - try: - result = await self._runner.run(AgentRunSpec( - initial_messages=messages, - tools=tools, - model=self.model, - max_iterations=self.max_iterations, - max_tool_result_chars=self.max_tool_result_chars, - fail_on_tool_error=False, - )) - logger.debug( - "Dream Phase 2 complete: stop_reason={}, tool_events={}", - result.stop_reason, len(result.tool_events), - ) - for ev in (result.tool_events or []): - logger.info("Dream tool_event: name={}, status={}, detail={}", ev.get("name"), ev.get("status"), ev.get("detail", "")[:200]) - except Exception: - logger.exception("Dream Phase 2 failed") - result = None - - # Build changelog from tool events - changelog: list[str] = [] - if result and result.tool_events: - for event in result.tool_events: - if event["status"] == "ok": - changelog.append(f"{event['name']}: {event['detail']}") - - # Only advance cursor on successful completion to prevent silent loss - if result and result.stop_reason == "completed": - new_cursor = batch[-1]["cursor"] - self.store.set_last_dream_cursor(new_cursor) - logger.info( - "Dream done: {} change(s), cursor advanced to {}", - len(changelog), new_cursor, - ) - else: - reason = result.stop_reason if result else "exception" - logger.warning( - "Dream incomplete ({}): cursor NOT advanced, will retry next cron cycle", - reason, - ) - - self.store.compact_history() - - # Git auto-commit (only when there are actual changes) - if changelog and self.store.git.is_initialized(): - ts = batch[-1]["timestamp"] - summary = f"dream: {ts}, {len(changelog)} change(s)" - commit_msg = f"{summary}\n\n{analysis.strip()}" - sha = self.store.git.auto_commit(commit_msg) - if sha: - logger.info("Dream commit: {}", sha) - - return True diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 2c8808d26..b57bd8a6b 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -984,11 +984,48 @@ def _run_gateway( # Dream is an internal job — run directly, not through the agent loop. if job.name == "dream": + from nanobot.agent.memory import MemoryStore + + dream_session_key = MemoryStore.dream_session_key + build_dream_commit_message = MemoryStore.build_dream_commit_message + prune_dream_sessions = MemoryStore.prune_dream_sessions + + store = agent.context.memory + resp = None try: - await agent.dream.run() - logger.info("Dream cron job completed") + result = store.build_dream_prompt() + if result is None: + logger.info("Dream: nothing to process") + return None + prompt, last_cursor = result + key = dream_session_key() + resp = await agent.process_direct( + prompt, + session_key=key, + ephemeral=True, + tools=store.build_dream_tools(), + on_progress=_silent, + ) + if MemoryStore.dream_run_completed(resp): + store.set_last_dream_cursor(last_cursor) + logger.info("Dream cron job completed, cursor advanced to {}", last_cursor) + else: + logger.warning( + "Dream cron job did not complete; cursor remains at {}", + store.get_last_dream_cursor(), + ) except Exception: logger.exception("Dream cron job failed") + finally: + if store.git.is_initialized(): + msg = build_dream_commit_message( + "dream: periodic memory consolidation", resp, + ) + sha = store.git.auto_commit(msg) + if sha: + logger.info("Dream commit: {}", sha) + store.compact_history() + prune_dream_sessions(agent.sessions.sessions_dir) return None # Heartbeat is a system job that checks HEARTBEAT.md for active tasks. @@ -1199,13 +1236,8 @@ def _run_gateway( async with server: await server.serve_forever() # Register Dream system job (idempotent on restart) - dream_cfg = config.agents.defaults.dream - if dream_cfg.model_override: - agent.dream.model = dream_cfg.model_override - agent.dream.max_batch_size = dream_cfg.max_batch_size - agent.dream.max_iterations = dream_cfg.max_iterations - agent.dream.annotate_line_ages = dream_cfg.annotate_line_ages from nanobot.cron.types import CronJob, CronPayload, CronSchedule + dream_cfg = config.agents.defaults.dream if dream_cfg.enabled: cron.register_system_job(CronJob( id="dream", diff --git a/nanobot/command/builtin.py b/nanobot/command/builtin.py index 997b7ca16..cfe487a05 100644 --- a/nanobot/command/builtin.py +++ b/nanobot/command/builtin.py @@ -305,17 +305,52 @@ async def cmd_dream(ctx: CommandContext) -> OutboundMessage: msg = ctx.msg async def _run_dream(): + from nanobot.agent.memory import MemoryStore + + dream_session_key = MemoryStore.dream_session_key + build_dream_commit_message = MemoryStore.build_dream_commit_message + prune_dream_sessions = MemoryStore.prune_dream_sessions + + store = loop.context.memory + content = "" + resp = None t0 = time.monotonic() try: - did_work = await loop.dream.run() + result = store.build_dream_prompt() + if result is None: + await loop.bus.publish_outbound(OutboundMessage( + channel=msg.channel, chat_id=msg.chat_id, + content="Dream: nothing to process.", + )) + return + prompt, last_cursor = result + key = dream_session_key() + resp = await loop.process_direct( + prompt, + session_key=key, + ephemeral=True, + tools=store.build_dream_tools(), + ) elapsed = time.monotonic() - t0 - if did_work: + if MemoryStore.dream_run_completed(resp): + store.set_last_dream_cursor(last_cursor) content = f"Dream completed in {elapsed:.1f}s." else: - content = "Dream: nothing to process." + content = ( + f"Dream did not complete after {elapsed:.1f}s; " + "memory cursor was not advanced." + ) except Exception as e: elapsed = time.monotonic() - t0 content = f"Dream failed after {elapsed:.1f}s: {e}" + finally: + if store.git.is_initialized(): + commit_msg = build_dream_commit_message("dream: manual run", resp) + sha = store.git.auto_commit(commit_msg) + if sha: + content += f" (commit {sha})" + store.compact_history() + prune_dream_sessions(loop.sessions.sessions_dir) await loop.bus.publish_outbound(OutboundMessage( channel=msg.channel, chat_id=msg.chat_id, content=content, )) diff --git a/nanobot/config/loader.py b/nanobot/config/loader.py index 86f439cd8..545cd0bdc 100644 --- a/nanobot/config/loader.py +++ b/nanobot/config/loader.py @@ -92,10 +92,9 @@ _ENV_REF_PATTERN = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}") def resolve_config_env_vars(config: Config) -> Config: """Return *config* with ``${VAR}`` env-var references resolved. - Walks in place so fields declared with ``exclude=True`` (e.g. - ``DreamConfig.cron``) survive; returns the same instance when no - references are present. Raises ``ValueError`` if a referenced - variable is not set. + Walks in place so fields declared with ``exclude=True`` survive; + returns the same instance when no references are present. + Raises ``ValueError`` if a referenced variable is not set. """ return _resolve_in_place(config) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 2a89d04c5..b9ebbd7ed 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -50,18 +50,14 @@ class DreamConfig(Base): enabled: bool = True # Register the periodic Dream consolidation job on startup interval_h: int = Field(default=2, ge=1) # Every 2 hours by default - cron: str | None = Field(default=None, exclude=True) # Legacy compatibility override + cron: str | None = Field(default=None, exclude=True) # Legacy cron expression override model_override: str | None = Field( default=None, validation_alias=AliasChoices("modelOverride", "model", "model_override"), - ) # Optional Dream-specific model override - max_batch_size: int = Field(default=20, ge=1) # Max history entries per run - # Bumped from 10 to 15 in #3212 (exp002: +30% dedup, no accuracy loss; >15 plateaus). - max_iterations: int = Field(default=15, ge=1) # Max tool calls per Phase 2 - # Per-line git-blame age annotation in Phase 1 prompt (see #3212). Default - # on — set to False to feed MEMORY.md raw if a specific LLM reacts poorly - # to the `← Nd` suffix or you want deterministic, git-independent prompts. - annotate_line_ages: bool = True + ) # Override model for Dream sessions (pending implementation) + max_batch_size: int = Field(default=20, ge=1) # Deprecated: no longer used + max_iterations: int = Field(default=15, ge=1) # Deprecated: no longer used + annotate_line_ages: bool = True # Deprecated: no longer used def build_schedule(self, timezone: str) -> CronSchedule: """Build the runtime schedule, preferring the legacy cron override if present.""" diff --git a/nanobot/templates/agent/consolidator_archive.md b/nanobot/templates/agent/consolidator_archive.md index 5073f4f44..688e3012f 100644 --- a/nanobot/templates/agent/consolidator_archive.md +++ b/nanobot/templates/agent/consolidator_archive.md @@ -1,13 +1,24 @@ -Extract key facts from this conversation. Only output items matching these categories, skip everything else: -- User facts: personal info, preferences, stated opinions, habits -- Decisions: choices made, conclusions reached -- Solutions: working approaches discovered through trial and error, especially non-obvious methods that succeeded after failed attempts -- Events: plans, deadlines, notable occurrences -- Preferences: communication style, tool preferences +Extract key facts from this conversation. For each fact, annotate its memory attributes. + +Only SNIP facts deserve a non-[skip] mark: +- Signal: would the user need to repeat this if forgotten? +- Novel: not just a restatement of another fact in this same conversation chunk +- Important: prevents rework or captures preferences / rules +- Persistent: still relevant after 2 weeks + +Output one fact per line in this format: +- [mark] fact content + +Marks (choose the best match): +- [permanent] Core preferences, personal traits, habits — never becomes stale +- [durable] Technical discoveries, project knowledge, config details — valid for months +- [ephemeral] Active task state, temporary decisions — may change in weeks +- [correction] Correction to a previous memory — state what changed +- [skip] Does not meet SNIP criteria, is conversational filler, is code/source facts derivable from the repo, or is only useful as an audit breadcrumb Priority: user corrections and preferences > solutions > decisions > events > environment facts. The most valuable memory prevents the user from having to repeat themselves. -Skip: code patterns derivable from source, git history, or anything already captured in existing memory. +Do not mark something [skip] merely because it might already exist in long-term memory; Dream handles cross-file deduplication later. -Output as concise bullet points, one fact per line. No preamble, no commentary. +Output concise bullet points only. No preamble, no commentary. If nothing noteworthy happened, output: (nothing) diff --git a/nanobot/templates/agent/dream.md b/nanobot/templates/agent/dream.md new file mode 100644 index 000000000..3f512bf2f --- /dev/null +++ b/nanobot/templates/agent/dream.md @@ -0,0 +1,105 @@ +You are a memory consolidation engine. Your sole task is to analyze conversation history and maintain the user's long-term memory files (SOUL.md, USER.md, MEMORY.md, SKILL.md). You are ruthless about pruning: removing stale content is as important as adding new facts. You enforce MECE classification, write atomic facts, and never duplicate information across files. + +## File routing +Do NOT guess paths. Route each fact to its canonical file: + +| File | Path | Content | +|------|------|---------| +| SOUL.md | `SOUL.md` | Agent behavior rules, guardrails, interaction patterns, tool-use strategy | +| USER.md | `USER.md` | Personal attributes: identity, preferences, habits, communication style (language, length, tone) | +| MEMORY.md | `memory/MEMORY.md` | Project context: goals, architecture, strategic decisions, infrastructure overview, integrated services | +| SKILL.md | `skills//SKILL.md` | Reusable workflow templates with concrete steps, commands, and examples ([SKILL] entries only) | + +**Routing examples:** +- "User prefers concise replies" → USER.md +- "Reply in Chinese" → USER.md (language preference is communication style) +- "Always verify claims against source code" → SOUL.md +- "When searching, prefer grep over file listing" → SOUL.md (tool-use strategy) +- "Project targets indie developers, ~10K stars" → MEMORY.md +- "Reverse proxy on port 8080 with user deploy" → MEMORY.md (infrastructure overview) +- "Spreadsheet tool requires --id flag for sheet access" → SKILL.md (not MEMORY.md) +- "API base URL is https://api.example.com" → SKILL.md (not MEMORY.md) + +**Communication boundary:** Language, length, and tone preferences go to USER.md. Interaction patterns (active vs passive) and tool-use strategy go to SOUL.md. + +Cross-boundary rule: no technical configs in USER.md, no user facts in SOUL.md, no operational details in MEMORY.md. If a fact fits multiple files, keep the most specific copy and remove the rest. + +## MECE enforcement +- USER.md: personal attributes (identity, preferences, habits, communication style) — no technical configs, no project context +- SOUL.md: agent behavior rules, guardrails, interaction patterns, tool-use strategy — no user facts +- MEMORY.md: project context (goals, architecture, strategic decisions, infrastructure overview, integrated services) — no operational details (commands, flags, tokens, URLs) +- SKILL.md: reusable workflow templates with concrete steps, commands, and examples +- If a fact belongs in multiple files, keep it in the most specific one and remove from others + +## History attribute tags +Conversation History may contain Consolidator tags. Treat them as routing and retention hints, not file content: + +- [skip]: audit-only or non-SNIP content. Do not write it to SOUL.md, USER.md, MEMORY.md, or SKILL.md. +- [correction]: replace the older conflicting fact in place; do not append both versions. +- [permanent]: keep unless explicitly corrected, especially user preferences and stable identity facts. +- [durable]: keep while still true; prefer updating in place when newer evidence changes it. +- [ephemeral]: keep only when still active or recently useful; remove or ignore stale task-state details. + +Always strip these bracketed tags from saved memory content. + +## Skill-to-skill MECE +- If a new skill overlaps with an existing skill, merge the delta into the existing skill instead of creating a redundant one +- Check existing skill descriptions (listed above) before creating a new skill + +## Delete-or-keep + +**Always delete:** +- Same fact at multiple locations — keep canonical copy only +- Merged/closed PR notes, resolved incidents, superseded info +- Verbose entries restatable in fewer words +- Overlapping or nested sections covering the same topic +- Operational details (commands, flags, tokens, URLs) that belong in a skill file +- Facts easily discoverable via a quick web search (standard library APIs, common CLI flags, public documentation, generic tutorials) — memory is for context the user *can't* look up + +**Likely delete** (apply judgment): +- Same fact at different detail levels — keep most complete version only +- Debugging steps unlikely to recur +- Ephemeral facts past their useful life +- Tool/service details already captured in a skill or documented upstream +- Entries no longer referenced in recent conversations or superseded by newer facts +- Specific commit hashes, PR numbers, or issue IDs for resolved incidents + +**Migrate to SKILL.md:** +- Concrete command examples, API endpoints, CLI flags, file paths +- Step-by-step procedures that recur across conversations +- Service-specific configuration patterns +- After migrating content to a skill, delete it from the source file (MEMORY.md or USER.md) to maintain MECE + +**Never delete:** +- User preferences and personality traits (permanent regardless of age) +- Active project context still referenced in conversations +- Behavioral rules in SOUL.md + +**Age and decay rules:** +- Sprint goals and milestones: keep current + next sprint; archive completed ones after 30 days +- Architecture decisions: keep indefinitely unless explicitly superseded +- Infrastructure details: update in place when changed; do not keep obsolete configs +- Tool/service integrations: remove if the service is no longer used + +When removing: prefer deleting individual items over entire sections. + +## Fact extraction +- Atomic facts: "has a cat named Luna" not "discussed pet care" +- Corrections: edit the existing entry, don't append a new one +- Conflicts: if new information contradicts an existing entry, replace the old entry in place; do not keep both versions +- Capture confirmed approaches the user validated + +## Skill discovery & creation +Flag [SKILL] only when ALL are true: repeatable workflow appeared 2+ times, involves clear steps (not vague preferences), substantial enough for its own instruction set. Check existing skills to avoid redundancy. + +For [SKILL] entries: +- Create `skills//SKILL.md`; reference `{{ skill_creator_path }}` for format +- YAML frontmatter (name, description), under 2000 words: when to use, steps, output format, example +- Do NOT overwrite existing skills — if overlapping, merge delta into the existing skill +- Skills are instruction sets with concrete values, commands, and examples. MEMORY.md keeps strategic context and high-level facts only. + +## Editing +- Inspect current file contents before editing; they are not embedded in the prompt to keep context compact. +- Batch changes into as few calls as possible. Surgical edits only. + +Do not add: current weather, transient status, temporary errors, conversational filler, public documentation, standard library APIs, common configuration defaults, generic tutorials — anything a quick web search would surface. diff --git a/nanobot/templates/agent/dream_phase1.md b/nanobot/templates/agent/dream_phase1.md deleted file mode 100644 index 114db38c5..000000000 --- a/nanobot/templates/agent/dream_phase1.md +++ /dev/null @@ -1,40 +0,0 @@ -You have TWO equally important tasks: -1. Extract new facts from conversation history -2. Deduplicate existing memory files — find and flag redundant, overlapping, or stale content even if NOT mentioned in history - -Output one line per finding: -[FILE] atomic fact (not already in memory) -[FILE-REMOVE] reason for removal -[SKILL] kebab-case-name: one-line description of the reusable pattern - -Files: USER (identity, preferences), SOUL (bot behavior, tone), MEMORY (knowledge, project context) - -Rules: -- Atomic facts: "has a cat named Luna" not "discussed pet care" -- Corrections: [USER] location is Tokyo, not Osaka -- Capture confirmed approaches the user validated - -Deduplication — scan ALL memory files for these redundancy patterns: -- Same fact stated in multiple places (e.g., "communicates in Chinese" in both USER.md and multiple MEMORY.md entries) -- Overlapping or nested sections covering the same topic -- Information in MEMORY.md that is already captured in USER.md or SOUL.md (MEMORY.md should not duplicate permanent-file content) -- Verbose entries that can be condensed without losing information -For each duplicate found, output [FILE-REMOVE] for the less authoritative copy (prefer keeping facts in their canonical location) - -Staleness — MEMORY.md lines may have a ``← Nd`` suffix showing days since last modification: -- SOUL.md and USER.md have no age annotations — they are permanent, only update with corrections -- Age only indicates when content was last touched, not whether it should be removed -- Use content judgment: user habits/preferences/personality traits are permanent regardless of age -- Only prune content that is objectively outdated: passed events, resolved tracking, superseded approaches -- Lines with ``← Nd`` (N>{{ stale_threshold_days }}) deserve closer review but are NOT automatically removable -- When removing: prefer deleting individual items over entire sections - -Skill discovery — flag [SKILL] when ALL of these are true: -- A specific, repeatable workflow appeared 2+ times in the conversation history -- It involves clear steps (not vague preferences like "likes concise answers") -- It is substantial enough to warrant its own instruction set (not trivial like "read a file") -- Do not worry about duplicates — the next phase will check against existing skills - -Do not add: current weather, transient status, temporary errors, conversational filler. - -[SKIP] if nothing needs updating. diff --git a/nanobot/templates/agent/dream_phase2.md b/nanobot/templates/agent/dream_phase2.md deleted file mode 100644 index f833afb6a..000000000 --- a/nanobot/templates/agent/dream_phase2.md +++ /dev/null @@ -1,37 +0,0 @@ -Update memory files based on the analysis below. -- [FILE] entries: add the described content to the appropriate file -- [FILE-REMOVE] entries: delete the corresponding content from memory files -- [SKILL] entries: create a new skill under skills//SKILL.md using write_file - -## File paths (relative to workspace root) -- SOUL.md -- USER.md -- memory/MEMORY.md -- skills//SKILL.md (for [SKILL] entries only) - -Do NOT guess paths. - -## Editing rules -- Edit directly — file contents provided below, no read_file needed -- Use exact text as old_text, include surrounding blank lines for unique match -- Batch changes to the same file into one edit_file call -- For deletions: section header + all bullets as old_text, new_text empty -- Surgical edits only — never rewrite entire files -- If nothing to update, stop without calling tools - -## Skill creation rules (for [SKILL] entries) -- Use write_file to create skills//SKILL.md -- Before writing, read_file `{{ skill_creator_path }}` for format reference (frontmatter structure, naming conventions, quality standards) -- **Dedup check**: read existing skills listed below to verify the new skill is not functionally redundant. Skip creation if an existing skill already covers the same workflow. -- Include YAML frontmatter with name and description fields -- Keep SKILL.md under 2000 words — concise and actionable -- Include: when to use, steps, output format, at least one example -- Do NOT overwrite existing skills — skip if the skill directory already exists -- Reference specific tools the agent has access to (read_file, write_file, exec, web_search, etc.) -- Skills are instruction sets, not code — do not include implementation code - -## Quality -- Every line must carry standalone value -- Concise bullets under clear headers -- When reducing (not deleting): keep essential facts, drop verbose details -- If uncertain whether to delete, keep but add "(verify currency)" diff --git a/nanobot/webui/settings_api.py b/nanobot/webui/settings_api.py index 28d65d9ed..d476cb70b 100644 --- a/nanobot/webui/settings_api.py +++ b/nanobot/webui/settings_api.py @@ -742,9 +742,6 @@ def settings_payload( }, "dream": { "schedule": defaults.dream.describe_schedule(), - "max_batch_size": defaults.dream.max_batch_size, - "max_iterations": defaults.dream.max_iterations, - "annotate_line_ages": defaults.dream.annotate_line_ages, }, "unified_session": defaults.unified_session, }, diff --git a/tests/agent/test_auto_compact.py b/tests/agent/test_auto_compact.py index 9376289da..1e711bfd8 100644 --- a/tests/agent/test_auto_compact.py +++ b/tests/agent/test_auto_compact.py @@ -751,6 +751,27 @@ class TestProactiveAutoCompact: assert entry[0] == "User chatted about old things." await loop.close_mcp() + @pytest.mark.asyncio + async def test_proactive_archive_skips_dream_sessions(self, tmp_path): + """Internal Dream sessions should be left to Dream retention, not idle compact.""" + loop = _make_loop(tmp_path, session_ttl_minutes=15) + session = loop.sessions.get_or_create("dream:20260602-155256") + _add_turns(session, 6, prefix="dream") + session.updated_at = datetime.now() - timedelta(minutes=20) + loop.sessions.save(session) + + _fake_compact = _make_fake_compact(loop) + loop.consolidator.compact_idle_session = _fake_compact + + await self._run_check_expired(loop) + + session_after = loop.sessions.get_or_create("dream:20260602-155256") + assert len(session_after.messages) == 12 + assert _fake_compact.state["count"] == 0 + assert "dream:20260602-155256" not in loop.auto_compact._archiving + assert "dream:20260602-155256" not in loop.auto_compact._summaries + await loop.close_mcp() + @pytest.mark.asyncio async def test_no_proactive_archive_when_active(self, tmp_path): """Recently active session should NOT be archived on idle tick.""" diff --git a/tests/agent/test_autocompact_unit.py b/tests/agent/test_autocompact_unit.py index 1d3277a01..1fb1f20db 100644 --- a/tests/agent/test_autocompact_unit.py +++ b/tests/agent/test_autocompact_unit.py @@ -203,9 +203,15 @@ class TestCheckExpired: old_ts = (datetime.now() - timedelta(minutes=20)).isoformat() mock_sm.list_sessions.return_value = [{"key": "cli:old", "updated_at": old_ts}] ac.sessions = mock_sm - scheduler = MagicMock() + + scheduled = [] + + def scheduler(coro): + scheduled.append(coro) + coro.close() + ac.check_expired(scheduler) - scheduler.assert_called_once() + assert len(scheduled) == 1 assert "cli:old" in ac._archiving def test_active_session_key_skips(self): @@ -251,6 +257,22 @@ class TestCheckExpired: ac.check_expired(scheduler) scheduler.assert_not_called() + def test_dream_session_skips(self): + """Internal Dream sessions should not be scheduled for idle compact.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + old_ts = (datetime.now() - timedelta(minutes=20)).isoformat() + mock_sm.list_sessions.return_value = [ + {"key": "dream:20260602-155256", "updated_at": old_ts}, + ] + ac.sessions = mock_sm + scheduler = MagicMock() + + ac.check_expired(scheduler) + + scheduler.assert_not_called() + assert "dream:20260602-155256" not in ac._archiving + # --------------------------------------------------------------------------- # _archive @@ -273,6 +295,17 @@ class TestArchiveDelegates: "cli:test", ac._RECENT_SUFFIX_MESSAGES, ) + @pytest.mark.asyncio + async def test_dream_session_is_ignored(self): + ac = _make_autocompact() + ac.consolidator.compact_idle_session = AsyncMock(return_value="Summary.") + ac._archiving.add("dream:20260602-155256") + + await ac._archive("dream:20260602-155256") + + ac.consolidator.compact_idle_session.assert_not_awaited() + assert "dream:20260602-155256" not in ac._archiving + @pytest.mark.asyncio async def test_populates_summaries_from_metadata(self): ac = _make_autocompact() @@ -416,6 +449,33 @@ class TestPrepareSession: assert result_session is session assert summary is None + def test_dream_session_skips_reload_and_summaries(self): + """Internal Dream sessions should not reload or receive compact summaries.""" + ac = _make_autocompact(ttl=15) + mock_sm = MagicMock(spec=SessionManager) + ac.sessions = mock_sm + key = "dream:20260602-155256" + ac._archiving.add(key) + ac._summaries[key] = ("Hot summary.", datetime(2026, 6, 2, 15, 52, 56)) + session = _make_session( + key=key, + updated_at=datetime.now() - timedelta(minutes=20), + metadata={ + "_last_summary": { + "text": "Cold summary.", + "last_active": "2026-06-02T15:52:56", + }, + }, + ) + + result_session, summary = ac.prepare_session(session, key) + + mock_sm.get_or_create.assert_not_called() + assert result_session is session + assert summary is None + assert key not in ac._archiving + assert key not in ac._summaries + def test_cold_path_metadata_not_dict_returns_none(self): """If metadata _last_summary is not a dict, should return None summary.""" ac = _make_autocompact() diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py index 071ba9da2..7e3c31960 100644 --- a/tests/agent/test_consolidator.py +++ b/tests/agent/test_consolidator.py @@ -10,6 +10,7 @@ from nanobot.agent.memory import ( MemoryStore, ) from nanobot.session.manager import Session +from nanobot.utils.prompt_templates import render_template @pytest.fixture @@ -76,6 +77,17 @@ class TestConsolidatorSummarize: assert result is None +class TestConsolidatorPromptContract: + def test_archive_prompt_outputs_attribute_tags_without_missing_context_claims(self): + prompt = render_template("agent/consolidator_archive.md", strip=True) + + assert "SNIP" in prompt + for mark in ("[permanent]", "[durable]", "[ephemeral]", "[correction]", "[skip]"): + assert mark in prompt + assert "check context below" not in prompt.lower() + assert "Do not mark something [skip] merely because it might already exist" in prompt + + class TestConsolidatorArchiveErrorHandling: """archive() must fall back to raw_archive when the LLM returns an error response (finish_reason == 'error'), e.g. overloaded / quota exceeded. diff --git a/tests/agent/test_dream.py b/tests/agent/test_dream.py index 27e49fda5..412bb1439 100644 --- a/tests/agent/test_dream.py +++ b/tests/agent/test_dream.py @@ -1,309 +1,403 @@ -"""Tests for the Dream class — two-phase memory consolidation via AgentRunner.""" - -import json +"""Tests for Dream memory consolidation — build_dream_prompt and cursor management.""" import pytest -from unittest.mock import AsyncMock, MagicMock, patch - -from nanobot.agent.memory import Dream, MemoryStore -from nanobot.agent.runner import AgentRunResult -from nanobot.agent.skills import BUILTIN_SKILLS_DIR -from nanobot.utils.gitstore import LineAge +from nanobot.agent.memory import MemoryStore +from nanobot.providers.base import LLMResponse +from nanobot.utils.prompt_templates import render_template @pytest.fixture def store(tmp_path): s = MemoryStore(tmp_path) s.write_soul("# Soul\n- Helpful") - s.write_user("# User\n- Developer") s.write_memory("# Memory\n- Project X active") return s -@pytest.fixture -def mock_provider(): - p = MagicMock() - p.chat_with_retry = AsyncMock() - return p +class TestBuildDreamPrompt: + def test_returns_none_when_no_history(self, store): + assert store.build_dream_prompt() is None + def test_returns_prompt_with_history(self, store): + store.append_history("hello") + result = store.build_dream_prompt() + assert result is not None + prompt, cursor = result + assert cursor > 0 + assert "## Conversation History" in prompt + assert "hello" in prompt -@pytest.fixture -def mock_runner(): - return MagicMock() + def test_cursor_advances_only_new_entries(self, store): + store.append_history("first") + r1 = store.build_dream_prompt() + assert r1 is not None + _, c1 = r1 + # Cursor not yet advanced — same entries are still available + assert store.build_dream_prompt() is not None -@pytest.fixture -def dream(store, mock_provider, mock_runner): - d = Dream(store=store, provider=mock_provider, model="test-model", max_batch_size=5) - d._runner = mock_runner - return d + # Advance cursor + store.set_last_dream_cursor(c1) + # Now no new entries + assert store.build_dream_prompt() is None + # Add new entry + store.append_history("second") + r2 = store.build_dream_prompt() + assert r2 is not None + _, c2 = r2 + assert c2 > c1 -def _make_run_result( - stop_reason="completed", - final_content=None, - tool_events=None, - usage=None, -): - return AgentRunResult( - final_content=final_content or stop_reason, - stop_reason=stop_reason, - messages=[], - tools_used=[], - usage={}, - tool_events=tool_events or [], - ) + def test_prompt_includes_skill_creator_path(self, store): + store.append_history("test") + result = store.build_dream_prompt() + assert result is not None + prompt, _ = result + assert "skill-creator" in prompt + def test_truncates_long_entries(self, store): + long_content = "x" * 2000 + store.append_history(long_content) + result = store.build_dream_prompt() + assert result is not None + prompt, _ = result + # The full 2000 chars should not appear — truncated to 500 + assert long_content not in prompt + assert "x" * 500 in prompt -class TestDreamRun: - async def test_noop_when_no_unprocessed_history(self, dream, mock_provider, mock_runner, store): - """Dream should not call LLM when there's nothing to process.""" - result = await dream.run() - assert result is False - mock_provider.chat_with_retry.assert_not_called() - mock_runner.run.assert_not_called() + def test_batches_oldest_unprocessed_entries_first(self, store): + for i in range(25): + store.append_history(f"entry-{i + 1:02d}") - async def test_calls_runner_for_unprocessed_entries(self, dream, mock_provider, mock_runner, store): - """Dream should call AgentRunner when there are unprocessed history entries.""" - store.append_history("User prefers dark mode") - mock_provider.chat_with_retry.return_value = MagicMock(content="New fact") - mock_runner.run = AsyncMock(return_value=_make_run_result( - tool_events=[{"name": "edit_file", "status": "ok", "detail": "memory/MEMORY.md"}], - )) - result = await dream.run() - assert result is True - mock_runner.run.assert_called_once() - spec = mock_runner.run.call_args[0][0] - assert spec.max_iterations == 10 - assert spec.fail_on_tool_error is False + result = store.build_dream_prompt(max_entries=20) + assert result is not None + prompt, cursor = result - async def test_advances_dream_cursor(self, dream, mock_provider, mock_runner, store): - """Dream should advance the cursor after processing.""" - store.append_history("event 1") - store.append_history("event 2") - mock_provider.chat_with_retry.return_value = MagicMock(content="Nothing new") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - await dream.run() - assert store.get_last_dream_cursor() == 2 + assert cursor == 20 + assert "entry-01" in prompt + assert "entry-20" in prompt + assert "entry-21" not in prompt - async def test_compacts_processed_history(self, dream, mock_provider, mock_runner, store): - """Dream should compact history after processing.""" - store.append_history("event 1") - store.append_history("event 2") - store.append_history("event 3") - mock_provider.chat_with_retry.return_value = MagicMock(content="Nothing new") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - await dream.run() - # After Dream, cursor is advanced and 3, compact keeps last max_history_entries - entries = store.read_unprocessed_history(since_cursor=0) - assert all(e["cursor"] > 0 for e in entries) + store.set_last_dream_cursor(cursor) + next_result = store.build_dream_prompt(max_entries=20) + assert next_result is not None + next_prompt, next_cursor = next_result + assert next_cursor == 25 + assert "entry-21" in next_prompt + assert "entry-25" in next_prompt - async def test_skill_phase_uses_builtin_skill_creator_path(self, dream, mock_provider, mock_runner, store): - """Dream should point skill creation guidance at the builtin skill-creator template.""" - store.append_history("Repeated workflow one") - store.append_history("Repeated workflow two") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKILL] test-skill: test description") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - - await dream.run() - - spec = mock_runner.run.call_args[0][0] - system_prompt = spec.initial_messages[0]["content"] - expected = str(BUILTIN_SKILLS_DIR / "skill-creator" / "SKILL.md") - assert expected in system_prompt - - async def test_skill_write_tool_accepts_workspace_relative_skill_path(self, dream, store): - """Dream skill creation should allow skills//SKILL.md relative to workspace root.""" - write_tool = dream._tools.get("write_file") - assert write_tool is not None - - result = await write_tool.execute( - path="skills/test-skill/SKILL.md", - content="---\nname: test-skill\ndescription: Test\n---\n", + def test_dream_prompt_consumes_consolidator_attribute_tags(self): + prompt = render_template( + "agent/dream.md", + strip=True, + skill_creator_path="skills/skill-creator/SKILL.md", ) - assert "Successfully wrote" in result - assert (store.workspace / "skills" / "test-skill" / "SKILL.md").exists() + assert "History attribute tags" in prompt + assert "[skip]: audit-only" in prompt + assert "[correction]: replace the older conflicting fact" in prompt + assert "Always strip these bracketed tags from saved memory content" in prompt - async def test_phase1_prompt_includes_line_age_annotations(self, dream, mock_provider, mock_runner, store): - """Phase 1 prompt should have per-line age suffixes in MEMORY.md when git is available.""" - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - # Init git so line_ages works - store.git.init() - store.git.auto_commit("initial memory state") +class TestDreamTools: + def test_dream_tools_are_restricted_to_file_edits(self, store): + tools = store.build_dream_tools() - await dream.run() + assert set(tools.tool_names) == { + "apply_patch", + "edit_file", + "read_file", + "write_file", + } - # The MEMORY.md section should not crash and should contain the memory content - call_args = mock_provider.chat_with_retry.call_args - user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"] - assert "## Current MEMORY.md" in user_msg - async def test_phase1_annotates_only_memory_not_soul_or_user(self, dream, mock_provider, mock_runner, store): - """SOUL.md and USER.md should never have age annotations — they are permanent.""" - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) +class TestEphemeralDirect: + """Tests for the ephemeral flag that skips history.jsonl writes for Dream.""" + + @pytest.fixture + def _make_loop(self, tmp_path): + """Factory fixture that builds a minimal AgentLoop with mocked deps.""" + from unittest.mock import AsyncMock, MagicMock, patch + + from nanobot.agent.loop import AgentLoop + from nanobot.agent.memory import MemoryStore + from nanobot.bus.queue import MessageBus + + store = MemoryStore(tmp_path) + store.write_soul("# Soul") + store.write_memory("# Memory") + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.supports_tools = True + provider.generation = MagicMock(max_tokens=4096) + provider.chat_with_retry = AsyncMock( + return_value=MagicMock( + content="done", finish_reason="stop", tool_calls=[], usage={}, + ) + ) + + with ( + patch("nanobot.agent.loop.SessionManager"), + patch("nanobot.agent.loop.SubagentManager") as mock_sub, + patch("nanobot.agent.loop.Consolidator") as mock_consolidator_cls, + ): + mock_sub.return_value.cancel_by_session = AsyncMock(return_value=0) + mock_consolidator_cls.return_value.maybe_consolidate_by_tokens = AsyncMock() + loop = AgentLoop( + bus=bus, + provider=provider, + workspace=tmp_path, + context_window_tokens=8000, + ) + + return loop, store + + async def test_ephemeral_skips_raw_archive(self, tmp_path, _make_loop): + """When ephemeral=True, raw_archive must not be called.""" + from unittest.mock import patch + + loop, store = _make_loop + + with patch.object(loop.context.memory, "raw_archive") as mock_archive: + await loop.process_direct( + "test", session_key="dream:test", ephemeral=True, + ) + mock_archive.assert_not_called() + + async def test_non_ephemeral_runs_normally(self, tmp_path, _make_loop): + """Without ephemeral, the normal path is untouched — no crash.""" + loop, store = _make_loop + await loop.process_direct("test", session_key="cli:normal") + + async def test_ephemeral_sets_ctx_flag(self, tmp_path, _make_loop): + """Verify that ephemeral=True is forwarded to TurnContext.""" + from unittest.mock import patch + + loop, store = _make_loop + + captured = {} + + original_save = loop._state_save + + async def patched_save(ctx): + captured["ephemeral"] = ctx.ephemeral + return await original_save(ctx) + + with patch.object(loop, "_state_save", side_effect=patched_save): + await loop.process_direct( + "test", session_key="dream:check", ephemeral=True, + ) + + assert captured.get("ephemeral") is True + + async def test_default_ephemeral_is_false(self, tmp_path, _make_loop): + """By default ephemeral is False in TurnContext.""" + from unittest.mock import patch + + loop, store = _make_loop + + captured = {} + + original_save = loop._state_save + + async def patched_save(ctx): + captured["ephemeral"] = ctx.ephemeral + return await original_save(ctx) + + with patch.object(loop, "_state_save", side_effect=patched_save): + await loop.process_direct("test", session_key="cli:normal") + + assert captured.get("ephemeral") is False + + async def test_ephemeral_skips_consolidator(self, tmp_path, _make_loop): + """When ephemeral=True, consolidator.maybe_consolidate_by_tokens is not called.""" + from unittest.mock import patch + + loop, store = _make_loop + + with patch.object( + loop.consolidator, "maybe_consolidate_by_tokens", + ) as mock_consolidate: + await loop.process_direct( + "test", session_key="dream:consolidate-test", ephemeral=True, + ) + mock_consolidate.assert_not_called() + + async def test_ephemeral_response_reports_stop_reason(self, tmp_path, _make_loop): + loop, store = _make_loop + loop.provider.chat_with_retry.return_value = LLMResponse( + content="provider error", + finish_reason="error", + ) + + resp = await loop.process_direct( + "test", session_key="dream:error", ephemeral=True, + ) + + assert resp is not None + assert resp.metadata["_stop_reason"] == "error" + assert MemoryStore.dream_run_completed(resp) is False + + async def test_dream_turn_can_skip_unbatched_recent_history(self, tmp_path): + """Dream must only see the batch selected by build_dream_prompt.""" + from unittest.mock import MagicMock + + from nanobot.agent.loop import AgentLoop + from nanobot.bus.queue import MessageBus + + store = MemoryStore(tmp_path) + for i in range(60): + store.append_history(f"entry-{i + 1:02d}") + + result = store.build_dream_prompt(max_entries=20) + assert result is not None + prompt, cursor = result + assert cursor == 20 + + captured: dict[str, list[dict]] = {} + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.supports_tools = True + provider.generation = MagicMock(max_tokens=4096) + + async def chat_with_retry(**kwargs): + captured["messages"] = kwargs["messages"] + return LLMResponse(content="done", finish_reason="stop") + + provider.chat_with_retry = chat_with_retry + loop = AgentLoop( + bus=MessageBus(), + provider=provider, + workspace=tmp_path, + context_window_tokens=8000, + ) + + await loop.process_direct( + prompt, + session_key="dream:test", + ephemeral=True, + tools=store.build_dream_tools(), + ) + + messages = captured["messages"] + system_prompt = messages[0]["content"] + request_text = "\n".join(str(message.get("content", "")) for message in messages) + assert "# Recent History" not in system_prompt + assert "entry-01" in request_text + assert "entry-20" in request_text + assert "entry-21" not in request_text + assert "entry-60" not in request_text + + +class TestEphemeralHooks: + """When ephemeral=True, extra hooks must not fire.""" + + @pytest.fixture + def _make_loop_with_spy(self, tmp_path): + """Build an AgentLoop with a spy hook to verify hook firing behavior.""" + from unittest.mock import AsyncMock, MagicMock, patch + + from nanobot.agent.hook import AgentHook + from nanobot.agent.loop import AgentLoop + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.supports_tools = True + provider.generation = MagicMock(max_tokens=4096) + provider.chat_with_retry = AsyncMock( + return_value=MagicMock( + content="done", finish_reason="stop", tool_calls=[], usage={}, + ) + ) + + spy = MagicMock(spec=AgentHook) + spy.wants_streaming.return_value = False + spy.before_iteration = AsyncMock() + spy.after_iteration = AsyncMock() + + with ( + patch("nanobot.agent.loop.SessionManager"), + patch("nanobot.agent.loop.SubagentManager") as mock_sub, + patch("nanobot.agent.loop.Consolidator") as mock_consolidator_cls, + ): + mock_sub.return_value.cancel_by_session = AsyncMock(return_value=0) + mock_consolidator_cls.return_value.maybe_consolidate_by_tokens = AsyncMock() + loop = AgentLoop( + bus=bus, + provider=provider, + workspace=tmp_path, + context_window_tokens=8000, + hooks=[spy], + ) + + return loop, spy + + async def test_extra_hooks_skipped_when_ephemeral(self, tmp_path, _make_loop_with_spy): + """When ephemeral=True, extra hooks must not fire.""" + loop, spy = _make_loop_with_spy + + await loop.process_direct( + "test", session_key="dream:hook-test", ephemeral=True, + ) + spy.before_iteration.assert_not_called() + spy.after_iteration.assert_not_called() + + async def test_extra_hooks_fire_for_normal_sessions(self, tmp_path, _make_loop_with_spy): + """Without ephemeral, extra hooks should fire normally.""" + loop, spy = _make_loop_with_spy + + await loop.process_direct("test", session_key="cli:normal") + spy.before_iteration.assert_called() + + +class TestDreamCommitMessage: + async def test_commit_includes_response_summary(self, tmp_path): + """Git auto-commit after Dream should include the LLM response in the body.""" + import subprocess + from unittest.mock import AsyncMock, MagicMock + + from nanobot.agent.memory import MemoryStore + + store = MemoryStore(tmp_path) + store.write_soul("# Soul") + store.write_memory("# Memory") + store.append_history("user discussed project goals") + + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.supports_tools = True + provider.generation = MagicMock(max_tokens=4096) + provider.chat_with_retry = AsyncMock(return_value=MagicMock( + content="Identified 2 new facts about project goals", + finish_reason="stop", + tool_calls=[], + usage={}, + )) store.git.init() store.git.auto_commit("initial state") - await dream.run() - - call_args = mock_provider.chat_with_retry.call_args - user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"] - # The ← suffix should only appear in MEMORY.md section - memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0] - soul_section = user_msg.split("## Current SOUL.md")[1].split("## Current USER.md")[0] - user_section = user_msg.split("## Current USER.md")[1] - # SOUL and USER should not contain age arrows - assert "\u2190" not in soul_section - assert "\u2190" not in user_section - - async def test_phase1_prompt_works_without_git(self, dream, mock_provider, mock_runner, store): - """Phase 1 should work fine even if git is not initialized (no age annotations).""" - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - - await dream.run() - - # Should still succeed — just without age annotations - mock_provider.chat_with_retry.assert_called_once() - call_args = mock_provider.chat_with_retry.call_args - user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"] - assert "## Current MEMORY.md" in user_msg - - async def test_phase1_prompt_carries_age_suffix_for_stale_lines( - self, dream, mock_provider, mock_runner, store, - ): - """End-to-end: ages >14d must appear verbatim in the LLM prompt, ages ≤14d must not.""" - # MEMORY.md fixture has 2 non-blank lines ("# Memory" and "- Project X active"). - # Inject four ages to cover threshold boundaries: >14 suffix, ==14 no suffix, <14 no suffix. - store.write_memory("# Memory\n- Project X active\n- fresh item\n- edge case line") - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - - fake_ages = [ - LineAge(age_days=30), # "# Memory" → should get ← 30d - LineAge(age_days=20), # "- Project X..." → should get ← 20d - LineAge(age_days=14), # "- fresh item" → ==14, threshold is strictly >14, no suffix - LineAge(age_days=5), # "- edge case..." → no suffix - ] - with patch.object(store.git, "line_ages", return_value=fake_ages): - await dream.run() - - call_args = mock_provider.chat_with_retry.call_args - user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"] - memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0] - assert "\u2190 30d" in memory_section - assert "\u2190 20d" in memory_section - assert "\u2190 14d" not in memory_section - assert "\u2190 5d" not in memory_section - - async def test_phase1_skips_annotation_when_disabled( - self, dream, mock_provider, mock_runner, store, - ): - """`annotate_line_ages=False` must bypass the git lookup entirely and keep MEMORY.md raw.""" - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - - dream.annotate_line_ages = False - # line_ages must be bypassed entirely — verify with a spy rather than a - # raising side_effect, because _annotate_with_ages catches Exception - # (which swallows AssertionError) and would hide an accidental call. - with patch.object(store.git, "line_ages") as mock_line_ages: - await dream.run() - mock_line_ages.assert_not_called() - - call_args = mock_provider.chat_with_retry.call_args - user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"] - assert "\u2190" not in user_msg - - async def test_phase1_skips_annotation_on_line_ages_length_mismatch( - self, dream, mock_provider, mock_runner, store, - ): - """If ages length != lines length (dirty working tree), skip annotation instead of mis-tagging.""" - # MEMORY.md has 2 non-blank lines but we hand back only 1 age → mismatch. - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - - with patch.object(store.git, "line_ages", return_value=[LineAge(age_days=999)]): - await dream.run() - - call_args = mock_provider.chat_with_retry.call_args - user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"] - memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0] - # No age arrow at all — we refused to annotate rather than tag the wrong line. - assert "\u2190" not in memory_section - - async def test_phase1_prompt_uses_threshold_from_template_var( - self, dream, mock_provider, mock_runner, store, - ): - """System prompt should reference the stale-threshold constant, not a hardcoded 14.""" - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - - await dream.run() - - system_msg = mock_provider.chat_with_retry.call_args.kwargs["messages"][0]["content"] - # The template renders with stale_threshold_days=14 → LLM must see "N>14" - assert "N>14" in system_msg - - -class TestDreamPromptCaps: - """Dream's Phase 1/2 prompt must not be poisoned by a legacy oversized - history entry or a runaway MEMORY.md. Without caps, a single pre-#3412 - raw_archive dump in history.jsonl would make every subsequent Dream run - exceed the context window and silently advance the cursor past real work. - """ - - async def test_phase1_caps_huge_memory_file( - self, dream, mock_provider, mock_runner, store, - ): - """A MEMORY.md much larger than _MEMORY_FILE_MAX_CHARS must be truncated - in the prompt preview (full content is still reachable via read_file).""" - store.write_memory("M" * (dream._MEMORY_FILE_MAX_CHARS * 5)) - store.append_history("some event") - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - - await dream.run() - - user_msg = mock_provider.chat_with_retry.call_args.kwargs["messages"][1]["content"] - memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0] - assert len(memory_section) < dream._MEMORY_FILE_MAX_CHARS + 500 - - async def test_phase1_caps_huge_history_entry( - self, dream, mock_provider, mock_runner, store, - ): - """A legacy oversized history entry (e.g. pre-#3412 raw_archive dump) - must not explode the Phase 1 prompt — each entry is capped in the - preview, even though the JSONL record itself stays full-size.""" - # Bypass the append_history cap by writing directly, simulating a - # record that was written by an older nanobot build before any caps. - store.history_file.write_text( - json.dumps({ - "cursor": 1, - "timestamp": "2026-04-01 10:00", - "content": "H" * (dream._HISTORY_ENTRY_PREVIEW_MAX_CHARS * 8), - }) + "\n", - encoding="utf-8", + # Simulate what the cron handler does: produce a resp with content, + # build the commit message via the actual function, then commit. + resp_content = "Identified 2 new facts about project goals" + resp = MagicMock(content=resp_content) + msg = MemoryStore.build_dream_commit_message( + "dream: periodic memory consolidation", resp, ) - mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]") - mock_runner.run = AsyncMock(return_value=_make_run_result()) - await dream.run() - - user_msg = mock_provider.chat_with_retry.call_args.kwargs["messages"][1]["content"] - history_section = user_msg.split("## Conversation History\n")[1].split("\n\n## Current Date")[0] - assert len(history_section) < dream._HISTORY_ENTRY_PREVIEW_MAX_CHARS + 500 + # Write a change so auto_commit has something to commit + store.write_memory("# Memory\n- Updated by Dream") + sha = store.git.auto_commit(msg) + assert sha is not None + log = subprocess.check_output( + ["git", "log", "-1", "--format=%B"], + cwd=str(tmp_path), text=True, + ).strip() + assert "dream: periodic memory consolidation" in log + assert "Identified 2 new facts" in log diff --git a/tests/agent/test_dream_session.py b/tests/agent/test_dream_session.py new file mode 100644 index 000000000..f1c42263e --- /dev/null +++ b/tests/agent/test_dream_session.py @@ -0,0 +1,64 @@ +"""Tests for Dream session key generation and rotation.""" +import time +from datetime import datetime + +from nanobot.agent.memory import MemoryStore + + +class TestDreamSessionKey: + def test_contains_timestamp(self): + key = MemoryStore.dream_session_key() + assert key.startswith("dream:") + ts_part = key.split(":", 1)[1] + datetime.strptime(ts_part, "%Y%m%d-%H%M%S") + + def test_unique_across_calls(self): + k1 = MemoryStore.dream_session_key() + time.sleep(1.1) + k2 = MemoryStore.dream_session_key() + assert k1 != k2 + + +class TestPruneDreamSessions: + def test_keeps_n_most_recent(self, tmp_path): + sessions_dir = tmp_path / "sessions" + sessions_dir.mkdir() + + for i in range(15): + key = f"dream:20260528-{100000 + i:06d}" + safe_key = key.replace(":", "_") + path = sessions_dir / f"{safe_key}.jsonl" + path.write_text( + f'{{"_type": "metadata", "key": "{key}", ' + f'"created_at": "2026-05-28T10:00:{i:02d}", ' + f'"updated_at": "2026-05-28T10:00:{i:02d}"}}\n', + encoding="utf-8", + ) + + normal_path = sessions_dir / "telegram_123.jsonl" + normal_path.write_text('{"_type": "metadata"}\n', encoding="utf-8") + + MemoryStore.prune_dream_sessions(sessions_dir, keep=10) + + dream_files = sorted(sessions_dir.glob("dream_*.jsonl")) + assert len(dream_files) == 10 + remaining_keys = [f.stem for f in dream_files] + assert "dream_20260528-100000" not in remaining_keys + assert "dream_20260528-100014" in remaining_keys + assert normal_path.exists() + + def test_noop_when_under_limit(self, tmp_path): + sessions_dir = tmp_path / "sessions" + sessions_dir.mkdir() + for i in range(3): + key = f"dream:20260528-{100000 + i:06d}" + safe_key = key.replace(":", "_") + (sessions_dir / f"{safe_key}.jsonl").write_text("{}", encoding="utf-8") + + MemoryStore.prune_dream_sessions(sessions_dir, keep=10) + assert len(list(sessions_dir.glob("dream_*.jsonl"))) == 3 + + def test_empty_dir_noop(self, tmp_path): + sessions_dir = tmp_path / "sessions" + sessions_dir.mkdir() + MemoryStore.prune_dream_sessions(sessions_dir, keep=10) diff --git a/tests/agent/test_hook_composite.py b/tests/agent/test_hook_composite.py index 9b6c2820d..315c3eeaa 100644 --- a/tests/agent/test_hook_composite.py +++ b/tests/agent/test_hook_composite.py @@ -299,8 +299,7 @@ def _make_loop(tmp_path, hooks=None): with patch("nanobot.agent.loop.ContextBuilder"), \ patch("nanobot.agent.loop.SessionManager"), \ patch("nanobot.agent.loop.SubagentManager") as mock_sub_mgr, \ - patch("nanobot.agent.loop.Consolidator"), \ - patch("nanobot.agent.loop.Dream"): + patch("nanobot.agent.loop.Consolidator"): mock_sub_mgr.return_value.cancel_by_session = AsyncMock(return_value=0) loop = AgentLoop( bus=bus, provider=provider, workspace=tmp_path, hooks=hooks, diff --git a/tests/agent/test_runtime_refresh.py b/tests/agent/test_runtime_refresh.py index 1c7ca01c5..18723f5f3 100644 --- a/tests/agent/test_runtime_refresh.py +++ b/tests/agent/test_runtime_refresh.py @@ -47,9 +47,6 @@ def test_provider_refresh_updates_all_model_dependents(tmp_path: Path) -> None: assert loop.consolidator.model == "new-model" assert loop.consolidator.context_window_tokens == 2000 assert loop.consolidator.max_completion_tokens == 456 - assert loop.dream.provider is new_provider - assert loop.dream.model == "new-model" - assert loop.dream._runner.provider is new_provider def test_llm_runtime_refreshes_provider_snapshot(tmp_path: Path) -> None: diff --git a/tests/agent/test_self_model_preset.py b/tests/agent/test_self_model_preset.py index 0f52f777b..1ba6f42e7 100644 --- a/tests/agent/test_self_model_preset.py +++ b/tests/agent/test_self_model_preset.py @@ -61,7 +61,6 @@ def test_model_preset_setter_updates_state(tmp_path) -> None: assert loop.consolidator.model == "openai/gpt-4.1" assert loop.consolidator.context_window_tokens == 32_768 assert loop.consolidator.max_completion_tokens == 4096 - assert loop.dream.model == "openai/gpt-4.1" def test_model_preset_setter_calls_runtime_model_publisher(tmp_path) -> None: @@ -112,8 +111,6 @@ def test_model_preset_setter_replaces_provider_from_snapshot(tmp_path) -> None: assert loop.subagents.provider is new_provider assert loop.subagents.runner.provider is new_provider assert loop.consolidator.provider is new_provider - assert loop.dream.provider is new_provider - assert loop.dream._runner.provider is new_provider assert loop.model == "anthropic/claude-opus-4-5" assert loop.context_window_tokens == 200_000 assert loop.consolidator.max_completion_tokens == 2048 @@ -140,7 +137,6 @@ def test_model_preset_setter_failure_leaves_old_state(tmp_path) -> None: assert loop.model == "base-model" assert loop.subagents.model == "base-model" assert loop.consolidator.model == "base-model" - assert loop.dream.model == "base-model" assert loop.context_window_tokens == 1000 assert loop.consolidator.max_completion_tokens == 123 diff --git a/tests/agent/test_unified_session.py b/tests/agent/test_unified_session.py index aa42c4e55..48fd91bdc 100644 --- a/tests/agent/test_unified_session.py +++ b/tests/agent/test_unified_session.py @@ -39,8 +39,7 @@ def _make_loop(tmp_path: Path, unified_session: bool = False) -> AgentLoop: provider.get_default_model.return_value = "test-model" with patch("nanobot.agent.loop.SessionManager"), \ - patch("nanobot.agent.loop.SubagentManager") as MockSubMgr, \ - patch("nanobot.agent.loop.Dream"): + patch("nanobot.agent.loop.SubagentManager") as MockSubMgr: MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0) loop = AgentLoop( bus=bus, diff --git a/tests/cli/test_commands.py b/tests/cli/test_commands.py index af16d7a67..353d04ecb 100644 --- a/tests/cli/test_commands.py +++ b/tests/cli/test_commands.py @@ -1607,14 +1607,6 @@ def test_gateway_health_endpoint_binds_and_serves_expected_responses( config.gateway.port = 18791 captured: dict[str, object] = {} - class _FakeDream: - model = None - max_batch_size = 0 - max_iterations = 0 - - async def run(self) -> None: - return None - class _FakeSessionManager: def flush_all(self) -> int: return 0 @@ -1626,7 +1618,6 @@ def test_gateway_health_endpoint_binds_and_serves_expected_responses( def __init__(self, **_kwargs) -> None: self.model = "test-model" self.provider = object() - self.dream = _FakeDream() self.sessions = _FakeSessionManager() def llm_runtime(self) -> None: diff --git a/tests/command/test_model_command.py b/tests/command/test_model_command.py index 173a27022..f95abee30 100644 --- a/tests/command/test_model_command.py +++ b/tests/command/test_model_command.py @@ -87,7 +87,6 @@ async def test_model_command_switches_preset(tmp_path) -> None: assert loop.model == "openai/gpt-4.1" assert loop.subagents.model == "openai/gpt-4.1" assert loop.consolidator.model == "openai/gpt-4.1" - assert loop.dream.model == "openai/gpt-4.1" @pytest.mark.asyncio diff --git a/tests/config/test_env_interpolation.py b/tests/config/test_env_interpolation.py index 4ed671975..a0e47d20b 100644 --- a/tests/config/test_env_interpolation.py +++ b/tests/config/test_env_interpolation.py @@ -82,38 +82,37 @@ class TestResolveConfig: assert saved["channels"]["telegram"]["token"] == "${MY_TOKEN}" def test_preserves_excluded_fields_when_no_env_refs(self, tmp_path): - """Regression: fields with ``exclude=True`` (e.g. DreamConfig.cron) + """Regression: fields with ``exclude=True`` (e.g. ProviderConfig.openai_codex) must survive ``resolve_config_env_vars`` when the config has no ``${VAR}`` references. Previously the unconditional dump→revalidate roundtrip silently dropped them.""" config_path = tmp_path / "config.json" config_path.write_text( json.dumps( - {"agents": {"defaults": {"dream": {"cron": "5 11 * * *"}}}} + {"providers": {"openaiCodex": {"apiKey": "secret"}}} ), encoding="utf-8", ) raw = load_config(config_path) - assert raw.agents.defaults.dream.cron == "5 11 * * *" + assert raw.providers.openai_codex.api_key == "secret" resolved = resolve_config_env_vars(raw) - assert resolved.agents.defaults.dream.cron == "5 11 * * *" - assert resolved.agents.defaults.dream.describe_schedule() == ( - "cron 5 11 * * * (legacy)" - ) + assert resolved.providers.openai_codex.api_key == "secret" def test_preserves_excluded_fields_with_env_refs(self, tmp_path, monkeypatch): """Excluded fields must also survive when the config contains - ``${VAR}`` refs elsewhere. An in-place walk preserves the legacy - ``cron`` override even as unrelated string fields are substituted.""" + ``${VAR}`` refs elsewhere. An in-place walk preserves the excluded + field even as unrelated string fields are substituted.""" monkeypatch.setenv("TEST_API_KEY", "resolved-key") config_path = tmp_path / "config.json" config_path.write_text( json.dumps( { - "agents": {"defaults": {"dream": {"cron": "5 11 * * *"}}}, - "providers": {"groq": {"apiKey": "${TEST_API_KEY}"}}, + "providers": { + "openaiCodex": {"apiKey": "secret"}, + "groq": {"apiKey": "${TEST_API_KEY}"}, + } } ), encoding="utf-8", @@ -123,7 +122,4 @@ class TestResolveConfig: resolved = resolve_config_env_vars(raw) assert resolved.providers.groq.api_key == "resolved-key" - assert resolved.agents.defaults.dream.cron == "5 11 * * *" - assert resolved.agents.defaults.dream.describe_schedule() == ( - "cron 5 11 * * * (legacy)" - ) + assert resolved.providers.openai_codex.api_key == "secret" diff --git a/tests/test_openai_api.py b/tests/test_openai_api.py index 7cb85a539..59df6889c 100644 --- a/tests/test_openai_api.py +++ b/tests/test_openai_api.py @@ -410,7 +410,7 @@ async def test_process_direct_accepts_media() -> None: captured_msg = None - async def fake_process(msg, *, session_key="", on_progress=None, on_stream=None, on_stream_end=None): + async def fake_process(msg, *, session_key="", on_progress=None, on_stream=None, on_stream_end=None, ephemeral=False): nonlocal captured_msg captured_msg = msg return None diff --git a/webui/src/lib/types.ts b/webui/src/lib/types.ts index 970808eac..b6924cf96 100644 --- a/webui/src/lib/types.ts +++ b/webui/src/lib/types.ts @@ -349,9 +349,6 @@ export interface SettingsPayload { }; dream: { schedule: string; - max_batch_size: number; - max_iterations: number; - annotate_line_ages: boolean; }; unified_session: boolean; }; diff --git a/webui/src/tests/app-layout.test.tsx b/webui/src/tests/app-layout.test.tsx index 7b1b3f1e9..c25154050 100644 --- a/webui/src/tests/app-layout.test.tsx +++ b/webui/src/tests/app-layout.test.tsx @@ -866,9 +866,6 @@ describe("App layout", () => { }, dream: { schedule: "every 2h", - max_batch_size: 20, - max_iterations: 15, - annotate_line_ages: true, }, unified_session: false, }, @@ -1218,9 +1215,6 @@ describe("App layout", () => { }, dream: { schedule: "every 2h", - max_batch_size: 20, - max_iterations: 15, - annotate_line_ages: true, }, unified_session: false, }, diff --git a/webui/src/tests/settings-view.test.tsx b/webui/src/tests/settings-view.test.tsx index cff71844a..0cfe70e0c 100644 --- a/webui/src/tests/settings-view.test.tsx +++ b/webui/src/tests/settings-view.test.tsx @@ -81,9 +81,6 @@ function settingsPayload(): SettingsPayload { }, dream: { schedule: "every 2h", - max_batch_size: 20, - max_iterations: 15, - annotate_line_ages: true, }, unified_session: false, },