mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-30 06:45:55 +00:00
#3412 stopped the headline raw_archive bloat but left four adjacent leaks on the same pollution chain: - archive() success path appended uncapped LLM summaries to history.jsonl, so a misbehaving LLM could re-open the #3412 bug from the happy path. - maybe_consolidate_by_tokens did not advance last_consolidated when archive() fell back to raw_archive, causing duplicate [RAW] dumps of the same chunk on every subsequent call. - Dream's Phase 1/2 prompt injected MEMORY.md / SOUL.md / USER.md and each history entry without caps, so any legacy oversized record (or an unbounded user edit) would blow past the context window every dream. - append_history itself had no default cap, leaving future new callers one forgotten-cap-away from the same vector. Changes: - Cap LLM-produced summaries at 8K chars (_ARCHIVE_SUMMARY_MAX_CHARS) before writing to history.jsonl. - Advance session.last_consolidated after archive() regardless of whether it summarized or raw-archived — both outcomes materialize the chunk; still break the round loop on fallback so a degraded LLM isn't hammered. - Truncate MEMORY.md / SOUL.md / USER.md and each history entry in Dream's Phase 1 prompt preview (Phase 2 still reaches full files via read_file). - Add _HISTORY_ENTRY_HARD_CAP (64K) as belt-and-suspenders default in append_history with a once-per-store warning, so any new caller that forgets its own tighter cap gets caught and observable. Layer the caps by scope: raw_archive=16K, archive summary=8K, append_history default=64K. Tight per-caller values cover expected payloads; the wide default only catches regressions. Tests: +9 regression tests covering each fix. Full suite: 2372 passed. Made-with: Cursor
310 lines
14 KiB
Python
310 lines
14 KiB
Python
"""Tests for the Dream class — two-phase memory consolidation via AgentRunner."""
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
from nanobot.agent.memory import Dream, MemoryStore
|
|
from nanobot.agent.runner import AgentRunResult
|
|
from nanobot.agent.skills import BUILTIN_SKILLS_DIR
|
|
from nanobot.utils.gitstore import LineAge
|
|
|
|
|
|
@pytest.fixture
|
|
def store(tmp_path):
|
|
s = MemoryStore(tmp_path)
|
|
s.write_soul("# Soul\n- Helpful")
|
|
s.write_user("# User\n- Developer")
|
|
s.write_memory("# Memory\n- Project X active")
|
|
return s
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_provider():
|
|
p = MagicMock()
|
|
p.chat_with_retry = AsyncMock()
|
|
return p
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_runner():
|
|
return MagicMock()
|
|
|
|
|
|
@pytest.fixture
|
|
def dream(store, mock_provider, mock_runner):
|
|
d = Dream(store=store, provider=mock_provider, model="test-model", max_batch_size=5)
|
|
d._runner = mock_runner
|
|
return d
|
|
|
|
|
|
def _make_run_result(
|
|
stop_reason="completed",
|
|
final_content=None,
|
|
tool_events=None,
|
|
usage=None,
|
|
):
|
|
return AgentRunResult(
|
|
final_content=final_content or stop_reason,
|
|
stop_reason=stop_reason,
|
|
messages=[],
|
|
tools_used=[],
|
|
usage={},
|
|
tool_events=tool_events or [],
|
|
)
|
|
|
|
|
|
class TestDreamRun:
|
|
async def test_noop_when_no_unprocessed_history(self, dream, mock_provider, mock_runner, store):
|
|
"""Dream should not call LLM when there's nothing to process."""
|
|
result = await dream.run()
|
|
assert result is False
|
|
mock_provider.chat_with_retry.assert_not_called()
|
|
mock_runner.run.assert_not_called()
|
|
|
|
async def test_calls_runner_for_unprocessed_entries(self, dream, mock_provider, mock_runner, store):
|
|
"""Dream should call AgentRunner when there are unprocessed history entries."""
|
|
store.append_history("User prefers dark mode")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="New fact")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result(
|
|
tool_events=[{"name": "edit_file", "status": "ok", "detail": "memory/MEMORY.md"}],
|
|
))
|
|
result = await dream.run()
|
|
assert result is True
|
|
mock_runner.run.assert_called_once()
|
|
spec = mock_runner.run.call_args[0][0]
|
|
assert spec.max_iterations == 10
|
|
assert spec.fail_on_tool_error is False
|
|
|
|
async def test_advances_dream_cursor(self, dream, mock_provider, mock_runner, store):
|
|
"""Dream should advance the cursor after processing."""
|
|
store.append_history("event 1")
|
|
store.append_history("event 2")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="Nothing new")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
await dream.run()
|
|
assert store.get_last_dream_cursor() == 2
|
|
|
|
async def test_compacts_processed_history(self, dream, mock_provider, mock_runner, store):
|
|
"""Dream should compact history after processing."""
|
|
store.append_history("event 1")
|
|
store.append_history("event 2")
|
|
store.append_history("event 3")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="Nothing new")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
await dream.run()
|
|
# After Dream, cursor is advanced and 3, compact keeps last max_history_entries
|
|
entries = store.read_unprocessed_history(since_cursor=0)
|
|
assert all(e["cursor"] > 0 for e in entries)
|
|
|
|
async def test_skill_phase_uses_builtin_skill_creator_path(self, dream, mock_provider, mock_runner, store):
|
|
"""Dream should point skill creation guidance at the builtin skill-creator template."""
|
|
store.append_history("Repeated workflow one")
|
|
store.append_history("Repeated workflow two")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKILL] test-skill: test description")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
await dream.run()
|
|
|
|
spec = mock_runner.run.call_args[0][0]
|
|
system_prompt = spec.initial_messages[0]["content"]
|
|
expected = str(BUILTIN_SKILLS_DIR / "skill-creator" / "SKILL.md")
|
|
assert expected in system_prompt
|
|
|
|
async def test_skill_write_tool_accepts_workspace_relative_skill_path(self, dream, store):
|
|
"""Dream skill creation should allow skills/<name>/SKILL.md relative to workspace root."""
|
|
write_tool = dream._tools.get("write_file")
|
|
assert write_tool is not None
|
|
|
|
result = await write_tool.execute(
|
|
path="skills/test-skill/SKILL.md",
|
|
content="---\nname: test-skill\ndescription: Test\n---\n",
|
|
)
|
|
|
|
assert "Successfully wrote" in result
|
|
assert (store.workspace / "skills" / "test-skill" / "SKILL.md").exists()
|
|
|
|
async def test_phase1_prompt_includes_line_age_annotations(self, dream, mock_provider, mock_runner, store):
|
|
"""Phase 1 prompt should have per-line age suffixes in MEMORY.md when git is available."""
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
# Init git so line_ages works
|
|
store.git.init()
|
|
store.git.auto_commit("initial memory state")
|
|
|
|
await dream.run()
|
|
|
|
# The MEMORY.md section should not crash and should contain the memory content
|
|
call_args = mock_provider.chat_with_retry.call_args
|
|
user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"]
|
|
assert "## Current MEMORY.md" in user_msg
|
|
|
|
async def test_phase1_annotates_only_memory_not_soul_or_user(self, dream, mock_provider, mock_runner, store):
|
|
"""SOUL.md and USER.md should never have age annotations — they are permanent."""
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
store.git.init()
|
|
store.git.auto_commit("initial state")
|
|
|
|
await dream.run()
|
|
|
|
call_args = mock_provider.chat_with_retry.call_args
|
|
user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"]
|
|
# The ← suffix should only appear in MEMORY.md section
|
|
memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0]
|
|
soul_section = user_msg.split("## Current SOUL.md")[1].split("## Current USER.md")[0]
|
|
user_section = user_msg.split("## Current USER.md")[1]
|
|
# SOUL and USER should not contain age arrows
|
|
assert "\u2190" not in soul_section
|
|
assert "\u2190" not in user_section
|
|
|
|
async def test_phase1_prompt_works_without_git(self, dream, mock_provider, mock_runner, store):
|
|
"""Phase 1 should work fine even if git is not initialized (no age annotations)."""
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
await dream.run()
|
|
|
|
# Should still succeed — just without age annotations
|
|
mock_provider.chat_with_retry.assert_called_once()
|
|
call_args = mock_provider.chat_with_retry.call_args
|
|
user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"]
|
|
assert "## Current MEMORY.md" in user_msg
|
|
|
|
async def test_phase1_prompt_carries_age_suffix_for_stale_lines(
|
|
self, dream, mock_provider, mock_runner, store,
|
|
):
|
|
"""End-to-end: ages >14d must appear verbatim in the LLM prompt, ages ≤14d must not."""
|
|
# MEMORY.md fixture has 2 non-blank lines ("# Memory" and "- Project X active").
|
|
# Inject four ages to cover threshold boundaries: >14 suffix, ==14 no suffix, <14 no suffix.
|
|
store.write_memory("# Memory\n- Project X active\n- fresh item\n- edge case line")
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
fake_ages = [
|
|
LineAge(age_days=30), # "# Memory" → should get ← 30d
|
|
LineAge(age_days=20), # "- Project X..." → should get ← 20d
|
|
LineAge(age_days=14), # "- fresh item" → ==14, threshold is strictly >14, no suffix
|
|
LineAge(age_days=5), # "- edge case..." → no suffix
|
|
]
|
|
with patch.object(store.git, "line_ages", return_value=fake_ages):
|
|
await dream.run()
|
|
|
|
call_args = mock_provider.chat_with_retry.call_args
|
|
user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"]
|
|
memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0]
|
|
assert "\u2190 30d" in memory_section
|
|
assert "\u2190 20d" in memory_section
|
|
assert "\u2190 14d" not in memory_section
|
|
assert "\u2190 5d" not in memory_section
|
|
|
|
async def test_phase1_skips_annotation_when_disabled(
|
|
self, dream, mock_provider, mock_runner, store,
|
|
):
|
|
"""`annotate_line_ages=False` must bypass the git lookup entirely and keep MEMORY.md raw."""
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
dream.annotate_line_ages = False
|
|
# line_ages must be bypassed entirely — verify with a spy rather than a
|
|
# raising side_effect, because _annotate_with_ages catches Exception
|
|
# (which swallows AssertionError) and would hide an accidental call.
|
|
with patch.object(store.git, "line_ages") as mock_line_ages:
|
|
await dream.run()
|
|
mock_line_ages.assert_not_called()
|
|
|
|
call_args = mock_provider.chat_with_retry.call_args
|
|
user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"]
|
|
assert "\u2190" not in user_msg
|
|
|
|
async def test_phase1_skips_annotation_on_line_ages_length_mismatch(
|
|
self, dream, mock_provider, mock_runner, store,
|
|
):
|
|
"""If ages length != lines length (dirty working tree), skip annotation instead of mis-tagging."""
|
|
# MEMORY.md has 2 non-blank lines but we hand back only 1 age → mismatch.
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
with patch.object(store.git, "line_ages", return_value=[LineAge(age_days=999)]):
|
|
await dream.run()
|
|
|
|
call_args = mock_provider.chat_with_retry.call_args
|
|
user_msg = call_args.kwargs.get("messages", call_args[1].get("messages"))[1]["content"]
|
|
memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0]
|
|
# No age arrow at all — we refused to annotate rather than tag the wrong line.
|
|
assert "\u2190" not in memory_section
|
|
|
|
async def test_phase1_prompt_uses_threshold_from_template_var(
|
|
self, dream, mock_provider, mock_runner, store,
|
|
):
|
|
"""System prompt should reference the stale-threshold constant, not a hardcoded 14."""
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
await dream.run()
|
|
|
|
system_msg = mock_provider.chat_with_retry.call_args.kwargs["messages"][0]["content"]
|
|
# The template renders with stale_threshold_days=14 → LLM must see "N>14"
|
|
assert "N>14" in system_msg
|
|
|
|
|
|
class TestDreamPromptCaps:
|
|
"""Dream's Phase 1/2 prompt must not be poisoned by a legacy oversized
|
|
history entry or a runaway MEMORY.md. Without caps, a single pre-#3412
|
|
raw_archive dump in history.jsonl would make every subsequent Dream run
|
|
exceed the context window and silently advance the cursor past real work.
|
|
"""
|
|
|
|
async def test_phase1_caps_huge_memory_file(
|
|
self, dream, mock_provider, mock_runner, store,
|
|
):
|
|
"""A MEMORY.md much larger than _MEMORY_FILE_MAX_CHARS must be truncated
|
|
in the prompt preview (full content is still reachable via read_file)."""
|
|
store.write_memory("M" * (dream._MEMORY_FILE_MAX_CHARS * 5))
|
|
store.append_history("some event")
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
await dream.run()
|
|
|
|
user_msg = mock_provider.chat_with_retry.call_args.kwargs["messages"][1]["content"]
|
|
memory_section = user_msg.split("## Current MEMORY.md")[1].split("## Current SOUL.md")[0]
|
|
assert len(memory_section) < dream._MEMORY_FILE_MAX_CHARS + 500
|
|
|
|
async def test_phase1_caps_huge_history_entry(
|
|
self, dream, mock_provider, mock_runner, store,
|
|
):
|
|
"""A legacy oversized history entry (e.g. pre-#3412 raw_archive dump)
|
|
must not explode the Phase 1 prompt — each entry is capped in the
|
|
preview, even though the JSONL record itself stays full-size."""
|
|
# Bypass the append_history cap by writing directly, simulating a
|
|
# record that was written by an older nanobot build before any caps.
|
|
store.history_file.write_text(
|
|
json.dumps({
|
|
"cursor": 1,
|
|
"timestamp": "2026-04-01 10:00",
|
|
"content": "H" * (dream._HISTORY_ENTRY_PREVIEW_MAX_CHARS * 8),
|
|
}) + "\n",
|
|
encoding="utf-8",
|
|
)
|
|
mock_provider.chat_with_retry.return_value = MagicMock(content="[SKIP]")
|
|
mock_runner.run = AsyncMock(return_value=_make_run_result())
|
|
|
|
await dream.run()
|
|
|
|
user_msg = mock_provider.chat_with_retry.call_args.kwargs["messages"][1]["content"]
|
|
history_section = user_msg.split("## Conversation History\n")[1].split("\n\n## Current Date")[0]
|
|
assert len(history_section) < dream._HISTORY_ENTRY_PREVIEW_MAX_CHARS + 500
|
|
|