"""Tests for the lightweight Consolidator — append-only to HISTORY.md.""" import pytest import asyncio from unittest.mock import AsyncMock, MagicMock, patch from nanobot.agent.memory import Consolidator, MemoryStore, _RAW_ARCHIVE_MAX_CHARS @pytest.fixture def store(tmp_path): return MemoryStore(tmp_path) @pytest.fixture def mock_provider(): p = MagicMock() p.chat_with_retry = AsyncMock() return p @pytest.fixture def consolidator(store, mock_provider): sessions = MagicMock() sessions.save = MagicMock() return Consolidator( store=store, provider=mock_provider, model="test-model", sessions=sessions, context_window_tokens=1000, build_messages=MagicMock(return_value=[]), get_tool_definitions=MagicMock(return_value=[]), max_completion_tokens=100, ) class TestConsolidatorSummarize: async def test_summarize_appends_to_history(self, consolidator, mock_provider, store): """Consolidator should call LLM to summarize, then append to HISTORY.md.""" mock_provider.chat_with_retry.return_value = MagicMock( content="User fixed a bug in the auth module." ) messages = [ {"role": "user", "content": "fix the auth bug"}, {"role": "assistant", "content": "Done, fixed the race condition."}, ] result = await consolidator.archive(messages) assert result == "User fixed a bug in the auth module." entries = store.read_unprocessed_history(since_cursor=0) assert len(entries) == 1 async def test_summarize_raw_dumps_on_llm_failure(self, consolidator, mock_provider, store): """On LLM failure, raw-dump messages to HISTORY.md.""" mock_provider.chat_with_retry.side_effect = Exception("API error") messages = [{"role": "user", "content": "hello"}] result = await consolidator.archive(messages) assert result is None # no summary on raw dump fallback entries = store.read_unprocessed_history(since_cursor=0) assert len(entries) == 1 assert "[RAW]" in entries[0]["content"] async def test_summarize_skips_empty_messages(self, consolidator): result = await consolidator.archive([]) assert result is None class TestConsolidatorArchiveErrorHandling: """archive() must fall back to raw_archive when the LLM returns an error response (finish_reason == 'error'), e.g. overloaded / quota exceeded. See https://github.com/HKUDS/nanobot/issues/3244 """ async def test_archive_falls_back_on_error_finish_reason(self, consolidator, mock_provider, store): """LLM returning finish_reason='error' should trigger raw_archive, not write error text.""" mock_provider.chat_with_retry.return_value = MagicMock( content="Error: {'type': 'error', 'error': {'type': 'overloaded_error', 'message': 'overloaded_error (529)'}}", finish_reason="error", ) messages = [ {"role": "user", "content": "fix the auth bug"}, {"role": "assistant", "content": "Done, fixed the race condition."}, ] result = await consolidator.archive(messages) assert result is None entries = store.read_unprocessed_history(since_cursor=0) assert len(entries) == 1 assert "[RAW]" in entries[0]["content"] assert "Error:" not in entries[0]["content"] async def test_archive_preserves_summary_on_success(self, consolidator, mock_provider, store): """Normal LLM response should still produce a proper summary entry.""" mock_provider.chat_with_retry.return_value = MagicMock( content="User fixed a bug in the auth module.", finish_reason="stop", ) messages = [ {"role": "user", "content": "fix the auth bug"}, {"role": "assistant", "content": "Done."}, ] result = await consolidator.archive(messages) assert result == "User fixed a bug in the auth module." entries = store.read_unprocessed_history(since_cursor=0) assert len(entries) == 1 assert "[RAW]" not in entries[0]["content"] class TestConsolidatorTokenBudget: async def test_prompt_below_threshold_does_not_consolidate(self, consolidator): """No consolidation when tokens are within budget.""" session = MagicMock() session.last_consolidated = 0 session.messages = [{"role": "user", "content": "hi"}] session.key = "test:key" consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken")) consolidator.archive = AsyncMock(return_value=True) await consolidator.maybe_consolidate_by_tokens(session) consolidator.archive.assert_not_called() async def test_large_chunk_archived_without_cap(self, consolidator): """Without chunk cap, the full range from pick_consolidation_boundary is archived.""" consolidator._SAFETY_BUFFER = 0 session = MagicMock() session.last_consolidated = 0 session.key = "test:key" session.messages = [ { "role": "user" if i in {0, 50, 61} else "assistant", "content": f"m{i}", } for i in range(70) ] consolidator.estimate_session_prompt_tokens = MagicMock( side_effect=[(1200, "tiktoken"), (400, "tiktoken")] ) # Use real pick_consolidation_boundary — it will find boundary at idx=50 # (user message at 50, token budget met) consolidator.archive = AsyncMock(return_value=True) await consolidator.maybe_consolidate_by_tokens(session) archived_chunk = consolidator.archive.await_args.args[0] # pick_consolidation_boundary returns (50, tokens) — user turn at idx 50 assert archived_chunk[0]["content"] == "m0" assert session.last_consolidated > 0 async def test_boundary_respected_when_no_intermediate_user_turn(self, consolidator): """When boundary points past a long tool chain, the full chunk is archived.""" consolidator._SAFETY_BUFFER = 0 session = MagicMock() session.last_consolidated = 0 session.key = "test:key" session.messages = [ { "role": "user" if i in {0, 61} else "assistant", "content": f"m{i}", } for i in range(70) ] consolidator.estimate_session_prompt_tokens = MagicMock( side_effect=[(1200, "tiktoken"), (400, "tiktoken")] ) consolidator.archive = AsyncMock(return_value=True) await consolidator.maybe_consolidate_by_tokens(session) consolidator.archive.assert_awaited_once() # pick_consolidation_boundary finds the only boundary at idx=61 assert session.last_consolidated == 61 class TestRawArchiveTruncation: """raw_archive() must cap entry size to avoid bloating history.jsonl.""" def test_raw_archive_truncates_large_content(self, store): """Large messages should be truncated to _RAW_ARCHIVE_MAX_CHARS.""" big = "x" * 50_000 messages = [{"role": "user", "content": big}] store.raw_archive(messages) entries = store.read_unprocessed_history(since_cursor=0) assert len(entries) == 1 assert len(entries[0]["content"]) < 50_000 assert "[RAW]" in entries[0]["content"] def test_raw_archive_preserves_small_content(self, store): """Small messages should not be truncated.""" messages = [{"role": "user", "content": "hello"}] store.raw_archive(messages) entries = store.read_unprocessed_history(since_cursor=0) assert len(entries) == 1 assert "hello" in entries[0]["content"] def test_raw_archive_custom_max_chars(self, store): """max_chars parameter should override default limit.""" messages = [{"role": "user", "content": "a" * 200}] store.raw_archive(messages, max_chars=100) entries = store.read_unprocessed_history(since_cursor=0) assert len(entries[0]["content"]) < 200 class TestArchiveTruncation: """archive() must truncate formatted text before sending to consolidation LLM.""" async def test_archive_truncates_large_formatted_text(self, consolidator, mock_provider, store): """Large formatted text should be truncated to token budget before LLM call.""" # context_window_tokens=1000, max_completion_tokens=100, _SAFETY_BUFFER=1024 # budget = 1000 - 100 - 1024 = -124 → fallback via truncate_text(budget*4) big_messages = [{"role": "user", "content": "x" * 100_000}] mock_provider.chat_with_retry.return_value = MagicMock( content="Summary of large input.", finish_reason="stop" ) await consolidator.archive(big_messages) call_args = mock_provider.chat_with_retry.call_args user_content = call_args.kwargs["messages"][1]["content"] # Should be significantly shorter than 100K assert len(user_content) < 50_000 async def test_archive_truncates_with_small_token_budget(self, consolidator, mock_provider, store): """Small context window: truncation uses actual tokenizer count.""" consolidator.context_window_tokens = 500 big_messages = [{"role": "user", "content": "word " * 50_000}] mock_provider.chat_with_retry.return_value = MagicMock( content="Summary.", finish_reason="stop" ) await consolidator.archive(big_messages) sent_messages = mock_provider.chat_with_retry.call_args.kwargs["messages"] user_content = sent_messages[1]["content"] # budget = 500 - 100 - 1024 = negative, fallback char-based # Should be truncated assert len(user_content) < 250_000