nanobot/tests/agent/test_consolidator.py
Cheng Yongru aabc3d5017 fix(memory): fall back to raw_archive on LLM error response
When chat_with_retry returns an error response (finish_reason='error')
instead of raising an exception, archive() previously treated the error
message as a valid summary and wrote it to history.jsonl, while the
original session data was already cleared by /new — causing irreversible
data loss.

Fix: check finish_reason after the LLM call and raise RuntimeError on
error responses, which naturally falls through to the existing raw_archive
fallback. This preserves the original messages in history.jsonl instead
of losing them.

Fixes #3244
2026-04-17 20:15:07 +08:00

168 lines
6.7 KiB
Python

"""Tests for the lightweight Consolidator — append-only to HISTORY.md."""
import pytest
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from nanobot.agent.memory import Consolidator, MemoryStore
@pytest.fixture
def store(tmp_path):
return MemoryStore(tmp_path)
@pytest.fixture
def mock_provider():
p = MagicMock()
p.chat_with_retry = AsyncMock()
return p
@pytest.fixture
def consolidator(store, mock_provider):
sessions = MagicMock()
sessions.save = MagicMock()
return Consolidator(
store=store,
provider=mock_provider,
model="test-model",
sessions=sessions,
context_window_tokens=1000,
build_messages=MagicMock(return_value=[]),
get_tool_definitions=MagicMock(return_value=[]),
max_completion_tokens=100,
)
class TestConsolidatorSummarize:
async def test_summarize_appends_to_history(self, consolidator, mock_provider, store):
"""Consolidator should call LLM to summarize, then append to HISTORY.md."""
mock_provider.chat_with_retry.return_value = MagicMock(
content="User fixed a bug in the auth module."
)
messages = [
{"role": "user", "content": "fix the auth bug"},
{"role": "assistant", "content": "Done, fixed the race condition."},
]
result = await consolidator.archive(messages)
assert result == "User fixed a bug in the auth module."
entries = store.read_unprocessed_history(since_cursor=0)
assert len(entries) == 1
async def test_summarize_raw_dumps_on_llm_failure(self, consolidator, mock_provider, store):
"""On LLM failure, raw-dump messages to HISTORY.md."""
mock_provider.chat_with_retry.side_effect = Exception("API error")
messages = [{"role": "user", "content": "hello"}]
result = await consolidator.archive(messages)
assert result is None # no summary on raw dump fallback
entries = store.read_unprocessed_history(since_cursor=0)
assert len(entries) == 1
assert "[RAW]" in entries[0]["content"]
async def test_summarize_skips_empty_messages(self, consolidator):
result = await consolidator.archive([])
assert result is None
class TestConsolidatorArchiveErrorHandling:
"""archive() must fall back to raw_archive when the LLM returns an error
response (finish_reason == 'error'), e.g. overloaded / quota exceeded.
See https://github.com/HKUDS/nanobot/issues/3244
"""
async def test_archive_falls_back_on_error_finish_reason(self, consolidator, mock_provider, store):
"""LLM returning finish_reason='error' should trigger raw_archive, not write error text."""
mock_provider.chat_with_retry.return_value = MagicMock(
content="Error: {'type': 'error', 'error': {'type': 'overloaded_error', 'message': 'overloaded_error (529)'}}",
finish_reason="error",
)
messages = [
{"role": "user", "content": "fix the auth bug"},
{"role": "assistant", "content": "Done, fixed the race condition."},
]
result = await consolidator.archive(messages)
assert result is None
entries = store.read_unprocessed_history(since_cursor=0)
assert len(entries) == 1
assert "[RAW]" in entries[0]["content"]
assert "Error:" not in entries[0]["content"]
async def test_archive_preserves_summary_on_success(self, consolidator, mock_provider, store):
"""Normal LLM response should still produce a proper summary entry."""
mock_provider.chat_with_retry.return_value = MagicMock(
content="User fixed a bug in the auth module.",
finish_reason="stop",
)
messages = [
{"role": "user", "content": "fix the auth bug"},
{"role": "assistant", "content": "Done."},
]
result = await consolidator.archive(messages)
assert result == "User fixed a bug in the auth module."
entries = store.read_unprocessed_history(since_cursor=0)
assert len(entries) == 1
assert "[RAW]" not in entries[0]["content"]
class TestConsolidatorTokenBudget:
async def test_prompt_below_threshold_does_not_consolidate(self, consolidator):
"""No consolidation when tokens are within budget."""
session = MagicMock()
session.last_consolidated = 0
session.messages = [{"role": "user", "content": "hi"}]
session.key = "test:key"
consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken"))
consolidator.archive = AsyncMock(return_value=True)
await consolidator.maybe_consolidate_by_tokens(session)
consolidator.archive.assert_not_called()
async def test_chunk_cap_preserves_user_turn_boundary(self, consolidator):
"""Chunk cap should rewind to the last user boundary within the cap."""
consolidator._SAFETY_BUFFER = 0
session = MagicMock()
session.last_consolidated = 0
session.key = "test:key"
session.messages = [
{
"role": "user" if i in {0, 50, 61} else "assistant",
"content": f"m{i}",
}
for i in range(70)
]
consolidator.estimate_session_prompt_tokens = MagicMock(
side_effect=[(1200, "tiktoken"), (400, "tiktoken")]
)
consolidator.pick_consolidation_boundary = MagicMock(return_value=(61, 999))
consolidator.archive = AsyncMock(return_value=True)
await consolidator.maybe_consolidate_by_tokens(session)
archived_chunk = consolidator.archive.await_args.args[0]
assert len(archived_chunk) == 50
assert archived_chunk[0]["content"] == "m0"
assert archived_chunk[-1]["content"] == "m49"
assert session.last_consolidated == 50
async def test_chunk_cap_skips_when_no_user_boundary_within_cap(self, consolidator):
"""If the cap would cut mid-turn, consolidation should skip that round."""
consolidator._SAFETY_BUFFER = 0
session = MagicMock()
session.last_consolidated = 0
session.key = "test:key"
session.messages = [
{
"role": "user" if i in {0, 61} else "assistant",
"content": f"m{i}",
}
for i in range(70)
]
consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(1200, "tiktoken"))
consolidator.pick_consolidation_boundary = MagicMock(return_value=(61, 999))
consolidator.archive = AsyncMock(return_value=True)
await consolidator.maybe_consolidate_by_tokens(session)
consolidator.archive.assert_not_awaited()
assert session.last_consolidated == 0