feat(agent): retain recent context during auto compact

Keep a legal recent suffix in idle auto-compacted sessions so resumed chats preserve their freshest live context while older messages are summarized. Recover persisted summaries even when retained messages remain, and document the new behavior.
This commit is contained in:
Xubin Ren 2026-04-11 07:25:50 +00:00 committed by Xubin Ren
parent d03458f034
commit 1cb28b39a3
3 changed files with 104 additions and 69 deletions

View File

@ -1505,7 +1505,7 @@ MCP tools are automatically discovered and registered on startup. The LLM can us
### Auto Compact
When a user is idle for longer than a configured TTL, nanobot **proactively** compresses the session context into a summary. This reduces token cost and first-token latency when the user returns — instead of re-processing a long stale context with an expired KV cache, the model receives a compact summary and fresh input.
When a user is idle for longer than a configured TTL, nanobot **proactively** compresses the older part of the session context into a summary while keeping a recent legal suffix of live messages. This reduces token cost and first-token latency when the user returns — instead of re-processing a long stale context with an expired KV cache, the model receives a compact summary, the most recent live context, and fresh input.
```json
{
@ -1523,8 +1523,8 @@ When a user is idle for longer than a configured TTL, nanobot **proactively** co
How it works:
1. **Idle detection**: On each idle tick (~1 s), checks all sessions for expiration.
2. **Background compaction**: Expired sessions are summarized via LLM, then cleared.
3. **Summary injection**: When the user returns, the summary is injected as runtime context (one-shot, not persisted).
2. **Background compaction**: Expired sessions summarize the older live prefix via LLM and keep the most recent legal suffix (currently 8 messages).
3. **Summary injection**: When the user returns, the summary is injected as runtime context (one-shot, not persisted) alongside the retained recent suffix.
> [!TIP]
> The summary survives bot restarts — it's stored in session metadata and recovered on the next message.

View File

@ -3,16 +3,18 @@
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, Callable, Coroutine
from typing import TYPE_CHECKING, Any, Callable, Coroutine
from loguru import logger
from nanobot.session.manager import Session, SessionManager
if TYPE_CHECKING:
from nanobot.agent.memory import Consolidator
from nanobot.session.manager import Session, SessionManager
class AutoCompact:
_RECENT_SUFFIX_MESSAGES = 8
def __init__(self, sessions: SessionManager, consolidator: Consolidator,
session_ttl_minutes: int = 0):
self.sessions = sessions
@ -33,6 +35,27 @@ class AutoCompact:
idle_min = int((datetime.now() - last_active).total_seconds() / 60)
return f"Inactive for {idle_min} minutes.\nPrevious conversation summary: {text}"
def _split_unconsolidated(
self, session: Session,
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
"""Split live session tail into archiveable prefix and retained recent suffix."""
tail = list(session.messages[session.last_consolidated:])
if not tail:
return [], []
probe = Session(
key=session.key,
messages=tail.copy(),
created_at=session.created_at,
updated_at=session.updated_at,
metadata={},
last_consolidated=0,
)
probe.retain_recent_legal_suffix(self._RECENT_SUFFIX_MESSAGES)
kept = probe.messages
cut = len(tail) - len(kept)
return tail[:cut], kept
def check_expired(self, schedule_background: Callable[[Coroutine], None]) -> None:
for info in self.sessions.list_sessions():
key = info.get("key", "")
@ -45,21 +68,31 @@ class AutoCompact:
try:
self.sessions.invalidate(key)
session = self.sessions.get_or_create(key)
msgs = session.messages[session.last_consolidated:]
if not msgs:
archive_msgs, kept_msgs = self._split_unconsolidated(session)
if not archive_msgs and not kept_msgs:
logger.debug("Auto-compact: skipping {}, no un-consolidated messages", key)
session.updated_at = datetime.now()
self.sessions.save(session)
return
n = len(msgs)
last_active = session.updated_at
summary = await self.consolidator.archive(msgs) or ""
summary = ""
if archive_msgs:
summary = await self.consolidator.archive(archive_msgs) or ""
if summary and summary != "(nothing)":
self._summaries[key] = (summary, last_active)
session.metadata["_last_summary"] = {"text": summary, "last_active": last_active.isoformat()}
session.clear()
session.messages = kept_msgs
session.last_consolidated = 0
session.updated_at = datetime.now()
self.sessions.save(session)
logger.info("Auto-compact: archived {} ({} messages, summary={})", key, n, bool(summary))
logger.info(
"Auto-compact: archived {} (archived={}, kept={}, summary={})",
key,
len(archive_msgs),
len(kept_msgs),
bool(summary),
)
except Exception:
logger.exception("Auto-compact: failed for {}", key)
finally:
@ -75,7 +108,7 @@ class AutoCompact:
if entry:
session.metadata.pop("_last_summary", None)
return session, self._format_summary(entry[0], entry[1])
if not session.messages and "_last_summary" in session.metadata:
if "_last_summary" in session.metadata:
meta = session.metadata.pop("_last_summary")
self.sessions.save(session)
return session, self._format_summary(meta["text"], datetime.fromisoformat(meta["last_active"]))

View File

@ -35,6 +35,13 @@ def _make_loop(tmp_path: Path, session_ttl_minutes: int = 15) -> AgentLoop:
return loop
def _add_turns(session, turns: int, *, prefix: str = "msg") -> None:
"""Append simple user/assistant turns to a session."""
for i in range(turns):
session.add_message("user", f"{prefix} user {i}")
session.add_message("assistant", f"{prefix} assistant {i}")
class TestSessionTTLConfig:
"""Test session TTL configuration."""
@ -113,13 +120,11 @@ class TestAutoCompact:
await loop.close_mcp()
@pytest.mark.asyncio
async def test_auto_compact_archives_and_clears(self, tmp_path):
"""_archive should archive un-consolidated messages and clear session."""
async def test_auto_compact_archives_prefix_and_keeps_recent_suffix(self, tmp_path):
"""_archive should summarize the old prefix and keep a recent legal suffix."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
for i in range(4):
session.add_message("user", f"msg{i}")
session.add_message("assistant", f"resp{i}")
_add_turns(session, 6)
loop.sessions.save(session)
archived_messages = []
@ -132,9 +137,11 @@ class TestAutoCompact:
await loop.auto_compact._archive("cli:test")
assert len(archived_messages) == 8
assert len(archived_messages) == 4
session_after = loop.sessions.get_or_create("cli:test")
assert len(session_after.messages) == 0
assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
assert session_after.messages[0]["content"] == "msg user 2"
assert session_after.messages[-1]["content"] == "msg assistant 5"
await loop.close_mcp()
@pytest.mark.asyncio
@ -142,8 +149,7 @@ class TestAutoCompact:
"""_archive should store the summary in _summaries."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "hello")
session.add_message("assistant", "hi there")
_add_turns(session, 6, prefix="hello")
loop.sessions.save(session)
async def _fake_archive(messages):
@ -157,7 +163,7 @@ class TestAutoCompact:
assert entry is not None
assert entry[0] == "User said hello."
session_after = loop.sessions.get_or_create("cli:test")
assert len(session_after.messages) == 0
assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
await loop.close_mcp()
@pytest.mark.asyncio
@ -187,9 +193,7 @@ class TestAutoCompact:
"""_archive should only archive un-consolidated messages."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
for i in range(10):
session.add_message("user", f"msg{i}")
session.add_message("assistant", f"resp{i}")
_add_turns(session, 14)
session.last_consolidated = 18
loop.sessions.save(session)
@ -232,7 +236,7 @@ class TestAutoCompactIdleDetection:
"""Proactive auto-new archives expired session; _process_message reloads it."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "old message")
_add_turns(session, 6, prefix="old")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -251,7 +255,8 @@ class TestAutoCompactIdleDetection:
await loop._process_message(msg)
session_after = loop.sessions.get_or_create("cli:test")
assert not any(m["content"] == "old message" for m in session_after.messages)
assert len(archived_messages) == 4
assert not any(m["content"] == "old user 0" for m in session_after.messages)
assert any(m["content"] == "new msg" for m in session_after.messages)
await loop.close_mcp()
@ -329,7 +334,7 @@ class TestAutoCompactSystemMessages:
"""Proactive auto-new archives expired session; system messages reload it."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "old message from subagent context")
_add_turns(session, 6, prefix="old")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -349,7 +354,7 @@ class TestAutoCompactSystemMessages:
session_after = loop.sessions.get_or_create("cli:test")
assert not any(
m["content"] == "old message from subagent context"
m["content"] == "old user 0"
for m in session_after.messages
)
await loop.close_mcp()
@ -363,8 +368,7 @@ class TestAutoCompactEdgeCases:
"""Auto-new should not inject when archive produces '(nothing)'."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "thanks")
session.add_message("assistant", "you're welcome")
_add_turns(session, 6, prefix="thanks")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -375,18 +379,18 @@ class TestAutoCompactEdgeCases:
await loop.auto_compact._archive("cli:test")
session_after = loop.sessions.get_or_create("cli:test")
assert len(session_after.messages) == 0
assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
# "(nothing)" summary should not be stored
assert "cli:test" not in loop.auto_compact._summaries
await loop.close_mcp()
@pytest.mark.asyncio
async def test_auto_compact_archive_failure_still_clears(self, tmp_path):
"""Auto-new should clear session even if LLM archive fails (raw_archive fallback)."""
async def test_auto_compact_archive_failure_still_keeps_recent_suffix(self, tmp_path):
"""Auto-new should keep the recent suffix even if LLM archive falls back to raw dump."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "important data")
_add_turns(session, 6, prefix="important")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -396,14 +400,13 @@ class TestAutoCompactEdgeCases:
await loop.auto_compact._archive("cli:test")
session_after = loop.sessions.get_or_create("cli:test")
# Session should be cleared (archive falls back to raw dump)
assert len(session_after.messages) == 0
assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
await loop.close_mcp()
@pytest.mark.asyncio
async def test_auto_compact_preserves_runtime_checkpoint_before_check(self, tmp_path):
"""Runtime checkpoint is restored; proactive archive handles the expired session."""
"""Short expired sessions keep recent messages; checkpoint restore still works on resume."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.metadata[AgentLoop._RUNTIME_CHECKPOINT_KEY] = {
@ -429,8 +432,10 @@ class TestAutoCompactEdgeCases:
msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="continue")
await loop._process_message(msg)
# The checkpoint-restored message should have been archived by proactive path
assert len(archived_messages) >= 1
session_after = loop.sessions.get_or_create("cli:test")
assert archived_messages == []
assert any(m["content"] == "previous message" for m in session_after.messages)
assert any(m["content"] == "interrupted response" for m in session_after.messages)
await loop.close_mcp()
@ -446,11 +451,17 @@ class TestAutoCompactIntegration:
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
# Phase 1: User has a conversation
# Phase 1: User has a conversation longer than the retained recent suffix
session.add_message("user", "I'm learning English, teach me past tense")
session.add_message("assistant", "Past tense is used for actions completed in the past...")
session.add_message("user", "Give me an example")
session.add_message("assistant", '"I walked to the store yesterday."')
session.add_message("user", "Give me another example")
session.add_message("assistant", '"She visited Paris last year."')
session.add_message("user", "Quiz me")
session.add_message("assistant", "What is the past tense of go?")
session.add_message("user", "I think it is went")
session.add_message("assistant", "Correct.")
loop.sessions.save(session)
# Phase 2: Time passes (simulate idle)
@ -474,7 +485,7 @@ class TestAutoCompactIntegration:
# Phase 4: Verify
session_after = loop.sessions.get_or_create("cli:test")
# Old messages should be gone
# The oldest messages should be trimmed from live session history
assert not any(
"past tense is used" in str(m.get("content", "")) for m in session_after.messages
)
@ -497,8 +508,8 @@ class TestAutoCompactIntegration:
await loop.close_mcp()
@pytest.mark.asyncio
async def test_multi_paragraph_user_message_preserved(self, tmp_path):
"""Multi-paragraph user messages must be fully preserved after auto-new."""
async def test_runtime_context_markers_not_persisted_for_multi_paragraph_turn(self, tmp_path):
"""Auto-compact resume context must not leak runtime markers into persisted session history."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "old message")
@ -520,16 +531,11 @@ class TestAutoCompactIntegration:
await loop._process_message(msg)
session_after = loop.sessions.get_or_create("cli:test")
user_msgs = [m for m in session_after.messages if m.get("role") == "user"]
assert len(user_msgs) >= 1
# All three paragraphs must be preserved
persisted = user_msgs[-1]["content"]
assert "Paragraph one" in persisted
assert "Paragraph two" in persisted
assert "Paragraph three" in persisted
# No runtime context markers in persisted message
assert "[Runtime Context" not in persisted
assert "[/Runtime Context]" not in persisted
assert any(m.get("content") == "old message" for m in session_after.messages)
for persisted in session_after.messages:
content = str(persisted.get("content", ""))
assert "[Runtime Context" not in content
assert "[/Runtime Context]" not in content
await loop.close_mcp()
@ -562,8 +568,7 @@ class TestProactiveAutoCompact:
"""Expired session should be archived during idle tick."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "old message")
session.add_message("assistant", "old response")
_add_turns(session, 5, prefix="old")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -578,7 +583,7 @@ class TestProactiveAutoCompact:
await self._run_check_expired(loop)
session_after = loop.sessions.get_or_create("cli:test")
assert len(session_after.messages) == 0
assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
assert len(archived_messages) == 2
entry = loop.auto_compact._summaries.get("cli:test")
assert entry is not None
@ -604,7 +609,7 @@ class TestProactiveAutoCompact:
"""Should not archive the same session twice if already in progress."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "old message")
_add_turns(session, 6, prefix="old")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -641,7 +646,7 @@ class TestProactiveAutoCompact:
"""Proactive archive failure should be caught and not block future ticks."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "old message")
_add_turns(session, 6, prefix="old")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -684,8 +689,7 @@ class TestProactiveAutoCompact:
"""Already-archived session should NOT be re-scheduled on subsequent ticks."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "old message")
session.add_message("assistant", "old response")
_add_turns(session, 5, prefix="old")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -738,8 +742,7 @@ class TestProactiveAutoCompact:
"""After successful compact + user sends new messages + idle again, should compact again."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "first conversation")
session.add_message("assistant", "first response")
_add_turns(session, 5, prefix="first")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -780,8 +783,7 @@ class TestSummaryPersistence:
"""After archive, _last_summary should be in session metadata."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "hello")
session.add_message("assistant", "hi there")
_add_turns(session, 6, prefix="hello")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -805,8 +807,7 @@ class TestSummaryPersistence:
"""Summary should be recovered from metadata when _summaries is empty (simulates restart)."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "hello")
session.add_message("assistant", "hi there")
_add_turns(session, 6, prefix="hello")
last_active = datetime.now() - timedelta(minutes=20)
session.updated_at = last_active
loop.sessions.save(session)
@ -825,6 +826,7 @@ class TestSummaryPersistence:
# prepare_session should recover summary from metadata
reloaded = loop.sessions.get_or_create("cli:test")
assert len(reloaded.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
_, summary = loop.auto_compact.prepare_session(reloaded, "cli:test")
assert summary is not None
@ -839,7 +841,7 @@ class TestSummaryPersistence:
"""_last_summary should be removed from metadata after being consumed."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "hello")
_add_turns(session, 6, prefix="hello")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)
@ -870,7 +872,7 @@ class TestSummaryPersistence:
"""In-memory _summaries path should also clean up _last_summary from metadata."""
loop = _make_loop(tmp_path, session_ttl_minutes=15)
session = loop.sessions.get_or_create("cli:test")
session.add_message("user", "hello")
_add_turns(session, 6, prefix="hello")
session.updated_at = datetime.now() - timedelta(minutes=20)
loop.sessions.save(session)