From a6e993df25b828389cde85c07783a545d2ca0986 Mon Sep 17 00:00:00 2001
From: chengyongru <chengyongru.ai@gmail.com>
Date: Thu, 7 May 2026 16:17:27 +0800
Subject: [PATCH] fix(agent): move archived summary into system prompt for KV
 cache stability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Append [Archived Context Summary] to system prompt instead of injecting
  it into the user message runtime context, improving KV cache reuse across
  turns and avoiding consecutive same-role messages.
- _last_summary persists in metadata (no pop) for restart survival;
  summary is re-injected every turn via the stable system prompt.
- Remove dynamic "Inactive for X minutes" from _format_summary — use
  static last_active timestamp instead to preserve KV cache stability.
- Pass session_summary through build_messages() so both normal and
  ask_user paths receive the archived summary in the system prompt.
- estimate_session_prompt_tokens now reads _last_summary from metadata
  to include the summary in token budget estimation.
- Remove obsolete session_summary parameter from
  maybe_consolidate_by_tokens and estimate_session_prompt_tokens
  call sites in loop.py (summary flows through build_messages instead).
- Ensure /new (session.clear()) clears _last_summary from metadata.
---
 nanobot/agent/autocompact.py                  | 12 +--
 nanobot/agent/context.py                      | 21 ++--
 nanobot/agent/loop.py                         | 14 +--
 nanobot/agent/memory.py                       | 10 +-
 nanobot/session/manager.py                    |  1 +
 tests/agent/test_auto_compact.py              | 97 ++++++++++++++++---
 tests/agent/test_loop_consolidation_tokens.py |  4 +-
 tests/agent/test_unified_session.py           |  1 -
 8 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/nanobot/agent/autocompact.py b/nanobot/agent/autocompact.py
index eabd86155..11e531039 100644
--- a/nanobot/agent/autocompact.py
+++ b/nanobot/agent/autocompact.py
@@ -7,6 +7,7 @@ from datetime import datetime
 from typing import TYPE_CHECKING, Any, Callable, Coroutine
 
 from loguru import logger
+
 from nanobot.session.manager import Session, SessionManager
 
 if TYPE_CHECKING:
@@ -34,8 +35,7 @@ class AutoCompact:
 
     @staticmethod
     def _format_summary(text: str, last_active: datetime) -> str:
-        idle_min = int((datetime.now() - last_active).total_seconds() / 60)
-        return f"Inactive for {idle_min} minutes.\nPrevious conversation summary: {text}"
+        return f"Previous conversation summary (last active {last_active.isoformat()}):\n{text}"
 
     def _split_unconsolidated(
         self, session: Session,
@@ -111,13 +111,11 @@ class AutoCompact:
             logger.info("Auto-compact: reloading session {} (archiving={})", key, key in self._archiving)
             session = self.sessions.get_or_create(key)
         # Hot path: summary from in-memory dict (process hasn't restarted).
-        # Also clean metadata copy so stale _last_summary never leaks to disk.
         entry = self._summaries.pop(key, None)
         if entry:
-            session.metadata.pop("_last_summary", None)
             return session, self._format_summary(entry[0], entry[1])
-        if "_last_summary" in session.metadata:
-            meta = session.metadata.pop("_last_summary")
-            self.sessions.save(session)
+        # Cold path: summary persisted in session metadata (process restarted).
+        meta = session.metadata.get("_last_summary")
+        if isinstance(meta, dict):
             return session, self._format_summary(meta["text"], datetime.fromisoformat(meta["last_active"]))
         return session, None
diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py
index ccd1b882e..7415cdfcd 100644
--- a/nanobot/agent/context.py
+++ b/nanobot/agent/context.py
@@ -10,7 +10,12 @@ from typing import Any
 
 from nanobot.agent.memory import MemoryStore
 from nanobot.agent.skills import SkillsLoader
-from nanobot.utils.helpers import build_assistant_message, current_time_str, detect_image_mime, truncate_text
+from nanobot.utils.helpers import (
+    build_assistant_message,
+    current_time_str,
+    detect_image_mime,
+    truncate_text,
+)
 from nanobot.utils.prompt_templates import render_template
 
 
@@ -33,6 +38,7 @@ class ContextBuilder:
         self,
         skill_names: list[str] | None = None,
         channel: str | None = None,
+        session_summary: str | None = None,
     ) -> str:
         """Build the system prompt from identity, bootstrap files, memory, and skills."""
         parts = [self._get_identity(channel=channel)]
@@ -64,6 +70,9 @@ class ContextBuilder:
             history_text = truncate_text(history_text, self._MAX_HISTORY_CHARS)
             parts.append("# Recent History\n\n" + history_text)
 
+        if session_summary:
+            parts.append(f"[Archived Context Summary]\n\n{session_summary}")
+
         return "\n\n---\n\n".join(parts)
 
     def _get_identity(self, channel: str | None = None) -> str:
@@ -83,7 +92,7 @@ class ContextBuilder:
     @staticmethod
     def _build_runtime_context(
         channel: str | None, chat_id: str | None, timezone: str | None = None,
-        session_summary: str | None = None, sender_id: str | None = None,
+        sender_id: str | None = None,
     ) -> str:
         """Build untrusted runtime metadata block for injection before the user message."""
         lines = [f"Current Time: {current_time_str(timezone)}"]
@@ -91,8 +100,6 @@ class ContextBuilder:
             lines += [f"Channel: {channel}", f"Chat ID: {chat_id}"]
         if sender_id:
             lines += [f"Sender ID: {sender_id}"]
-        if session_summary:
-            lines += ["", "[Resumed Session]", session_summary]
         return ContextBuilder._RUNTIME_CONTEXT_TAG + "\n" + "\n".join(lines) + "\n" + ContextBuilder._RUNTIME_CONTEXT_END
 
     @staticmethod
@@ -139,11 +146,11 @@ class ContextBuilder:
         channel: str | None = None,
         chat_id: str | None = None,
         current_role: str = "user",
-        session_summary: str | None = None,
         sender_id: str | None = None,
+        session_summary: str | None = None,
     ) -> list[dict[str, Any]]:
         """Build the complete message list for an LLM call."""
-        runtime_ctx = self._build_runtime_context(channel, chat_id, self.timezone, session_summary=session_summary, sender_id=sender_id)
+        runtime_ctx = self._build_runtime_context(channel, chat_id, self.timezone, sender_id=sender_id)
         user_content = self._build_user_content(current_message, media)
 
         # Merge runtime context and user content into a single user message
@@ -153,7 +160,7 @@ class ContextBuilder:
         else:
             merged = [{"type": "text", "text": runtime_ctx}] + user_content
         messages = [
-            {"role": "system", "content": self.build_system_prompt(skill_names, channel=channel)},
+            {"role": "system", "content": self.build_system_prompt(skill_names, channel=channel, session_summary=session_summary)},
             *history,
         ]
         if messages[-1].get("role") == current_role:
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index d89ab3007..da05cfbf6 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -706,12 +706,16 @@ class AgentLoop:
         session: Session,
         history: list[dict[str, Any]],
         pending_ask_id: str | None,
-        pending_summary: Any,
+        pending_summary: str | None,
     ) -> list[dict[str, Any]]:
         """Build the initial message list for the LLM turn."""
         if pending_ask_id:
+            system_prompt = self.context.build_system_prompt(
+                channel=msg.channel,
+                session_summary=pending_summary,
+            )
             return ask_user_tool_result_messages(
-                self.context.build_system_prompt(channel=msg.channel),
+                system_prompt,
                 history,
                 pending_ask_id,
                 image_generation_prompt(msg.content, msg.metadata),
@@ -719,11 +723,11 @@ class AgentLoop:
         return self.context.build_messages(
             history=history,
             current_message=image_generation_prompt(msg.content, msg.metadata),
-            session_summary=pending_summary,
             media=msg.media if msg.media else None,
             channel=msg.channel,
             chat_id=self._runtime_chat_id(msg),
             sender_id=msg.sender_id,
+            session_summary=pending_summary,
         )
 
     async def _dispatch_command_inline(
@@ -1179,7 +1183,6 @@ class AgentLoop:
 
         await self.consolidator.maybe_consolidate_by_tokens(
             session,
-            session_summary=pending,
             replay_max_messages=self._max_messages,
         )
         is_subagent = msg.sender_id == "subagent"
@@ -1203,9 +1206,9 @@ class AgentLoop:
             current_message="" if is_subagent else msg.content,
             channel=channel,
             chat_id=chat_id,
-            session_summary=pending,
             current_role=current_role,
             sender_id=msg.sender_id,
+            session_summary=pending,
         )
         final_content, _, all_msgs, stop_reason, _ = await self._run_agent_loop(
             messages, session=session, channel=channel, chat_id=chat_id,
@@ -1413,7 +1416,6 @@ class AgentLoop:
     async def _state_build(self, ctx: TurnContext) -> str:
         await self.consolidator.maybe_consolidate_by_tokens(
             ctx.session,
-            session_summary=ctx.pending_summary,
             replay_max_messages=self._max_messages,
         )
         self._set_tool_context(
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index 8eaf06daf..271fb3f65 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -590,19 +590,20 @@ class Consolidator:
     def estimate_session_prompt_tokens(
         self,
         session: Session,
-        *,
-        session_summary: str | None = None,
     ) -> tuple[int, str]:
         """Estimate prompt size from the full unconsolidated session tail."""
         history = self._full_unconsolidated_history(session, include_timestamps=True)
         channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None))
+        # Include archived summary in estimation so the budget accounts for it.
+        meta = session.metadata.get("_last_summary")
+        summary = meta.get("text") if isinstance(meta, dict) else (meta if isinstance(meta, str) else None)
         probe_messages = self._build_messages(
             history=history,
             current_message="[token-probe]",
             channel=channel,
             chat_id=chat_id,
-            session_summary=session_summary,
             sender_id=None,
+            session_summary=summary,
         )
         return estimate_prompt_tokens_chain(
             self.provider,
@@ -669,7 +670,6 @@ class Consolidator:
         self,
         session: Session,
         *,
-        session_summary: str | None = None,
         replay_max_messages: int | None = None,
     ) -> None:
         """Loop: archive old messages until prompt fits within safe budget.
@@ -691,7 +691,6 @@ class Consolidator:
             try:
                 estimated, source = self.estimate_session_prompt_tokens(
                     session,
-                    session_summary=session_summary,
                 )
             except Exception:
                 logger.exception("Token estimation failed for {}", session.key)
@@ -757,7 +756,6 @@ class Consolidator:
                 try:
                     estimated, source = self.estimate_session_prompt_tokens(
                         session,
-                        session_summary=session_summary,
                     )
                 except Exception:
                     logger.exception("Token estimation failed for {}", session.key)
diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py
index 7aea7b63d..47d98976b 100644
--- a/nanobot/session/manager.py
+++ b/nanobot/session/manager.py
@@ -181,6 +181,7 @@ class Session:
         self.messages = []
         self.last_consolidated = 0
         self.updated_at = datetime.now()
+        self.metadata.pop("_last_summary", None)
 
     def retain_recent_legal_suffix(self, max_messages: int) -> None:
         """Keep a legal recent suffix constrained by a hard message cap."""
diff --git a/tests/agent/test_auto_compact.py b/tests/agent/test_auto_compact.py
index ecef55044..41d79f85b 100644
--- a/tests/agent/test_auto_compact.py
+++ b/tests/agent/test_auto_compact.py
@@ -1020,14 +1020,14 @@ class TestSummaryPersistence:
 
         assert summary is not None
         assert "User said hello." in summary
-        assert "Inactive for" in summary
-        # Metadata should be cleaned up after consumption
-        assert "_last_summary" not in reloaded.metadata
+        assert "Previous conversation summary" in summary
+        # _last_summary persists in metadata for restart survival.
+        assert "_last_summary" in reloaded.metadata
         await loop.close_mcp()
 
     @pytest.mark.asyncio
-    async def test_metadata_cleanup_no_leak(self, tmp_path):
-        """_last_summary should be removed from metadata after being consumed."""
+    async def test_metadata_persists_for_restart(self, tmp_path):
+        """_last_summary stays in metadata so it survives process restarts."""
         loop = _make_loop(tmp_path, session_ttl_minutes=15)
         session = loop.sessions.get_or_create("cli:test")
         _add_turns(session, 6, prefix="hello")
@@ -1046,14 +1046,14 @@ class TestSummaryPersistence:
         loop.sessions.invalidate("cli:test")
         reloaded = loop.sessions.get_or_create("cli:test")
 
-        # First call: consumes from metadata
+        # Every call returns the summary from metadata (no _consumed_keys gate)
         _, summary = loop.auto_compact.prepare_session(reloaded, "cli:test")
         assert summary is not None
-
-        # Second call: no summary (already consumed)
         _, summary2 = loop.auto_compact.prepare_session(reloaded, "cli:test")
-        assert summary2 is None
-        assert "_last_summary" not in reloaded.metadata
+        assert summary2 is not None
+        assert "Summary." in summary2
+        # _last_summary persists in metadata for restart survival.
+        assert "_last_summary" in reloaded.metadata
         await loop.close_mcp()
 
     @pytest.mark.asyncio
@@ -1081,6 +1081,79 @@ class TestSummaryPersistence:
         # In-memory path is taken (no restart)
         _, summary = loop.auto_compact.prepare_session(reloaded, "cli:test")
         assert summary is not None
-        # Metadata should also be cleaned up
-        assert "_last_summary" not in reloaded.metadata
+        # _last_summary persists in metadata for restart survival.
+        assert "_last_summary" in reloaded.metadata
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_new_summary_overrides_old(self, tmp_path):
+        """A fresh archive writes a new summary that replaces the old one."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="hello")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "First summary."
+
+        loop.consolidator.archive = _fake_archive
+        await loop.auto_compact._archive("cli:test")
+
+        # Consume the first summary via hot path
+        _, summary1 = loop.auto_compact.prepare_session(
+            loop.sessions.get_or_create("cli:test"), "cli:test"
+        )
+        assert summary1 is not None
+        assert "First summary." in summary1
+        assert "cli:test" not in loop.auto_compact._summaries  # popped by hot path
+
+        # Add new messages and archive again (simulating a later turn)
+        _add_turns(session, 4, prefix="world")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive2(messages):
+            return "Second summary."
+
+        loop.consolidator.archive = _fake_archive2
+        await loop.auto_compact._archive("cli:test")
+
+        # The second archive writes a new summary
+        assert "cli:test" in loop.auto_compact._summaries
+
+        # prepare_session must return the new summary
+        reloaded = loop.sessions.get_or_create("cli:test")
+        _, summary2 = loop.auto_compact.prepare_session(reloaded, "cli:test")
+        assert summary2 is not None
+        assert "Second summary." in summary2
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_new_command_clears_last_summary(self, tmp_path):
+        """/new should clear _last_summary so the new session starts fresh."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="hello")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "Old summary."
+
+        loop.consolidator.archive = _fake_archive
+        await loop.auto_compact._archive("cli:test")
+
+        # Verify summary exists before /new
+        reloaded = loop.sessions.get_or_create("cli:test")
+        assert "_last_summary" in reloaded.metadata
+
+        # Simulate /new command
+        session.clear()
+        loop.sessions.save(session)
+        loop.sessions.invalidate(session.key)
+
+        # After /new, metadata should no longer contain _last_summary
+        fresh = loop.sessions.get_or_create("cli:test")
+        assert "_last_summary" not in fresh.metadata
         await loop.close_mcp()
diff --git a/tests/agent/test_loop_consolidation_tokens.py b/tests/agent/test_loop_consolidation_tokens.py
index aeb67d8b3..3228bd6dd 100644
--- a/tests/agent/test_loop_consolidation_tokens.py
+++ b/tests/agent/test_loop_consolidation_tokens.py
@@ -190,7 +190,8 @@ async def test_consolidation_persists_summary_for_next_prepare_session(tmp_path,
     reloaded, pending = loop.auto_compact.prepare_session(reloaded, "cli:test")
     assert pending is not None
     assert "User discussed project status." in pending
-    assert "_last_summary" not in reloaded.metadata
+    # _last_summary persists for restart survival.
+    assert "_last_summary" in reloaded.metadata
 
 
 @pytest.mark.asyncio
@@ -207,7 +208,6 @@ async def test_preflight_consolidation_receives_pending_summary(tmp_path) -> Non
 
     loop.consolidator.maybe_consolidate_by_tokens.assert_any_await(
         session,
-        session_summary="Previous conversation summary: earlier context",
         replay_max_messages=loop._max_messages,
     )
 
diff --git a/tests/agent/test_unified_session.py b/tests/agent/test_unified_session.py
index 957c8ead2..839f62f57 100644
--- a/tests/agent/test_unified_session.py
+++ b/tests/agent/test_unified_session.py
@@ -399,7 +399,6 @@ class TestConsolidationUnaffectedByUnifiedSession:
         # estimate was called (consolidation was attempted)
         consolidator.estimate_session_prompt_tokens.assert_called_once_with(
             session,
-            session_summary=None,
         )
         # but archive was not called (no valid boundary)
         consolidator.archive.assert_not_called()