From 5a34504b76c109918c37560e93a158b1a24d4782 Mon Sep 17 00:00:00 2001
From: olgagaga <olga_kuzmich2005@tut.by>
Date: Sat, 16 May 2026 10:50:15 -0400
Subject: [PATCH 1/8] docs(configuration): expand "Environment Variables for
 Secrets" section

- Note that any string field supports ${VAR_NAME} and resolved values are
  never written back to disk.
- Document the failure mode for unset variables.
- Add MCP (stdio env + HTTP headers) and web-search examples.
- Add Docker, direnv, and secret-manager (1Password / pass / Bitwarden)
  delivery patterns alongside the existing systemd example.
- Replace plaintext apiKey values in tools.web.search examples (Brave,
  Tavily, Jina, Kagi, Olostep) with ${PROVIDER_API_KEY} placeholders so
  the docs stop modelling the anti-pattern.
- Cross-link from the Security section.

Refs: HKUDS/nanobot#2172
---
 docs/configuration.md | 86 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 80 insertions(+), 6 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 338991a33..f9e116bd1 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -26,7 +26,52 @@ Instead of storing secrets directly in `config.json`, you can use `${VAR_NAME}`
 }
 ```
 
-For **systemd** deployments, use `EnvironmentFile=` in the service unit to load variables from a file that only the deploying user can read:
+Any string value in `config.json` can use `${VAR_NAME}`. Resolution runs once at startup, in memory only — resolved values are never written back to disk, so editing config through `nanobot onboard` or the WebUI preserves the placeholder.
+
+If a referenced variable is unset, nanobot fails fast at startup with `ValueError: Environment variable 'NAME' referenced in config is not set`.
+
+### More examples
+
+**MCP servers** — both stdio `env` and HTTP `headers`:
+
+```json
+{
+  "tools": {
+    "mcpServers": {
+      "github": {
+        "command": "npx",
+        "args": ["-y", "@modelcontextprotocol/server-github"],
+        "env": { "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}" }
+      },
+      "remote": {
+        "url": "https://example.com/mcp/",
+        "headers": { "Authorization": "Bearer ${REMOTE_MCP_TOKEN}" }
+      }
+    }
+  }
+}
+```
+
+**Web search providers:**
+
+```json
+{
+  "tools": {
+    "web": {
+      "search": {
+        "provider": "brave",
+        "apiKey": "${BRAVE_API_KEY}"
+      }
+    }
+  }
+}
+```
+
+### Loading variables at startup
+
+Pick whatever fits your deployment — nanobot only reads `os.environ` at startup, so any mechanism that populates the process environment works.
+
+**systemd** — use `EnvironmentFile=` in the service unit to load variables from a file that only the deploying user can read:
 
 ```ini
 # /etc/systemd/system/nanobot.service (excerpt)
@@ -42,6 +87,33 @@ TELEGRAM_TOKEN=your-token-here
 IMAP_PASSWORD=your-password-here
 ```
 
+**Docker** — `--env-file` (one `KEY=VALUE` per line) or `-e KEY=value`:
+
+```bash
+docker run --env-file=./nanobot.env nanobot/nanobot
+```
+
+**direnv** — drop a `.envrc` in your working directory and run `direnv allow`:
+
+```bash
+# .envrc (auto-loaded by direnv)
+export TELEGRAM_TOKEN=your-token-here
+export ANTHROPIC_API_KEY=...
+```
+
+**Secret managers (1Password, Bitwarden, pass)** — wrap the process so secrets only exist as env vars for the lifetime of the run, never on disk:
+
+```bash
+# 1Password — references in .env.tpl look like `op://Vault/Item/field`
+op run --env-file=.env.tpl -- nanobot agent
+
+# pass (passwordstore.org)
+ANTHROPIC_API_KEY="$(pass show api/anthropic)" nanobot agent
+
+# Bitwarden
+ANTHROPIC_API_KEY="$(bw get password api/anthropic)" nanobot agent
+```
+
 ## Providers
 
 > [!TIP]
@@ -917,7 +989,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an
     "web": {
       "search": {
         "provider": "brave",
-        "apiKey": "BSA..."
+        "apiKey": "${BRAVE_API_KEY}"
       }
     }
   }
@@ -931,7 +1003,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an
     "web": {
       "search": {
         "provider": "tavily",
-        "apiKey": "tvly-..."
+        "apiKey": "${TAVILY_API_KEY}"
       }
     }
   }
@@ -945,7 +1017,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an
     "web": {
       "search": {
         "provider": "jina",
-        "apiKey": "jina_..."
+        "apiKey": "${JINA_API_KEY}"
       }
     }
   }
@@ -959,7 +1031,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an
     "web": {
       "search": {
         "provider": "kagi",
-        "apiKey": "your-kagi-api-key"
+        "apiKey": "${KAGI_API_KEY}"
       }
     }
   }
@@ -973,7 +1045,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an
     "web": {
       "search": {
         "provider": "olostep",
-        "apiKey": "YOUR_OLOSTEP_API_KEY"
+        "apiKey": "${OLOSTEP_API_KEY}"
       }
     }
   }
@@ -1136,6 +1208,8 @@ MCP tools are automatically discovered and registered on startup. The LLM can us
 > [!TIP]
 > For production deployments, set `"restrictToWorkspace": true` and `"tools.exec.sandbox": "bwrap"` in your config to sandbox the agent.
 
+For API keys, tokens, and other secrets, see [Environment Variables for Secrets](#environment-variables-for-secrets) — avoid storing them directly in `config.json`.
+
 | Option | Default | Description |
 |--------|---------|-------------|
 | `tools.restrictToWorkspace` | `false` | When `true`, restricts **all** agent tools (shell, file read/write/edit, list) to the workspace directory. Prevents path traversal and out-of-scope access. |

From f017e209da1ed512ee4f352040e512fbc6e65ddb Mon Sep 17 00:00:00 2001
From: Xubin Ren <52506698+Re-bin@users.noreply.github.com>
Date: Mon, 18 May 2026 00:37:01 +0800
Subject: [PATCH 2/8] docs(configuration): align Docker env-file example

---
 docs/configuration.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index f9e116bd1..b5d74f7ca 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -87,10 +87,12 @@ TELEGRAM_TOKEN=your-token-here
 IMAP_PASSWORD=your-password-here
 ```
 
-**Docker** — `--env-file` (one `KEY=VALUE` per line) or `-e KEY=value`:
+**Docker** — pass an env file to the locally built image (one `KEY=VALUE` per line), or use `-e KEY=value`:
 
 ```bash
-docker run --env-file=./nanobot.env nanobot/nanobot
+docker run --rm --env-file=./nanobot.env \
+  -v ~/.nanobot:/home/nanobot/.nanobot \
+  nanobot agent -m "Hello"
 ```
 
 **direnv** — drop a `.envrc` in your working directory and run `direnv allow`:

From bf8a6e35fdd72a0fe29cd5a30bb020f63dd9919f Mon Sep 17 00:00:00 2001
From: voidborne-d <258577966+voidborne-d@users.noreply.github.com>
Date: Sun, 17 May 2026 16:39:54 +0800
Subject: [PATCH 3/8] docs(deployment): match docker run gateway example to
 docker-compose.yml (refs #3873)

The `docker run` example for `gateway` in `docs/deployment.md` had drifted from
the canonical configuration in `docker-compose.yml`:

- It omitted the security flags that `docker-compose.yml` already declares
  (`cap_drop: ALL` + `cap_add: SYS_ADMIN` + unconfined apparmor/seccomp).
  These are required whenever `tools.exec.sandbox: "bwrap"` is enabled, because
  bwrap needs CAP_SYS_ADMIN for user namespaces; without them bwrap exits with
  `clone3: Operation not permitted` and exec tools silently fail.
- It omitted `-p 8765:8765`, even though both the bundled `docker-compose.yml`
  and `Dockerfile` (`EXPOSE 18790 8765`) already expose the WebSocket channel
  / WebUI port; users following the docs would get a reachable gateway health
  endpoint but an unreachable WebUI.

This change keeps the two paths in sync so anyone reading deployment.md and
using `docker run` directly gets the same security posture and port surface
as the Compose path.

Also adds a short `!IMPORTANT` note documenting that `gateway.host` and
`channels.websocket.host` default to `127.0.0.1` (set in
`nanobot/config/schema.py:GatewayConfig`). Docker `-p` cannot forward to the
container's loopback interface, so the user must set both binds to `0.0.0.0`
in `config.json` for the published ports to actually be reachable. This is
the symptom reported as items 2 + 3 of #3873; items 1 + 4 of that issue are
already resolved on `main` (`Dockerfile` line 49 already exposes both ports,
and README.md lines 218-220 already reflect that the WebUI ships in the wheel).

Docs only, no code changes.

Signed-off-by: voidborne-d <258577966+voidborne-d@users.noreply.github.com>
---
 docs/deployment.md | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/docs/deployment.md b/docs/deployment.md
index 746c35218..8a2cd89eb 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -10,6 +10,18 @@
 > [!IMPORTANT]
 > Official Docker usage currently means building from this repository with the included `Dockerfile`. Docker Hub images under third-party namespaces are not maintained or verified by HKUDS/nanobot; do not mount API keys or bot tokens into them unless you trust the publisher.
 
+> [!IMPORTANT]
+> The gateway and WebSocket channel default to `host: "127.0.0.1"` in `config.json` (set in `nanobot/config/schema.py`). Docker `-p` port forwarding cannot reach a container's loopback interface, so for the host or LAN to reach the exposed ports you must set both binds to `0.0.0.0` in `~/.nanobot/config.json` before starting the container:
+>
+> ```json
+> {
+>   "gateway":  { "host": "0.0.0.0" },
+>   "channels": { "websocket": { "host": "0.0.0.0" } }
+> }
+> ```
+>
+> When `host` is `0.0.0.0`, the gateway refuses to start unless `token` or `tokenIssueSecret` is also configured on the WebSocket channel — see [`webui/README.md`](../webui/README.md) for details.
+
 ### Docker Compose
 
 ```bash
@@ -36,8 +48,20 @@ docker run -v ~/.nanobot:/home/nanobot/.nanobot --rm nanobot onboard
 # Edit config on host to add API keys
 vim ~/.nanobot/config.json
 
-# Run gateway (connects to enabled channels, e.g. Telegram/Discord/Mochat)
-docker run -v ~/.nanobot:/home/nanobot/.nanobot -p 18790:18790 nanobot gateway
+# Run gateway (connects to enabled channels, e.g. Telegram/Discord/Mochat).
+# Mirrors the security caps and port mappings declared in docker-compose.yml:
+#   - `--cap-drop ALL --cap-add SYS_ADMIN` + unconfined apparmor/seccomp are required
+#     when `tools.exec.sandbox: "bwrap"` is enabled (bwrap needs CAP_SYS_ADMIN for
+#     user namespaces). Without them, `bwrap` exits with `clone3: Operation not permitted`.
+#   - `-p 8765:8765` exposes the WebSocket channel / WebUI alongside the gateway health
+#     endpoint on 18790.
+docker run \
+  --cap-drop ALL --cap-add SYS_ADMIN \
+  --security-opt apparmor=unconfined \
+  --security-opt seccomp=unconfined \
+  -v ~/.nanobot:/home/nanobot/.nanobot \
+  -p 18790:18790 -p 8765:8765 \
+  nanobot gateway
 
 # Or run a single command
 docker run -v ~/.nanobot:/home/nanobot/.nanobot --rm nanobot agent -m "Hello!"

From 48d35bd2d9830d0ae65307f69b8d9a8fa6fc8f34 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Sun, 17 May 2026 21:02:18 +0800
Subject: [PATCH 4/8] feat(consolidator): add compact_idle_session method with
 lock-protected truncation

Add Consolidator.compact_idle_session(session_key, max_suffix=8) that
performs hard-truncation of idle sessions under the per-session
consolidation lock. This is the single lock-protected path for AutoCompact
to use instead of modifying session state directly, fixing the race
condition between AutoCompact and Consolidator.

Behavior:
- Acquires per-session consolidation lock
- Invalidates cache and reloads fresh from disk
- Splits unconsolidated tail into archive prefix and retained suffix
- Archives prefix via LLM (with raw_archive fallback on failure)
- Persists _last_summary in session metadata on success
- Returns summary text, None on LLM failure, or '' if nothing to archive

Tests: 6 new tests covering prefix archival, empty session timestamp
refresh, (nothing) summary exclusion, LLM failure fallback,
last_consolidated offset, and lock acquisition verification.
---
 nanobot/agent/memory.py          |  68 +++++++++++++
 tests/agent/test_consolidator.py | 162 +++++++++++++++++++++++++++++++
 2 files changed, 230 insertions(+)

diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index fd233bfa3..167f02284 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -769,6 +769,74 @@ class Consolidator:
             # the summary injection strategy with AutoCompact._archive().
             self._persist_last_summary(session, last_summary)
 
+    async def compact_idle_session(
+        self,
+        session_key: str,
+        max_suffix: int = 8,
+    ) -> str | None:
+        """Hard-truncate an idle session under the consolidation lock.
+
+        Used by AutoCompact so all session mutation goes through a single
+        lock-protected path.  Returns the summary text on success, ``None``
+        if the LLM failed (raw_archive fallback), or ``""`` if there was
+        nothing to archive.
+        """
+        lock = self.get_lock(session_key)
+        async with lock:
+            self.sessions.invalidate(session_key)
+            session = self.sessions.get_or_create(session_key)
+
+            tail = list(session.messages[session.last_consolidated:])
+            if not tail:
+                session.updated_at = datetime.now()
+                self.sessions.save(session)
+                return ""
+
+            probe = Session(
+                key=session.key,
+                messages=tail.copy(),
+                created_at=session.created_at,
+                updated_at=session.updated_at,
+                metadata={},
+                last_consolidated=0,
+            )
+            probe.retain_recent_legal_suffix(max_suffix)
+            kept = probe.messages
+            cut = len(tail) - len(kept)
+            archive_msgs = tail[:cut]
+
+            if not archive_msgs and not kept:
+                session.updated_at = datetime.now()
+                self.sessions.save(session)
+                return ""
+
+            last_active = session.updated_at
+            summary: str | None = ""
+            if archive_msgs:
+                summary = await self.archive(archive_msgs)
+
+            if summary and summary != "(nothing)":
+                session.metadata["_last_summary"] = {
+                    "text": summary,
+                    "last_active": last_active.isoformat(),
+                }
+
+            session.messages = kept
+            session.last_consolidated = 0
+            session.updated_at = datetime.now()
+            self.sessions.save(session)
+
+            if archive_msgs:
+                logger.info(
+                    "Idle-session compact for {}: archived={}, kept={}, summary={}",
+                    session_key,
+                    len(archive_msgs),
+                    len(kept),
+                    bool(summary),
+                )
+
+            return summary
+
 
 # ---------------------------------------------------------------------------
 # Dream — heavyweight cron-scheduled memory consolidation
diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py
index 64ef9a886..49888b8a1 100644
--- a/tests/agent/test_consolidator.py
+++ b/tests/agent/test_consolidator.py
@@ -299,6 +299,168 @@ class TestConsolidatorTokenBudget:
         assert session.last_consolidated == 61
 
 
+class TestCompactIdleSession:
+    """Tests for Consolidator.compact_idle_session — lock-protected idle truncation."""
+
+    @pytest.fixture
+    def real_consolidator(self, store, mock_provider):
+        """Create a Consolidator with a real SessionManager (not a mock)."""
+        from nanobot.session.manager import SessionManager
+
+        sessions = SessionManager(store.workspace)
+        return Consolidator(
+            store=store,
+            provider=mock_provider,
+            model="test-model",
+            sessions=sessions,
+            context_window_tokens=1000,
+            build_messages=MagicMock(return_value=[]),
+            get_tool_definitions=MagicMock(return_value=[]),
+            max_completion_tokens=100,
+        )
+
+    @pytest.mark.asyncio
+    async def test_archives_prefix_keeps_suffix(self, real_consolidator, mock_provider):
+        """20 user/assistant turns → compact with max_suffix=8 → messages ≤ 8,
+        last_consolidated=0, _last_summary stored."""
+        mock_provider.chat_with_retry.return_value = MagicMock(
+            content="Summary of old conversation.", finish_reason="stop"
+        )
+        sessions = real_consolidator.sessions
+        session = sessions.get_or_create("cli:test")
+        for i in range(20):
+            session.add_message("user", f"user msg {i}")
+            session.add_message("assistant", f"assistant msg {i}")
+        sessions.save(session)
+
+        result = await real_consolidator.compact_idle_session("cli:test", max_suffix=8)
+        assert result == "Summary of old conversation."
+
+        reloaded = sessions.get_or_create("cli:test")
+        assert len(reloaded.messages) <= 8
+        assert reloaded.last_consolidated == 0
+        meta = reloaded.metadata.get("_last_summary")
+        assert meta is not None
+        assert meta["text"] == "Summary of old conversation."
+        assert "last_active" in meta
+
+    @pytest.mark.asyncio
+    async def test_empty_session_refreshes_timestamp(self, real_consolidator):
+        """Empty session with old updated_at → refreshed after call, returns ''."""
+        from datetime import datetime, timedelta
+
+        sessions = real_consolidator.sessions
+        session = sessions.get_or_create("cli:empty")
+        old_ts = datetime.now() - timedelta(hours=2)
+        session.updated_at = old_ts
+        sessions.save(session)
+
+        result = await real_consolidator.compact_idle_session("cli:empty")
+        assert result == ""
+
+        reloaded = sessions.get_or_create("cli:empty")
+        assert reloaded.updated_at > old_ts
+
+    @pytest.mark.asyncio
+    async def test_nothing_summary_not_stored(self, real_consolidator, mock_provider):
+        """LLM returns '(nothing)' → _last_summary NOT in metadata."""
+        mock_provider.chat_with_retry.return_value = MagicMock(
+            content="(nothing)", finish_reason="stop"
+        )
+        sessions = real_consolidator.sessions
+        session = sessions.get_or_create("cli:nothing")
+        for i in range(10):
+            session.add_message("user", f"u{i}")
+            session.add_message("assistant", f"a{i}")
+        sessions.save(session)
+
+        result = await real_consolidator.compact_idle_session("cli:nothing", max_suffix=4)
+        assert result == "(nothing)"
+
+        reloaded = sessions.get_or_create("cli:nothing")
+        assert "_last_summary" not in reloaded.metadata
+
+    @pytest.mark.asyncio
+    async def test_llm_failure_still_truncates(self, real_consolidator, mock_provider, store):
+        """LLM raises RuntimeError → raw_archive fires, session still truncated, returns None."""
+        mock_provider.chat_with_retry.side_effect = RuntimeError("LLM unavailable")
+        sessions = real_consolidator.sessions
+        session = sessions.get_or_create("cli:fail")
+        for i in range(10):
+            session.add_message("user", f"u{i}")
+            session.add_message("assistant", f"a{i}")
+        sessions.save(session)
+
+        result = await real_consolidator.compact_idle_session("cli:fail", max_suffix=4)
+        assert result is None
+
+        # raw_archive should have been called (history.jsonl gets an entry)
+        entries = store.read_unprocessed_history(since_cursor=0)
+        assert any("[RAW]" in e["content"] for e in entries)
+
+        # Session should still be truncated
+        reloaded = sessions.get_or_create("cli:fail")
+        assert len(reloaded.messages) <= 4
+
+    @pytest.mark.asyncio
+    async def test_respects_last_consolidated(self, real_consolidator, mock_provider):
+        """30 turns with last_consolidated=50 → only unconsolidated tail considered."""
+        mock_provider.chat_with_retry.return_value = MagicMock(
+            content="Tail summary.", finish_reason="stop"
+        )
+        sessions = real_consolidator.sessions
+        session = sessions.get_or_create("cli:offset")
+        for i in range(30):
+            session.add_message("user", f"u{i}")
+            session.add_message("assistant", f"a{i}")
+        session.last_consolidated = 50  # Only 10 messages unconsolidated
+        sessions.save(session)
+
+        result = await real_consolidator.compact_idle_session("cli:offset", max_suffix=4)
+        assert result == "Tail summary."
+
+        # Verify only the unconsolidated tail was processed:
+        # 10 unconsolidated messages (50-59), keep suffix of 4 → archive 6
+        archived_call = mock_provider.chat_with_retry.call_args
+        user_content = archived_call.kwargs["messages"][1]["content"]
+        # Should contain only tail messages, not early ones
+        assert "u0" not in user_content
+        assert "u25" in user_content or "a25" in user_content
+
+    @pytest.mark.asyncio
+    async def test_acquires_consolidation_lock(self, real_consolidator, mock_provider):
+        """Verify lock is held during execution."""
+        import asyncio
+
+        # Use a slow LLM response to ensure the lock is held while we check
+        started = asyncio.Event()
+
+        async def slow_chat(**kwargs):
+            started.set()
+            await asyncio.sleep(0.1)
+            return MagicMock(content="Summary.", finish_reason="stop")
+
+        mock_provider.chat_with_retry = slow_chat
+
+        sessions = real_consolidator.sessions
+        session = sessions.get_or_create("cli:lock")
+        for i in range(10):
+            session.add_message("user", f"u{i}")
+            session.add_message("assistant", f"a{i}")
+        sessions.save(session)
+
+        lock = real_consolidator.get_lock("cli:lock")
+        assert not lock.locked()
+
+        task = asyncio.ensure_future(
+            real_consolidator.compact_idle_session("cli:lock", max_suffix=4)
+        )
+        await started.wait()
+        assert lock.locked()
+        await task
+        assert not lock.locked()
+
+
 class TestRawArchiveTruncation:
     """raw_archive() must cap entry size to avoid bloating history.jsonl."""
 

From 888d54790db98dee7c04b357afa567bb6ac485af Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Sun, 17 May 2026 21:12:26 +0800
Subject: [PATCH 5/8] fix(memory): add session-refresh guard to
 maybe_consolidate_by_tokens

When background consolidation runs with a stale session reference (captured
before AutoCompact replaced the session via compact_idle_session), it could
operate on outdated data. Now, after acquiring the per-session lock, the
method refreshes its session reference from SessionManager.get_or_create().
If the session was replaced, it swaps in the fresh reference before doing
any consolidation work.

This prevents a race where AutoCompact truncates an idle session while a
background maybe_consolidate_by_tokens call is in flight with the old
session object.
---
 nanobot/agent/memory.py          |  5 +++
 tests/agent/test_consolidator.py | 64 ++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index 167f02284..b7a325a02 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -683,6 +683,11 @@ class Consolidator:
 
         lock = self.get_lock(session.key)
         async with lock:
+            # Refresh session reference: AutoCompact may have replaced it.
+            fresh = self.sessions.get_or_create(session.key)
+            if fresh is not session:
+                session = fresh
+
             budget = self._input_token_budget
             target = int(budget * self.consolidation_ratio)
             last_summary = await self._consolidate_replay_overflow(
diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py
index 49888b8a1..159ec01d1 100644
--- a/tests/agent/test_consolidator.py
+++ b/tests/agent/test_consolidator.py
@@ -28,6 +28,12 @@ def mock_provider():
 def consolidator(store, mock_provider):
     sessions = MagicMock()
     sessions.save = MagicMock()
+    # When maybe_consolidate_by_tokens refreshes the session reference via
+    # get_or_create(session.key), it should get back the same object the test
+    # passed in.  Store sessions by key so the lookup is transparent.
+    _session_cache: dict[str, MagicMock] = {}
+    sessions.get_or_create = MagicMock(side_effect=lambda key: _session_cache.get(key, MagicMock()))
+    sessions._session_cache = _session_cache
     return Consolidator(
         store=store,
         provider=mock_provider,
@@ -117,6 +123,7 @@ class TestConsolidatorTokenBudget:
         session.last_consolidated = 0
         session.messages = [{"role": "user", "content": "hi"}]
         session.key = "test:key"
+        consolidator.sessions._session_cache[session.key] = session
         consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken"))
         consolidator.archive = AsyncMock(return_value=True)
         await consolidator.maybe_consolidate_by_tokens(session)
@@ -152,6 +159,7 @@ class TestConsolidatorTokenBudget:
             session.add_message("user", f"u{i}")
             session.add_message("assistant", f"a{i}")
 
+        consolidator.sessions._session_cache[session.key] = session
         consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken"))
         consolidator.archive = AsyncMock(return_value="old conversation summary")
 
@@ -184,6 +192,7 @@ class TestConsolidatorTokenBudget:
         session.add_message("tool", "tool result", tool_call_id="call-1", name="x")
         session.add_message("assistant", "final answer")
 
+        consolidator.sessions._session_cache[session.key] = session
         consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken"))
         consolidator.archive = AsyncMock(return_value="tool turn summary")
 
@@ -210,6 +219,7 @@ class TestConsolidatorTokenBudget:
             }
             for i in range(70)
         ]
+        consolidator.sessions._session_cache[session.key] = session
         consolidator.estimate_session_prompt_tokens = MagicMock(
             side_effect=[(1200, "tiktoken"), (400, "tiktoken")]
         )
@@ -238,6 +248,7 @@ class TestConsolidatorTokenBudget:
             for i in range(70)
         ]
         session.metadata = {}
+        consolidator.sessions._session_cache[session.key] = session
         consolidator.estimate_session_prompt_tokens = MagicMock(
             side_effect=[(1200, "tiktoken"), (400, "tiktoken")]
         )
@@ -263,6 +274,7 @@ class TestConsolidatorTokenBudget:
             for i in range(70)
         ]
         session.metadata = {}
+        consolidator.sessions._session_cache[session.key] = session
         # Keep estimates high so the loop would otherwise run multiple rounds.
         consolidator.estimate_session_prompt_tokens = MagicMock(
             return_value=(1200, "tiktoken")
@@ -287,6 +299,7 @@ class TestConsolidatorTokenBudget:
             }
             for i in range(70)
         ]
+        consolidator.sessions._session_cache[session.key] = session
         consolidator.estimate_session_prompt_tokens = MagicMock(
             side_effect=[(1200, "tiktoken"), (400, "tiktoken")]
         )
@@ -461,6 +474,57 @@ class TestCompactIdleSession:
         assert not lock.locked()
 
 
+class TestConsolidatorSessionRefresh:
+    """Background consolidation must detect stale session references."""
+
+    @pytest.mark.asyncio
+    async def test_reloads_stale_session_after_compact(self, tmp_path):
+        """After compact_idle_session replaces the session, a concurrent
+        maybe_consolidate_by_tokens with the old reference should use the
+        fresh session from cache instead of overwriting."""
+        from nanobot.agent.memory import Consolidator, MemoryStore
+        from nanobot.session.manager import SessionManager
+
+        store = MemoryStore(tmp_path)
+        provider = MagicMock()
+        provider.chat_with_retry = AsyncMock(
+            return_value=MagicMock(content="summary", finish_reason="stop")
+        )
+        provider.generation.max_tokens = 4096
+        provider.estimate_prompt_tokens = MagicMock(return_value=(10, "test"))
+        sessions = SessionManager(tmp_path)
+        consolidator = Consolidator(
+            store=store,
+            provider=provider,
+            model="test-model",
+            sessions=sessions,
+            context_window_tokens=128_000,
+            build_messages=MagicMock(return_value=[]),
+            get_tool_definitions=MagicMock(return_value=[]),
+        )
+
+        # Populate session with many messages
+        session = sessions.get_or_create("cli:test")
+        for i in range(20):
+            session.add_message("user", f"u{i}")
+            session.add_message("assistant", f"a{i}")
+        sessions.save(session)
+
+        # Simulate: background consolidation captures old reference
+        old_ref = session
+
+        # AutoCompact runs first and truncates to 8
+        await consolidator.compact_idle_session("cli:test", max_suffix=8)
+
+        # Background consolidation runs with stale reference —
+        # should detect the session was replaced and not undo the compact.
+        await consolidator.maybe_consolidate_by_tokens(old_ref)
+
+        session_after = sessions.get_or_create("cli:test")
+        # Messages should still be truncated (not restored to 40)
+        assert len(session_after.messages) <= 8
+
+
 class TestRawArchiveTruncation:
     """raw_archive() must cap entry size to avoid bloating history.jsonl."""
 

From 5bb94edc99d24796ba069d1dda2b952b7b965e56 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Sun, 17 May 2026 21:30:44 +0800
Subject: [PATCH 6/8] refactor(autocompact): delegate _archive to
 Consolidator.compact_idle_session

Replace AutoCompact._archive() direct session mutation with delegation
to Consolidator.compact_idle_session(). Remove _split_unconsolidated()
method since that logic now lives inside compact_idle_session.

All session mutation for idle compaction now goes through the
Consolidator's lock, eliminating the race condition between
background token consolidation and idle TTL compaction.

Changes:
- autocompact.py: rewrite _archive() to call compact_idle_session,
  remove _split_unconsolidated(), clean up unused imports
- test_autocompact_unit.py: replace TestArchive/TestSplitUnconsolidated
  with TestArchiveDelegates that verifies delegation behavior
- test_auto_compact.py: convert all consolidator.archive mocks to
  consolidator.compact_idle_session mocks via _make_fake_compact helper
---
 nanobot/agent/autocompact.py         |  59 +-----
 tests/agent/test_auto_compact.py     | 303 +++++++++++++--------------
 tests/agent/test_autocompact_unit.py | 175 +++-------------
 3 files changed, 183 insertions(+), 354 deletions(-)

diff --git a/nanobot/agent/autocompact.py b/nanobot/agent/autocompact.py
index 11e531039..4ad241170 100644
--- a/nanobot/agent/autocompact.py
+++ b/nanobot/agent/autocompact.py
@@ -4,7 +4,7 @@ from __future__ import annotations
 
 from collections.abc import Collection
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Callable, Coroutine
+from typing import TYPE_CHECKING, Callable, Coroutine
 
 from loguru import logger
 
@@ -37,27 +37,6 @@ class AutoCompact:
     def _format_summary(text: str, last_active: datetime) -> str:
         return f"Previous conversation summary (last active {last_active.isoformat()}):\n{text}"
 
-    def _split_unconsolidated(
-        self, session: Session,
-    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
-        """Split live session tail into archiveable prefix and retained recent suffix."""
-        tail = list(session.messages[session.last_consolidated:])
-        if not tail:
-            return [], []
-
-        probe = Session(
-            key=session.key,
-            messages=tail.copy(),
-            created_at=session.created_at,
-            updated_at=session.updated_at,
-            metadata={},
-            last_consolidated=0,
-        )
-        probe.retain_recent_legal_suffix(self._RECENT_SUFFIX_MESSAGES)
-        kept = probe.messages
-        cut = len(tail) - len(kept)
-        return tail[:cut], kept
-
     def check_expired(self, schedule_background: Callable[[Coroutine], None],
                       active_session_keys: Collection[str] = ()) -> None:
         """Schedule archival for idle sessions, skipping those with in-flight agent tasks."""
@@ -74,33 +53,17 @@ class AutoCompact:
 
     async def _archive(self, key: str) -> None:
         try:
-            self.sessions.invalidate(key)
-            session = self.sessions.get_or_create(key)
-            archive_msgs, kept_msgs = self._split_unconsolidated(session)
-            if not archive_msgs and not kept_msgs:
-                session.updated_at = datetime.now()
-                self.sessions.save(session)
-                return
-
-            last_active = session.updated_at
-            summary = ""
-            if archive_msgs:
-                summary = await self.consolidator.archive(archive_msgs) or ""
+            summary = await self.consolidator.compact_idle_session(
+                key, self._RECENT_SUFFIX_MESSAGES,
+            )
             if summary and summary != "(nothing)":
-                self._summaries[key] = (summary, last_active)
-                session.metadata["_last_summary"] = {"text": summary, "last_active": last_active.isoformat()}
-            session.messages = kept_msgs
-            session.last_consolidated = 0
-            session.updated_at = datetime.now()
-            self.sessions.save(session)
-            if archive_msgs:
-                logger.info(
-                    "Auto-compact: archived {} (archived={}, kept={}, summary={})",
-                    key,
-                    len(archive_msgs),
-                    len(kept_msgs),
-                    bool(summary),
-                )
+                session = self.sessions.get_or_create(key)
+                meta = session.metadata.get("_last_summary")
+                if isinstance(meta, dict):
+                    self._summaries[key] = (
+                        meta["text"],
+                        datetime.fromisoformat(meta["last_active"]),
+                    )
         except Exception:
             logger.exception("Auto-compact: failed for {}", key)
         finally:
diff --git a/tests/agent/test_auto_compact.py b/tests/agent/test_auto_compact.py
index 0bc02a694..37fcbfdae 100644
--- a/tests/agent/test_auto_compact.py
+++ b/tests/agent/test_auto_compact.py
@@ -45,6 +45,73 @@ def _add_turns(session, turns: int, *, prefix: str = "msg") -> None:
         session.add_message("assistant", f"{prefix} assistant {i}")
 
 
+def _make_fake_compact(
+    loop: AgentLoop,
+    *,
+    summary: str = "Summary.",
+    on_archive=None,
+    track_archived: list | None = None,
+    track_count: bool = False,
+):
+    """Return a fake compact_idle_session that mirrors the real method's session mutation."""
+    from nanobot.session.manager import Session as _Session
+
+    state = {"count": 0}
+
+    async def _fake_compact(key: str, max_suffix: int = 8) -> str:
+        state["count"] += 1
+        session = loop.sessions.get_or_create(key)
+
+        tail = list(session.messages[session.last_consolidated:])
+        if not tail:
+            session.updated_at = datetime.now()
+            loop.sessions.save(session)
+            return ""
+
+        probe = _Session(
+            key=session.key,
+            messages=tail.copy(),
+            created_at=session.created_at,
+            updated_at=session.updated_at,
+            metadata={},
+            last_consolidated=0,
+        )
+        probe.retain_recent_legal_suffix(max_suffix)
+        kept = probe.messages
+        cut = len(tail) - len(kept)
+        archive_msgs = tail[:cut]
+
+        if not archive_msgs and not kept:
+            session.updated_at = datetime.now()
+            loop.sessions.save(session)
+            return ""
+
+        last_active = session.updated_at
+        s = summary
+        if archive_msgs:
+            if on_archive:
+                result = on_archive(archive_msgs)
+                s = result if isinstance(result, str) else summary
+            if track_archived is not None:
+                track_archived.extend(archive_msgs)
+
+        if s and s != "(nothing)":
+            session.metadata["_last_summary"] = {
+                "text": s,
+                "last_active": last_active.isoformat(),
+            }
+
+        session.messages = kept
+        session.last_consolidated = 0
+        session.updated_at = datetime.now()
+        loop.sessions.save(session)
+        return s
+
+    # Attach state for count access
+    _fake_compact.state = state  # type: ignore[attr-defined]
+    return _fake_compact
+
+
 class TestSessionTTLConfig:
     """Test session TTL configuration."""
 
@@ -201,10 +268,7 @@ class TestAutoCompact:
         s2.add_message("user", "recent")
         loop.sessions.save(s2)
 
-        async def _fake_archive(messages):
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
         loop.auto_compact.check_expired(loop._schedule_background)
         await asyncio.sleep(0.1)
 
@@ -222,12 +286,9 @@ class TestAutoCompact:
         loop.sessions.save(session)
 
         archived_messages = []
-
-        async def _fake_archive(messages):
-            archived_messages.extend(messages)
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, track_archived=archived_messages,
+        )
 
         await loop.auto_compact._archive("cli:test")
 
@@ -246,10 +307,9 @@ class TestAutoCompact:
         _add_turns(session, 6, prefix="hello")
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "User said hello."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, summary="User said hello.",
+        )
 
         await loop.auto_compact._archive("cli:test")
 
@@ -262,23 +322,16 @@ class TestAutoCompact:
 
     @pytest.mark.asyncio
     async def test_auto_compact_empty_session(self, tmp_path):
-        """_archive on empty session should not archive."""
+        """_archive on empty session should not store a summary."""
         loop = _make_loop(tmp_path, session_ttl_minutes=15)
 
-        archive_called = False
-
-        async def _fake_archive(messages):
-            nonlocal archive_called
-            archive_called = True
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         await loop.auto_compact._archive("cli:test")
 
-        assert not archive_called
         session_after = loop.sessions.get_or_create("cli:test")
         assert len(session_after.messages) == 0
+        assert "cli:test" not in loop.auto_compact._summaries
         await loop.close_mcp()
 
     @pytest.mark.asyncio
@@ -290,18 +343,14 @@ class TestAutoCompact:
         session.last_consolidated = 18
         loop.sessions.save(session)
 
-        archived_count = 0
-
-        async def _fake_archive(messages):
-            nonlocal archived_count
-            archived_count = len(messages)
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        archived_messages = []
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, track_archived=archived_messages,
+        )
 
         await loop.auto_compact._archive("cli:test")
 
-        assert archived_count == 2
+        assert len(archived_messages) == 2
         await loop.close_mcp()
 
 
@@ -334,12 +383,9 @@ class TestAutoCompactIdleDetection:
         loop.sessions.save(session)
 
         archived_messages = []
-
-        async def _fake_archive(messages):
-            archived_messages.extend(messages)
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, track_archived=archived_messages,
+        )
 
         # Simulate proactive archive completing before message arrives
         await loop.auto_compact._archive("cli:test")
@@ -402,10 +448,7 @@ class TestAutoCompactIdleDetection:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
         response = await loop._process_message(msg)
@@ -466,10 +509,7 @@ class TestAutoCompactSystemMessages:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         # Simulate proactive archive completing before system message arrives
         await loop.auto_compact._archive("cli:test")
@@ -547,12 +587,9 @@ class TestAutoCompactEdgeCases:
         loop.sessions.save(session)
 
         archived_messages = []
-
-        async def _fake_archive(messages):
-            archived_messages.extend(messages)
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, track_archived=archived_messages,
+        )
 
         # Simulate proactive archive completing before message arrives
         await loop.auto_compact._archive("cli:test")
@@ -644,10 +681,7 @@ class TestAutoCompactIntegration:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         # Simulate proactive archive completing before message arrives
         await loop.auto_compact._archive("cli:test")
@@ -704,12 +738,9 @@ class TestProactiveAutoCompact:
         loop.sessions.save(session)
 
         archived_messages = []
-
-        async def _fake_archive(messages):
-            archived_messages.extend(messages)
-            return "User chatted about old things."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, summary="User chatted about old things.", track_archived=archived_messages,
+        )
 
         await self._run_check_expired(loop)
 
@@ -748,14 +779,14 @@ class TestProactiveAutoCompact:
         started = asyncio.Event()
         block_forever = asyncio.Event()
 
-        async def _slow_archive(messages):
+        async def _slow_compact(key, max_suffix=8):
             nonlocal archive_count
             archive_count += 1
             started.set()
             await block_forever.wait()
             return "Summary."
 
-        loop.consolidator.archive = _slow_archive
+        loop.consolidator.compact_idle_session = _slow_compact
 
         # First call starts archiving via callback
         loop.auto_compact.check_expired(loop._schedule_background)
@@ -781,10 +812,10 @@ class TestProactiveAutoCompact:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _failing_archive(messages):
+        async def _failing_compact(key, max_suffix=8):
             raise RuntimeError("LLM down")
 
-        loop.consolidator.archive = _failing_archive
+        loop.consolidator.compact_idle_session = _failing_compact
 
         # Should not raise
         await self._run_check_expired(loop)
@@ -795,24 +826,18 @@ class TestProactiveAutoCompact:
 
     @pytest.mark.asyncio
     async def test_proactive_archive_skips_empty_sessions(self, tmp_path):
-        """Proactive archive should not call LLM for sessions with no un-consolidated messages."""
+        """Proactive archive should not produce a summary for sessions with no messages."""
         loop = _make_loop(tmp_path, session_ttl_minutes=15)
         session = loop.sessions.get_or_create("cli:test")
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        archive_called = False
-
-        async def _fake_archive(messages):
-            nonlocal archive_called
-            archive_called = True
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         await self._run_check_expired(loop)
 
-        assert not archive_called
+        # Empty session should not produce a summary
+        assert "cli:test" not in loop.auto_compact._summaries
         await loop.close_mcp()
 
     @pytest.mark.asyncio
@@ -824,18 +849,12 @@ class TestProactiveAutoCompact:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        archive_count = 0
-
-        async def _fake_archive(messages):
-            nonlocal archive_count
-            archive_count += 1
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        _fake_compact = _make_fake_compact(loop)
+        loop.consolidator.compact_idle_session = _fake_compact
 
         # Simulate an active agent task for this session
         await self._run_check_expired(loop, active_session_keys={"cli:test"})
-        assert archive_count == 0
+        assert _fake_compact.state["count"] == 0
 
         session_after = loop.sessions.get_or_create("cli:test")
         assert len(session_after.messages) == 12  # All messages preserved
@@ -851,22 +870,16 @@ class TestProactiveAutoCompact:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        archive_count = 0
-
-        async def _fake_archive(messages):
-            nonlocal archive_count
-            archive_count += 1
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        _fake_compact = _make_fake_compact(loop)
+        loop.consolidator.compact_idle_session = _fake_compact
 
         # First tick: active task, skip
         await self._run_check_expired(loop, active_session_keys={"cli:test"})
-        assert archive_count == 0
+        assert _fake_compact.state["count"] == 0
 
         # Second tick: task completed, should archive
         await self._run_check_expired(loop)
-        assert archive_count == 1
+        assert _fake_compact.state["count"] == 1
         await loop.close_mcp()
 
     @pytest.mark.asyncio
@@ -888,18 +901,12 @@ class TestProactiveAutoCompact:
         s3.add_message("user", "recent")
         loop.sessions.save(s3)
 
-        archive_count = 0
-
-        async def _fake_archive(messages):
-            nonlocal archive_count
-            archive_count += 1
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        _fake_compact = _make_fake_compact(loop)
+        loop.consolidator.compact_idle_session = _fake_compact
 
         await self._run_check_expired(loop, active_session_keys={"cli:expired_active"})
 
-        assert archive_count == 1
+        assert _fake_compact.state["count"] == 1
         s1_after = loop.sessions.get_or_create("cli:expired_idle")
         assert len(s1_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
         s2_after = loop.sessions.get_or_create("cli:expired_active")
@@ -917,22 +924,16 @@ class TestProactiveAutoCompact:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        archive_count = 0
-
-        async def _fake_archive(messages):
-            nonlocal archive_count
-            archive_count += 1
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        _fake_compact = _make_fake_compact(loop)
+        loop.consolidator.compact_idle_session = _fake_compact
 
         # First tick: archives the session
         await self._run_check_expired(loop)
-        assert archive_count == 1
+        assert _fake_compact.state["count"] == 1
 
         # Second tick: should NOT re-schedule (updated_at is fresh after clear)
         await self._run_check_expired(loop)
-        assert archive_count == 1  # Still 1, not re-scheduled
+        assert _fake_compact.state["count"] == 1  # Still 1, not re-scheduled
         await loop.close_mcp()
 
     @pytest.mark.asyncio
@@ -943,22 +944,15 @@ class TestProactiveAutoCompact:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        archive_count = 0
-
-        async def _fake_archive(messages):
-            nonlocal archive_count
-            archive_count += 1
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         # First tick: skips (no messages), refreshes updated_at
         await self._run_check_expired(loop)
-        assert archive_count == 0
+        assert "cli:test" not in loop.auto_compact._summaries
 
         # Second tick: should NOT re-schedule because updated_at is fresh
         await self._run_check_expired(loop)
-        assert archive_count == 0
+        assert "cli:test" not in loop.auto_compact._summaries
         await loop.close_mcp()
 
     @pytest.mark.asyncio
@@ -970,18 +964,12 @@ class TestProactiveAutoCompact:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        archive_count = 0
-
-        async def _fake_archive(messages):
-            nonlocal archive_count
-            archive_count += 1
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        _fake_compact = _make_fake_compact(loop)
+        loop.consolidator.compact_idle_session = _fake_compact
 
         # First compact cycle
         await loop.auto_compact._archive("cli:test")
-        assert archive_count == 1
+        assert _fake_compact.state["count"] == 1
 
         # User returns, sends new messages
         msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second topic")
@@ -995,7 +983,7 @@ class TestProactiveAutoCompact:
 
         # Second compact cycle should succeed
         await loop.auto_compact._archive("cli:test")
-        assert archive_count == 2
+        assert _fake_compact.state["count"] == 2
         await loop.close_mcp()
 
 
@@ -1011,10 +999,9 @@ class TestSummaryPersistence:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "User said hello."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, summary="User said hello.",
+        )
 
         await loop.auto_compact._archive("cli:test")
 
@@ -1036,10 +1023,9 @@ class TestSummaryPersistence:
         session.updated_at = last_active
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "User said hello."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, summary="User said hello.",
+        )
 
         # Archive
         await loop.auto_compact._archive("cli:test")
@@ -1069,10 +1055,7 @@ class TestSummaryPersistence:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         await loop.auto_compact._archive("cli:test")
 
@@ -1100,10 +1083,7 @@ class TestSummaryPersistence:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "Summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(loop)
 
         await loop.auto_compact._archive("cli:test")
 
@@ -1129,10 +1109,9 @@ class TestSummaryPersistence:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "First summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, summary="First summary.",
+        )
         await loop.auto_compact._archive("cli:test")
 
         # Consume the first summary via hot path
@@ -1148,10 +1127,9 @@ class TestSummaryPersistence:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive2(messages):
-            return "Second summary."
-
-        loop.consolidator.archive = _fake_archive2
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, summary="Second summary.",
+        )
         await loop.auto_compact._archive("cli:test")
 
         # The second archive writes a new summary
@@ -1173,10 +1151,9 @@ class TestSummaryPersistence:
         session.updated_at = datetime.now() - timedelta(minutes=20)
         loop.sessions.save(session)
 
-        async def _fake_archive(messages):
-            return "Old summary."
-
-        loop.consolidator.archive = _fake_archive
+        loop.consolidator.compact_idle_session = _make_fake_compact(
+            loop, summary="Old summary.",
+        )
         await loop.auto_compact._archive("cli:test")
 
         # Verify summary exists before /new
diff --git a/tests/agent/test_autocompact_unit.py b/tests/agent/test_autocompact_unit.py
index d501770dd..1d3277a01 100644
--- a/tests/agent/test_autocompact_unit.py
+++ b/tests/agent/test_autocompact_unit.py
@@ -38,7 +38,7 @@ def _make_autocompact(
         sessions = MagicMock(spec=SessionManager)
     if consolidator is None:
         consolidator = MagicMock()
-        consolidator.archive = AsyncMock(return_value="Summary.")
+        consolidator.compact_idle_session = AsyncMock(return_value="Summary.")
     return AutoCompact(
         sessions=sessions,
         consolidator=consolidator,
@@ -178,62 +178,6 @@ class TestFormatSummary:
         assert result.startswith("Previous conversation summary (last active ")
 
 
-# ---------------------------------------------------------------------------
-# _split_unconsolidated
-# ---------------------------------------------------------------------------
-
-
-class TestSplitUnconsolidated:
-    """Test AutoCompact._split_unconsolidated splitting logic."""
-
-    def test_empty_session_returns_both_empty(self):
-        """Empty session should return ([], [])."""
-        ac = _make_autocompact()
-        session = _make_session(messages=[])
-        archive, kept = ac._split_unconsolidated(session)
-        assert archive == []
-        assert kept == []
-
-    def test_all_messages_archivable_when_more_than_suffix(self):
-        """Session with many messages should archive a prefix and keep suffix."""
-        ac = _make_autocompact()
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        session = _make_session(messages=msgs)
-        archive, kept = ac._split_unconsolidated(session)
-        assert len(archive) > 0
-        assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES
-
-    def test_fewer_messages_than_suffix_returns_empty_archive(self):
-        """Session with fewer messages than suffix should have empty archive."""
-        ac = _make_autocompact()
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(3)]
-        session = _make_session(messages=msgs)
-        archive, kept = ac._split_unconsolidated(session)
-        assert archive == []
-        assert len(kept) == len(msgs)
-
-    def test_respects_last_consolidated_offset(self):
-        """Only messages after last_consolidated should be considered."""
-        ac = _make_autocompact()
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        # First 10 are already consolidated
-        session = _make_session(messages=msgs, last_consolidated=10)
-        archive, kept = ac._split_unconsolidated(session)
-        # Only the tail of 10 messages is considered for splitting
-        assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in kept)
-        assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in archive)
-
-    def test_retain_recent_legal_suffix_keeps_last_n(self):
-        """The kept suffix should be at most _RECENT_SUFFIX_MESSAGES long."""
-        ac = _make_autocompact()
-        # 20 user messages = 20 messages total, all after last_consolidated=0
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        session = _make_session(messages=msgs)
-        archive, kept = ac._split_unconsolidated(session)
-        assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES
-        assert len(archive) == len(msgs) - len(kept)
-
-
 # ---------------------------------------------------------------------------
 # check_expired
 # ---------------------------------------------------------------------------
@@ -313,126 +257,71 @@ class TestCheckExpired:
 # ---------------------------------------------------------------------------
 
 
-class TestArchive:
-    """Test AutoCompact._archive async method."""
+class TestArchiveDelegates:
+    """_archive should delegate all session mutation to Consolidator."""
 
     @pytest.mark.asyncio
-    async def test_empty_session_updates_timestamp_no_archive_call(self):
-        """Empty session should refresh updated_at and not call consolidator.archive."""
+    async def test_calls_compact_idle_session(self):
         ac = _make_autocompact()
         mock_sm = MagicMock(spec=SessionManager)
-        empty_session = _make_session(messages=[])
-        mock_sm.get_or_create.return_value = empty_session
         ac.sessions = mock_sm
-        ac.consolidator.archive = AsyncMock(return_value="Summary.")
+        ac.consolidator.compact_idle_session = AsyncMock(return_value="Summary.")
 
         await ac._archive("cli:test")
 
-        ac.consolidator.archive.assert_not_called()
-        mock_sm.save.assert_called_once_with(empty_session)
-        # updated_at was refreshed
-        assert empty_session.updated_at > datetime.now() - timedelta(seconds=5)
+        ac.consolidator.compact_idle_session.assert_awaited_once_with(
+            "cli:test", ac._RECENT_SUFFIX_MESSAGES,
+        )
 
     @pytest.mark.asyncio
-    async def test_archive_returns_empty_string_no_summary_stored(self):
-        """If archive returns empty string, no summary should be stored."""
+    async def test_populates_summaries_from_metadata(self):
         ac = _make_autocompact()
         mock_sm = MagicMock(spec=SessionManager)
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        session = _make_session(messages=msgs)
+        session = _make_session(
+            metadata={"_last_summary": {"text": "Hello.", "last_active": "2026-05-13T10:00:00"}}
+        )
         mock_sm.get_or_create.return_value = session
         ac.sessions = mock_sm
-        ac.consolidator.archive = AsyncMock(return_value="")
+        ac.consolidator.compact_idle_session = AsyncMock(return_value="Hello.")
 
         await ac._archive("cli:test")
 
-        assert "cli:test" not in ac._summaries
-
-    @pytest.mark.asyncio
-    async def test_archive_returns_nothing_no_summary_stored(self):
-        """If archive returns '(nothing)', no summary should be stored."""
-        ac = _make_autocompact()
-        mock_sm = MagicMock(spec=SessionManager)
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        session = _make_session(messages=msgs)
-        mock_sm.get_or_create.return_value = session
-        ac.sessions = mock_sm
-        ac.consolidator.archive = AsyncMock(return_value="(nothing)")
-
-        await ac._archive("cli:test")
-
-        assert "cli:test" not in ac._summaries
-
-    @pytest.mark.asyncio
-    async def test_archive_exception_caught_key_removed_from_archiving(self):
-        """If archive raises, exception is caught and key removed from _archiving."""
-        ac = _make_autocompact()
-        mock_sm = MagicMock(spec=SessionManager)
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        session = _make_session(messages=msgs)
-        mock_sm.get_or_create.return_value = session
-        ac.sessions = mock_sm
-        ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("LLM down"))
-
-        # Should not raise
-        await ac._archive("cli:test")
-
-        assert "cli:test" not in ac._archiving
-
-    @pytest.mark.asyncio
-    async def test_successful_archive_stores_summary_in_summaries_and_metadata(self):
-        """Successful archive should store summary in _summaries dict and metadata."""
-        ac = _make_autocompact()
-        mock_sm = MagicMock(spec=SessionManager)
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        last_active = datetime(2026, 5, 13, 10, 0, 0)
-        session = _make_session(messages=msgs, updated_at=last_active)
-        mock_sm.get_or_create.return_value = session
-        ac.sessions = mock_sm
-        ac.consolidator.archive = AsyncMock(return_value="User discussed AI.")
-
-        await ac._archive("cli:test")
-
-        # _summaries
         entry = ac._summaries.get("cli:test")
         assert entry is not None
-        assert entry[0] == "User discussed AI."
-        assert entry[1] == last_active
-        # metadata
-        meta = session.metadata.get("_last_summary")
-        assert meta is not None
-        assert meta["text"] == "User discussed AI."
-        assert "last_active" in meta
+        assert entry[0] == "Hello."
 
     @pytest.mark.asyncio
-    async def test_finally_block_always_removes_from_archiving(self):
-        """Finally block should always remove key from _archiving, even on error."""
+    async def test_no_summary_when_compact_returns_empty(self):
         ac = _make_autocompact()
         mock_sm = MagicMock(spec=SessionManager)
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        session = _make_session(messages=msgs)
-        mock_sm.get_or_create.return_value = session
         ac.sessions = mock_sm
-        ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("fail"))
+        ac.consolidator.compact_idle_session = AsyncMock(return_value="")
 
-        # Pre-add key to archiving to verify it gets removed
-        ac._archiving.add("cli:test")
         await ac._archive("cli:test")
-        assert "cli:test" not in ac._archiving
+
+        assert "cli:test" not in ac._summaries
 
     @pytest.mark.asyncio
-    async def test_finally_removes_from_archiving_on_success(self):
-        """Finally block should remove key from _archiving on success too."""
+    async def test_no_summary_when_compact_returns_nothing(self):
         ac = _make_autocompact()
         mock_sm = MagicMock(spec=SessionManager)
-        msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)]
-        session = _make_session(messages=msgs)
-        mock_sm.get_or_create.return_value = session
         ac.sessions = mock_sm
-        ac.consolidator.archive = AsyncMock(return_value="Summary.")
+        ac.consolidator.compact_idle_session = AsyncMock(return_value="(nothing)")
+
+        await ac._archive("cli:test")
+
+        assert "cli:test" not in ac._summaries
+
+    @pytest.mark.asyncio
+    async def test_exception_still_removes_from_archiving(self):
+        ac = _make_autocompact()
+        mock_sm = MagicMock(spec=SessionManager)
+        ac.sessions = mock_sm
+        ac.consolidator.compact_idle_session = AsyncMock(side_effect=RuntimeError("fail"))
 
         ac._archiving.add("cli:test")
         await ac._archive("cli:test")
+
         assert "cli:test" not in ac._archiving
 
 

From c58a360b25335e146098cc2ab8bd57ebdbfcafaa Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Sun, 17 May 2026 21:41:30 +0800
Subject: [PATCH 7/8] fix(test): seed get_or_create mock for session-refresh
 guard compatibility

---
 tests/agent/test_unified_session.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/agent/test_unified_session.py b/tests/agent/test_unified_session.py
index 839f62f57..f22290ba6 100644
--- a/tests/agent/test_unified_session.py
+++ b/tests/agent/test_unified_session.py
@@ -387,6 +387,7 @@ class TestConsolidationUnaffectedByUnifiedSession:
 
         session = Session(key="unified:default")
         session.messages = [{"role": "user", "content": "msg"}]
+        sessions.get_or_create.return_value = session
 
         # Simulate over-budget: estimated > budget
         consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(950, "tiktoken"))

From eb0ff3ad1d1250a7529eabc30314a53c2e1c5085 Mon Sep 17 00:00:00 2001
From: Xubin Ren <52506698+Re-bin@users.noreply.github.com>
Date: Mon, 18 May 2026 01:01:34 +0800
Subject: [PATCH 8/8] fix(memory): refresh session before empty guard

---
 nanobot/agent/memory.py          |  4 +++-
 tests/agent/test_consolidator.py | 41 ++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index b7a325a02..ffc9c5f0e 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -678,7 +678,7 @@ class Consolidator:
         The budget reserves space for completion tokens and a safety buffer
         so the LLM request never exceeds the context window.
         """
-        if not session.messages or self.context_window_tokens <= 0:
+        if self.context_window_tokens <= 0:
             return
 
         lock = self.get_lock(session.key)
@@ -687,6 +687,8 @@ class Consolidator:
             fresh = self.sessions.get_or_create(session.key)
             if fresh is not session:
                 session = fresh
+            if not session.messages:
+                return
 
             budget = self._input_token_budget
             target = int(budget * self.consolidation_ratio)
diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py
index 159ec01d1..1fa05d3c8 100644
--- a/tests/agent/test_consolidator.py
+++ b/tests/agent/test_consolidator.py
@@ -477,6 +477,47 @@ class TestCompactIdleSession:
 class TestConsolidatorSessionRefresh:
     """Background consolidation must detect stale session references."""
 
+    @pytest.mark.asyncio
+    async def test_reloads_before_empty_session_guard(self, tmp_path):
+        """A stale empty reference must not skip a non-empty cached session."""
+        from nanobot.agent.memory import Consolidator, MemoryStore
+        from nanobot.session.manager import Session, SessionManager
+
+        store = MemoryStore(tmp_path)
+        provider = MagicMock()
+        provider.chat_with_retry = AsyncMock(
+            return_value=MagicMock(content="summary", finish_reason="stop")
+        )
+        provider.generation.max_tokens = 4096
+        provider.estimate_prompt_tokens = MagicMock(return_value=(10, "test"))
+        sessions = SessionManager(tmp_path)
+        consolidator = Consolidator(
+            store=store,
+            provider=provider,
+            model="test-model",
+            sessions=sessions,
+            context_window_tokens=128_000,
+            build_messages=MagicMock(return_value=[]),
+            get_tool_definitions=MagicMock(return_value=[]),
+        )
+
+        fresh = sessions.get_or_create("cli:test")
+        fresh.add_message("user", "fresh message")
+        sessions.save(fresh)
+        stale_empty = Session(key="cli:test")
+
+        seen: dict[str, Session] = {}
+
+        def estimate(session: Session):
+            seen["session"] = session
+            return 10, "test"
+
+        consolidator.estimate_session_prompt_tokens = MagicMock(side_effect=estimate)
+
+        await consolidator.maybe_consolidate_by_tokens(stale_empty)
+
+        assert seen["session"] is fresh
+
     @pytest.mark.asyncio
     async def test_reloads_stale_session_after_compact(self, tmp_path):
         """After compact_idle_session replaces the session, a concurrent