From 5a34504b76c109918c37560e93a158b1a24d4782 Mon Sep 17 00:00:00 2001 From: olgagaga Date: Sat, 16 May 2026 10:50:15 -0400 Subject: [PATCH 1/8] docs(configuration): expand "Environment Variables for Secrets" section - Note that any string field supports ${VAR_NAME} and resolved values are never written back to disk. - Document the failure mode for unset variables. - Add MCP (stdio env + HTTP headers) and web-search examples. - Add Docker, direnv, and secret-manager (1Password / pass / Bitwarden) delivery patterns alongside the existing systemd example. - Replace plaintext apiKey values in tools.web.search examples (Brave, Tavily, Jina, Kagi, Olostep) with ${PROVIDER_API_KEY} placeholders so the docs stop modelling the anti-pattern. - Cross-link from the Security section. Refs: HKUDS/nanobot#2172 --- docs/configuration.md | 86 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 338991a33..f9e116bd1 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -26,7 +26,52 @@ Instead of storing secrets directly in `config.json`, you can use `${VAR_NAME}` } ``` -For **systemd** deployments, use `EnvironmentFile=` in the service unit to load variables from a file that only the deploying user can read: +Any string value in `config.json` can use `${VAR_NAME}`. Resolution runs once at startup, in memory only — resolved values are never written back to disk, so editing config through `nanobot onboard` or the WebUI preserves the placeholder. + +If a referenced variable is unset, nanobot fails fast at startup with `ValueError: Environment variable 'NAME' referenced in config is not set`. + +### More examples + +**MCP servers** — both stdio `env` and HTTP `headers`: + +```json +{ + "tools": { + "mcpServers": { + "github": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-github"], + "env": { "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}" } + }, + "remote": { + "url": "https://example.com/mcp/", + "headers": { "Authorization": "Bearer ${REMOTE_MCP_TOKEN}" } + } + } + } +} +``` + +**Web search providers:** + +```json +{ + "tools": { + "web": { + "search": { + "provider": "brave", + "apiKey": "${BRAVE_API_KEY}" + } + } + } +} +``` + +### Loading variables at startup + +Pick whatever fits your deployment — nanobot only reads `os.environ` at startup, so any mechanism that populates the process environment works. + +**systemd** — use `EnvironmentFile=` in the service unit to load variables from a file that only the deploying user can read: ```ini # /etc/systemd/system/nanobot.service (excerpt) @@ -42,6 +87,33 @@ TELEGRAM_TOKEN=your-token-here IMAP_PASSWORD=your-password-here ``` +**Docker** — `--env-file` (one `KEY=VALUE` per line) or `-e KEY=value`: + +```bash +docker run --env-file=./nanobot.env nanobot/nanobot +``` + +**direnv** — drop a `.envrc` in your working directory and run `direnv allow`: + +```bash +# .envrc (auto-loaded by direnv) +export TELEGRAM_TOKEN=your-token-here +export ANTHROPIC_API_KEY=... +``` + +**Secret managers (1Password, Bitwarden, pass)** — wrap the process so secrets only exist as env vars for the lifetime of the run, never on disk: + +```bash +# 1Password — references in .env.tpl look like `op://Vault/Item/field` +op run --env-file=.env.tpl -- nanobot agent + +# pass (passwordstore.org) +ANTHROPIC_API_KEY="$(pass show api/anthropic)" nanobot agent + +# Bitwarden +ANTHROPIC_API_KEY="$(bw get password api/anthropic)" nanobot agent +``` + ## Providers > [!TIP] @@ -917,7 +989,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an "web": { "search": { "provider": "brave", - "apiKey": "BSA..." + "apiKey": "${BRAVE_API_KEY}" } } } @@ -931,7 +1003,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an "web": { "search": { "provider": "tavily", - "apiKey": "tvly-..." + "apiKey": "${TAVILY_API_KEY}" } } } @@ -945,7 +1017,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an "web": { "search": { "provider": "jina", - "apiKey": "jina_..." + "apiKey": "${JINA_API_KEY}" } } } @@ -959,7 +1031,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an "web": { "search": { "provider": "kagi", - "apiKey": "your-kagi-api-key" + "apiKey": "${KAGI_API_KEY}" } } } @@ -973,7 +1045,7 @@ By default, web search uses `duckduckgo`, and it works out of the box without an "web": { "search": { "provider": "olostep", - "apiKey": "YOUR_OLOSTEP_API_KEY" + "apiKey": "${OLOSTEP_API_KEY}" } } } @@ -1136,6 +1208,8 @@ MCP tools are automatically discovered and registered on startup. The LLM can us > [!TIP] > For production deployments, set `"restrictToWorkspace": true` and `"tools.exec.sandbox": "bwrap"` in your config to sandbox the agent. +For API keys, tokens, and other secrets, see [Environment Variables for Secrets](#environment-variables-for-secrets) — avoid storing them directly in `config.json`. + | Option | Default | Description | |--------|---------|-------------| | `tools.restrictToWorkspace` | `false` | When `true`, restricts **all** agent tools (shell, file read/write/edit, list) to the workspace directory. Prevents path traversal and out-of-scope access. | From f017e209da1ed512ee4f352040e512fbc6e65ddb Mon Sep 17 00:00:00 2001 From: Xubin Ren <52506698+Re-bin@users.noreply.github.com> Date: Mon, 18 May 2026 00:37:01 +0800 Subject: [PATCH 2/8] docs(configuration): align Docker env-file example --- docs/configuration.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index f9e116bd1..b5d74f7ca 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -87,10 +87,12 @@ TELEGRAM_TOKEN=your-token-here IMAP_PASSWORD=your-password-here ``` -**Docker** — `--env-file` (one `KEY=VALUE` per line) or `-e KEY=value`: +**Docker** — pass an env file to the locally built image (one `KEY=VALUE` per line), or use `-e KEY=value`: ```bash -docker run --env-file=./nanobot.env nanobot/nanobot +docker run --rm --env-file=./nanobot.env \ + -v ~/.nanobot:/home/nanobot/.nanobot \ + nanobot agent -m "Hello" ``` **direnv** — drop a `.envrc` in your working directory and run `direnv allow`: From bf8a6e35fdd72a0fe29cd5a30bb020f63dd9919f Mon Sep 17 00:00:00 2001 From: voidborne-d <258577966+voidborne-d@users.noreply.github.com> Date: Sun, 17 May 2026 16:39:54 +0800 Subject: [PATCH 3/8] docs(deployment): match docker run gateway example to docker-compose.yml (refs #3873) The `docker run` example for `gateway` in `docs/deployment.md` had drifted from the canonical configuration in `docker-compose.yml`: - It omitted the security flags that `docker-compose.yml` already declares (`cap_drop: ALL` + `cap_add: SYS_ADMIN` + unconfined apparmor/seccomp). These are required whenever `tools.exec.sandbox: "bwrap"` is enabled, because bwrap needs CAP_SYS_ADMIN for user namespaces; without them bwrap exits with `clone3: Operation not permitted` and exec tools silently fail. - It omitted `-p 8765:8765`, even though both the bundled `docker-compose.yml` and `Dockerfile` (`EXPOSE 18790 8765`) already expose the WebSocket channel / WebUI port; users following the docs would get a reachable gateway health endpoint but an unreachable WebUI. This change keeps the two paths in sync so anyone reading deployment.md and using `docker run` directly gets the same security posture and port surface as the Compose path. Also adds a short `!IMPORTANT` note documenting that `gateway.host` and `channels.websocket.host` default to `127.0.0.1` (set in `nanobot/config/schema.py:GatewayConfig`). Docker `-p` cannot forward to the container's loopback interface, so the user must set both binds to `0.0.0.0` in `config.json` for the published ports to actually be reachable. This is the symptom reported as items 2 + 3 of #3873; items 1 + 4 of that issue are already resolved on `main` (`Dockerfile` line 49 already exposes both ports, and README.md lines 218-220 already reflect that the WebUI ships in the wheel). Docs only, no code changes. Signed-off-by: voidborne-d <258577966+voidborne-d@users.noreply.github.com> --- docs/deployment.md | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/docs/deployment.md b/docs/deployment.md index 746c35218..8a2cd89eb 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -10,6 +10,18 @@ > [!IMPORTANT] > Official Docker usage currently means building from this repository with the included `Dockerfile`. Docker Hub images under third-party namespaces are not maintained or verified by HKUDS/nanobot; do not mount API keys or bot tokens into them unless you trust the publisher. +> [!IMPORTANT] +> The gateway and WebSocket channel default to `host: "127.0.0.1"` in `config.json` (set in `nanobot/config/schema.py`). Docker `-p` port forwarding cannot reach a container's loopback interface, so for the host or LAN to reach the exposed ports you must set both binds to `0.0.0.0` in `~/.nanobot/config.json` before starting the container: +> +> ```json +> { +> "gateway": { "host": "0.0.0.0" }, +> "channels": { "websocket": { "host": "0.0.0.0" } } +> } +> ``` +> +> When `host` is `0.0.0.0`, the gateway refuses to start unless `token` or `tokenIssueSecret` is also configured on the WebSocket channel — see [`webui/README.md`](../webui/README.md) for details. + ### Docker Compose ```bash @@ -36,8 +48,20 @@ docker run -v ~/.nanobot:/home/nanobot/.nanobot --rm nanobot onboard # Edit config on host to add API keys vim ~/.nanobot/config.json -# Run gateway (connects to enabled channels, e.g. Telegram/Discord/Mochat) -docker run -v ~/.nanobot:/home/nanobot/.nanobot -p 18790:18790 nanobot gateway +# Run gateway (connects to enabled channels, e.g. Telegram/Discord/Mochat). +# Mirrors the security caps and port mappings declared in docker-compose.yml: +# - `--cap-drop ALL --cap-add SYS_ADMIN` + unconfined apparmor/seccomp are required +# when `tools.exec.sandbox: "bwrap"` is enabled (bwrap needs CAP_SYS_ADMIN for +# user namespaces). Without them, `bwrap` exits with `clone3: Operation not permitted`. +# - `-p 8765:8765` exposes the WebSocket channel / WebUI alongside the gateway health +# endpoint on 18790. +docker run \ + --cap-drop ALL --cap-add SYS_ADMIN \ + --security-opt apparmor=unconfined \ + --security-opt seccomp=unconfined \ + -v ~/.nanobot:/home/nanobot/.nanobot \ + -p 18790:18790 -p 8765:8765 \ + nanobot gateway # Or run a single command docker run -v ~/.nanobot:/home/nanobot/.nanobot --rm nanobot agent -m "Hello!" From 48d35bd2d9830d0ae65307f69b8d9a8fa6fc8f34 Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Sun, 17 May 2026 21:02:18 +0800 Subject: [PATCH 4/8] feat(consolidator): add compact_idle_session method with lock-protected truncation Add Consolidator.compact_idle_session(session_key, max_suffix=8) that performs hard-truncation of idle sessions under the per-session consolidation lock. This is the single lock-protected path for AutoCompact to use instead of modifying session state directly, fixing the race condition between AutoCompact and Consolidator. Behavior: - Acquires per-session consolidation lock - Invalidates cache and reloads fresh from disk - Splits unconsolidated tail into archive prefix and retained suffix - Archives prefix via LLM (with raw_archive fallback on failure) - Persists _last_summary in session metadata on success - Returns summary text, None on LLM failure, or '' if nothing to archive Tests: 6 new tests covering prefix archival, empty session timestamp refresh, (nothing) summary exclusion, LLM failure fallback, last_consolidated offset, and lock acquisition verification. --- nanobot/agent/memory.py | 68 +++++++++++++ tests/agent/test_consolidator.py | 162 +++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index fd233bfa3..167f02284 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -769,6 +769,74 @@ class Consolidator: # the summary injection strategy with AutoCompact._archive(). self._persist_last_summary(session, last_summary) + async def compact_idle_session( + self, + session_key: str, + max_suffix: int = 8, + ) -> str | None: + """Hard-truncate an idle session under the consolidation lock. + + Used by AutoCompact so all session mutation goes through a single + lock-protected path. Returns the summary text on success, ``None`` + if the LLM failed (raw_archive fallback), or ``""`` if there was + nothing to archive. + """ + lock = self.get_lock(session_key) + async with lock: + self.sessions.invalidate(session_key) + session = self.sessions.get_or_create(session_key) + + tail = list(session.messages[session.last_consolidated:]) + if not tail: + session.updated_at = datetime.now() + self.sessions.save(session) + return "" + + probe = Session( + key=session.key, + messages=tail.copy(), + created_at=session.created_at, + updated_at=session.updated_at, + metadata={}, + last_consolidated=0, + ) + probe.retain_recent_legal_suffix(max_suffix) + kept = probe.messages + cut = len(tail) - len(kept) + archive_msgs = tail[:cut] + + if not archive_msgs and not kept: + session.updated_at = datetime.now() + self.sessions.save(session) + return "" + + last_active = session.updated_at + summary: str | None = "" + if archive_msgs: + summary = await self.archive(archive_msgs) + + if summary and summary != "(nothing)": + session.metadata["_last_summary"] = { + "text": summary, + "last_active": last_active.isoformat(), + } + + session.messages = kept + session.last_consolidated = 0 + session.updated_at = datetime.now() + self.sessions.save(session) + + if archive_msgs: + logger.info( + "Idle-session compact for {}: archived={}, kept={}, summary={}", + session_key, + len(archive_msgs), + len(kept), + bool(summary), + ) + + return summary + # --------------------------------------------------------------------------- # Dream — heavyweight cron-scheduled memory consolidation diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py index 64ef9a886..49888b8a1 100644 --- a/tests/agent/test_consolidator.py +++ b/tests/agent/test_consolidator.py @@ -299,6 +299,168 @@ class TestConsolidatorTokenBudget: assert session.last_consolidated == 61 +class TestCompactIdleSession: + """Tests for Consolidator.compact_idle_session — lock-protected idle truncation.""" + + @pytest.fixture + def real_consolidator(self, store, mock_provider): + """Create a Consolidator with a real SessionManager (not a mock).""" + from nanobot.session.manager import SessionManager + + sessions = SessionManager(store.workspace) + return Consolidator( + store=store, + provider=mock_provider, + model="test-model", + sessions=sessions, + context_window_tokens=1000, + build_messages=MagicMock(return_value=[]), + get_tool_definitions=MagicMock(return_value=[]), + max_completion_tokens=100, + ) + + @pytest.mark.asyncio + async def test_archives_prefix_keeps_suffix(self, real_consolidator, mock_provider): + """20 user/assistant turns → compact with max_suffix=8 → messages ≤ 8, + last_consolidated=0, _last_summary stored.""" + mock_provider.chat_with_retry.return_value = MagicMock( + content="Summary of old conversation.", finish_reason="stop" + ) + sessions = real_consolidator.sessions + session = sessions.get_or_create("cli:test") + for i in range(20): + session.add_message("user", f"user msg {i}") + session.add_message("assistant", f"assistant msg {i}") + sessions.save(session) + + result = await real_consolidator.compact_idle_session("cli:test", max_suffix=8) + assert result == "Summary of old conversation." + + reloaded = sessions.get_or_create("cli:test") + assert len(reloaded.messages) <= 8 + assert reloaded.last_consolidated == 0 + meta = reloaded.metadata.get("_last_summary") + assert meta is not None + assert meta["text"] == "Summary of old conversation." + assert "last_active" in meta + + @pytest.mark.asyncio + async def test_empty_session_refreshes_timestamp(self, real_consolidator): + """Empty session with old updated_at → refreshed after call, returns ''.""" + from datetime import datetime, timedelta + + sessions = real_consolidator.sessions + session = sessions.get_or_create("cli:empty") + old_ts = datetime.now() - timedelta(hours=2) + session.updated_at = old_ts + sessions.save(session) + + result = await real_consolidator.compact_idle_session("cli:empty") + assert result == "" + + reloaded = sessions.get_or_create("cli:empty") + assert reloaded.updated_at > old_ts + + @pytest.mark.asyncio + async def test_nothing_summary_not_stored(self, real_consolidator, mock_provider): + """LLM returns '(nothing)' → _last_summary NOT in metadata.""" + mock_provider.chat_with_retry.return_value = MagicMock( + content="(nothing)", finish_reason="stop" + ) + sessions = real_consolidator.sessions + session = sessions.get_or_create("cli:nothing") + for i in range(10): + session.add_message("user", f"u{i}") + session.add_message("assistant", f"a{i}") + sessions.save(session) + + result = await real_consolidator.compact_idle_session("cli:nothing", max_suffix=4) + assert result == "(nothing)" + + reloaded = sessions.get_or_create("cli:nothing") + assert "_last_summary" not in reloaded.metadata + + @pytest.mark.asyncio + async def test_llm_failure_still_truncates(self, real_consolidator, mock_provider, store): + """LLM raises RuntimeError → raw_archive fires, session still truncated, returns None.""" + mock_provider.chat_with_retry.side_effect = RuntimeError("LLM unavailable") + sessions = real_consolidator.sessions + session = sessions.get_or_create("cli:fail") + for i in range(10): + session.add_message("user", f"u{i}") + session.add_message("assistant", f"a{i}") + sessions.save(session) + + result = await real_consolidator.compact_idle_session("cli:fail", max_suffix=4) + assert result is None + + # raw_archive should have been called (history.jsonl gets an entry) + entries = store.read_unprocessed_history(since_cursor=0) + assert any("[RAW]" in e["content"] for e in entries) + + # Session should still be truncated + reloaded = sessions.get_or_create("cli:fail") + assert len(reloaded.messages) <= 4 + + @pytest.mark.asyncio + async def test_respects_last_consolidated(self, real_consolidator, mock_provider): + """30 turns with last_consolidated=50 → only unconsolidated tail considered.""" + mock_provider.chat_with_retry.return_value = MagicMock( + content="Tail summary.", finish_reason="stop" + ) + sessions = real_consolidator.sessions + session = sessions.get_or_create("cli:offset") + for i in range(30): + session.add_message("user", f"u{i}") + session.add_message("assistant", f"a{i}") + session.last_consolidated = 50 # Only 10 messages unconsolidated + sessions.save(session) + + result = await real_consolidator.compact_idle_session("cli:offset", max_suffix=4) + assert result == "Tail summary." + + # Verify only the unconsolidated tail was processed: + # 10 unconsolidated messages (50-59), keep suffix of 4 → archive 6 + archived_call = mock_provider.chat_with_retry.call_args + user_content = archived_call.kwargs["messages"][1]["content"] + # Should contain only tail messages, not early ones + assert "u0" not in user_content + assert "u25" in user_content or "a25" in user_content + + @pytest.mark.asyncio + async def test_acquires_consolidation_lock(self, real_consolidator, mock_provider): + """Verify lock is held during execution.""" + import asyncio + + # Use a slow LLM response to ensure the lock is held while we check + started = asyncio.Event() + + async def slow_chat(**kwargs): + started.set() + await asyncio.sleep(0.1) + return MagicMock(content="Summary.", finish_reason="stop") + + mock_provider.chat_with_retry = slow_chat + + sessions = real_consolidator.sessions + session = sessions.get_or_create("cli:lock") + for i in range(10): + session.add_message("user", f"u{i}") + session.add_message("assistant", f"a{i}") + sessions.save(session) + + lock = real_consolidator.get_lock("cli:lock") + assert not lock.locked() + + task = asyncio.ensure_future( + real_consolidator.compact_idle_session("cli:lock", max_suffix=4) + ) + await started.wait() + assert lock.locked() + await task + assert not lock.locked() + + class TestRawArchiveTruncation: """raw_archive() must cap entry size to avoid bloating history.jsonl.""" From 888d54790db98dee7c04b357afa567bb6ac485af Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Sun, 17 May 2026 21:12:26 +0800 Subject: [PATCH 5/8] fix(memory): add session-refresh guard to maybe_consolidate_by_tokens When background consolidation runs with a stale session reference (captured before AutoCompact replaced the session via compact_idle_session), it could operate on outdated data. Now, after acquiring the per-session lock, the method refreshes its session reference from SessionManager.get_or_create(). If the session was replaced, it swaps in the fresh reference before doing any consolidation work. This prevents a race where AutoCompact truncates an idle session while a background maybe_consolidate_by_tokens call is in flight with the old session object. --- nanobot/agent/memory.py | 5 +++ tests/agent/test_consolidator.py | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 167f02284..b7a325a02 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -683,6 +683,11 @@ class Consolidator: lock = self.get_lock(session.key) async with lock: + # Refresh session reference: AutoCompact may have replaced it. + fresh = self.sessions.get_or_create(session.key) + if fresh is not session: + session = fresh + budget = self._input_token_budget target = int(budget * self.consolidation_ratio) last_summary = await self._consolidate_replay_overflow( diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py index 49888b8a1..159ec01d1 100644 --- a/tests/agent/test_consolidator.py +++ b/tests/agent/test_consolidator.py @@ -28,6 +28,12 @@ def mock_provider(): def consolidator(store, mock_provider): sessions = MagicMock() sessions.save = MagicMock() + # When maybe_consolidate_by_tokens refreshes the session reference via + # get_or_create(session.key), it should get back the same object the test + # passed in. Store sessions by key so the lookup is transparent. + _session_cache: dict[str, MagicMock] = {} + sessions.get_or_create = MagicMock(side_effect=lambda key: _session_cache.get(key, MagicMock())) + sessions._session_cache = _session_cache return Consolidator( store=store, provider=mock_provider, @@ -117,6 +123,7 @@ class TestConsolidatorTokenBudget: session.last_consolidated = 0 session.messages = [{"role": "user", "content": "hi"}] session.key = "test:key" + consolidator.sessions._session_cache[session.key] = session consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken")) consolidator.archive = AsyncMock(return_value=True) await consolidator.maybe_consolidate_by_tokens(session) @@ -152,6 +159,7 @@ class TestConsolidatorTokenBudget: session.add_message("user", f"u{i}") session.add_message("assistant", f"a{i}") + consolidator.sessions._session_cache[session.key] = session consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken")) consolidator.archive = AsyncMock(return_value="old conversation summary") @@ -184,6 +192,7 @@ class TestConsolidatorTokenBudget: session.add_message("tool", "tool result", tool_call_id="call-1", name="x") session.add_message("assistant", "final answer") + consolidator.sessions._session_cache[session.key] = session consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(100, "tiktoken")) consolidator.archive = AsyncMock(return_value="tool turn summary") @@ -210,6 +219,7 @@ class TestConsolidatorTokenBudget: } for i in range(70) ] + consolidator.sessions._session_cache[session.key] = session consolidator.estimate_session_prompt_tokens = MagicMock( side_effect=[(1200, "tiktoken"), (400, "tiktoken")] ) @@ -238,6 +248,7 @@ class TestConsolidatorTokenBudget: for i in range(70) ] session.metadata = {} + consolidator.sessions._session_cache[session.key] = session consolidator.estimate_session_prompt_tokens = MagicMock( side_effect=[(1200, "tiktoken"), (400, "tiktoken")] ) @@ -263,6 +274,7 @@ class TestConsolidatorTokenBudget: for i in range(70) ] session.metadata = {} + consolidator.sessions._session_cache[session.key] = session # Keep estimates high so the loop would otherwise run multiple rounds. consolidator.estimate_session_prompt_tokens = MagicMock( return_value=(1200, "tiktoken") @@ -287,6 +299,7 @@ class TestConsolidatorTokenBudget: } for i in range(70) ] + consolidator.sessions._session_cache[session.key] = session consolidator.estimate_session_prompt_tokens = MagicMock( side_effect=[(1200, "tiktoken"), (400, "tiktoken")] ) @@ -461,6 +474,57 @@ class TestCompactIdleSession: assert not lock.locked() +class TestConsolidatorSessionRefresh: + """Background consolidation must detect stale session references.""" + + @pytest.mark.asyncio + async def test_reloads_stale_session_after_compact(self, tmp_path): + """After compact_idle_session replaces the session, a concurrent + maybe_consolidate_by_tokens with the old reference should use the + fresh session from cache instead of overwriting.""" + from nanobot.agent.memory import Consolidator, MemoryStore + from nanobot.session.manager import SessionManager + + store = MemoryStore(tmp_path) + provider = MagicMock() + provider.chat_with_retry = AsyncMock( + return_value=MagicMock(content="summary", finish_reason="stop") + ) + provider.generation.max_tokens = 4096 + provider.estimate_prompt_tokens = MagicMock(return_value=(10, "test")) + sessions = SessionManager(tmp_path) + consolidator = Consolidator( + store=store, + provider=provider, + model="test-model", + sessions=sessions, + context_window_tokens=128_000, + build_messages=MagicMock(return_value=[]), + get_tool_definitions=MagicMock(return_value=[]), + ) + + # Populate session with many messages + session = sessions.get_or_create("cli:test") + for i in range(20): + session.add_message("user", f"u{i}") + session.add_message("assistant", f"a{i}") + sessions.save(session) + + # Simulate: background consolidation captures old reference + old_ref = session + + # AutoCompact runs first and truncates to 8 + await consolidator.compact_idle_session("cli:test", max_suffix=8) + + # Background consolidation runs with stale reference — + # should detect the session was replaced and not undo the compact. + await consolidator.maybe_consolidate_by_tokens(old_ref) + + session_after = sessions.get_or_create("cli:test") + # Messages should still be truncated (not restored to 40) + assert len(session_after.messages) <= 8 + + class TestRawArchiveTruncation: """raw_archive() must cap entry size to avoid bloating history.jsonl.""" From 5bb94edc99d24796ba069d1dda2b952b7b965e56 Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Sun, 17 May 2026 21:30:44 +0800 Subject: [PATCH 6/8] refactor(autocompact): delegate _archive to Consolidator.compact_idle_session Replace AutoCompact._archive() direct session mutation with delegation to Consolidator.compact_idle_session(). Remove _split_unconsolidated() method since that logic now lives inside compact_idle_session. All session mutation for idle compaction now goes through the Consolidator's lock, eliminating the race condition between background token consolidation and idle TTL compaction. Changes: - autocompact.py: rewrite _archive() to call compact_idle_session, remove _split_unconsolidated(), clean up unused imports - test_autocompact_unit.py: replace TestArchive/TestSplitUnconsolidated with TestArchiveDelegates that verifies delegation behavior - test_auto_compact.py: convert all consolidator.archive mocks to consolidator.compact_idle_session mocks via _make_fake_compact helper --- nanobot/agent/autocompact.py | 59 +----- tests/agent/test_auto_compact.py | 303 +++++++++++++-------------- tests/agent/test_autocompact_unit.py | 175 +++------------- 3 files changed, 183 insertions(+), 354 deletions(-) diff --git a/nanobot/agent/autocompact.py b/nanobot/agent/autocompact.py index 11e531039..4ad241170 100644 --- a/nanobot/agent/autocompact.py +++ b/nanobot/agent/autocompact.py @@ -4,7 +4,7 @@ from __future__ import annotations from collections.abc import Collection from datetime import datetime -from typing import TYPE_CHECKING, Any, Callable, Coroutine +from typing import TYPE_CHECKING, Callable, Coroutine from loguru import logger @@ -37,27 +37,6 @@ class AutoCompact: def _format_summary(text: str, last_active: datetime) -> str: return f"Previous conversation summary (last active {last_active.isoformat()}):\n{text}" - def _split_unconsolidated( - self, session: Session, - ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: - """Split live session tail into archiveable prefix and retained recent suffix.""" - tail = list(session.messages[session.last_consolidated:]) - if not tail: - return [], [] - - probe = Session( - key=session.key, - messages=tail.copy(), - created_at=session.created_at, - updated_at=session.updated_at, - metadata={}, - last_consolidated=0, - ) - probe.retain_recent_legal_suffix(self._RECENT_SUFFIX_MESSAGES) - kept = probe.messages - cut = len(tail) - len(kept) - return tail[:cut], kept - def check_expired(self, schedule_background: Callable[[Coroutine], None], active_session_keys: Collection[str] = ()) -> None: """Schedule archival for idle sessions, skipping those with in-flight agent tasks.""" @@ -74,33 +53,17 @@ class AutoCompact: async def _archive(self, key: str) -> None: try: - self.sessions.invalidate(key) - session = self.sessions.get_or_create(key) - archive_msgs, kept_msgs = self._split_unconsolidated(session) - if not archive_msgs and not kept_msgs: - session.updated_at = datetime.now() - self.sessions.save(session) - return - - last_active = session.updated_at - summary = "" - if archive_msgs: - summary = await self.consolidator.archive(archive_msgs) or "" + summary = await self.consolidator.compact_idle_session( + key, self._RECENT_SUFFIX_MESSAGES, + ) if summary and summary != "(nothing)": - self._summaries[key] = (summary, last_active) - session.metadata["_last_summary"] = {"text": summary, "last_active": last_active.isoformat()} - session.messages = kept_msgs - session.last_consolidated = 0 - session.updated_at = datetime.now() - self.sessions.save(session) - if archive_msgs: - logger.info( - "Auto-compact: archived {} (archived={}, kept={}, summary={})", - key, - len(archive_msgs), - len(kept_msgs), - bool(summary), - ) + session = self.sessions.get_or_create(key) + meta = session.metadata.get("_last_summary") + if isinstance(meta, dict): + self._summaries[key] = ( + meta["text"], + datetime.fromisoformat(meta["last_active"]), + ) except Exception: logger.exception("Auto-compact: failed for {}", key) finally: diff --git a/tests/agent/test_auto_compact.py b/tests/agent/test_auto_compact.py index 0bc02a694..37fcbfdae 100644 --- a/tests/agent/test_auto_compact.py +++ b/tests/agent/test_auto_compact.py @@ -45,6 +45,73 @@ def _add_turns(session, turns: int, *, prefix: str = "msg") -> None: session.add_message("assistant", f"{prefix} assistant {i}") +def _make_fake_compact( + loop: AgentLoop, + *, + summary: str = "Summary.", + on_archive=None, + track_archived: list | None = None, + track_count: bool = False, +): + """Return a fake compact_idle_session that mirrors the real method's session mutation.""" + from nanobot.session.manager import Session as _Session + + state = {"count": 0} + + async def _fake_compact(key: str, max_suffix: int = 8) -> str: + state["count"] += 1 + session = loop.sessions.get_or_create(key) + + tail = list(session.messages[session.last_consolidated:]) + if not tail: + session.updated_at = datetime.now() + loop.sessions.save(session) + return "" + + probe = _Session( + key=session.key, + messages=tail.copy(), + created_at=session.created_at, + updated_at=session.updated_at, + metadata={}, + last_consolidated=0, + ) + probe.retain_recent_legal_suffix(max_suffix) + kept = probe.messages + cut = len(tail) - len(kept) + archive_msgs = tail[:cut] + + if not archive_msgs and not kept: + session.updated_at = datetime.now() + loop.sessions.save(session) + return "" + + last_active = session.updated_at + s = summary + if archive_msgs: + if on_archive: + result = on_archive(archive_msgs) + s = result if isinstance(result, str) else summary + if track_archived is not None: + track_archived.extend(archive_msgs) + + if s and s != "(nothing)": + session.metadata["_last_summary"] = { + "text": s, + "last_active": last_active.isoformat(), + } + + session.messages = kept + session.last_consolidated = 0 + session.updated_at = datetime.now() + loop.sessions.save(session) + return s + + # Attach state for count access + _fake_compact.state = state # type: ignore[attr-defined] + return _fake_compact + + class TestSessionTTLConfig: """Test session TTL configuration.""" @@ -201,10 +268,7 @@ class TestAutoCompact: s2.add_message("user", "recent") loop.sessions.save(s2) - async def _fake_archive(messages): - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) loop.auto_compact.check_expired(loop._schedule_background) await asyncio.sleep(0.1) @@ -222,12 +286,9 @@ class TestAutoCompact: loop.sessions.save(session) archived_messages = [] - - async def _fake_archive(messages): - archived_messages.extend(messages) - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, track_archived=archived_messages, + ) await loop.auto_compact._archive("cli:test") @@ -246,10 +307,9 @@ class TestAutoCompact: _add_turns(session, 6, prefix="hello") loop.sessions.save(session) - async def _fake_archive(messages): - return "User said hello." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, summary="User said hello.", + ) await loop.auto_compact._archive("cli:test") @@ -262,23 +322,16 @@ class TestAutoCompact: @pytest.mark.asyncio async def test_auto_compact_empty_session(self, tmp_path): - """_archive on empty session should not archive.""" + """_archive on empty session should not store a summary.""" loop = _make_loop(tmp_path, session_ttl_minutes=15) - archive_called = False - - async def _fake_archive(messages): - nonlocal archive_called - archive_called = True - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) await loop.auto_compact._archive("cli:test") - assert not archive_called session_after = loop.sessions.get_or_create("cli:test") assert len(session_after.messages) == 0 + assert "cli:test" not in loop.auto_compact._summaries await loop.close_mcp() @pytest.mark.asyncio @@ -290,18 +343,14 @@ class TestAutoCompact: session.last_consolidated = 18 loop.sessions.save(session) - archived_count = 0 - - async def _fake_archive(messages): - nonlocal archived_count - archived_count = len(messages) - return "Summary." - - loop.consolidator.archive = _fake_archive + archived_messages = [] + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, track_archived=archived_messages, + ) await loop.auto_compact._archive("cli:test") - assert archived_count == 2 + assert len(archived_messages) == 2 await loop.close_mcp() @@ -334,12 +383,9 @@ class TestAutoCompactIdleDetection: loop.sessions.save(session) archived_messages = [] - - async def _fake_archive(messages): - archived_messages.extend(messages) - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, track_archived=archived_messages, + ) # Simulate proactive archive completing before message arrives await loop.auto_compact._archive("cli:test") @@ -402,10 +448,7 @@ class TestAutoCompactIdleDetection: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") response = await loop._process_message(msg) @@ -466,10 +509,7 @@ class TestAutoCompactSystemMessages: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) # Simulate proactive archive completing before system message arrives await loop.auto_compact._archive("cli:test") @@ -547,12 +587,9 @@ class TestAutoCompactEdgeCases: loop.sessions.save(session) archived_messages = [] - - async def _fake_archive(messages): - archived_messages.extend(messages) - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, track_archived=archived_messages, + ) # Simulate proactive archive completing before message arrives await loop.auto_compact._archive("cli:test") @@ -644,10 +681,7 @@ class TestAutoCompactIntegration: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) # Simulate proactive archive completing before message arrives await loop.auto_compact._archive("cli:test") @@ -704,12 +738,9 @@ class TestProactiveAutoCompact: loop.sessions.save(session) archived_messages = [] - - async def _fake_archive(messages): - archived_messages.extend(messages) - return "User chatted about old things." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, summary="User chatted about old things.", track_archived=archived_messages, + ) await self._run_check_expired(loop) @@ -748,14 +779,14 @@ class TestProactiveAutoCompact: started = asyncio.Event() block_forever = asyncio.Event() - async def _slow_archive(messages): + async def _slow_compact(key, max_suffix=8): nonlocal archive_count archive_count += 1 started.set() await block_forever.wait() return "Summary." - loop.consolidator.archive = _slow_archive + loop.consolidator.compact_idle_session = _slow_compact # First call starts archiving via callback loop.auto_compact.check_expired(loop._schedule_background) @@ -781,10 +812,10 @@ class TestProactiveAutoCompact: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _failing_archive(messages): + async def _failing_compact(key, max_suffix=8): raise RuntimeError("LLM down") - loop.consolidator.archive = _failing_archive + loop.consolidator.compact_idle_session = _failing_compact # Should not raise await self._run_check_expired(loop) @@ -795,24 +826,18 @@ class TestProactiveAutoCompact: @pytest.mark.asyncio async def test_proactive_archive_skips_empty_sessions(self, tmp_path): - """Proactive archive should not call LLM for sessions with no un-consolidated messages.""" + """Proactive archive should not produce a summary for sessions with no messages.""" loop = _make_loop(tmp_path, session_ttl_minutes=15) session = loop.sessions.get_or_create("cli:test") session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - archive_called = False - - async def _fake_archive(messages): - nonlocal archive_called - archive_called = True - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) await self._run_check_expired(loop) - assert not archive_called + # Empty session should not produce a summary + assert "cli:test" not in loop.auto_compact._summaries await loop.close_mcp() @pytest.mark.asyncio @@ -824,18 +849,12 @@ class TestProactiveAutoCompact: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - archive_count = 0 - - async def _fake_archive(messages): - nonlocal archive_count - archive_count += 1 - return "Summary." - - loop.consolidator.archive = _fake_archive + _fake_compact = _make_fake_compact(loop) + loop.consolidator.compact_idle_session = _fake_compact # Simulate an active agent task for this session await self._run_check_expired(loop, active_session_keys={"cli:test"}) - assert archive_count == 0 + assert _fake_compact.state["count"] == 0 session_after = loop.sessions.get_or_create("cli:test") assert len(session_after.messages) == 12 # All messages preserved @@ -851,22 +870,16 @@ class TestProactiveAutoCompact: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - archive_count = 0 - - async def _fake_archive(messages): - nonlocal archive_count - archive_count += 1 - return "Summary." - - loop.consolidator.archive = _fake_archive + _fake_compact = _make_fake_compact(loop) + loop.consolidator.compact_idle_session = _fake_compact # First tick: active task, skip await self._run_check_expired(loop, active_session_keys={"cli:test"}) - assert archive_count == 0 + assert _fake_compact.state["count"] == 0 # Second tick: task completed, should archive await self._run_check_expired(loop) - assert archive_count == 1 + assert _fake_compact.state["count"] == 1 await loop.close_mcp() @pytest.mark.asyncio @@ -888,18 +901,12 @@ class TestProactiveAutoCompact: s3.add_message("user", "recent") loop.sessions.save(s3) - archive_count = 0 - - async def _fake_archive(messages): - nonlocal archive_count - archive_count += 1 - return "Summary." - - loop.consolidator.archive = _fake_archive + _fake_compact = _make_fake_compact(loop) + loop.consolidator.compact_idle_session = _fake_compact await self._run_check_expired(loop, active_session_keys={"cli:expired_active"}) - assert archive_count == 1 + assert _fake_compact.state["count"] == 1 s1_after = loop.sessions.get_or_create("cli:expired_idle") assert len(s1_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES s2_after = loop.sessions.get_or_create("cli:expired_active") @@ -917,22 +924,16 @@ class TestProactiveAutoCompact: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - archive_count = 0 - - async def _fake_archive(messages): - nonlocal archive_count - archive_count += 1 - return "Summary." - - loop.consolidator.archive = _fake_archive + _fake_compact = _make_fake_compact(loop) + loop.consolidator.compact_idle_session = _fake_compact # First tick: archives the session await self._run_check_expired(loop) - assert archive_count == 1 + assert _fake_compact.state["count"] == 1 # Second tick: should NOT re-schedule (updated_at is fresh after clear) await self._run_check_expired(loop) - assert archive_count == 1 # Still 1, not re-scheduled + assert _fake_compact.state["count"] == 1 # Still 1, not re-scheduled await loop.close_mcp() @pytest.mark.asyncio @@ -943,22 +944,15 @@ class TestProactiveAutoCompact: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - archive_count = 0 - - async def _fake_archive(messages): - nonlocal archive_count - archive_count += 1 - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) # First tick: skips (no messages), refreshes updated_at await self._run_check_expired(loop) - assert archive_count == 0 + assert "cli:test" not in loop.auto_compact._summaries # Second tick: should NOT re-schedule because updated_at is fresh await self._run_check_expired(loop) - assert archive_count == 0 + assert "cli:test" not in loop.auto_compact._summaries await loop.close_mcp() @pytest.mark.asyncio @@ -970,18 +964,12 @@ class TestProactiveAutoCompact: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - archive_count = 0 - - async def _fake_archive(messages): - nonlocal archive_count - archive_count += 1 - return "Summary." - - loop.consolidator.archive = _fake_archive + _fake_compact = _make_fake_compact(loop) + loop.consolidator.compact_idle_session = _fake_compact # First compact cycle await loop.auto_compact._archive("cli:test") - assert archive_count == 1 + assert _fake_compact.state["count"] == 1 # User returns, sends new messages msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second topic") @@ -995,7 +983,7 @@ class TestProactiveAutoCompact: # Second compact cycle should succeed await loop.auto_compact._archive("cli:test") - assert archive_count == 2 + assert _fake_compact.state["count"] == 2 await loop.close_mcp() @@ -1011,10 +999,9 @@ class TestSummaryPersistence: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "User said hello." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, summary="User said hello.", + ) await loop.auto_compact._archive("cli:test") @@ -1036,10 +1023,9 @@ class TestSummaryPersistence: session.updated_at = last_active loop.sessions.save(session) - async def _fake_archive(messages): - return "User said hello." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, summary="User said hello.", + ) # Archive await loop.auto_compact._archive("cli:test") @@ -1069,10 +1055,7 @@ class TestSummaryPersistence: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) await loop.auto_compact._archive("cli:test") @@ -1100,10 +1083,7 @@ class TestSummaryPersistence: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "Summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact(loop) await loop.auto_compact._archive("cli:test") @@ -1129,10 +1109,9 @@ class TestSummaryPersistence: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "First summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, summary="First summary.", + ) await loop.auto_compact._archive("cli:test") # Consume the first summary via hot path @@ -1148,10 +1127,9 @@ class TestSummaryPersistence: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive2(messages): - return "Second summary." - - loop.consolidator.archive = _fake_archive2 + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, summary="Second summary.", + ) await loop.auto_compact._archive("cli:test") # The second archive writes a new summary @@ -1173,10 +1151,9 @@ class TestSummaryPersistence: session.updated_at = datetime.now() - timedelta(minutes=20) loop.sessions.save(session) - async def _fake_archive(messages): - return "Old summary." - - loop.consolidator.archive = _fake_archive + loop.consolidator.compact_idle_session = _make_fake_compact( + loop, summary="Old summary.", + ) await loop.auto_compact._archive("cli:test") # Verify summary exists before /new diff --git a/tests/agent/test_autocompact_unit.py b/tests/agent/test_autocompact_unit.py index d501770dd..1d3277a01 100644 --- a/tests/agent/test_autocompact_unit.py +++ b/tests/agent/test_autocompact_unit.py @@ -38,7 +38,7 @@ def _make_autocompact( sessions = MagicMock(spec=SessionManager) if consolidator is None: consolidator = MagicMock() - consolidator.archive = AsyncMock(return_value="Summary.") + consolidator.compact_idle_session = AsyncMock(return_value="Summary.") return AutoCompact( sessions=sessions, consolidator=consolidator, @@ -178,62 +178,6 @@ class TestFormatSummary: assert result.startswith("Previous conversation summary (last active ") -# --------------------------------------------------------------------------- -# _split_unconsolidated -# --------------------------------------------------------------------------- - - -class TestSplitUnconsolidated: - """Test AutoCompact._split_unconsolidated splitting logic.""" - - def test_empty_session_returns_both_empty(self): - """Empty session should return ([], []).""" - ac = _make_autocompact() - session = _make_session(messages=[]) - archive, kept = ac._split_unconsolidated(session) - assert archive == [] - assert kept == [] - - def test_all_messages_archivable_when_more_than_suffix(self): - """Session with many messages should archive a prefix and keep suffix.""" - ac = _make_autocompact() - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - session = _make_session(messages=msgs) - archive, kept = ac._split_unconsolidated(session) - assert len(archive) > 0 - assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES - - def test_fewer_messages_than_suffix_returns_empty_archive(self): - """Session with fewer messages than suffix should have empty archive.""" - ac = _make_autocompact() - msgs = [{"role": "user", "content": f"u{i}"} for i in range(3)] - session = _make_session(messages=msgs) - archive, kept = ac._split_unconsolidated(session) - assert archive == [] - assert len(kept) == len(msgs) - - def test_respects_last_consolidated_offset(self): - """Only messages after last_consolidated should be considered.""" - ac = _make_autocompact() - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - # First 10 are already consolidated - session = _make_session(messages=msgs, last_consolidated=10) - archive, kept = ac._split_unconsolidated(session) - # Only the tail of 10 messages is considered for splitting - assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in kept) - assert all(m["content"] in [f"u{i}" for i in range(10, 20)] for m in archive) - - def test_retain_recent_legal_suffix_keeps_last_n(self): - """The kept suffix should be at most _RECENT_SUFFIX_MESSAGES long.""" - ac = _make_autocompact() - # 20 user messages = 20 messages total, all after last_consolidated=0 - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - session = _make_session(messages=msgs) - archive, kept = ac._split_unconsolidated(session) - assert len(kept) <= AutoCompact._RECENT_SUFFIX_MESSAGES - assert len(archive) == len(msgs) - len(kept) - - # --------------------------------------------------------------------------- # check_expired # --------------------------------------------------------------------------- @@ -313,126 +257,71 @@ class TestCheckExpired: # --------------------------------------------------------------------------- -class TestArchive: - """Test AutoCompact._archive async method.""" +class TestArchiveDelegates: + """_archive should delegate all session mutation to Consolidator.""" @pytest.mark.asyncio - async def test_empty_session_updates_timestamp_no_archive_call(self): - """Empty session should refresh updated_at and not call consolidator.archive.""" + async def test_calls_compact_idle_session(self): ac = _make_autocompact() mock_sm = MagicMock(spec=SessionManager) - empty_session = _make_session(messages=[]) - mock_sm.get_or_create.return_value = empty_session ac.sessions = mock_sm - ac.consolidator.archive = AsyncMock(return_value="Summary.") + ac.consolidator.compact_idle_session = AsyncMock(return_value="Summary.") await ac._archive("cli:test") - ac.consolidator.archive.assert_not_called() - mock_sm.save.assert_called_once_with(empty_session) - # updated_at was refreshed - assert empty_session.updated_at > datetime.now() - timedelta(seconds=5) + ac.consolidator.compact_idle_session.assert_awaited_once_with( + "cli:test", ac._RECENT_SUFFIX_MESSAGES, + ) @pytest.mark.asyncio - async def test_archive_returns_empty_string_no_summary_stored(self): - """If archive returns empty string, no summary should be stored.""" + async def test_populates_summaries_from_metadata(self): ac = _make_autocompact() mock_sm = MagicMock(spec=SessionManager) - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - session = _make_session(messages=msgs) + session = _make_session( + metadata={"_last_summary": {"text": "Hello.", "last_active": "2026-05-13T10:00:00"}} + ) mock_sm.get_or_create.return_value = session ac.sessions = mock_sm - ac.consolidator.archive = AsyncMock(return_value="") + ac.consolidator.compact_idle_session = AsyncMock(return_value="Hello.") await ac._archive("cli:test") - assert "cli:test" not in ac._summaries - - @pytest.mark.asyncio - async def test_archive_returns_nothing_no_summary_stored(self): - """If archive returns '(nothing)', no summary should be stored.""" - ac = _make_autocompact() - mock_sm = MagicMock(spec=SessionManager) - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - session = _make_session(messages=msgs) - mock_sm.get_or_create.return_value = session - ac.sessions = mock_sm - ac.consolidator.archive = AsyncMock(return_value="(nothing)") - - await ac._archive("cli:test") - - assert "cli:test" not in ac._summaries - - @pytest.mark.asyncio - async def test_archive_exception_caught_key_removed_from_archiving(self): - """If archive raises, exception is caught and key removed from _archiving.""" - ac = _make_autocompact() - mock_sm = MagicMock(spec=SessionManager) - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - session = _make_session(messages=msgs) - mock_sm.get_or_create.return_value = session - ac.sessions = mock_sm - ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("LLM down")) - - # Should not raise - await ac._archive("cli:test") - - assert "cli:test" not in ac._archiving - - @pytest.mark.asyncio - async def test_successful_archive_stores_summary_in_summaries_and_metadata(self): - """Successful archive should store summary in _summaries dict and metadata.""" - ac = _make_autocompact() - mock_sm = MagicMock(spec=SessionManager) - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - last_active = datetime(2026, 5, 13, 10, 0, 0) - session = _make_session(messages=msgs, updated_at=last_active) - mock_sm.get_or_create.return_value = session - ac.sessions = mock_sm - ac.consolidator.archive = AsyncMock(return_value="User discussed AI.") - - await ac._archive("cli:test") - - # _summaries entry = ac._summaries.get("cli:test") assert entry is not None - assert entry[0] == "User discussed AI." - assert entry[1] == last_active - # metadata - meta = session.metadata.get("_last_summary") - assert meta is not None - assert meta["text"] == "User discussed AI." - assert "last_active" in meta + assert entry[0] == "Hello." @pytest.mark.asyncio - async def test_finally_block_always_removes_from_archiving(self): - """Finally block should always remove key from _archiving, even on error.""" + async def test_no_summary_when_compact_returns_empty(self): ac = _make_autocompact() mock_sm = MagicMock(spec=SessionManager) - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - session = _make_session(messages=msgs) - mock_sm.get_or_create.return_value = session ac.sessions = mock_sm - ac.consolidator.archive = AsyncMock(side_effect=RuntimeError("fail")) + ac.consolidator.compact_idle_session = AsyncMock(return_value="") - # Pre-add key to archiving to verify it gets removed - ac._archiving.add("cli:test") await ac._archive("cli:test") - assert "cli:test" not in ac._archiving + + assert "cli:test" not in ac._summaries @pytest.mark.asyncio - async def test_finally_removes_from_archiving_on_success(self): - """Finally block should remove key from _archiving on success too.""" + async def test_no_summary_when_compact_returns_nothing(self): ac = _make_autocompact() mock_sm = MagicMock(spec=SessionManager) - msgs = [{"role": "user", "content": f"u{i}"} for i in range(20)] - session = _make_session(messages=msgs) - mock_sm.get_or_create.return_value = session ac.sessions = mock_sm - ac.consolidator.archive = AsyncMock(return_value="Summary.") + ac.consolidator.compact_idle_session = AsyncMock(return_value="(nothing)") + + await ac._archive("cli:test") + + assert "cli:test" not in ac._summaries + + @pytest.mark.asyncio + async def test_exception_still_removes_from_archiving(self): + ac = _make_autocompact() + mock_sm = MagicMock(spec=SessionManager) + ac.sessions = mock_sm + ac.consolidator.compact_idle_session = AsyncMock(side_effect=RuntimeError("fail")) ac._archiving.add("cli:test") await ac._archive("cli:test") + assert "cli:test" not in ac._archiving From c58a360b25335e146098cc2ab8bd57ebdbfcafaa Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Sun, 17 May 2026 21:41:30 +0800 Subject: [PATCH 7/8] fix(test): seed get_or_create mock for session-refresh guard compatibility --- tests/agent/test_unified_session.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/agent/test_unified_session.py b/tests/agent/test_unified_session.py index 839f62f57..f22290ba6 100644 --- a/tests/agent/test_unified_session.py +++ b/tests/agent/test_unified_session.py @@ -387,6 +387,7 @@ class TestConsolidationUnaffectedByUnifiedSession: session = Session(key="unified:default") session.messages = [{"role": "user", "content": "msg"}] + sessions.get_or_create.return_value = session # Simulate over-budget: estimated > budget consolidator.estimate_session_prompt_tokens = MagicMock(return_value=(950, "tiktoken")) From eb0ff3ad1d1250a7529eabc30314a53c2e1c5085 Mon Sep 17 00:00:00 2001 From: Xubin Ren <52506698+Re-bin@users.noreply.github.com> Date: Mon, 18 May 2026 01:01:34 +0800 Subject: [PATCH 8/8] fix(memory): refresh session before empty guard --- nanobot/agent/memory.py | 4 +++- tests/agent/test_consolidator.py | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index b7a325a02..ffc9c5f0e 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -678,7 +678,7 @@ class Consolidator: The budget reserves space for completion tokens and a safety buffer so the LLM request never exceeds the context window. """ - if not session.messages or self.context_window_tokens <= 0: + if self.context_window_tokens <= 0: return lock = self.get_lock(session.key) @@ -687,6 +687,8 @@ class Consolidator: fresh = self.sessions.get_or_create(session.key) if fresh is not session: session = fresh + if not session.messages: + return budget = self._input_token_budget target = int(budget * self.consolidation_ratio) diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py index 159ec01d1..1fa05d3c8 100644 --- a/tests/agent/test_consolidator.py +++ b/tests/agent/test_consolidator.py @@ -477,6 +477,47 @@ class TestCompactIdleSession: class TestConsolidatorSessionRefresh: """Background consolidation must detect stale session references.""" + @pytest.mark.asyncio + async def test_reloads_before_empty_session_guard(self, tmp_path): + """A stale empty reference must not skip a non-empty cached session.""" + from nanobot.agent.memory import Consolidator, MemoryStore + from nanobot.session.manager import Session, SessionManager + + store = MemoryStore(tmp_path) + provider = MagicMock() + provider.chat_with_retry = AsyncMock( + return_value=MagicMock(content="summary", finish_reason="stop") + ) + provider.generation.max_tokens = 4096 + provider.estimate_prompt_tokens = MagicMock(return_value=(10, "test")) + sessions = SessionManager(tmp_path) + consolidator = Consolidator( + store=store, + provider=provider, + model="test-model", + sessions=sessions, + context_window_tokens=128_000, + build_messages=MagicMock(return_value=[]), + get_tool_definitions=MagicMock(return_value=[]), + ) + + fresh = sessions.get_or_create("cli:test") + fresh.add_message("user", "fresh message") + sessions.save(fresh) + stale_empty = Session(key="cli:test") + + seen: dict[str, Session] = {} + + def estimate(session: Session): + seen["session"] = session + return 10, "test" + + consolidator.estimate_session_prompt_tokens = MagicMock(side_effect=estimate) + + await consolidator.maybe_consolidate_by_tokens(stale_empty) + + assert seen["session"] is fresh + @pytest.mark.asyncio async def test_reloads_stale_session_after_compact(self, tmp_path): """After compact_idle_session replaces the session, a concurrent