diff --git a/README.md b/README.md
index 6098c55ca..72fd62aa0 100644
--- a/README.md
+++ b/README.md
@@ -1312,6 +1312,7 @@ If you need to allow trusted private ranges such as Tailscale / CGNAT addresses,
 | `brave` | `apiKey` | `BRAVE_API_KEY` | No |
 | `tavily` | `apiKey` | `TAVILY_API_KEY` | No |
 | `jina` | `apiKey` | `JINA_API_KEY` | Free tier (10M tokens) |
+| `kagi` | `apiKey` | `KAGI_API_KEY` | No |
 | `searxng` | `baseUrl` | `SEARXNG_BASE_URL` | Yes (self-hosted) |
 | `duckduckgo` (default) | — | — | Yes |
 
@@ -1368,6 +1369,20 @@ If you need to allow trusted private ranges such as Tailscale / CGNAT addresses,
 }
 ```
 
+**Kagi:**
+```json
+{
+  "tools": {
+    "web": {
+      "search": {
+        "provider": "kagi",
+        "apiKey": "your-kagi-api-key"
+      }
+    }
+  }
+}
+```
+
 **SearXNG** (self-hosted, no API key needed):
 ```json
 {
@@ -1503,6 +1518,35 @@ MCP tools are automatically discovered and registered on startup. The LLM can us
 **Docker security**: The official Docker image runs as a non-root user (`nanobot`, UID 1000) with bubblewrap pre-installed. When using `docker-compose.yml`, the container drops all Linux capabilities except `SYS_ADMIN` (required for bwrap's namespace isolation).
 
 
+### Auto Compact
+
+When a user is idle for longer than a configured threshold, nanobot **proactively** compresses the older part of the session context into a summary while keeping a recent legal suffix of live messages. This reduces token cost and first-token latency when the user returns — instead of re-processing a long stale context with an expired KV cache, the model receives a compact summary, the most recent live context, and fresh input.
+
+```json
+{
+  "agents": {
+    "defaults": {
+      "idleCompactAfterMinutes": 15
+    }
+  }
+}
+```
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `agents.defaults.idleCompactAfterMinutes` | `0` (disabled) | Minutes of idle time before auto-compaction starts. Set to `0` to disable. Recommended: `15` — close to a typical LLM KV cache expiry window, so stale sessions get compacted before the user returns. |
+
+`sessionTtlMinutes` remains accepted as a legacy alias for backward compatibility, but `idleCompactAfterMinutes` is the preferred config key going forward.
+
+How it works:
+1. **Idle detection**: On each idle tick (~1 s), checks all sessions for expiration.
+2. **Background compaction**: Idle sessions summarize the older live prefix via LLM and keep the most recent legal suffix (currently 8 messages).
+3. **Summary injection**: When the user returns, the summary is injected as runtime context (one-shot, not persisted) alongside the retained recent suffix.
+4. **Restart-safe resume**: The summary is also mirrored into session metadata so it can still be recovered after a process restart.
+
+> [!TIP]
+> Think of auto compact as "summarize older context, keep the freshest live turns." It is not a hard session reset.
+
 ### Timezone
 
 Time is context. Context should be precise.
diff --git a/nanobot/agent/autocompact.py b/nanobot/agent/autocompact.py
new file mode 100644
index 000000000..47c7b5a36
--- /dev/null
+++ b/nanobot/agent/autocompact.py
@@ -0,0 +1,115 @@
+"""Auto compact: proactive compression of idle sessions to reduce token cost and latency."""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Callable, Coroutine
+
+from loguru import logger
+from nanobot.session.manager import Session, SessionManager
+
+if TYPE_CHECKING:
+    from nanobot.agent.memory import Consolidator
+
+
+class AutoCompact:
+    _RECENT_SUFFIX_MESSAGES = 8
+
+    def __init__(self, sessions: SessionManager, consolidator: Consolidator,
+                 session_ttl_minutes: int = 0):
+        self.sessions = sessions
+        self.consolidator = consolidator
+        self._ttl = session_ttl_minutes
+        self._archiving: set[str] = set()
+        self._summaries: dict[str, tuple[str, datetime]] = {}
+
+    def _is_expired(self, ts: datetime | str | None) -> bool:
+        if self._ttl <= 0 or not ts:
+            return False
+        if isinstance(ts, str):
+            ts = datetime.fromisoformat(ts)
+        return (datetime.now() - ts).total_seconds() >= self._ttl * 60
+
+    @staticmethod
+    def _format_summary(text: str, last_active: datetime) -> str:
+        idle_min = int((datetime.now() - last_active).total_seconds() / 60)
+        return f"Inactive for {idle_min} minutes.\nPrevious conversation summary: {text}"
+
+    def _split_unconsolidated(
+        self, session: Session,
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+        """Split live session tail into archiveable prefix and retained recent suffix."""
+        tail = list(session.messages[session.last_consolidated:])
+        if not tail:
+            return [], []
+
+        probe = Session(
+            key=session.key,
+            messages=tail.copy(),
+            created_at=session.created_at,
+            updated_at=session.updated_at,
+            metadata={},
+            last_consolidated=0,
+        )
+        probe.retain_recent_legal_suffix(self._RECENT_SUFFIX_MESSAGES)
+        kept = probe.messages
+        cut = len(tail) - len(kept)
+        return tail[:cut], kept
+
+    def check_expired(self, schedule_background: Callable[[Coroutine], None]) -> None:
+        for info in self.sessions.list_sessions():
+            key = info.get("key", "")
+            if key and key not in self._archiving and self._is_expired(info.get("updated_at")):
+                self._archiving.add(key)
+                logger.debug("Auto-compact: scheduling archival for {} (idle > {} min)", key, self._ttl)
+                schedule_background(self._archive(key))
+
+    async def _archive(self, key: str) -> None:
+        try:
+            self.sessions.invalidate(key)
+            session = self.sessions.get_or_create(key)
+            archive_msgs, kept_msgs = self._split_unconsolidated(session)
+            if not archive_msgs and not kept_msgs:
+                logger.debug("Auto-compact: skipping {}, no un-consolidated messages", key)
+                session.updated_at = datetime.now()
+                self.sessions.save(session)
+                return
+
+            last_active = session.updated_at
+            summary = ""
+            if archive_msgs:
+                summary = await self.consolidator.archive(archive_msgs) or ""
+            if summary and summary != "(nothing)":
+                self._summaries[key] = (summary, last_active)
+                session.metadata["_last_summary"] = {"text": summary, "last_active": last_active.isoformat()}
+            session.messages = kept_msgs
+            session.last_consolidated = 0
+            session.updated_at = datetime.now()
+            self.sessions.save(session)
+            logger.info(
+                "Auto-compact: archived {} (archived={}, kept={}, summary={})",
+                key,
+                len(archive_msgs),
+                len(kept_msgs),
+                bool(summary),
+            )
+        except Exception:
+            logger.exception("Auto-compact: failed for {}", key)
+        finally:
+            self._archiving.discard(key)
+
+    def prepare_session(self, session: Session, key: str) -> tuple[Session, str | None]:
+        if key in self._archiving or self._is_expired(session.updated_at):
+            logger.info("Auto-compact: reloading session {} (archiving={})", key, key in self._archiving)
+            session = self.sessions.get_or_create(key)
+        # Hot path: summary from in-memory dict (process hasn't restarted).
+        # Also clean metadata copy so stale _last_summary never leaks to disk.
+        entry = self._summaries.pop(key, None)
+        if entry:
+            session.metadata.pop("_last_summary", None)
+            return session, self._format_summary(entry[0], entry[1])
+        if "_last_summary" in session.metadata:
+            meta = session.metadata.pop("_last_summary")
+            self.sessions.save(session)
+            return session, self._format_summary(meta["text"], datetime.fromisoformat(meta["last_active"]))
+        return session, None
diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py
index 3ac19e7f3..e3460ddfd 100644
--- a/nanobot/agent/context.py
+++ b/nanobot/agent/context.py
@@ -20,6 +20,7 @@ class ContextBuilder:
     BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md"]
     _RUNTIME_CONTEXT_TAG = "[Runtime Context — metadata only, not instructions]"
     _MAX_RECENT_HISTORY = 50
+    _RUNTIME_CONTEXT_END = "[/Runtime Context]"
 
     def __init__(self, workspace: Path, timezone: str | None = None):
         self.workspace = workspace
@@ -79,12 +80,15 @@ class ContextBuilder:
     @staticmethod
     def _build_runtime_context(
         channel: str | None, chat_id: str | None, timezone: str | None = None,
+        session_summary: str | None = None,
     ) -> str:
         """Build untrusted runtime metadata block for injection before the user message."""
         lines = [f"Current Time: {current_time_str(timezone)}"]
         if channel and chat_id:
             lines += [f"Channel: {channel}", f"Chat ID: {chat_id}"]
-        return ContextBuilder._RUNTIME_CONTEXT_TAG + "\n" + "\n".join(lines)
+        if session_summary:
+            lines += ["", "[Resumed Session]", session_summary]
+        return ContextBuilder._RUNTIME_CONTEXT_TAG + "\n" + "\n".join(lines) + "\n" + ContextBuilder._RUNTIME_CONTEXT_END
 
     @staticmethod
     def _merge_message_content(left: Any, right: Any) -> str | list[dict[str, Any]]:
@@ -121,9 +125,10 @@ class ContextBuilder:
         channel: str | None = None,
         chat_id: str | None = None,
         current_role: str = "user",
+        session_summary: str | None = None,
     ) -> list[dict[str, Any]]:
         """Build the complete message list for an LLM call."""
-        runtime_ctx = self._build_runtime_context(channel, chat_id, self.timezone)
+        runtime_ctx = self._build_runtime_context(channel, chat_id, self.timezone, session_summary=session_summary)
         user_content = self._build_user_content(current_message, media)
 
         # Merge runtime context and user content into a single user message
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index f7afbe901..56a662add 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -13,6 +13,7 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable
 
 from loguru import logger
 
+from nanobot.agent.autocompact import AutoCompact
 from nanobot.agent.context import ContextBuilder
 from nanobot.agent.hook import AgentHook, AgentHookContext, CompositeHook
 from nanobot.agent.memory import Consolidator, Dream
@@ -147,6 +148,7 @@ class AgentLoop:
         mcp_servers: dict | None = None,
         channels_config: ChannelsConfig | None = None,
         timezone: str | None = None,
+        session_ttl_minutes: int = 0,
         hooks: list[AgentHook] | None = None,
         unified_session: bool = False,
     ):
@@ -219,6 +221,11 @@ class AgentLoop:
             get_tool_definitions=self.tools.get_definitions,
             max_completion_tokens=provider.generation.max_tokens,
         )
+        self.auto_compact = AutoCompact(
+            sessions=self.sessions,
+            consolidator=self.consolidator,
+            session_ttl_minutes=session_ttl_minutes,
+        )
         self.dream = Dream(
             store=self.context.memory,
             provider=provider,
@@ -275,7 +282,10 @@ class AgentLoop:
 
         try:
             self._mcp_stacks = await connect_mcp_servers(self._mcp_servers, self.tools)
-            self._mcp_connected = True
+            if self._mcp_stacks:
+                self._mcp_connected = True
+            else:
+                logger.warning("No MCP servers connected successfully (will retry next message)")
         except asyncio.CancelledError:
             logger.warning("MCP connection cancelled (will retry next message)")
             self._mcp_stacks.clear()
@@ -381,6 +391,7 @@ class AgentLoop:
             try:
                 msg = await asyncio.wait_for(self.bus.consume_inbound(), timeout=1.0)
             except asyncio.TimeoutError:
+                self.auto_compact.check_expired(self._schedule_background)
                 continue
             except asyncio.CancelledError:
                 # Preserve real task cancellation so shutdown can complete cleanly.
@@ -532,15 +543,18 @@ class AgentLoop:
             session = self.sessions.get_or_create(key)
             if self._restore_runtime_checkpoint(session):
                 self.sessions.save(session)
+
+            session, pending = self.auto_compact.prepare_session(session, key)
+
             await self.consolidator.maybe_consolidate_by_tokens(session)
             self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
             history = session.get_history(max_messages=0)
             current_role = "assistant" if msg.sender_id == "subagent" else "user"
+
             messages = self.context.build_messages(
                 history=history,
-                current_message=msg.content,
-                channel=channel,
-                chat_id=chat_id,
+                current_message=msg.content, channel=channel, chat_id=chat_id,
+                session_summary=pending,
                 current_role=current_role,
             )
             final_content, _, all_msgs, _ = await self._run_agent_loop(
@@ -568,6 +582,8 @@ class AgentLoop:
         if self._restore_runtime_checkpoint(session):
             self.sessions.save(session)
 
+        session, pending = self.auto_compact.prepare_session(session, key)
+
         # Slash commands
         raw = msg.content.strip()
         ctx = CommandContext(msg=msg, session=session, key=key, raw=raw, loop=self)
@@ -582,9 +598,11 @@ class AgentLoop:
                 message_tool.start_turn()
 
         history = session.get_history(max_messages=0)
+
         initial_messages = self.context.build_messages(
             history=history,
             current_message=msg.content,
+            session_summary=pending,
             media=msg.media if msg.media else None,
             channel=msg.channel,
             chat_id=msg.chat_id,
@@ -696,15 +714,24 @@ class AgentLoop:
                         continue
                     entry["content"] = filtered
             elif role == "user":
-                if isinstance(content, str) and content.startswith(
-                    ContextBuilder._RUNTIME_CONTEXT_TAG
-                ):
-                    # Strip the runtime-context prefix, keep only the user text.
-                    parts = content.split("\n\n", 1)
-                    if len(parts) > 1 and parts[1].strip():
-                        entry["content"] = parts[1]
+                if isinstance(content, str) and content.startswith(ContextBuilder._RUNTIME_CONTEXT_TAG):
+                    # Strip the entire runtime-context block (including any session summary).
+                    # The block is bounded by _RUNTIME_CONTEXT_TAG and _RUNTIME_CONTEXT_END.
+                    end_marker = ContextBuilder._RUNTIME_CONTEXT_END
+                    end_pos = content.find(end_marker)
+                    if end_pos >= 0:
+                        after = content[end_pos + len(end_marker):].lstrip("\n")
+                        if after:
+                            entry["content"] = after
+                        else:
+                            continue
                     else:
-                        continue
+                        # Fallback: no end marker found, strip the tag prefix
+                        after_tag = content[len(ContextBuilder._RUNTIME_CONTEXT_TAG):].lstrip("\n")
+                        if after_tag.strip():
+                            entry["content"] = after_tag
+                        else:
+                            continue
                 if isinstance(content, list):
                     filtered = self._sanitize_persisted_blocks(content, drop_runtime=True)
                     if not filtered:
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index 943d91855..04d988ee5 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -290,7 +290,7 @@ class MemoryStore:
                 if not lines:
                     return None
                 return json.loads(lines[-1])
-        except (FileNotFoundError, json.JSONDecodeError):
+        except (FileNotFoundError, json.JSONDecodeError, UnicodeDecodeError):
             return None
 
     def _write_entries(self, entries: list[dict[str, Any]]) -> None:
@@ -433,13 +433,13 @@ class Consolidator:
             self._get_tool_definitions(),
         )
 
-    async def archive(self, messages: list[dict]) -> bool:
+    async def archive(self, messages: list[dict]) -> str | None:
         """Summarize messages via LLM and append to history.jsonl.
 
-        Returns True on success (or degraded success), False if nothing to do.
+        Returns the summary text on success, None if nothing to archive.
         """
         if not messages:
-            return False
+            return None
         try:
             formatted = MemoryStore._format_messages(messages)
             response = await self.provider.chat_with_retry(
@@ -459,11 +459,11 @@ class Consolidator:
             )
             summary = response.content or "[no summary]"
             self.store.append_history(summary)
-            return True
+            return summary
         except Exception:
             logger.warning("Consolidation LLM call failed, raw-dumping to history")
             self.store.raw_archive(messages)
-            return True
+            return None
 
     async def maybe_consolidate_by_tokens(self, session: Session) -> None:
         """Loop: archive old messages until prompt fits within safe budget.
diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py
index 275fcf88c..38fc33d74 100644
--- a/nanobot/agent/tools/web.py
+++ b/nanobot/agent/tools/web.py
@@ -114,6 +114,8 @@ class WebSearchTool(Tool):
             return await self._search_jina(query, n)
         elif provider == "brave":
             return await self._search_brave(query, n)
+        elif provider == "kagi":
+            return await self._search_kagi(query, n)
         else:
             return f"Error: unknown search provider '{provider}'"
 
@@ -204,6 +206,29 @@ class WebSearchTool(Tool):
             logger.warning("Jina search failed ({}), falling back to DuckDuckGo", e)
             return await self._search_duckduckgo(query, n)
 
+    async def _search_kagi(self, query: str, n: int) -> str:
+        api_key = self.config.api_key or os.environ.get("KAGI_API_KEY", "")
+        if not api_key:
+            logger.warning("KAGI_API_KEY not set, falling back to DuckDuckGo")
+            return await self._search_duckduckgo(query, n)
+        try:
+            async with httpx.AsyncClient(proxy=self.proxy) as client:
+                r = await client.get(
+                    "https://kagi.com/api/v0/search",
+                    params={"q": query, "limit": n},
+                    headers={"Authorization": f"Bot {api_key}"},
+                    timeout=10.0,
+                )
+                r.raise_for_status()
+            # t=0 items are search results; other values are related searches, etc.
+            items = [
+                {"title": d.get("title", ""), "url": d.get("url", ""), "content": d.get("snippet", "")}
+                for d in r.json().get("data", []) if d.get("t") == 0
+            ]
+            return _format_results(query, items, n)
+        except Exception as e:
+            return f"Error: {e}"
+
     async def _search_duckduckgo(self, query: str, n: int) -> str:
         try:
             # Note: duckduckgo_search is synchronous and does its own requests
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 5ce8b7937..9d818a9db 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -591,6 +591,7 @@ def serve(
         channels_config=runtime_config.channels,
         timezone=runtime_config.agents.defaults.timezone,
         unified_session=runtime_config.agents.defaults.unified_session,
+        session_ttl_minutes=runtime_config.agents.defaults.session_ttl_minutes,
     )
 
     model_name = runtime_config.agents.defaults.model
@@ -683,6 +684,7 @@ def gateway(
         channels_config=config.channels,
         timezone=config.agents.defaults.timezone,
         unified_session=config.agents.defaults.unified_session,
+        session_ttl_minutes=config.agents.defaults.session_ttl_minutes,
     )
 
     # Set cron callback (needs agent)
@@ -915,6 +917,7 @@ def agent(
         channels_config=config.channels,
         timezone=config.agents.defaults.timezone,
         unified_session=config.agents.defaults.unified_session,
+        session_ttl_minutes=config.agents.defaults.session_ttl_minutes,
     )
     restart_notice = consume_restart_notice_from_env()
     if restart_notice and should_show_cli_restart_notice(restart_notice, session_id):
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 2d31c8bf9..a841fe159 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -77,6 +77,12 @@ class AgentDefaults(Base):
     reasoning_effort: str | None = None  # low / medium / high / adaptive - enables LLM thinking mode
     timezone: str = "UTC"  # IANA timezone, e.g. "Asia/Shanghai", "America/New_York"
     unified_session: bool = False  # Share one session across all channels (single-user multi-device)
+    session_ttl_minutes: int = Field(
+        default=0,
+        ge=0,
+        validation_alias=AliasChoices("idleCompactAfterMinutes", "sessionTtlMinutes"),
+        serialization_alias="idleCompactAfterMinutes",
+    )  # Auto-compact idle threshold in minutes (0 = disabled)
     dream: DreamConfig = Field(default_factory=DreamConfig)
 
 
@@ -153,7 +159,7 @@ class GatewayConfig(Base):
 class WebSearchConfig(Base):
     """Web search tool configuration."""
 
-    provider: str = "duckduckgo"  # brave, tavily, duckduckgo, searxng, jina
+    provider: str = "duckduckgo"  # brave, tavily, duckduckgo, searxng, jina, kagi
     api_key: str = ""
     base_url: str = ""  # SearXNG base URL
     max_results: int = 5
diff --git a/nanobot/cron/service.py b/nanobot/cron/service.py
index 267613012..165ce54d7 100644
--- a/nanobot/cron/service.py
+++ b/nanobot/cron/service.py
@@ -80,6 +80,7 @@ class CronService:
         self._store: CronStore | None = None
         self._timer_task: asyncio.Task | None = None
         self._running = False
+        self._timer_active = False
         self.max_sleep_ms = max_sleep_ms
 
     def _load_jobs(self) -> tuple[list[CronJob], int]:
@@ -171,7 +172,11 @@ class CronService:
     def _load_store(self) -> CronStore:
         """Load jobs from disk. Reloads automatically if file was modified externally.
         - Reload every time because it needs to merge operations on the jobs object from other instances.
+        - During _on_timer execution, return the existing store to prevent concurrent
+          _load_store calls (e.g. from list_jobs polling) from replacing it mid-execution.
         """
+        if self._timer_active and self._store:
+            return self._store
         jobs, version = self._load_jobs()
         self._store = CronStore(version=version, jobs=jobs)
         self._merge_action()
@@ -290,18 +295,23 @@ class CronService:
         """Handle timer tick - run due jobs."""
         self._load_store()
         if not self._store:
+            self._arm_timer()
             return
 
-        now = _now_ms()
-        due_jobs = [
-            j for j in self._store.jobs
-            if j.enabled and j.state.next_run_at_ms and now >= j.state.next_run_at_ms
-        ]
+        self._timer_active = True
+        try:
+            now = _now_ms()
+            due_jobs = [
+                j for j in self._store.jobs
+                if j.enabled and j.state.next_run_at_ms and now >= j.state.next_run_at_ms
+            ]
 
-        for job in due_jobs:
-            await self._execute_job(job)
+            for job in due_jobs:
+                await self._execute_job(job)
 
-        self._save_store()
+            self._save_store()
+        finally:
+            self._timer_active = False
         self._arm_timer()
 
     async def _execute_job(self, job: CronJob) -> None:
diff --git a/nanobot/nanobot.py b/nanobot/nanobot.py
index 9166acb27..df0e49842 100644
--- a/nanobot/nanobot.py
+++ b/nanobot/nanobot.py
@@ -82,6 +82,7 @@ class Nanobot:
             mcp_servers=config.tools.mcp_servers,
             timezone=defaults.timezone,
             unified_session=defaults.unified_session,
+            session_ttl_minutes=defaults.session_ttl_minutes,
         )
         return cls(loop)
 
diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py
index 27df31405..2ed0624a2 100644
--- a/nanobot/session/manager.py
+++ b/nanobot/session/manager.py
@@ -155,6 +155,7 @@ class SessionManager:
             messages = []
             metadata = {}
             created_at = None
+            updated_at = None
             last_consolidated = 0
 
             with open(path, encoding="utf-8") as f:
@@ -168,6 +169,7 @@ class SessionManager:
                     if data.get("_type") == "metadata":
                         metadata = data.get("metadata", {})
                         created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None
+                        updated_at = datetime.fromisoformat(data["updated_at"]) if data.get("updated_at") else None
                         last_consolidated = data.get("last_consolidated", 0)
                     else:
                         messages.append(data)
@@ -176,6 +178,7 @@ class SessionManager:
                 key=key,
                 messages=messages,
                 created_at=created_at or datetime.now(),
+                updated_at=updated_at or datetime.now(),
                 metadata=metadata,
                 last_consolidated=last_consolidated
             )
diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index 3b4f9f25a..1bfd9f18b 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -17,10 +17,10 @@ from loguru import logger
 def strip_think(text: str) -> str:
     """Remove thinking blocks and any unclosed trailing tag."""
     text = re.sub(r"<think>[\s\S]*?</think>", "", text)
-    text = re.sub(r"<think>[\s\S]*$", "", text)
+    text = re.sub(r"^\s*<think>[\s\S]*$", "", text)
     # Gemma 4 and similar models use <thought>...</thought> blocks
     text = re.sub(r"<thought>[\s\S]*?</thought>", "", text)
-    text = re.sub(r"<thought>[\s\S]*$", "", text)
+    text = re.sub(r"^\s*<thought>[\s\S]*$", "", text)
     return text.strip()
 
 
diff --git a/tests/agent/test_auto_compact.py b/tests/agent/test_auto_compact.py
new file mode 100644
index 000000000..b3462820b
--- /dev/null
+++ b/tests/agent/test_auto_compact.py
@@ -0,0 +1,914 @@
+"""Tests for auto compact (idle TTL) feature."""
+
+import asyncio
+from datetime import datetime, timedelta
+from unittest.mock import AsyncMock, MagicMock
+from pathlib import Path
+
+import pytest
+
+from nanobot.agent.loop import AgentLoop
+from nanobot.bus.events import InboundMessage
+from nanobot.bus.queue import MessageBus
+from nanobot.config.schema import AgentDefaults
+from nanobot.command import CommandContext
+from nanobot.providers.base import LLMResponse
+
+
+def _make_loop(tmp_path: Path, session_ttl_minutes: int = 15) -> AgentLoop:
+    """Create a minimal AgentLoop for testing."""
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    provider.estimate_prompt_tokens.return_value = (10_000, "test")
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
+    provider.generation.max_tokens = 4096
+    loop = AgentLoop(
+        bus=bus,
+        provider=provider,
+        workspace=tmp_path,
+        model="test-model",
+        context_window_tokens=128_000,
+        session_ttl_minutes=session_ttl_minutes,
+    )
+    loop.tools.get_definitions = MagicMock(return_value=[])
+    return loop
+
+
+def _add_turns(session, turns: int, *, prefix: str = "msg") -> None:
+    """Append simple user/assistant turns to a session."""
+    for i in range(turns):
+        session.add_message("user", f"{prefix} user {i}")
+        session.add_message("assistant", f"{prefix} assistant {i}")
+
+
+class TestSessionTTLConfig:
+    """Test session TTL configuration."""
+
+    def test_default_ttl_is_zero(self):
+        """Default TTL should be 0 (disabled)."""
+        defaults = AgentDefaults()
+        assert defaults.session_ttl_minutes == 0
+
+    def test_custom_ttl(self):
+        """Custom TTL should be stored correctly."""
+        defaults = AgentDefaults(session_ttl_minutes=30)
+        assert defaults.session_ttl_minutes == 30
+
+    def test_user_friendly_alias_is_supported(self):
+        """Config should accept idleCompactAfterMinutes as the preferred JSON key."""
+        defaults = AgentDefaults.model_validate({"idleCompactAfterMinutes": 30})
+        assert defaults.session_ttl_minutes == 30
+
+    def test_legacy_alias_is_still_supported(self):
+        """Config should still accept the old sessionTtlMinutes key for compatibility."""
+        defaults = AgentDefaults.model_validate({"sessionTtlMinutes": 30})
+        assert defaults.session_ttl_minutes == 30
+
+    def test_serializes_with_user_friendly_alias(self):
+        """Config dumps should use idleCompactAfterMinutes for JSON output."""
+        defaults = AgentDefaults(session_ttl_minutes=30)
+        data = defaults.model_dump(mode="json", by_alias=True)
+        assert data["idleCompactAfterMinutes"] == 30
+        assert "sessionTtlMinutes" not in data
+
+
+class TestAgentLoopTTLParam:
+    """Test that AutoCompact receives and stores session_ttl_minutes."""
+
+    def test_loop_stores_ttl(self, tmp_path):
+        """AutoCompact should store the TTL value."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=25)
+        assert loop.auto_compact._ttl == 25
+
+    def test_loop_default_ttl_zero(self, tmp_path):
+        """AutoCompact default TTL should be 0 (disabled)."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=0)
+        assert loop.auto_compact._ttl == 0
+
+
+class TestAutoCompact:
+    """Test the _archive method."""
+
+    @pytest.mark.asyncio
+    async def test_is_expired_boundary(self, tmp_path):
+        """Exactly at TTL boundary should be expired (>= not >)."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        ts = datetime.now() - timedelta(minutes=15)
+        assert loop.auto_compact._is_expired(ts) is True
+        ts2 = datetime.now() - timedelta(minutes=14, seconds=59)
+        assert loop.auto_compact._is_expired(ts2) is False
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_is_expired_string_timestamp(self, tmp_path):
+        """_is_expired should parse ISO string timestamps."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        ts = (datetime.now() - timedelta(minutes=20)).isoformat()
+        assert loop.auto_compact._is_expired(ts) is True
+        assert loop.auto_compact._is_expired(None) is False
+        assert loop.auto_compact._is_expired("") is False
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_check_expired_only_archives_expired_sessions(self, tmp_path):
+        """With multiple sessions, only the expired one should be archived."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        # Expired session
+        s1 = loop.sessions.get_or_create("cli:expired")
+        s1.add_message("user", "old")
+        s1.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(s1)
+        # Active session
+        s2 = loop.sessions.get_or_create("cli:active")
+        s2.add_message("user", "recent")
+        loop.sessions.save(s2)
+
+        async def _fake_archive(messages):
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+        loop.auto_compact.check_expired(loop._schedule_background)
+        await asyncio.sleep(0.1)
+
+        active_after = loop.sessions.get_or_create("cli:active")
+        assert len(active_after.messages) == 1
+        assert active_after.messages[0]["content"] == "recent"
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_archives_prefix_and_keeps_recent_suffix(self, tmp_path):
+        """_archive should summarize the old prefix and keep a recent legal suffix."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6)
+        loop.sessions.save(session)
+
+        archived_messages = []
+
+        async def _fake_archive(messages):
+            archived_messages.extend(messages)
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        await loop.auto_compact._archive("cli:test")
+
+        assert len(archived_messages) == 4
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
+        assert session_after.messages[0]["content"] == "msg user 2"
+        assert session_after.messages[-1]["content"] == "msg assistant 5"
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_stores_summary(self, tmp_path):
+        """_archive should store the summary in _summaries."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="hello")
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "User said hello."
+
+        loop.consolidator.archive = _fake_archive
+
+        await loop.auto_compact._archive("cli:test")
+
+        entry = loop.auto_compact._summaries.get("cli:test")
+        assert entry is not None
+        assert entry[0] == "User said hello."
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_empty_session(self, tmp_path):
+        """_archive on empty session should not archive."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+
+        archive_called = False
+
+        async def _fake_archive(messages):
+            nonlocal archive_called
+            archive_called = True
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        await loop.auto_compact._archive("cli:test")
+
+        assert not archive_called
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == 0
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_respects_last_consolidated(self, tmp_path):
+        """_archive should only archive un-consolidated messages."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 14)
+        session.last_consolidated = 18
+        loop.sessions.save(session)
+
+        archived_count = 0
+
+        async def _fake_archive(messages):
+            nonlocal archived_count
+            archived_count = len(messages)
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        await loop.auto_compact._archive("cli:test")
+
+        assert archived_count == 2
+        await loop.close_mcp()
+
+
+class TestAutoCompactIdleDetection:
+    """Test idle detection triggers auto-new in _process_message."""
+
+    @pytest.mark.asyncio
+    async def test_no_auto_compact_when_ttl_disabled(self, tmp_path):
+        """No auto-new should happen when TTL is 0 (disabled)."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=0)
+        session = loop.sessions.get_or_create("cli:test")
+        session.add_message("user", "old message")
+        session.updated_at = datetime.now() - timedelta(minutes=30)
+        loop.sessions.save(session)
+
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new msg")
+        await loop._process_message(msg)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert any(m["content"] == "old message" for m in session_after.messages)
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_triggers_on_idle(self, tmp_path):
+        """Proactive auto-new archives expired session; _process_message reloads it."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="old")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archived_messages = []
+
+        async def _fake_archive(messages):
+            archived_messages.extend(messages)
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        # Simulate proactive archive completing before message arrives
+        await loop.auto_compact._archive("cli:test")
+
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new msg")
+        await loop._process_message(msg)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(archived_messages) == 4
+        assert not any(m["content"] == "old user 0" for m in session_after.messages)
+        assert any(m["content"] == "new msg" for m in session_after.messages)
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_no_auto_compact_when_active(self, tmp_path):
+        """No auto-new should happen when session is recently active."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        session.add_message("user", "recent message")
+        loop.sessions.save(session)
+
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="new msg")
+        await loop._process_message(msg)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert any(m["content"] == "recent message" for m in session_after.messages)
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_does_not_affect_priority_commands(self, tmp_path):
+        """Priority commands (/stop, /restart) bypass _process_message entirely via run()."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        session.add_message("user", "old message")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        # Priority commands are dispatched in run() before _process_message is called.
+        # Simulate that path directly via dispatch_priority.
+        raw = "/stop"
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content=raw)
+        ctx = CommandContext(msg=msg, session=session, key="cli:test", raw=raw, loop=loop)
+        result = await loop.commands.dispatch_priority(ctx)
+        assert result is not None
+        assert "stopped" in result.content.lower() or "no active task" in result.content.lower()
+
+        # Session should be untouched since priority commands skip _process_message
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert any(m["content"] == "old message" for m in session_after.messages)
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_with_slash_new(self, tmp_path):
+        """Auto-new fires before /new dispatches; session is cleared twice but idempotent."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        for i in range(4):
+            session.add_message("user", f"msg{i}")
+            session.add_message("assistant", f"resp{i}")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
+        response = await loop._process_message(msg)
+
+        assert response is not None
+        assert "new session started" in response.content.lower()
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        # Session is empty (auto-new archived and cleared, /new cleared again)
+        assert len(session_after.messages) == 0
+        await loop.close_mcp()
+
+
+class TestAutoCompactSystemMessages:
+    """Test that auto-new also works for system messages."""
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_triggers_for_system_messages(self, tmp_path):
+        """Proactive auto-new archives expired session; system messages reload it."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="old")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        # Simulate proactive archive completing before system message arrives
+        await loop.auto_compact._archive("cli:test")
+
+        msg = InboundMessage(
+            channel="system", sender_id="subagent", chat_id="cli:test",
+            content="subagent result",
+        )
+        await loop._process_message(msg)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert not any(
+            m["content"] == "old user 0"
+            for m in session_after.messages
+        )
+        await loop.close_mcp()
+
+
+class TestAutoCompactEdgeCases:
+    """Edge cases for auto session new."""
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_with_nothing_summary(self, tmp_path):
+        """Auto-new should not inject when archive produces '(nothing)'."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="thanks")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        loop.provider.chat_with_retry = AsyncMock(
+            return_value=LLMResponse(content="(nothing)", tool_calls=[])
+        )
+
+        await loop.auto_compact._archive("cli:test")
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
+        # "(nothing)" summary should not be stored
+        assert "cli:test" not in loop.auto_compact._summaries
+
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_archive_failure_still_keeps_recent_suffix(self, tmp_path):
+        """Auto-new should keep the recent suffix even if LLM archive falls back to raw dump."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="important")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        loop.provider.chat_with_retry = AsyncMock(side_effect=Exception("API down"))
+
+        # Should not raise
+        await loop.auto_compact._archive("cli:test")
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
+
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_auto_compact_preserves_runtime_checkpoint_before_check(self, tmp_path):
+        """Short expired sessions keep recent messages; checkpoint restore still works on resume."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        session.metadata[AgentLoop._RUNTIME_CHECKPOINT_KEY] = {
+            "assistant_message": {"role": "assistant", "content": "interrupted response"},
+            "completed_tool_results": [],
+            "pending_tool_calls": [],
+        }
+        session.add_message("user", "previous message")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archived_messages = []
+
+        async def _fake_archive(messages):
+            archived_messages.extend(messages)
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        # Simulate proactive archive completing before message arrives
+        await loop.auto_compact._archive("cli:test")
+
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="continue")
+        await loop._process_message(msg)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert archived_messages == []
+        assert any(m["content"] == "previous message" for m in session_after.messages)
+        assert any(m["content"] == "interrupted response" for m in session_after.messages)
+
+        await loop.close_mcp()
+
+
+class TestAutoCompactIntegration:
+    """End-to-end test of auto session new feature."""
+
+    @pytest.mark.asyncio
+    async def test_full_lifecycle(self, tmp_path):
+        """
+        Full lifecycle: messages -> idle -> auto-new -> archive -> clear -> summary injected as runtime context.
+        """
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+
+        # Phase 1: User has a conversation longer than the retained recent suffix
+        session.add_message("user", "I'm learning English, teach me past tense")
+        session.add_message("assistant", "Past tense is used for actions completed in the past...")
+        session.add_message("user", "Give me an example")
+        session.add_message("assistant", '"I walked to the store yesterday."')
+        session.add_message("user", "Give me another example")
+        session.add_message("assistant", '"She visited Paris last year."')
+        session.add_message("user", "Quiz me")
+        session.add_message("assistant", "What is the past tense of go?")
+        session.add_message("user", "I think it is went")
+        session.add_message("assistant", "Correct.")
+        loop.sessions.save(session)
+
+        # Phase 2: Time passes (simulate idle)
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        # Phase 3: User returns with a new message
+        loop.provider.chat_with_retry = AsyncMock(
+            return_value=LLMResponse(
+                content="User is learning English past tense. Example: 'I walked to the store yesterday.'",
+                tool_calls=[],
+            )
+        )
+
+        msg = InboundMessage(
+            channel="cli", sender_id="user", chat_id="test",
+            content="Let's continue, teach me present perfect",
+        )
+        response = await loop._process_message(msg)
+
+        # Phase 4: Verify
+        session_after = loop.sessions.get_or_create("cli:test")
+
+        # The oldest messages should be trimmed from live session history
+        assert not any(
+            "past tense is used" in str(m.get("content", "")) for m in session_after.messages
+        )
+
+        # Summary should NOT be persisted in session (ephemeral, one-shot)
+        assert not any(
+            "[Resumed Session]" in str(m.get("content", "")) for m in session_after.messages
+        )
+        # Runtime context end marker should NOT be persisted
+        assert not any(
+            "[/Runtime Context]" in str(m.get("content", "")) for m in session_after.messages
+        )
+
+        # Pending summary should be consumed (one-shot)
+        assert "cli:test" not in loop.auto_compact._summaries
+
+        # The new message should be processed (response exists)
+        assert response is not None
+
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_runtime_context_markers_not_persisted_for_multi_paragraph_turn(self, tmp_path):
+        """Auto-compact resume context must not leak runtime markers into persisted session history."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        session.add_message("user", "old message")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        # Simulate proactive archive completing before message arrives
+        await loop.auto_compact._archive("cli:test")
+
+        msg = InboundMessage(
+            channel="cli", sender_id="user", chat_id="test",
+            content="Paragraph one\n\nParagraph two\n\nParagraph three",
+        )
+        await loop._process_message(msg)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert any(m.get("content") == "old message" for m in session_after.messages)
+        for persisted in session_after.messages:
+            content = str(persisted.get("content", ""))
+            assert "[Runtime Context" not in content
+            assert "[/Runtime Context]" not in content
+        await loop.close_mcp()
+
+
+class TestProactiveAutoCompact:
+    """Test proactive auto-new on idle ticks (TimeoutError path in run loop)."""
+
+    @staticmethod
+    async def _run_check_expired(loop):
+        """Helper: run check_expired via callback and wait for background tasks."""
+        loop.auto_compact.check_expired(loop._schedule_background)
+        await asyncio.sleep(0.1)
+
+    @pytest.mark.asyncio
+    async def test_no_check_when_ttl_disabled(self, tmp_path):
+        """check_expired should be a no-op when TTL is 0."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=0)
+        session = loop.sessions.get_or_create("cli:test")
+        session.add_message("user", "old message")
+        session.updated_at = datetime.now() - timedelta(minutes=30)
+        loop.sessions.save(session)
+
+        await self._run_check_expired(loop)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == 1
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_proactive_archive_on_idle_tick(self, tmp_path):
+        """Expired session should be archived during idle tick."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 5, prefix="old")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archived_messages = []
+
+        async def _fake_archive(messages):
+            archived_messages.extend(messages)
+            return "User chatted about old things."
+
+        loop.consolidator.archive = _fake_archive
+
+        await self._run_check_expired(loop)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
+        assert len(archived_messages) == 2
+        entry = loop.auto_compact._summaries.get("cli:test")
+        assert entry is not None
+        assert entry[0] == "User chatted about old things."
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_no_proactive_archive_when_active(self, tmp_path):
+        """Recently active session should NOT be archived on idle tick."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        session.add_message("user", "recent message")
+        loop.sessions.save(session)
+
+        await self._run_check_expired(loop)
+
+        session_after = loop.sessions.get_or_create("cli:test")
+        assert len(session_after.messages) == 1
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_no_duplicate_archive(self, tmp_path):
+        """Should not archive the same session twice if already in progress."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="old")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archive_count = 0
+        started = asyncio.Event()
+        block_forever = asyncio.Event()
+
+        async def _slow_archive(messages):
+            nonlocal archive_count
+            archive_count += 1
+            started.set()
+            await block_forever.wait()
+            return "Summary."
+
+        loop.consolidator.archive = _slow_archive
+
+        # First call starts archiving via callback
+        loop.auto_compact.check_expired(loop._schedule_background)
+        await started.wait()
+        assert archive_count == 1
+
+        # Second call should skip (key is in _archiving)
+        loop.auto_compact.check_expired(loop._schedule_background)
+        await asyncio.sleep(0.05)
+        assert archive_count == 1
+
+        # Clean up
+        block_forever.set()
+        await asyncio.sleep(0.1)
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_proactive_archive_error_does_not_block(self, tmp_path):
+        """Proactive archive failure should be caught and not block future ticks."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="old")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _failing_archive(messages):
+            raise RuntimeError("LLM down")
+
+        loop.consolidator.archive = _failing_archive
+
+        # Should not raise
+        await self._run_check_expired(loop)
+
+        # Key should be removed from _archiving (finally block)
+        assert "cli:test" not in loop.auto_compact._archiving
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_proactive_archive_skips_empty_sessions(self, tmp_path):
+        """Proactive archive should not call LLM for sessions with no un-consolidated messages."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archive_called = False
+
+        async def _fake_archive(messages):
+            nonlocal archive_called
+            archive_called = True
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        await self._run_check_expired(loop)
+
+        assert not archive_called
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_no_reschedule_after_successful_archive(self, tmp_path):
+        """Already-archived session should NOT be re-scheduled on subsequent ticks."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 5, prefix="old")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archive_count = 0
+
+        async def _fake_archive(messages):
+            nonlocal archive_count
+            archive_count += 1
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        # First tick: archives the session
+        await self._run_check_expired(loop)
+        assert archive_count == 1
+
+        # Second tick: should NOT re-schedule (updated_at is fresh after clear)
+        await self._run_check_expired(loop)
+        assert archive_count == 1  # Still 1, not re-scheduled
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_empty_skip_refreshes_updated_at_prevents_reschedule(self, tmp_path):
+        """Empty session skip refreshes updated_at, preventing immediate re-scheduling."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archive_count = 0
+
+        async def _fake_archive(messages):
+            nonlocal archive_count
+            archive_count += 1
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        # First tick: skips (no messages), refreshes updated_at
+        await self._run_check_expired(loop)
+        assert archive_count == 0
+
+        # Second tick: should NOT re-schedule because updated_at is fresh
+        await self._run_check_expired(loop)
+        assert archive_count == 0
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_session_can_be_compacted_again_after_new_messages(self, tmp_path):
+        """After successful compact + user sends new messages + idle again, should compact again."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 5, prefix="first")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        archive_count = 0
+
+        async def _fake_archive(messages):
+            nonlocal archive_count
+            archive_count += 1
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        # First compact cycle
+        await loop.auto_compact._archive("cli:test")
+        assert archive_count == 1
+
+        # User returns, sends new messages
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="second topic")
+        await loop._process_message(msg)
+
+        # Simulate idle again
+        loop.sessions.invalidate("cli:test")
+        session2 = loop.sessions.get_or_create("cli:test")
+        session2.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session2)
+
+        # Second compact cycle should succeed
+        await loop.auto_compact._archive("cli:test")
+        assert archive_count == 2
+        await loop.close_mcp()
+
+
+class TestSummaryPersistence:
+    """Test that summary survives restart via session metadata."""
+
+    @pytest.mark.asyncio
+    async def test_summary_persisted_in_session_metadata(self, tmp_path):
+        """After archive, _last_summary should be in session metadata."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="hello")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "User said hello."
+
+        loop.consolidator.archive = _fake_archive
+
+        await loop.auto_compact._archive("cli:test")
+
+        # Summary should be persisted in session metadata
+        session_after = loop.sessions.get_or_create("cli:test")
+        meta = session_after.metadata.get("_last_summary")
+        assert meta is not None
+        assert meta["text"] == "User said hello."
+        assert "last_active" in meta
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_summary_recovered_after_restart(self, tmp_path):
+        """Summary should be recovered from metadata when _summaries is empty (simulates restart)."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="hello")
+        last_active = datetime.now() - timedelta(minutes=20)
+        session.updated_at = last_active
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "User said hello."
+
+        loop.consolidator.archive = _fake_archive
+
+        # Archive
+        await loop.auto_compact._archive("cli:test")
+
+        # Simulate restart: clear in-memory state
+        loop.auto_compact._summaries.clear()
+        loop.sessions.invalidate("cli:test")
+
+        # prepare_session should recover summary from metadata
+        reloaded = loop.sessions.get_or_create("cli:test")
+        assert len(reloaded.messages) == loop.auto_compact._RECENT_SUFFIX_MESSAGES
+        _, summary = loop.auto_compact.prepare_session(reloaded, "cli:test")
+
+        assert summary is not None
+        assert "User said hello." in summary
+        assert "Inactive for" in summary
+        # Metadata should be cleaned up after consumption
+        assert "_last_summary" not in reloaded.metadata
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_metadata_cleanup_no_leak(self, tmp_path):
+        """_last_summary should be removed from metadata after being consumed."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="hello")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        await loop.auto_compact._archive("cli:test")
+
+        # Clear in-memory to force metadata path
+        loop.auto_compact._summaries.clear()
+        loop.sessions.invalidate("cli:test")
+        reloaded = loop.sessions.get_or_create("cli:test")
+
+        # First call: consumes from metadata
+        _, summary = loop.auto_compact.prepare_session(reloaded, "cli:test")
+        assert summary is not None
+
+        # Second call: no summary (already consumed)
+        _, summary2 = loop.auto_compact.prepare_session(reloaded, "cli:test")
+        assert summary2 is None
+        assert "_last_summary" not in reloaded.metadata
+        await loop.close_mcp()
+
+    @pytest.mark.asyncio
+    async def test_metadata_cleanup_on_inmemory_path(self, tmp_path):
+        """In-memory _summaries path should also clean up _last_summary from metadata."""
+        loop = _make_loop(tmp_path, session_ttl_minutes=15)
+        session = loop.sessions.get_or_create("cli:test")
+        _add_turns(session, 6, prefix="hello")
+        session.updated_at = datetime.now() - timedelta(minutes=20)
+        loop.sessions.save(session)
+
+        async def _fake_archive(messages):
+            return "Summary."
+
+        loop.consolidator.archive = _fake_archive
+
+        await loop.auto_compact._archive("cli:test")
+
+        # Both _summaries and metadata have the summary
+        assert "cli:test" in loop.auto_compact._summaries
+        loop.sessions.invalidate("cli:test")
+        reloaded = loop.sessions.get_or_create("cli:test")
+        assert "_last_summary" in reloaded.metadata
+
+        # In-memory path is taken (no restart)
+        _, summary = loop.auto_compact.prepare_session(reloaded, "cli:test")
+        assert summary is not None
+        # Metadata should also be cleaned up
+        assert "_last_summary" not in reloaded.metadata
+        await loop.close_mcp()
diff --git a/tests/agent/test_consolidator.py b/tests/agent/test_consolidator.py
index b7989d9dd..28587e1b4 100644
--- a/tests/agent/test_consolidator.py
+++ b/tests/agent/test_consolidator.py
@@ -46,7 +46,7 @@ class TestConsolidatorSummarize:
             {"role": "assistant", "content": "Done, fixed the race condition."},
         ]
         result = await consolidator.archive(messages)
-        assert result is True
+        assert result == "User fixed a bug in the auth module."
         entries = store.read_unprocessed_history(since_cursor=0)
         assert len(entries) == 1
 
@@ -55,14 +55,14 @@ class TestConsolidatorSummarize:
         mock_provider.chat_with_retry.side_effect = Exception("API error")
         messages = [{"role": "user", "content": "hello"}]
         result = await consolidator.archive(messages)
-        assert result is True  # always succeeds
+        assert result is None  # no summary on raw dump fallback
         entries = store.read_unprocessed_history(since_cursor=0)
         assert len(entries) == 1
         assert "[RAW]" in entries[0]["content"]
 
     async def test_summarize_skips_empty_messages(self, consolidator):
         result = await consolidator.archive([])
-        assert result is False
+        assert result is None
 
 
 class TestConsolidatorTokenBudget:
diff --git a/tests/agent/test_mcp_connection.py b/tests/agent/test_mcp_connection.py
new file mode 100644
index 000000000..e7d0a7854
--- /dev/null
+++ b/tests/agent/test_mcp_connection.py
@@ -0,0 +1,44 @@
+"""Tests for MCP connection lifecycle in AgentLoop."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from nanobot.agent.loop import AgentLoop
+from nanobot.bus.queue import MessageBus
+
+
+def _make_loop(tmp_path, *, mcp_servers: dict | None = None) -> AgentLoop:
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    provider.generation.max_tokens = 4096
+    return AgentLoop(
+        bus=bus,
+        provider=provider,
+        workspace=tmp_path,
+        model="test-model",
+        mcp_servers=mcp_servers or {"test": object()},
+    )
+
+
+@pytest.mark.asyncio
+async def test_connect_mcp_retries_when_no_servers_connect(tmp_path, monkeypatch: pytest.MonkeyPatch):
+    loop = _make_loop(tmp_path)
+    attempts = 0
+
+    async def _fake_connect(_servers, _registry):
+        nonlocal attempts
+        attempts += 1
+        return {}
+
+    monkeypatch.setattr("nanobot.agent.tools.mcp.connect_mcp_servers", _fake_connect)
+
+    await loop._connect_mcp()
+    await loop._connect_mcp()
+
+    assert attempts == 2
+    assert loop._mcp_connected is False
+    assert loop._mcp_stacks == {}
diff --git a/tests/cron/test_cron_service.py b/tests/cron/test_cron_service.py
index f1956d8d2..747f8ec81 100644
--- a/tests/cron/test_cron_service.py
+++ b/tests/cron/test_cron_service.py
@@ -329,6 +329,45 @@ async def test_external_update_preserves_run_history_records(tmp_path):
     fresh._save_store()
 
 
+# ── timer race regression tests ──
+
+
+@pytest.mark.asyncio
+async def test_timer_execution_is_not_rolled_back_by_list_jobs_reload(tmp_path):
+    """list_jobs() during _on_timer should not replace the active store and re-run the same due job."""
+    store_path = tmp_path / "cron" / "jobs.json"
+    calls: list[str] = []
+
+    async def on_job(job):
+        calls.append(job.id)
+        # Simulate frontend polling list_jobs while the timer callback is mid-execution.
+        service.list_jobs(include_disabled=True)
+        await asyncio.sleep(0)
+
+    service = CronService(store_path, on_job=on_job)
+    service._running = True
+    service._load_store()
+    service._arm_timer = lambda: None
+
+    job = service.add_job(
+        name="race",
+        schedule=CronSchedule(kind="every", every_ms=60_000),
+        message="hello",
+    )
+    job.state.next_run_at_ms = max(1, int(time.time() * 1000) - 1_000)
+    service._save_store()
+
+    await service._on_timer()
+    await service._on_timer()
+
+    assert calls == [job.id]
+    loaded = service.get_job(job.id)
+    assert loaded is not None
+    assert loaded.state.last_run_at_ms is not None
+    assert loaded.state.next_run_at_ms is not None
+    assert loaded.state.next_run_at_ms > loaded.state.last_run_at_ms
+
+
 # ── update_job tests ──
 
 
@@ -479,3 +518,49 @@ def test_update_job_sentinel_channel_and_to(tmp_path) -> None:
     assert isinstance(result, CronJob)
     assert result.payload.channel is None
     assert result.payload.to is None
+
+
+@pytest.mark.asyncio
+async def test_list_jobs_during_on_job_does_not_cause_stale_reload(tmp_path) -> None:
+    """Regression: if the bot calls list_jobs (which reloads from disk) during
+    on_job execution, the in-memory next_run_at_ms update must not be lost.
+    Previously this caused an infinite re-trigger loop."""
+    store_path = tmp_path / "cron" / "jobs.json"
+    execution_count = 0
+
+    async def on_job_that_lists(job):
+        nonlocal execution_count
+        execution_count += 1
+        # Simulate the bot calling cron(action=list) mid-execution
+        service.list_jobs()
+
+    service = CronService(store_path, on_job=on_job_that_lists, max_sleep_ms=100)
+    await service.start()
+
+    # Add two jobs scheduled in the past so they're immediately due
+    now_ms = int(time.time() * 1000)
+    for name in ("job-a", "job-b"):
+        service.add_job(
+            name=name,
+            schedule=CronSchedule(kind="every", every_ms=3_600_000),
+            message="test",
+        )
+    # Force next_run to the past so _on_timer picks them up
+    for job in service._store.jobs:
+        job.state.next_run_at_ms = now_ms - 1000
+    service._save_store()
+    service._arm_timer()
+
+    # Let the timer fire once
+    await asyncio.sleep(0.3)
+    service.stop()
+
+    # Each job should have run exactly once, not looped
+    assert execution_count == 2
+
+    # Verify next_run_at_ms was persisted correctly (in the future)
+    raw = json.loads(store_path.read_text())
+    for j in raw["jobs"]:
+        next_run = j["state"]["nextRunAtMs"]
+        assert next_run is not None
+        assert next_run > now_ms, f"Job '{j['name']}' next_run should be in the future"
diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py
index adeb78e75..da90c4d0d 100644
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -356,6 +356,46 @@ async def test_connect_mcp_servers_enabled_tools_warns_on_unknown_entries(
     assert "Available wrapped names: mcp_test_demo" in warnings[-1]
 
 
+@pytest.mark.asyncio
+async def test_connect_mcp_servers_one_failure_does_not_block_others(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    sessions = {"good": _make_fake_session(["demo"])}
+
+    class _SelectiveClientSession:
+        def __init__(self, read: object, _write: object) -> None:
+            self._session = sessions[read]
+
+        async def __aenter__(self) -> object:
+            return self._session
+
+        async def __aexit__(self, exc_type, exc, tb) -> bool:
+            return False
+
+    @asynccontextmanager
+    async def _selective_stdio_client(params: object):
+        if params.command == "bad":
+            raise RuntimeError("boom")
+        yield params.command, object()
+
+    monkeypatch.setattr(sys.modules["mcp"], "ClientSession", _SelectiveClientSession)
+    monkeypatch.setattr(sys.modules["mcp.client.stdio"], "stdio_client", _selective_stdio_client)
+
+    registry = ToolRegistry()
+    stacks = await connect_mcp_servers(
+        {
+            "good": MCPServerConfig(command="good"),
+            "bad": MCPServerConfig(command="bad"),
+        },
+        registry,
+    )
+    for stack in stacks.values():
+        await stack.aclose()
+
+    assert registry.tool_names == ["mcp_good_demo"]
+    assert set(stacks) == {"good"}
+
+
 # ---------------------------------------------------------------------------
 # MCPResourceWrapper tests
 # ---------------------------------------------------------------------------
diff --git a/tests/tools/test_web_search_tool.py b/tests/tools/test_web_search_tool.py
index e33dd7e6c..790d8adcd 100644
--- a/tests/tools/test_web_search_tool.py
+++ b/tests/tools/test_web_search_tool.py
@@ -120,6 +120,27 @@ async def test_jina_search(monkeypatch):
     assert "https://jina.ai" in result
 
 
+@pytest.mark.asyncio
+async def test_kagi_search(monkeypatch):
+    async def mock_get(self, url, **kw):
+        assert "kagi.com/api/v0/search" in url
+        assert kw["headers"]["Authorization"] == "Bot kagi-key"
+        assert kw["params"] == {"q": "test", "limit": 2}
+        return _response(json={
+            "data": [
+                {"t": 0, "title": "Kagi Result", "url": "https://kagi.com", "snippet": "Premium search"},
+                {"t": 1, "list": ["ignored related search"]},
+            ]
+        })
+
+    monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
+    tool = _tool(provider="kagi", api_key="kagi-key")
+    result = await tool.execute(query="test", count=2)
+    assert "Kagi Result" in result
+    assert "https://kagi.com" in result
+    assert "ignored related search" not in result
+
+
 @pytest.mark.asyncio
 async def test_unknown_provider():
     tool = _tool(provider="unknown")
@@ -189,6 +210,23 @@ async def test_jina_422_falls_back_to_duckduckgo(monkeypatch):
     assert "DuckDuckGo fallback" in result
 
 
+@pytest.mark.asyncio
+async def test_kagi_fallback_to_duckduckgo_when_no_key(monkeypatch):
+    class MockDDGS:
+        def __init__(self, **kw):
+            pass
+
+        def text(self, query, max_results=5):
+            return [{"title": "Fallback", "href": "https://ddg.example", "body": "DuckDuckGo fallback"}]
+
+    monkeypatch.setattr("ddgs.DDGS", MockDDGS)
+    monkeypatch.delenv("KAGI_API_KEY", raising=False)
+
+    tool = _tool(provider="kagi", api_key="")
+    result = await tool.execute(query="test")
+    assert "Fallback" in result
+
+
 @pytest.mark.asyncio
 async def test_jina_search_uses_path_encoded_query(monkeypatch):
     calls = {}
diff --git a/tests/utils/test_strip_think.py b/tests/utils/test_strip_think.py
index 6710dfc93..5828c6d1f 100644
--- a/tests/utils/test_strip_think.py
+++ b/tests/utils/test_strip_think.py
@@ -34,3 +34,32 @@ class TestStripThinkTag:
 
     def test_empty_string(self):
         assert strip_think("") == ""
+
+
+class TestStripThinkFalsePositive:
+    """Ensure mid-content <think>/<thought> tags are NOT stripped (#3004)."""
+
+    def test_backtick_think_tag_preserved(self):
+        text = "*Think Stripping:* A new utility to strip `<think>` tags from output."
+        assert strip_think(text) == text
+
+    def test_prose_think_tag_preserved(self):
+        text = "The model emits <think> at the start of its response."
+        assert strip_think(text) == text
+
+    def test_code_block_think_tag_preserved(self):
+        text = "Example:\n```\ntext = re.sub(r\"<think>[\\s\\S]*\", \"\", text)\n```\nDone."
+        assert strip_think(text) == text
+
+    def test_backtick_thought_tag_preserved(self):
+        text = "Gemma 4 uses `<thought>` blocks for reasoning."
+        assert strip_think(text) == text
+
+    def test_prefix_unclosed_think_still_stripped(self):
+        assert strip_think("<think>reasoning without closing") == ""
+
+    def test_prefix_unclosed_think_with_whitespace(self):
+        assert strip_think("  <think>reasoning...") == ""
+
+    def test_prefix_unclosed_thought_still_stripped(self):
+        assert strip_think("<thought>reasoning without closing") == ""