diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index fb630ce13..60b542082 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -221,10 +221,26 @@ class MemoryStore: # -- history.jsonl — append-only, JSONL format --------------------------- def append_history(self, entry: str) -> int: - """Append *entry* to history.jsonl and return its auto-incrementing cursor.""" + """Append *entry* to history.jsonl and return its auto-incrementing cursor. + + Entries are passed through `strip_think` to drop template-level leaks + (e.g. unclosed `` markers) before being + persisted. If the cleaned content is empty but the raw entry wasn't, + the record is persisted with an empty string rather than falling back + to the raw leak — otherwise `strip_think`'s guarantees would be + undone by history replay / consolidation downstream. + """ cursor = self._next_cursor() ts = datetime.now().strftime("%Y-%m-%d %H:%M") - record = {"cursor": cursor, "timestamp": ts, "content": strip_think(entry.rstrip()) or entry.rstrip()} + raw = entry.rstrip() + content = strip_think(raw) + if raw and not content: + logger.debug( + "history entry {} stripped to empty (likely template leak); " + "persisting empty content to avoid re-polluting context", + cursor, + ) + record = {"cursor": cursor, "timestamp": ts, "content": content} with open(self.history_file, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") self._cursor_file.write_text(str(cursor), encoding="utf-8") diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 6c3849ef8..74c80c110 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -15,12 +15,48 @@ from loguru import logger def strip_think(text: str) -> str: - """Remove thinking blocks and any unclosed trailing tag.""" + """Remove thinking blocks, unclosed trailing tags, and tokenizer-level + template leaks occasionally emitted by some models (notably Gemma 4's + Ollama renderer). + + Covers: + 1. Well-formed `...` and `...` blocks. + 2. Streaming prefixes where the block is never closed. + 3. *Malformed* opening tags missing the `>` — e.g. `` / `<|channel|>` + **at the start of the text** — conservative to avoid eating + explanatory prose that mentions these tokens. + 5. Orphan closing tags `` / `` **at the very start + or end of the text** only, for the same reason. + + Since this is also applied before persisting to history (memory.py), + the edge-only stripping of (4) and (5) is deliberate: stripping those + tokens mid-text would silently rewrite any message where a user or the + assistant discusses the tokens themselves. + """ + # Well-formed blocks first. text = re.sub(r"[\s\S]*?", "", text) text = re.sub(r"^\s*[\s\S]*$", "", text) - # Gemma 4 and similar models use ... blocks text = re.sub(r"[\s\S]*?", "", text) text = re.sub(r"^\s*[\s\S]*$", "", text) + # Malformed opening tags: `` / `/` — we can't use `\w` here because in Python's default + # Unicode regex mode it matches CJK characters too, which would defeat + # the primary fix for `/])", "", text) + text = re.sub(r"/])", "", text) + # Edge-only orphan closing tags (start or end of text). + text = re.sub(r"^\s*\s*", "", text) + text = re.sub(r"\s*\s*$", "", text) + text = re.sub(r"^\s*\s*", "", text) + text = re.sub(r"\s*\s*$", "", text) + # Edge-only channel markers (harmony / Gemma 4 variant leaks). + text = re.sub(r"^\s*<\|?channel\|?>\s*", "", text) return text.strip() @@ -37,7 +73,9 @@ def detect_image_mime(data: bytes) -> str | None: return None -def build_image_content_blocks(raw: bytes, mime: str, path: str, label: str) -> list[dict[str, Any]]: +def build_image_content_blocks( + raw: bytes, mime: str, path: str, label: str +) -> list[dict[str, Any]]: """Build native image blocks plus a short text label.""" b64 = base64.b64encode(raw).decode() return [ @@ -83,6 +121,7 @@ _TOOL_RESULTS_DIR = ".nanobot/tool-results" _TOOL_RESULT_RETENTION_SECS = 7 * 24 * 60 * 60 _TOOL_RESULT_MAX_BUCKETS = 32 + def safe_filename(name: str) -> str: """Replace unsafe path characters with underscores.""" return _UNSAFE_CHARS.sub("_", name).strip() @@ -258,9 +297,9 @@ def split_message(content: str, max_len: int = 2000) -> list[str]: break cut = content[:max_len] # Try to break at newline first, then space, then hard break - pos = cut.rfind('\n') + pos = cut.rfind("\n") if pos <= 0: - pos = cut.rfind(' ') + pos = cut.rfind(" ") if pos <= 0: pos = max_len chunks.append(content[:pos]) @@ -404,7 +443,7 @@ def build_status_content( max_completion_tokens: int = 8192, ) -> str: """Build a human-readable runtime status snapshot. - + Args: search_usage_text: Optional pre-formatted web search usage string (produced by SearchUsageInfo.format()). When provided @@ -423,7 +462,11 @@ def build_status_content( # Budget mirrors Consolidator formula: ctx_window - max_completion - _SAFETY_BUFFER ctx_budget = max(ctx_total - int(max_completion_tokens) - 1024, 1) ctx_pct = min(int((context_tokens_estimate / ctx_budget) * 100), 999) if ctx_budget > 0 else 0 - ctx_used_str = f"{context_tokens_estimate // 1000}k" if context_tokens_estimate >= 1000 else str(context_tokens_estimate) + ctx_used_str = ( + f"{context_tokens_estimate // 1000}k" + if context_tokens_estimate >= 1000 + else str(context_tokens_estimate) + ) ctx_total_str = f"{ctx_total // 1000}k" if ctx_total > 0 else "n/a" token_line = f"\U0001f4ca Tokens: {last_in} in / {last_out} out" if cached and last_in: @@ -439,12 +482,13 @@ def build_status_content( ] if search_usage_text: lines.append(search_usage_text) - return "\n".join(lines) + return "\n".join(lines) def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]: """Sync bundled templates to workspace. Only creates missing files.""" from importlib.resources import files as pkg_files + try: tpl = pkg_files("nanobot") / "templates" except Exception: @@ -470,15 +514,22 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str] if added and not silent: from rich.console import Console + for name in added: Console().print(f" [dim]Created {name}[/dim]") # Initialize git for memory version control try: from nanobot.utils.gitstore import GitStore - gs = GitStore(workspace, tracked_files=[ - "SOUL.md", "USER.md", "memory/MEMORY.md", - ]) + + gs = GitStore( + workspace, + tracked_files=[ + "SOUL.md", + "USER.md", + "memory/MEMORY.md", + ], + ) gs.init() except Exception: logger.warning("Failed to initialize git store for {}", workspace) diff --git a/tests/agent/test_memory_store.py b/tests/agent/test_memory_store.py index 7bb23fc69..94adbf376 100644 --- a/tests/agent/test_memory_store.py +++ b/tests/agent/test_memory_store.py @@ -1,8 +1,7 @@ """Tests for the restructured MemoryStore — pure file I/O layer.""" -from datetime import datetime import json -from pathlib import Path +from datetime import datetime import pytest @@ -65,6 +64,34 @@ class TestHistoryWithCursor: cursor = store.append_history("event 3") assert cursor == 3 + def test_append_history_strips_thinking_content(self, store): + """`strip_think` must run before persistence — well-formed thinking + blocks shouldn't land in history.""" + cursor = store.append_history("reasoningfinal answer") + content = store.read_file(store.history_file) + data = json.loads(content) + assert data["cursor"] == cursor + assert data["content"] == "final answer" + + def test_append_history_drops_pure_leak_content(self, store): + """Regression: entries that strip down to empty (pure template-token + leak) must NOT fall back to the raw leak. Persisting the raw text + would re-pollute context via consolidation / replay, undoing the + protection `strip_think` provides.""" + cursor = store.append_history("nothing user-facing") + content = store.read_file(store.history_file) + data = json.loads(content) + assert data["cursor"] == cursor + assert data["content"] == "" + + def test_append_history_drops_malformed_leak_prefix(self, store): + """Channel-marker / malformed opening leaks should not survive.""" + cursor = store.append_history("") + content = store.read_file(store.history_file) + data = json.loads(content) + assert data["cursor"] == cursor + assert data["content"] == "" + def test_read_unprocessed_history(self, store): store.append_history("event 1") store.append_history("event 2") @@ -134,7 +161,8 @@ class TestLegacyHistoryMigration: """JSONL entries with cursor=1 are correctly parsed and returned.""" store.history_file.write_text( '{"cursor": 1, "timestamp": "2026-03-30 14:30", "content": "Old event"}\n', - encoding="utf-8") + encoding="utf-8", + ) entries = store.read_unprocessed_history(since_cursor=0) assert len(entries) == 1 assert entries[0]["cursor"] == 1 @@ -218,8 +246,7 @@ class TestLegacyHistoryMigration: memory_dir.mkdir() legacy_file = memory_dir / "HISTORY.md" legacy_content = ( - "[2026-03-25–2026-04-02] Multi-day summary.\n" - "[2026-03-26/27] Cross-day summary.\n" + "[2026-03-25–2026-04-02] Multi-day summary.\n[2026-03-26/27] Cross-day summary.\n" ) legacy_file.write_text(legacy_content, encoding="utf-8") @@ -277,9 +304,7 @@ class TestLegacyHistoryMigration: memory_dir = tmp_path / "memory" memory_dir.mkdir() legacy_file = memory_dir / "HISTORY.md" - legacy_file.write_bytes( - b"[2026-04-01 10:00] Broken \xff data still needs migration.\n\n" - ) + legacy_file.write_bytes(b"[2026-04-01 10:00] Broken \xff data still needs migration.\n\n") store = MemoryStore(tmp_path) diff --git a/tests/utils/test_strip_think.py b/tests/utils/test_strip_think.py index 5828c6d1f..1eda89eb5 100644 --- a/tests/utils/test_strip_think.py +++ b/tests/utils/test_strip_think.py @@ -1,5 +1,3 @@ -import pytest - from nanobot.utils.helpers import strip_think @@ -48,7 +46,7 @@ class TestStripThinkFalsePositive: assert strip_think(text) == text def test_code_block_think_tag_preserved(self): - text = "Example:\n```\ntext = re.sub(r\"[\\s\\S]*\", \"\", text)\n```\nDone." + text = 'Example:\n```\ntext = re.sub(r"[\\s\\S]*", "", text)\n```\nDone.' assert strip_think(text) == text def test_backtick_thought_tag_preserved(self): @@ -63,3 +61,76 @@ class TestStripThinkFalsePositive: def test_prefix_unclosed_thought_still_stripped(self): assert strip_think("reasoning without closing") == "" + + +class TestStripThinkMalformedLeaks: + """Regression: Gemma 4's Ollama renderer occasionally emits a tag name + with no closing '>', running straight into the user-facing content + (e.g. `' and + let these through.""" + + def test_malformed_think_no_gt_chinese(self): + assert strip_think("` is a valid tag name variant; must not match. + assert strip_think("content") == "content" + + def test_self_closing_preserved(self): + assert strip_think("ok") == "ok" + assert strip_think("ok") == "ok" + + def test_orphan_closing_think_at_end_stripped(self): + # Typical leak: model opens `` without closing; we strip the + # opener from the start, leaving an orphan `` at the end. + assert strip_think("answer") == "answer" + + def test_orphan_closing_think_at_start_stripped(self): + assert strip_think("answer") == "answer" + + def test_channel_marker_at_start_stripped(self): + # Harmony / Gemma 4 channel markers leak at the start of a response. + assert strip_think("喷泉策略:09:00 开启") == ("喷泉策略:09:00 开启") + assert strip_think("<|channel|>answer") == "answer" + + +class TestStripThinkConservativePreserve: + """Regression: the malformed-tag / orphan cleanup must NOT touch + legitimate prose or code that mentions these tokens literally, otherwise + `strip_think` (which runs before history is persisted, memory.py) will + silently rewrite the conversation transcript.""" + + def test_think_dash_variant_preserved(self): + assert strip_think("bar") == "bar" + + def test_think_underscore_variant_preserved(self): + assert strip_think("bar") == "bar" + + def test_think_numeric_variant_preserved(self): + assert strip_think("bar") == "bar" + + def test_think_namespaced_variant_preserved(self): + assert strip_think("bar") == "bar" + + def test_literal_close_think_in_prose_preserved(self): + # Mid-prose references to `` in backticks or plain text must + # not be stripped; edge-only regex protects this. + text = "Use `` to close a thinking block." + assert strip_think(text) == text + + def test_literal_channel_marker_in_prose_preserved(self): + text = "The Harmony spec uses `<|channel|>` and `` markers." + assert strip_think(text) == text + + def test_literal_channel_marker_in_code_block_preserved(self): + text = "Example:\n```\nif line.startswith(''):\n skip()\n```" + assert strip_think(text) == text