Merge remote-tracking branch 'origin/main' into nightly

2026-05-03 08:15:53 +00:00 · 2026-04-20 23:38:08 +08:00 · 2026-04-20 23:38:08 +08:00 · f2848b9b94
commit f2848b9b94
parent e49b56525b 899a9073ce
4 changed files with 187 additions and 24 deletions
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@ -221,10 +221,26 @@ class MemoryStore:
    # -- history.jsonl — append-only, JSONL format ---------------------------
    def append_history(self, entry: str) -> int:
-        """Append *entry* to history.jsonl and return its auto-incrementing cursor."""
+        """Append *entry* to history.jsonl and return its auto-incrementing cursor.
        Entries are passed through `strip_think` to drop template-level leaks
        (e.g. unclosed `<think` prefixes, `<channel|>` markers) before being
        persisted. If the cleaned content is empty but the raw entry wasn't,
        the record is persisted with an empty string rather than falling back
        to the raw leak — otherwise `strip_think`'s guarantees would be
        undone by history replay / consolidation downstream.
        """
        cursor = self._next_cursor()
        ts = datetime.now().strftime("%Y-%m-%d %H:%M")
-        record = {"cursor": cursor, "timestamp": ts, "content": strip_think(entry.rstrip()) or entry.rstrip()}
+        raw = entry.rstrip()
        content = strip_think(raw)
        if raw and not content:
            logger.debug(
                "history entry {} stripped to empty (likely template leak); "
                "persisting empty content to avoid re-polluting context",
                cursor,
            )
        record = {"cursor": cursor, "timestamp": ts, "content": content}
        with open(self.history_file, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
        self._cursor_file.write_text(str(cursor), encoding="utf-8")
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@ -15,12 +15,48 @@ from loguru import logger
 def strip_think(text: str) -> str:
-    """Remove thinking blocks and any unclosed trailing tag."""
+    """Remove thinking blocks, unclosed trailing tags, and tokenizer-level
    template leaks occasionally emitted by some models (notably Gemma 4's
    Ollama renderer).
    Covers:
      1. Well-formed `<think>...</think>` and `<thought>...</thought>` blocks.
      2. Streaming prefixes where the block is never closed.
      3. *Malformed* opening tags missing the `>` — e.g. `<think广场…`. The
         model sometimes emits the tag name directly followed by user-facing
         content with no delimiter; without this step the literal `<think`
         leaks into the rendered message.
      4. Harmony-style channel markers like `<channel|>` / `<|channel|>`
         **at the start of the text** — conservative to avoid eating
         explanatory prose that mentions these tokens.
      5. Orphan closing tags `</think>` / `</thought>` **at the very start
         or end of the text** only, for the same reason.
    Since this is also applied before persisting to history (memory.py),
    the edge-only stripping of (4) and (5) is deliberate: stripping those
    tokens mid-text would silently rewrite any message where a user or the
    assistant discusses the tokens themselves.
    """
    # Well-formed blocks first.
    text = re.sub(r"<think>[\s\S]*?</think>", "", text)
    text = re.sub(r"^\s*<think>[\s\S]*$", "", text)
    # Gemma 4 and similar models use <thought>...</thought> blocks
    text = re.sub(r"<thought>[\s\S]*?</thought>", "", text)
    text = re.sub(r"^\s*<thought>[\s\S]*$", "", text)
    # Malformed opening tags: `<think` / `<thought` where the next char is
    # NOT one that could continue a valid tag / identifier name. Explicitly
    # listing ASCII tag-name chars (letters, digits, `_`, `-`, `:`) plus
    # `>` / `/` — we can't use `\w` here because in Python's default
    # Unicode regex mode it matches CJK characters too, which would defeat
    # the primary fix for `<think广场…` leaks.
    text = re.sub(r"<think(?![A-Za-z0-9_\-:>/])", "", text)
    text = re.sub(r"<thought(?![A-Za-z0-9_\-:>/])", "", text)
    # Edge-only orphan closing tags (start or end of text).
    text = re.sub(r"^\s*</think>\s*", "", text)
    text = re.sub(r"\s*</think>\s*$", "", text)
    text = re.sub(r"^\s*</thought>\s*", "", text)
    text = re.sub(r"\s*</thought>\s*$", "", text)
    # Edge-only channel markers (harmony / Gemma 4 variant leaks).
    text = re.sub(r"^\s*<\|?channel\|?>\s*", "", text)
    return text.strip()
@ -37,7 +73,9 @@ def detect_image_mime(data: bytes) -> str | None:
    return None
-def build_image_content_blocks(raw: bytes, mime: str, path: str, label: str) -> list[dict[str, Any]]:
+def build_image_content_blocks(
    raw: bytes, mime: str, path: str, label: str
 ) -> list[dict[str, Any]]:
    """Build native image blocks plus a short text label."""
    b64 = base64.b64encode(raw).decode()
    return [
@ -83,6 +121,7 @@ _TOOL_RESULTS_DIR = ".nanobot/tool-results"
 _TOOL_RESULT_RETENTION_SECS = 7 * 24 * 60 * 60
 _TOOL_RESULT_MAX_BUCKETS = 32
 def safe_filename(name: str) -> str:
    """Replace unsafe path characters with underscores."""
    return _UNSAFE_CHARS.sub("_", name).strip()
@ -258,9 +297,9 @@ def split_message(content: str, max_len: int = 2000) -> list[str]:
            break
        cut = content[:max_len]
        # Try to break at newline first, then space, then hard break
-        pos = cut.rfind('\n')
+        pos = cut.rfind("\n")
        if pos <= 0:
-            pos = cut.rfind(' ')
+            pos = cut.rfind(" ")
        if pos <= 0:
            pos = max_len
        chunks.append(content[:pos])
@ -423,7 +462,11 @@ def build_status_content(
    # Budget mirrors Consolidator formula: ctx_window - max_completion - _SAFETY_BUFFER
    ctx_budget = max(ctx_total - int(max_completion_tokens) - 1024, 1)
    ctx_pct = min(int((context_tokens_estimate / ctx_budget) * 100), 999) if ctx_budget > 0 else 0
-    ctx_used_str = f"{context_tokens_estimate // 1000}k" if context_tokens_estimate >= 1000 else str(context_tokens_estimate)
+    ctx_used_str = (
        f"{context_tokens_estimate // 1000}k"
        if context_tokens_estimate >= 1000
        else str(context_tokens_estimate)
    )
    ctx_total_str = f"{ctx_total // 1000}k" if ctx_total > 0 else "n/a"
    token_line = f"\U0001f4ca Tokens: {last_in} in / {last_out} out"
    if cached and last_in:
@ -445,6 +488,7 @@ def build_status_content(
 def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]:
    """Sync bundled templates to workspace. Only creates missing files."""
    from importlib.resources import files as pkg_files
    try:
        tpl = pkg_files("nanobot") / "templates"
    except Exception:
@ -470,15 +514,22 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]
    if added and not silent:
        from rich.console import Console
        for name in added:
            Console().print(f"  [dim]Created {name}[/dim]")
    # Initialize git for memory version control
    try:
        from nanobot.utils.gitstore import GitStore
-        gs = GitStore(workspace, tracked_files=[
+
-            "SOUL.md", "USER.md", "memory/MEMORY.md",
+        gs = GitStore(
-        ])
+            workspace,
            tracked_files=[
                "SOUL.md",
                "USER.md",
                "memory/MEMORY.md",
            ],
        )
        gs.init()
    except Exception:
        logger.warning("Failed to initialize git store for {}", workspace)
--- a/tests/agent/test_memory_store.py
+++ b/tests/agent/test_memory_store.py
@ -1,8 +1,7 @@
 """Tests for the restructured MemoryStore — pure file I/O layer."""
 from datetime import datetime
 import json
-from pathlib import Path
+from datetime import datetime
 import pytest
@ -65,6 +64,34 @@ class TestHistoryWithCursor:
        cursor = store.append_history("event 3")
        assert cursor == 3
    def test_append_history_strips_thinking_content(self, store):
        """`strip_think` must run before persistence — well-formed thinking
        blocks shouldn't land in history."""
        cursor = store.append_history("<think>reasoning</think>final answer")
        content = store.read_file(store.history_file)
        data = json.loads(content)
        assert data["cursor"] == cursor
        assert data["content"] == "final answer"
    def test_append_history_drops_pure_leak_content(self, store):
        """Regression: entries that strip down to empty (pure template-token
        leak) must NOT fall back to the raw leak. Persisting the raw text
        would re-pollute context via consolidation / replay, undoing the
        protection `strip_think` provides."""
        cursor = store.append_history("<think>nothing user-facing</think>")
        content = store.read_file(store.history_file)
        data = json.loads(content)
        assert data["cursor"] == cursor
        assert data["content"] == ""
    def test_append_history_drops_malformed_leak_prefix(self, store):
        """Channel-marker / malformed opening leaks should not survive."""
        cursor = store.append_history("<channel|>")
        content = store.read_file(store.history_file)
        data = json.loads(content)
        assert data["cursor"] == cursor
        assert data["content"] == ""
    def test_read_unprocessed_history(self, store):
        store.append_history("event 1")
        store.append_history("event 2")
@ -134,7 +161,8 @@ class TestLegacyHistoryMigration:
        """JSONL entries with cursor=1 are correctly parsed and returned."""
        store.history_file.write_text(
            '{"cursor": 1, "timestamp": "2026-03-30 14:30", "content": "Old event"}\n',
-            encoding="utf-8")
+            encoding="utf-8",
        )
        entries = store.read_unprocessed_history(since_cursor=0)
        assert len(entries) == 1
        assert entries[0]["cursor"] == 1
@ -218,8 +246,7 @@ class TestLegacyHistoryMigration:
        memory_dir.mkdir()
        legacy_file = memory_dir / "HISTORY.md"
        legacy_content = (
-            "[2026-03-25–2026-04-02] Multi-day summary.\n"
+            "[2026-03-25–2026-04-02] Multi-day summary.\n[2026-03-26/27] Cross-day summary.\n"
            "[2026-03-26/27] Cross-day summary.\n"
        )
        legacy_file.write_text(legacy_content, encoding="utf-8")
@ -277,9 +304,7 @@ class TestLegacyHistoryMigration:
        memory_dir = tmp_path / "memory"
        memory_dir.mkdir()
        legacy_file = memory_dir / "HISTORY.md"
-        legacy_file.write_bytes(
+        legacy_file.write_bytes(b"[2026-04-01 10:00] Broken \xff data still needs migration.\n\n")
            b"[2026-04-01 10:00] Broken \xff data still needs migration.\n\n"
        )
        store = MemoryStore(tmp_path)
--- a/tests/utils/test_strip_think.py
+++ b/tests/utils/test_strip_think.py
@ -1,5 +1,3 @@
 import pytest
 from nanobot.utils.helpers import strip_think
@ -48,7 +46,7 @@ class TestStripThinkFalsePositive:
        assert strip_think(text) == text
    def test_code_block_think_tag_preserved(self):
-        text = "Example:\n```\ntext = re.sub(r\"<think>[\\s\\S]*\", \"\", text)\n```\nDone."
+        text = 'Example:\n```\ntext = re.sub(r"<think>[\\s\\S]*", "", text)\n```\nDone.'
        assert strip_think(text) == text
    def test_backtick_thought_tag_preserved(self):
@ -63,3 +61,76 @@ class TestStripThinkFalsePositive:
    def test_prefix_unclosed_thought_still_stripped(self):
        assert strip_think("<thought>reasoning without closing") == ""
 class TestStripThinkMalformedLeaks:
    """Regression: Gemma 4's Ollama renderer occasionally emits a tag name
    with no closing '>', running straight into the user-facing content
    (e.g. `<think广场照明灯目前…`). The earlier regexes required '>' and
    let these through."""
    def test_malformed_think_no_gt_chinese(self):
        assert strip_think("<think广场照明灯目前绑定在'照明灯'策略下") == (
            "广场照明灯目前绑定在'照明灯'策略下"
        )
    def test_malformed_think_no_gt_english_with_space(self):
        # English leak with a space after the tag name (common streaming form).
        assert strip_think("<think The fountain opens at 09:00") == ("The fountain opens at 09:00")
    def test_malformed_thought_no_gt(self):
        assert strip_think("<thought广场照明灯") == "广场照明灯"
    def test_thinker_word_preserved(self):
        # `<thinker>` is a valid tag name variant; must not match.
        assert strip_think("<thinker>content</thinker>") == "<thinker>content</thinker>"
    def test_self_closing_preserved(self):
        assert strip_think("<think/>ok") == "<think/>ok"
        assert strip_think("<thought/>ok") == "<thought/>ok"
    def test_orphan_closing_think_at_end_stripped(self):
        # Typical leak: model opens `<think>` without closing; we strip the
        # opener from the start, leaving an orphan `</think>` at the end.
        assert strip_think("answer</think>") == "answer"
    def test_orphan_closing_think_at_start_stripped(self):
        assert strip_think("</think>answer") == "answer"
    def test_channel_marker_at_start_stripped(self):
        # Harmony / Gemma 4 channel markers leak at the start of a response.
        assert strip_think("<channel|>喷泉策略：09:00 开启") == ("喷泉策略：09:00 开启")
        assert strip_think("<|channel|>answer") == "answer"
 class TestStripThinkConservativePreserve:
    """Regression: the malformed-tag / orphan cleanup must NOT touch
    legitimate prose or code that mentions these tokens literally, otherwise
    `strip_think` (which runs before history is persisted, memory.py) will
    silently rewrite the conversation transcript."""
    def test_think_dash_variant_preserved(self):
        assert strip_think("<think-foo>bar</think-foo>") == "<think-foo>bar</think-foo>"
    def test_think_underscore_variant_preserved(self):
        assert strip_think("<think_foo>bar</think_foo>") == "<think_foo>bar</think_foo>"
    def test_think_numeric_variant_preserved(self):
        assert strip_think("<think1>bar</think1>") == "<think1>bar</think1>"
    def test_think_namespaced_variant_preserved(self):
        assert strip_think("<think:foo>bar</think:foo>") == "<think:foo>bar</think:foo>"
    def test_literal_close_think_in_prose_preserved(self):
        # Mid-prose references to `</think>` in backticks or plain text must
        # not be stripped; edge-only regex protects this.
        text = "Use `</think>` to close a thinking block."
        assert strip_think(text) == text
    def test_literal_channel_marker_in_prose_preserved(self):
        text = "The Harmony spec uses `<|channel|>` and `<channel|>` markers."
        assert strip_think(text) == text
    def test_literal_channel_marker_in_code_block_preserved(self):
        text = "Example:\n```\nif line.startswith('<channel|>'):\n    skip()\n```"
        assert strip_think(text) == text