From 075bdd5c3cd523012c7a4b63cb080a9ca9d938ba Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Tue, 7 Apr 2026 05:56:52 +0000 Subject: [PATCH] refactor: move SafeFileHistory to module level + add regression tests - Promote _SafeFileHistory to module-level SafeFileHistory for testability - Add 5 regression tests: surrogates, normal text, emoji, mixed CJK, multi-surrogates Made-with: Cursor --- nanobot/cli/commands.py | 24 +++++++++------- tests/cli/test_safe_file_history.py | 44 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 10 deletions(-) create mode 100644 tests/cli/test_safe_file_history.py diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 2e18045f4..a1fb7c0e0 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -33,6 +33,19 @@ from rich.table import Table from rich.text import Text from nanobot import __logo__, __version__ + + +class SafeFileHistory(FileHistory): + """FileHistory subclass that sanitizes surrogate characters on write. + + On Windows, special Unicode input (emoji, mixed-script) can produce + surrogate characters that crash prompt_toolkit's file write. + See issue #2846. + """ + + def store_string(self, string: str) -> None: + safe = string.encode("utf-8", errors="surrogateescape").decode("utf-8", errors="replace") + super().store_string(safe) from nanobot.cli.stream import StreamRenderer, ThinkingSpinner from nanobot.config.paths import get_workspace_path, is_default_workspace from nanobot.config.schema import Config @@ -118,17 +131,8 @@ def _init_prompt_session() -> None: history_file = get_cli_history_path() history_file.parent.mkdir(parents=True, exist_ok=True) - # Wrap FileHistory to sanitize surrogate characters on write. - # Without this, special Unicode input (emoji, mixed-script) crashes - # prompt_toolkit's history file write on Windows with UnicodeEncodeError. - # See issue #2846. - class _SafeFileHistory(FileHistory): - def store_string(self, string: str) -> None: - safe = string.encode("utf-8", errors="surrogateescape").decode("utf-8", errors="replace") - super().store_string(safe) - _PROMPT_SESSION = PromptSession( - history=_SafeFileHistory(str(history_file)), + history=SafeFileHistory(str(history_file)), enable_open_in_editor=False, multiline=False, # Enter submits (single line mode) ) diff --git a/tests/cli/test_safe_file_history.py b/tests/cli/test_safe_file_history.py new file mode 100644 index 000000000..78b5e2339 --- /dev/null +++ b/tests/cli/test_safe_file_history.py @@ -0,0 +1,44 @@ +"""Regression tests for SafeFileHistory (issue #2846). + +Surrogate characters in CLI input must not crash history file writes. +""" + +from nanobot.cli.commands import SafeFileHistory + + +class TestSafeFileHistory: + def test_surrogate_replaced(self, tmp_path): + """Surrogate pairs are replaced with U+FFFD, not crash.""" + hist = SafeFileHistory(str(tmp_path / "history")) + hist.store_string("hello \udce9 world") + entries = list(hist.load_history_strings()) + assert len(entries) == 1 + assert "\udce9" not in entries[0] + assert "hello" in entries[0] + assert "world" in entries[0] + + def test_normal_text_unchanged(self, tmp_path): + hist = SafeFileHistory(str(tmp_path / "history")) + hist.store_string("normal ascii text") + entries = list(hist.load_history_strings()) + assert entries[0] == "normal ascii text" + + def test_emoji_preserved(self, tmp_path): + hist = SafeFileHistory(str(tmp_path / "history")) + hist.store_string("hello 🐈 nanobot") + entries = list(hist.load_history_strings()) + assert entries[0] == "hello 🐈 nanobot" + + def test_mixed_unicode_preserved(self, tmp_path): + """CJK + emoji + latin should all pass through cleanly.""" + hist = SafeFileHistory(str(tmp_path / "history")) + hist.store_string("你好 hello こんにちは 🎉") + entries = list(hist.load_history_strings()) + assert entries[0] == "你好 hello こんにちは 🎉" + + def test_multiple_surrogates(self, tmp_path): + hist = SafeFileHistory(str(tmp_path / "history")) + hist.store_string("\udce9\udcf1\udcff") + entries = list(hist.load_history_strings()) + assert len(entries) == 1 + assert "\udce9" not in entries[0]