refactor: move SafeFileHistory to module level + add regression tests

- Promote _SafeFileHistory to module-level SafeFileHistory for testability
- Add 5 regression tests: surrogates, normal text, emoji, mixed CJK, multi-surrogates

Made-with: Cursor
This commit is contained in:
Xubin Ren 2026-04-07 05:56:52 +00:00 committed by Xubin Ren
parent 64bd7234b3
commit 075bdd5c3c
2 changed files with 58 additions and 10 deletions

View File

@ -33,6 +33,19 @@ from rich.table import Table
from rich.text import Text
from nanobot import __logo__, __version__
class SafeFileHistory(FileHistory):
"""FileHistory subclass that sanitizes surrogate characters on write.
On Windows, special Unicode input (emoji, mixed-script) can produce
surrogate characters that crash prompt_toolkit's file write.
See issue #2846.
"""
def store_string(self, string: str) -> None:
safe = string.encode("utf-8", errors="surrogateescape").decode("utf-8", errors="replace")
super().store_string(safe)
from nanobot.cli.stream import StreamRenderer, ThinkingSpinner
from nanobot.config.paths import get_workspace_path, is_default_workspace
from nanobot.config.schema import Config
@ -118,17 +131,8 @@ def _init_prompt_session() -> None:
history_file = get_cli_history_path()
history_file.parent.mkdir(parents=True, exist_ok=True)
# Wrap FileHistory to sanitize surrogate characters on write.
# Without this, special Unicode input (emoji, mixed-script) crashes
# prompt_toolkit's history file write on Windows with UnicodeEncodeError.
# See issue #2846.
class _SafeFileHistory(FileHistory):
def store_string(self, string: str) -> None:
safe = string.encode("utf-8", errors="surrogateescape").decode("utf-8", errors="replace")
super().store_string(safe)
_PROMPT_SESSION = PromptSession(
history=_SafeFileHistory(str(history_file)),
history=SafeFileHistory(str(history_file)),
enable_open_in_editor=False,
multiline=False, # Enter submits (single line mode)
)

View File

@ -0,0 +1,44 @@
"""Regression tests for SafeFileHistory (issue #2846).
Surrogate characters in CLI input must not crash history file writes.
"""
from nanobot.cli.commands import SafeFileHistory
class TestSafeFileHistory:
def test_surrogate_replaced(self, tmp_path):
"""Surrogate pairs are replaced with U+FFFD, not crash."""
hist = SafeFileHistory(str(tmp_path / "history"))
hist.store_string("hello \udce9 world")
entries = list(hist.load_history_strings())
assert len(entries) == 1
assert "\udce9" not in entries[0]
assert "hello" in entries[0]
assert "world" in entries[0]
def test_normal_text_unchanged(self, tmp_path):
hist = SafeFileHistory(str(tmp_path / "history"))
hist.store_string("normal ascii text")
entries = list(hist.load_history_strings())
assert entries[0] == "normal ascii text"
def test_emoji_preserved(self, tmp_path):
hist = SafeFileHistory(str(tmp_path / "history"))
hist.store_string("hello 🐈 nanobot")
entries = list(hist.load_history_strings())
assert entries[0] == "hello 🐈 nanobot"
def test_mixed_unicode_preserved(self, tmp_path):
"""CJK + emoji + latin should all pass through cleanly."""
hist = SafeFileHistory(str(tmp_path / "history"))
hist.store_string("你好 hello こんにちは 🎉")
entries = list(hist.load_history_strings())
assert entries[0] == "你好 hello こんにちは 🎉"
def test_multiple_surrogates(self, tmp_path):
hist = SafeFileHistory(str(tmp_path / "history"))
hist.store_string("\udce9\udcf1\udcff")
entries = list(hist.load_history_strings())
assert len(entries) == 1
assert "\udce9" not in entries[0]