mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-04-19 17:39:55 +00:00
refactor: move SafeFileHistory to module level + add regression tests
- Promote _SafeFileHistory to module-level SafeFileHistory for testability - Add 5 regression tests: surrogates, normal text, emoji, mixed CJK, multi-surrogates Made-with: Cursor
This commit is contained in:
parent
64bd7234b3
commit
075bdd5c3c
@ -33,6 +33,19 @@ from rich.table import Table
|
|||||||
from rich.text import Text
|
from rich.text import Text
|
||||||
|
|
||||||
from nanobot import __logo__, __version__
|
from nanobot import __logo__, __version__
|
||||||
|
|
||||||
|
|
||||||
|
class SafeFileHistory(FileHistory):
|
||||||
|
"""FileHistory subclass that sanitizes surrogate characters on write.
|
||||||
|
|
||||||
|
On Windows, special Unicode input (emoji, mixed-script) can produce
|
||||||
|
surrogate characters that crash prompt_toolkit's file write.
|
||||||
|
See issue #2846.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def store_string(self, string: str) -> None:
|
||||||
|
safe = string.encode("utf-8", errors="surrogateescape").decode("utf-8", errors="replace")
|
||||||
|
super().store_string(safe)
|
||||||
from nanobot.cli.stream import StreamRenderer, ThinkingSpinner
|
from nanobot.cli.stream import StreamRenderer, ThinkingSpinner
|
||||||
from nanobot.config.paths import get_workspace_path, is_default_workspace
|
from nanobot.config.paths import get_workspace_path, is_default_workspace
|
||||||
from nanobot.config.schema import Config
|
from nanobot.config.schema import Config
|
||||||
@ -118,17 +131,8 @@ def _init_prompt_session() -> None:
|
|||||||
history_file = get_cli_history_path()
|
history_file = get_cli_history_path()
|
||||||
history_file.parent.mkdir(parents=True, exist_ok=True)
|
history_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Wrap FileHistory to sanitize surrogate characters on write.
|
|
||||||
# Without this, special Unicode input (emoji, mixed-script) crashes
|
|
||||||
# prompt_toolkit's history file write on Windows with UnicodeEncodeError.
|
|
||||||
# See issue #2846.
|
|
||||||
class _SafeFileHistory(FileHistory):
|
|
||||||
def store_string(self, string: str) -> None:
|
|
||||||
safe = string.encode("utf-8", errors="surrogateescape").decode("utf-8", errors="replace")
|
|
||||||
super().store_string(safe)
|
|
||||||
|
|
||||||
_PROMPT_SESSION = PromptSession(
|
_PROMPT_SESSION = PromptSession(
|
||||||
history=_SafeFileHistory(str(history_file)),
|
history=SafeFileHistory(str(history_file)),
|
||||||
enable_open_in_editor=False,
|
enable_open_in_editor=False,
|
||||||
multiline=False, # Enter submits (single line mode)
|
multiline=False, # Enter submits (single line mode)
|
||||||
)
|
)
|
||||||
|
|||||||
44
tests/cli/test_safe_file_history.py
Normal file
44
tests/cli/test_safe_file_history.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
"""Regression tests for SafeFileHistory (issue #2846).
|
||||||
|
|
||||||
|
Surrogate characters in CLI input must not crash history file writes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from nanobot.cli.commands import SafeFileHistory
|
||||||
|
|
||||||
|
|
||||||
|
class TestSafeFileHistory:
|
||||||
|
def test_surrogate_replaced(self, tmp_path):
|
||||||
|
"""Surrogate pairs are replaced with U+FFFD, not crash."""
|
||||||
|
hist = SafeFileHistory(str(tmp_path / "history"))
|
||||||
|
hist.store_string("hello \udce9 world")
|
||||||
|
entries = list(hist.load_history_strings())
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert "\udce9" not in entries[0]
|
||||||
|
assert "hello" in entries[0]
|
||||||
|
assert "world" in entries[0]
|
||||||
|
|
||||||
|
def test_normal_text_unchanged(self, tmp_path):
|
||||||
|
hist = SafeFileHistory(str(tmp_path / "history"))
|
||||||
|
hist.store_string("normal ascii text")
|
||||||
|
entries = list(hist.load_history_strings())
|
||||||
|
assert entries[0] == "normal ascii text"
|
||||||
|
|
||||||
|
def test_emoji_preserved(self, tmp_path):
|
||||||
|
hist = SafeFileHistory(str(tmp_path / "history"))
|
||||||
|
hist.store_string("hello 🐈 nanobot")
|
||||||
|
entries = list(hist.load_history_strings())
|
||||||
|
assert entries[0] == "hello 🐈 nanobot"
|
||||||
|
|
||||||
|
def test_mixed_unicode_preserved(self, tmp_path):
|
||||||
|
"""CJK + emoji + latin should all pass through cleanly."""
|
||||||
|
hist = SafeFileHistory(str(tmp_path / "history"))
|
||||||
|
hist.store_string("你好 hello こんにちは 🎉")
|
||||||
|
entries = list(hist.load_history_strings())
|
||||||
|
assert entries[0] == "你好 hello こんにちは 🎉"
|
||||||
|
|
||||||
|
def test_multiple_surrogates(self, tmp_path):
|
||||||
|
hist = SafeFileHistory(str(tmp_path / "history"))
|
||||||
|
hist.store_string("\udce9\udcf1\udcff")
|
||||||
|
entries = list(hist.load_history_strings())
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert "\udce9" not in entries[0]
|
||||||
Loading…
x
Reference in New Issue
Block a user