mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-30 13:31:12 +00:00
fix: strip partial think tags in streaming output
This commit is contained in:
parent
aea5948b11
commit
2c397ad442
@ -32,6 +32,8 @@ def strip_think(text: str) -> str:
|
||||
explanatory prose that mentions these tokens.
|
||||
5. Orphan closing tags `</think>` / `</thought>` **at the very start
|
||||
or end of the text** only, for the same reason.
|
||||
6. Trailing partial control tags split across stream chunks, such as
|
||||
`<thi`, `<thin`, or `<tho`.
|
||||
|
||||
Since this is also applied before persisting to history (memory.py),
|
||||
the edge-only stripping of (4) and (5) is deliberate: stripping those
|
||||
@ -58,6 +60,14 @@ def strip_think(text: str) -> str:
|
||||
text = re.sub(r"\s*</thought>\s*$", "", text)
|
||||
# Edge-only channel markers (harmony / Gemma 4 variant leaks).
|
||||
text = re.sub(r"^\s*<\|?channel\|?>\s*", "", text)
|
||||
# Stream chunks may end in the middle of a control tag. Strip only known
|
||||
# control-token prefixes at the very end.
|
||||
partial_control_tag = (
|
||||
r"</?(?:t|th|thi|thin|think|tho|thou|thoug|though|thought)"
|
||||
r"|<\|?(?:c|ch|cha|chan|chann|channe|channel|channel\|?)"
|
||||
)
|
||||
text = re.sub(rf"(?:{partial_control_tag})$", "", text)
|
||||
text = re.sub(r"^\s*<\|?$", "", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
|
||||
@ -972,6 +972,27 @@ async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp
|
||||
assert endings == [False]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path):
|
||||
loop = _make_loop(tmp_path)
|
||||
deltas: list[str] = []
|
||||
|
||||
async def chat_stream_with_retry(*, on_content_delta, **kwargs):
|
||||
await on_content_delta("Hello <thin")
|
||||
await on_content_delta("k>hidden</think>World")
|
||||
return LLMResponse(content="Hello <think>hidden</think>World", tool_calls=[], usage={})
|
||||
|
||||
loop.provider.chat_stream_with_retry = chat_stream_with_retry
|
||||
|
||||
async def on_stream(delta: str) -> None:
|
||||
deltas.append(delta)
|
||||
|
||||
final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
|
||||
|
||||
assert final_content == "Hello World"
|
||||
assert deltas == ["Hello", " World"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_loop_retries_think_only_final_response(tmp_path):
|
||||
loop = _make_loop(tmp_path)
|
||||
|
||||
@ -102,6 +102,14 @@ class TestStripThinkMalformedLeaks:
|
||||
assert strip_think("<channel|>喷泉策略:09:00 开启") == ("喷泉策略:09:00 开启")
|
||||
assert strip_think("<|channel|>answer") == "answer"
|
||||
|
||||
def test_partial_trailing_think_tag_after_visible_text(self):
|
||||
assert strip_think("喷泉策略说明 <thin") == "喷泉策略说明"
|
||||
assert strip_think("answer <thought") == "answer"
|
||||
|
||||
def test_partial_trailing_channel_marker_after_visible_text(self):
|
||||
assert strip_think("喷泉策略说明 <|chan") == "喷泉策略说明"
|
||||
assert strip_think("answer <channel") == "answer"
|
||||
|
||||
|
||||
class TestStripThinkConservativePreserve:
|
||||
"""Regression: the malformed-tag / orphan cleanup must NOT touch
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user