mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-30 13:31:12 +00:00
fix: strip partial think tags in streaming output
This commit is contained in:
parent
aea5948b11
commit
2c397ad442
@ -32,6 +32,8 @@ def strip_think(text: str) -> str:
|
|||||||
explanatory prose that mentions these tokens.
|
explanatory prose that mentions these tokens.
|
||||||
5. Orphan closing tags `</think>` / `</thought>` **at the very start
|
5. Orphan closing tags `</think>` / `</thought>` **at the very start
|
||||||
or end of the text** only, for the same reason.
|
or end of the text** only, for the same reason.
|
||||||
|
6. Trailing partial control tags split across stream chunks, such as
|
||||||
|
`<thi`, `<thin`, or `<tho`.
|
||||||
|
|
||||||
Since this is also applied before persisting to history (memory.py),
|
Since this is also applied before persisting to history (memory.py),
|
||||||
the edge-only stripping of (4) and (5) is deliberate: stripping those
|
the edge-only stripping of (4) and (5) is deliberate: stripping those
|
||||||
@ -58,6 +60,14 @@ def strip_think(text: str) -> str:
|
|||||||
text = re.sub(r"\s*</thought>\s*$", "", text)
|
text = re.sub(r"\s*</thought>\s*$", "", text)
|
||||||
# Edge-only channel markers (harmony / Gemma 4 variant leaks).
|
# Edge-only channel markers (harmony / Gemma 4 variant leaks).
|
||||||
text = re.sub(r"^\s*<\|?channel\|?>\s*", "", text)
|
text = re.sub(r"^\s*<\|?channel\|?>\s*", "", text)
|
||||||
|
# Stream chunks may end in the middle of a control tag. Strip only known
|
||||||
|
# control-token prefixes at the very end.
|
||||||
|
partial_control_tag = (
|
||||||
|
r"</?(?:t|th|thi|thin|think|tho|thou|thoug|though|thought)"
|
||||||
|
r"|<\|?(?:c|ch|cha|chan|chann|channe|channel|channel\|?)"
|
||||||
|
)
|
||||||
|
text = re.sub(rf"(?:{partial_control_tag})$", "", text)
|
||||||
|
text = re.sub(r"^\s*<\|?$", "", text)
|
||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -972,6 +972,27 @@ async def test_loop_stream_filter_handles_think_only_prefix_without_crashing(tmp
|
|||||||
assert endings == [False]
|
assert endings == [False]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_loop_stream_filter_hides_partial_trailing_think_prefix(tmp_path):
|
||||||
|
loop = _make_loop(tmp_path)
|
||||||
|
deltas: list[str] = []
|
||||||
|
|
||||||
|
async def chat_stream_with_retry(*, on_content_delta, **kwargs):
|
||||||
|
await on_content_delta("Hello <thin")
|
||||||
|
await on_content_delta("k>hidden</think>World")
|
||||||
|
return LLMResponse(content="Hello <think>hidden</think>World", tool_calls=[], usage={})
|
||||||
|
|
||||||
|
loop.provider.chat_stream_with_retry = chat_stream_with_retry
|
||||||
|
|
||||||
|
async def on_stream(delta: str) -> None:
|
||||||
|
deltas.append(delta)
|
||||||
|
|
||||||
|
final_content, _, _, _, _ = await loop._run_agent_loop([], on_stream=on_stream)
|
||||||
|
|
||||||
|
assert final_content == "Hello World"
|
||||||
|
assert deltas == ["Hello", " World"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_loop_retries_think_only_final_response(tmp_path):
|
async def test_loop_retries_think_only_final_response(tmp_path):
|
||||||
loop = _make_loop(tmp_path)
|
loop = _make_loop(tmp_path)
|
||||||
|
|||||||
@ -102,6 +102,14 @@ class TestStripThinkMalformedLeaks:
|
|||||||
assert strip_think("<channel|>喷泉策略:09:00 开启") == ("喷泉策略:09:00 开启")
|
assert strip_think("<channel|>喷泉策略:09:00 开启") == ("喷泉策略:09:00 开启")
|
||||||
assert strip_think("<|channel|>answer") == "answer"
|
assert strip_think("<|channel|>answer") == "answer"
|
||||||
|
|
||||||
|
def test_partial_trailing_think_tag_after_visible_text(self):
|
||||||
|
assert strip_think("喷泉策略说明 <thin") == "喷泉策略说明"
|
||||||
|
assert strip_think("answer <thought") == "answer"
|
||||||
|
|
||||||
|
def test_partial_trailing_channel_marker_after_visible_text(self):
|
||||||
|
assert strip_think("喷泉策略说明 <|chan") == "喷泉策略说明"
|
||||||
|
assert strip_think("answer <channel") == "answer"
|
||||||
|
|
||||||
|
|
||||||
class TestStripThinkConservativePreserve:
|
class TestStripThinkConservativePreserve:
|
||||||
"""Regression: the malformed-tag / orphan cleanup must NOT touch
|
"""Regression: the malformed-tag / orphan cleanup must NOT touch
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user