mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-15 07:14:08 +00:00
fix(utils): make split_message fenced-code-block-aware
When split_message splits a long message, it now checks whether the split point falls inside a fenced code block. If so, it either moves the split to before the opening fence or closes/reopens the fence across chunks, preventing broken HTML rendering. Addresses #4250
This commit is contained in:
parent
b8a4ceb30c
commit
131446fa61
@ -368,6 +368,22 @@ def maybe_persist_tool_result(
|
||||
)
|
||||
|
||||
|
||||
def _fence_line(content: str, fence_pos: int) -> str:
|
||||
line_end = content.find("\n", fence_pos)
|
||||
if line_end < 0:
|
||||
return content[fence_pos:]
|
||||
return content[fence_pos:line_end]
|
||||
|
||||
|
||||
def _split_inside_fenced_code_block(content: str, pos: int) -> tuple[bool, int, str]:
|
||||
if content[:pos].count("```") % 2 == 0:
|
||||
return False, -1, ""
|
||||
opening = content.rfind("```", 0, pos)
|
||||
if opening < 0:
|
||||
return True, -1, "```"
|
||||
return True, opening, _fence_line(content, opening)
|
||||
|
||||
|
||||
def split_message(content: str, max_len: int = 2000) -> list[str]:
|
||||
"""
|
||||
Split content into chunks within max_len, preferring line breaks.
|
||||
@ -395,6 +411,36 @@ def split_message(content: str, max_len: int = 2000) -> list[str]:
|
||||
pos = cut.rfind(" ")
|
||||
if pos <= 0:
|
||||
pos = max_len
|
||||
inside_code, opening, fence = _split_inside_fenced_code_block(content, pos)
|
||||
if inside_code:
|
||||
if opening > 0:
|
||||
pos = opening
|
||||
else:
|
||||
closing = "\n```"
|
||||
min_code_pos = len(fence)
|
||||
if content.startswith(fence + "\n"):
|
||||
min_code_pos += 1
|
||||
if pos < min_code_pos and min_code_pos + len(closing) > max_len:
|
||||
chunks.append(content[:max_len])
|
||||
content = content[max_len:].lstrip()
|
||||
continue
|
||||
if pos + len(closing) > max_len:
|
||||
budget = max_len - len(closing)
|
||||
if budget > 0:
|
||||
recut = content[:budget]
|
||||
adjusted = recut.rfind("\n")
|
||||
if adjusted <= 0:
|
||||
adjusted = recut.rfind(" ")
|
||||
pos = adjusted if adjusted > 0 else budget
|
||||
else:
|
||||
closing = "```"
|
||||
pos = max_len - len(closing)
|
||||
chunks.append(content[:pos] + closing)
|
||||
remainder = content[pos:]
|
||||
if remainder.startswith("\n"):
|
||||
remainder = remainder[1:]
|
||||
content = f"{fence}\n{remainder}"
|
||||
continue
|
||||
chunks.append(content[:pos])
|
||||
content = content[pos:].lstrip()
|
||||
return chunks
|
||||
|
||||
60
tests/utils/test_helpers.py
Normal file
60
tests/utils/test_helpers.py
Normal file
@ -0,0 +1,60 @@
|
||||
from nanobot.utils.helpers import split_message
|
||||
|
||||
|
||||
def test_split_message_no_code_blocks_unchanged():
|
||||
content = "alpha beta gamma delta"
|
||||
|
||||
assert split_message(content, max_len=12) == ["alpha beta", "gamma delta"]
|
||||
|
||||
|
||||
def test_split_message_outside_code_block_unchanged():
|
||||
content = "alpha beta gamma delta\n```python\nx = 1\n```\ndone"
|
||||
|
||||
chunks = split_message(content, max_len=12)
|
||||
|
||||
assert chunks[0] == "alpha beta"
|
||||
assert chunks[1].startswith("gamma")
|
||||
|
||||
|
||||
def test_split_message_inside_code_block_moves_before_fence():
|
||||
content = "Intro paragraph.\n```python\nprint('a')\nprint('b')\n```\nDone"
|
||||
|
||||
chunks = split_message(content, max_len=35)
|
||||
|
||||
assert chunks[0] == "Intro paragraph.\n"
|
||||
assert chunks[1].startswith("```python\nprint('a')")
|
||||
assert all(chunk.count("```") % 2 == 0 for chunk in chunks[1:])
|
||||
|
||||
|
||||
def test_split_message_code_block_longer_than_max_len_closes_and_reopens():
|
||||
content = "```python\n" + ("print('line one')\n" * 6) + "```\nDone"
|
||||
|
||||
chunks = split_message(content, max_len=60)
|
||||
|
||||
assert len(chunks) > 1
|
||||
assert all(len(chunk) <= 60 for chunk in chunks)
|
||||
assert all(chunk.count("```") % 2 == 0 for chunk in chunks)
|
||||
assert chunks[0].startswith("```python\n")
|
||||
assert chunks[0].endswith("\n```")
|
||||
assert chunks[1].startswith("```python\n")
|
||||
|
||||
|
||||
def test_split_message_multiple_code_blocks_moves_second_block_to_next_chunk():
|
||||
content = (
|
||||
"First\n"
|
||||
"```js\n"
|
||||
"one();\n"
|
||||
"```\n"
|
||||
"Middle paragraph here\n"
|
||||
"```py\n"
|
||||
"two()\n"
|
||||
"three()\n"
|
||||
"```\n"
|
||||
"End"
|
||||
)
|
||||
|
||||
chunks = split_message(content, max_len=55)
|
||||
|
||||
assert chunks[0].endswith("Middle paragraph here\n")
|
||||
assert chunks[1].startswith("```py\n")
|
||||
assert all(chunk.count("```") % 2 == 0 for chunk in chunks)
|
||||
Loading…
x
Reference in New Issue
Block a user