diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 6341bc2bc..181cea9ca 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -368,6 +368,22 @@ def maybe_persist_tool_result( ) +def _fence_line(content: str, fence_pos: int) -> str: + line_end = content.find("\n", fence_pos) + if line_end < 0: + return content[fence_pos:] + return content[fence_pos:line_end] + + +def _split_inside_fenced_code_block(content: str, pos: int) -> tuple[bool, int, str]: + if content[:pos].count("```") % 2 == 0: + return False, -1, "" + opening = content.rfind("```", 0, pos) + if opening < 0: + return True, -1, "```" + return True, opening, _fence_line(content, opening) + + def split_message(content: str, max_len: int = 2000) -> list[str]: """ Split content into chunks within max_len, preferring line breaks. @@ -395,6 +411,36 @@ def split_message(content: str, max_len: int = 2000) -> list[str]: pos = cut.rfind(" ") if pos <= 0: pos = max_len + inside_code, opening, fence = _split_inside_fenced_code_block(content, pos) + if inside_code: + if opening > 0: + pos = opening + else: + closing = "\n```" + min_code_pos = len(fence) + if content.startswith(fence + "\n"): + min_code_pos += 1 + if pos < min_code_pos and min_code_pos + len(closing) > max_len: + chunks.append(content[:max_len]) + content = content[max_len:].lstrip() + continue + if pos + len(closing) > max_len: + budget = max_len - len(closing) + if budget > 0: + recut = content[:budget] + adjusted = recut.rfind("\n") + if adjusted <= 0: + adjusted = recut.rfind(" ") + pos = adjusted if adjusted > 0 else budget + else: + closing = "```" + pos = max_len - len(closing) + chunks.append(content[:pos] + closing) + remainder = content[pos:] + if remainder.startswith("\n"): + remainder = remainder[1:] + content = f"{fence}\n{remainder}" + continue chunks.append(content[:pos]) content = content[pos:].lstrip() return chunks diff --git a/tests/utils/test_helpers.py b/tests/utils/test_helpers.py new file mode 100644 index 000000000..1823c9b34 --- /dev/null +++ b/tests/utils/test_helpers.py @@ -0,0 +1,60 @@ +from nanobot.utils.helpers import split_message + + +def test_split_message_no_code_blocks_unchanged(): + content = "alpha beta gamma delta" + + assert split_message(content, max_len=12) == ["alpha beta", "gamma delta"] + + +def test_split_message_outside_code_block_unchanged(): + content = "alpha beta gamma delta\n```python\nx = 1\n```\ndone" + + chunks = split_message(content, max_len=12) + + assert chunks[0] == "alpha beta" + assert chunks[1].startswith("gamma") + + +def test_split_message_inside_code_block_moves_before_fence(): + content = "Intro paragraph.\n```python\nprint('a')\nprint('b')\n```\nDone" + + chunks = split_message(content, max_len=35) + + assert chunks[0] == "Intro paragraph.\n" + assert chunks[1].startswith("```python\nprint('a')") + assert all(chunk.count("```") % 2 == 0 for chunk in chunks[1:]) + + +def test_split_message_code_block_longer_than_max_len_closes_and_reopens(): + content = "```python\n" + ("print('line one')\n" * 6) + "```\nDone" + + chunks = split_message(content, max_len=60) + + assert len(chunks) > 1 + assert all(len(chunk) <= 60 for chunk in chunks) + assert all(chunk.count("```") % 2 == 0 for chunk in chunks) + assert chunks[0].startswith("```python\n") + assert chunks[0].endswith("\n```") + assert chunks[1].startswith("```python\n") + + +def test_split_message_multiple_code_blocks_moves_second_block_to_next_chunk(): + content = ( + "First\n" + "```js\n" + "one();\n" + "```\n" + "Middle paragraph here\n" + "```py\n" + "two()\n" + "three()\n" + "```\n" + "End" + ) + + chunks = split_message(content, max_len=55) + + assert chunks[0].endswith("Middle paragraph here\n") + assert chunks[1].startswith("```py\n") + assert all(chunk.count("```") % 2 == 0 for chunk in chunks)