diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 9d3eafed1..6acf595fc 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -36,9 +36,9 @@ from nanobot.utils.helpers import split_message TELEGRAM_MAX_MESSAGE_LEN = 4000 # Telegram message character limit # Telegram's actual API limit is 4096; we split raw markdown at 4000 as a -# safety margin for mid-stream edits (plain text). For _stream_end, we -# convert to HTML first and then split at the true 4096-char boundary so -# the final rendered message never overflows. +# safety margin for mid-stream edits (plain text). For _stream_end, we split +# raw markdown into chunks whose rendered HTML fits Telegram's true 4096-char +# boundary so the final rendered message never overflows. TELEGRAM_HTML_MAX_LEN = 4096 TELEGRAM_REPLY_CONTEXT_MAX_LEN = TELEGRAM_MAX_MESSAGE_LEN # Max length for reply context in user message @@ -285,6 +285,32 @@ def _markdown_to_telegram_html(text: str) -> str: return text +def _split_telegram_markdown_html(content: str, max_html_len: int) -> list[str]: + """Split raw Telegram Markdown and return HTML chunks within Telegram's limit.""" + chunks: list[str] = [] + pending = _split_telegram_markdown(content, TELEGRAM_MAX_MESSAGE_LEN) + while pending: + chunk = pending.pop(0) + html = _markdown_to_telegram_html(chunk) + if len(html) <= max_html_len: + chunks.append(html) + continue + + # Markdown can expand when rendered as HTML (tags/entities). Re-split + # the raw markdown with a smaller budget instead of slicing HTML tags. + next_limit = max(1, int(len(chunk) * max_html_len / len(html)) - 8) + next_limit = min(next_limit, len(chunk) - 1) + if next_limit <= 0: + chunks.extend(split_message(html, max_html_len)) + continue + parts = _split_telegram_markdown(chunk, next_limit) + if len(parts) == 1 and parts[0] == chunk: + chunks.extend(split_message(html, max_html_len)) + continue + pending = parts + pending + return chunks + + _SEND_MAX_RETRIES = 3 _SEND_RETRY_BASE_DELAY = 0.5 # seconds, doubled each retry _STREAM_EDIT_INTERVAL_DEFAULT = 0.6 # min seconds between edit_message_text calls @@ -800,14 +826,9 @@ class TelegramChannel(BaseChannel): if message_thread_id := meta.get("message_thread_id"): thread_kwargs["message_thread_id"] = message_thread_id raw_text = buf.text - html = _markdown_to_telegram_html(raw_text) - if len(html) <= TELEGRAM_HTML_MAX_LEN: - primary_html = html - extra_html_chunks = [] - else: - html_chunks = split_message(html, TELEGRAM_HTML_MAX_LEN) - primary_html = html_chunks[0] - extra_html_chunks = html_chunks[1:] + html_chunks = _split_telegram_markdown_html(raw_text, TELEGRAM_HTML_MAX_LEN) + primary_html = html_chunks[0] + extra_html_chunks = html_chunks[1:] try: await self._call_with_retry( self._app.bot.edit_message_text, diff --git a/tests/channels/test_telegram_channel.py b/tests/channels/test_telegram_channel.py index 5115791d9..da3341474 100644 --- a/tests/channels/test_telegram_channel.py +++ b/tests/channels/test_telegram_channel.py @@ -719,6 +719,36 @@ async def test_send_delta_stream_end_html_expansion_does_not_overflow() -> None: assert "123" not in channel._stream_bufs +@pytest.mark.asyncio +async def test_send_delta_stream_end_splits_long_code_block_before_html_rendering() -> None: + """Final streamed replies must not split Telegram HTML inside
."""
+ channel = TelegramChannel(
+ TelegramConfig(enabled=True, token="123:abc", allow_from=["*"]),
+ MessageBus(),
+ )
+ channel._app = _FakeApp(lambda: None)
+ channel._app.bot.edit_message_text = AsyncMock()
+ channel._app.bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=99))
+
+ raw_text = "```python\n" + ("print(\"line\")\n" * 450) + "```\nDone"
+ channel._stream_bufs["123"] = _StreamBuf(text=raw_text, message_id=7, last_edit=0.0)
+
+ await channel.send_delta("123", "", {"_stream_end": True})
+
+ html_chunks = [
+ channel._app.bot.edit_message_text.call_args.kwargs.get("text", ""),
+ *[
+ call.kwargs.get("text", "")
+ for call in channel._app.bot.send_message.call_args_list
+ ],
+ ]
+ assert len(html_chunks) > 1
+ for html in html_chunks:
+ assert len(html) <= 4096
+ assert html.count("") == html.count("
")
+ assert "123" not in channel._stream_bufs
+
+
@pytest.mark.asyncio
async def test_send_delta_new_stream_id_replaces_stale_buffer() -> None:
channel = TelegramChannel(