mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-15 07:14:08 +00:00
fix: keep Telegram streamed code blocks balanced
Maintainer edit: split final streamed Telegram markdown before rendering to HTML so long fenced code blocks do not produce unbalanced <pre><code> chunks while still respecting Telegram's rendered HTML limit.
This commit is contained in:
parent
a5a816abaf
commit
ffae1dca6d
@ -36,9 +36,9 @@ from nanobot.utils.helpers import split_message
|
|||||||
|
|
||||||
TELEGRAM_MAX_MESSAGE_LEN = 4000 # Telegram message character limit
|
TELEGRAM_MAX_MESSAGE_LEN = 4000 # Telegram message character limit
|
||||||
# Telegram's actual API limit is 4096; we split raw markdown at 4000 as a
|
# Telegram's actual API limit is 4096; we split raw markdown at 4000 as a
|
||||||
# safety margin for mid-stream edits (plain text). For _stream_end, we
|
# safety margin for mid-stream edits (plain text). For _stream_end, we split
|
||||||
# convert to HTML first and then split at the true 4096-char boundary so
|
# raw markdown into chunks whose rendered HTML fits Telegram's true 4096-char
|
||||||
# the final rendered message never overflows.
|
# boundary so the final rendered message never overflows.
|
||||||
TELEGRAM_HTML_MAX_LEN = 4096
|
TELEGRAM_HTML_MAX_LEN = 4096
|
||||||
TELEGRAM_REPLY_CONTEXT_MAX_LEN = TELEGRAM_MAX_MESSAGE_LEN # Max length for reply context in user message
|
TELEGRAM_REPLY_CONTEXT_MAX_LEN = TELEGRAM_MAX_MESSAGE_LEN # Max length for reply context in user message
|
||||||
|
|
||||||
@ -285,6 +285,32 @@ def _markdown_to_telegram_html(text: str) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _split_telegram_markdown_html(content: str, max_html_len: int) -> list[str]:
|
||||||
|
"""Split raw Telegram Markdown and return HTML chunks within Telegram's limit."""
|
||||||
|
chunks: list[str] = []
|
||||||
|
pending = _split_telegram_markdown(content, TELEGRAM_MAX_MESSAGE_LEN)
|
||||||
|
while pending:
|
||||||
|
chunk = pending.pop(0)
|
||||||
|
html = _markdown_to_telegram_html(chunk)
|
||||||
|
if len(html) <= max_html_len:
|
||||||
|
chunks.append(html)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Markdown can expand when rendered as HTML (tags/entities). Re-split
|
||||||
|
# the raw markdown with a smaller budget instead of slicing HTML tags.
|
||||||
|
next_limit = max(1, int(len(chunk) * max_html_len / len(html)) - 8)
|
||||||
|
next_limit = min(next_limit, len(chunk) - 1)
|
||||||
|
if next_limit <= 0:
|
||||||
|
chunks.extend(split_message(html, max_html_len))
|
||||||
|
continue
|
||||||
|
parts = _split_telegram_markdown(chunk, next_limit)
|
||||||
|
if len(parts) == 1 and parts[0] == chunk:
|
||||||
|
chunks.extend(split_message(html, max_html_len))
|
||||||
|
continue
|
||||||
|
pending = parts + pending
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
_SEND_MAX_RETRIES = 3
|
_SEND_MAX_RETRIES = 3
|
||||||
_SEND_RETRY_BASE_DELAY = 0.5 # seconds, doubled each retry
|
_SEND_RETRY_BASE_DELAY = 0.5 # seconds, doubled each retry
|
||||||
_STREAM_EDIT_INTERVAL_DEFAULT = 0.6 # min seconds between edit_message_text calls
|
_STREAM_EDIT_INTERVAL_DEFAULT = 0.6 # min seconds between edit_message_text calls
|
||||||
@ -800,14 +826,9 @@ class TelegramChannel(BaseChannel):
|
|||||||
if message_thread_id := meta.get("message_thread_id"):
|
if message_thread_id := meta.get("message_thread_id"):
|
||||||
thread_kwargs["message_thread_id"] = message_thread_id
|
thread_kwargs["message_thread_id"] = message_thread_id
|
||||||
raw_text = buf.text
|
raw_text = buf.text
|
||||||
html = _markdown_to_telegram_html(raw_text)
|
html_chunks = _split_telegram_markdown_html(raw_text, TELEGRAM_HTML_MAX_LEN)
|
||||||
if len(html) <= TELEGRAM_HTML_MAX_LEN:
|
primary_html = html_chunks[0]
|
||||||
primary_html = html
|
extra_html_chunks = html_chunks[1:]
|
||||||
extra_html_chunks = []
|
|
||||||
else:
|
|
||||||
html_chunks = split_message(html, TELEGRAM_HTML_MAX_LEN)
|
|
||||||
primary_html = html_chunks[0]
|
|
||||||
extra_html_chunks = html_chunks[1:]
|
|
||||||
try:
|
try:
|
||||||
await self._call_with_retry(
|
await self._call_with_retry(
|
||||||
self._app.bot.edit_message_text,
|
self._app.bot.edit_message_text,
|
||||||
|
|||||||
@ -719,6 +719,36 @@ async def test_send_delta_stream_end_html_expansion_does_not_overflow() -> None:
|
|||||||
assert "123" not in channel._stream_bufs
|
assert "123" not in channel._stream_bufs
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_send_delta_stream_end_splits_long_code_block_before_html_rendering() -> None:
|
||||||
|
"""Final streamed replies must not split Telegram HTML inside <pre><code>."""
|
||||||
|
channel = TelegramChannel(
|
||||||
|
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"]),
|
||||||
|
MessageBus(),
|
||||||
|
)
|
||||||
|
channel._app = _FakeApp(lambda: None)
|
||||||
|
channel._app.bot.edit_message_text = AsyncMock()
|
||||||
|
channel._app.bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=99))
|
||||||
|
|
||||||
|
raw_text = "```python\n" + ("print(\"line\")\n" * 450) + "```\nDone"
|
||||||
|
channel._stream_bufs["123"] = _StreamBuf(text=raw_text, message_id=7, last_edit=0.0)
|
||||||
|
|
||||||
|
await channel.send_delta("123", "", {"_stream_end": True})
|
||||||
|
|
||||||
|
html_chunks = [
|
||||||
|
channel._app.bot.edit_message_text.call_args.kwargs.get("text", ""),
|
||||||
|
*[
|
||||||
|
call.kwargs.get("text", "")
|
||||||
|
for call in channel._app.bot.send_message.call_args_list
|
||||||
|
],
|
||||||
|
]
|
||||||
|
assert len(html_chunks) > 1
|
||||||
|
for html in html_chunks:
|
||||||
|
assert len(html) <= 4096
|
||||||
|
assert html.count("<pre><code>") == html.count("</code></pre>")
|
||||||
|
assert "123" not in channel._stream_bufs
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_send_delta_new_stream_id_replaces_stale_buffer() -> None:
|
async def test_send_delta_new_stream_id_replaces_stale_buffer() -> None:
|
||||||
channel = TelegramChannel(
|
channel = TelegramChannel(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user