diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index ca0639bc1..6925658de 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -53,6 +53,34 @@ def _strip_md(s: str) -> str: return s.strip() +def _strip_md_block(text: str) -> str: + """Strip block-level and inline markdown for readable plain-text preview. + + Used during streaming mid-edits so users see clean text instead of raw + markdown syntax while the response is still being generated. + """ + # Code blocks -> just the code + text = re.sub(r'```[\w]*\n?([\s\S]*?)```', r'\1', text) + # Headers -> plain text + text = re.sub(r'^#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE) + # Blockquotes + text = re.sub(r'^>\s*(.*)$', r'\1', text, flags=re.MULTILINE) + # Bold / italic / strikethrough + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) + text = re.sub(r'__(.+?)__', r'\1', text) + text = re.sub(r'(? text + text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) + # Bullet lists + text = re.sub(r'^[-*]\s+', '• ', text, flags=re.MULTILINE) + # Numbered lists (normalize spacing) + text = re.sub(r'^(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE) + return text + + def _render_table_box(table_lines: list[str]) -> str: """Convert markdown pipe-table to compact aligned text for
 display."""
 
@@ -129,8 +157,8 @@ def _markdown_to_telegram_html(text: str) -> str:
 
     text = re.sub(r'`([^`]+)`', save_inline_code, text)
 
-    # 3. Headers # Title -> just the title text
-    text = re.sub(r'^#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE)
+    # 3. Headers # Title -> Title (preserve visual hierarchy)
+    text = re.sub(r'^#{1,6}\s+(.+)$', r'⟪B⟫\1⟪/B⟫', text, flags=re.MULTILINE)
 
     # 4. Blockquotes > text -> just the text (before HTML escaping)
     text = re.sub(r'^>\s*(.*)$', r'\1', text, flags=re.MULTILINE)
@@ -154,6 +182,9 @@ def _markdown_to_telegram_html(text: str) -> str:
     # 10. Bullet lists - item -> • item
     text = re.sub(r'^[-*]\s+', '• ', text, flags=re.MULTILINE)
 
+    # 10.5. Numbered lists  1. item -> 1. item (keep number, normalize indent)
+    text = re.sub(r'^(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE)
+
     # 11. Restore inline code with HTML tags
     for i, code in enumerate(inline_codes):
         # Escape HTML in code content
@@ -166,6 +197,9 @@ def _markdown_to_telegram_html(text: str) -> str:
         escaped = _escape_telegram_html(code)
         text = text.replace(f"\x00CB{i}\x00", f"
{escaped}
") + # 13. Restore header bold markers (inserted in step 3, after HTML escaping) + text = text.replace('⟪B⟫', '').replace('⟪/B⟫', '') + return text @@ -637,10 +671,11 @@ class TelegramChannel(BaseChannel): if message_thread_id := meta.get("message_thread_id"): thread_kwargs["message_thread_id"] = message_thread_id if buf.message_id is None: + preview = _strip_md_block(buf.text) try: sent = await self._call_with_retry( self._app.bot.send_message, - chat_id=int_chat_id, text=buf.text, + chat_id=int_chat_id, text=preview, **thread_kwargs, ) buf.message_id = sent.message_id @@ -653,11 +688,12 @@ class TelegramChannel(BaseChannel): await self._flush_stream_overflow(int_chat_id, buf, thread_kwargs) buf.last_edit = now return + preview = _strip_md_block(buf.text) try: await self._call_with_retry( self._app.bot.edit_message_text, chat_id=int_chat_id, message_id=buf.message_id, - text=buf.text, + text=preview, ) buf.last_edit = now except Exception as e: diff --git a/tests/channels/test_telegram_channel.py b/tests/channels/test_telegram_channel.py index e02ca5318..4a69d31a9 100644 --- a/tests/channels/test_telegram_channel.py +++ b/tests/channels/test_telegram_channel.py @@ -1471,3 +1471,123 @@ async def test_send_text_bad_request_plain_fallback_exhausted() -> None: # so HTML fails after 1 attempt → fallback to plain also fails after 1 attempt. # Before the fix: 2 total. After the fix: still 2 (BadRequest SHOULD fallback). assert call_count == 2, f"Expected 2 calls (1 HTML + 1 plain), got {call_count}" + + +# --------------------------------------------------------------------------- +# _markdown_to_telegram_html formatting tests +# --------------------------------------------------------------------------- + +def test_markdown_to_html_headers_become_bold() -> None: + from nanobot.channels.telegram import _markdown_to_telegram_html + + assert _markdown_to_telegram_html("# Title") == "Title" + assert _markdown_to_telegram_html("## Subtitle") == "Subtitle" + assert _markdown_to_telegram_html("### Deep") == "Deep" + + +def test_markdown_to_html_numbered_lists_preserved() -> None: + from nanobot.channels.telegram import _markdown_to_telegram_html + + text = "1. First\n2. Second\n3. Third" + result = _markdown_to_telegram_html(text) + assert "1. First" in result + assert "2. Second" in result + assert "3. Third" in result + + +def test_markdown_to_html_numbered_list_normalizes_whitespace() -> None: + from nanobot.channels.telegram import _markdown_to_telegram_html + + # Extra spaces after dot should be normalized + text = "1. Lots of space\n2. Two spaces" + result = _markdown_to_telegram_html(text) + assert "1. Lots of space" in result + assert "2. Two spaces" in result + + +def test_markdown_to_html_headers_survive_html_escaping() -> None: + """Headers containing special HTML chars should still render as bold.""" + from nanobot.channels.telegram import _markdown_to_telegram_html + + result = _markdown_to_telegram_html("# A < B & C > D") + assert "A < B & C > D" == result + + +def test_markdown_to_html_mixed_formatting() -> None: + """Headers, bullets, numbered lists, and bold coexist correctly.""" + from nanobot.channels.telegram import _markdown_to_telegram_html + + text = "# Overview\n\n- bullet one\n- bullet two\n\n1. step one\n2. step two\n\n**bold text**" + result = _markdown_to_telegram_html(text) + assert "Overview" in result + assert "\u2022 bullet one" in result + assert "1. step one" in result + assert "bold text" in result + + +# --------------------------------------------------------------------------- +# _strip_md_block tests +# --------------------------------------------------------------------------- + +def test_strip_md_block_removes_inline_formatting() -> None: + from nanobot.channels.telegram import _strip_md_block + + text = "**bold** and _italic_ and ~~struck~~" + result = _strip_md_block(text) + assert result == "bold and italic and struck" + + +def test_strip_md_block_strips_headers() -> None: + from nanobot.channels.telegram import _strip_md_block + + assert _strip_md_block("## Title\nBody") == "Title\nBody" + + +def test_strip_md_block_converts_bullets_and_numbers() -> None: + from nanobot.channels.telegram import _strip_md_block + + text = "- item a\n1. item b\n2. item c" + result = _strip_md_block(text) + assert "\u2022 item a" in result + assert "1. item b" in result + assert "2. item c" in result + + +def test_strip_md_block_strips_links() -> None: + from nanobot.channels.telegram import _strip_md_block + + assert _strip_md_block("[click here](https://example.com)") == "click here" + + +# --------------------------------------------------------------------------- +# Streaming mid-edit uses _strip_md_block +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_send_delta_mid_stream_strips_markdown() -> None: + """Mid-stream edits should strip markdown so users see clean text.""" + channel = TelegramChannel( + TelegramConfig(enabled=True, token="123:abc", allow_from=["*"]), + MessageBus(), + ) + channel._app = _FakeApp(lambda: None) + channel._app.bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=42)) + channel._app.bot.edit_message_text = AsyncMock() + + # Initial send with markdown + await channel.send_delta("999", "**hello** world") + sent_text = channel._app.bot.send_message.call_args.kwargs.get("text", "") + # Should NOT contain raw markdown asterisks + assert "**" not in sent_text + assert "hello world" in sent_text + + # Mid-stream edit + import time + buf = channel._stream_bufs["999"] + buf.last_edit = time.monotonic() - 10 # force edit interval + await channel.send_delta("999", "\n### Title\n1. step") + edited_text = channel._app.bot.edit_message_text.call_args.kwargs.get("text", "") + assert "###" not in edited_text + assert "**" not in edited_text + assert "Title" in edited_text + assert "1. step" in edited_text