diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index ca0639bc1..6925658de 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -53,6 +53,34 @@ def _strip_md(s: str) -> str: return s.strip() +def _strip_md_block(text: str) -> str: + """Strip block-level and inline markdown for readable plain-text preview. + + Used during streaming mid-edits so users see clean text instead of raw + markdown syntax while the response is still being generated. + """ + # Code blocks -> just the code + text = re.sub(r'```[\w]*\n?([\s\S]*?)```', r'\1', text) + # Headers -> plain text + text = re.sub(r'^#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE) + # Blockquotes + text = re.sub(r'^>\s*(.*)$', r'\1', text, flags=re.MULTILINE) + # Bold / italic / strikethrough + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) + text = re.sub(r'__(.+?)__', r'\1', text) + text = re.sub(r'(? text + text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) + # Bullet lists + text = re.sub(r'^[-*]\s+', '• ', text, flags=re.MULTILINE) + # Numbered lists (normalize spacing) + text = re.sub(r'^(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE) + return text + + def _render_table_box(table_lines: list[str]) -> str: """Convert markdown pipe-table to compact aligned text for
display."""
@@ -129,8 +157,8 @@ def _markdown_to_telegram_html(text: str) -> str:
text = re.sub(r'`([^`]+)`', save_inline_code, text)
- # 3. Headers # Title -> just the title text
- text = re.sub(r'^#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE)
+ # 3. Headers # Title -> Title (preserve visual hierarchy)
+ text = re.sub(r'^#{1,6}\s+(.+)$', r'⟪B⟫\1⟪/B⟫', text, flags=re.MULTILINE)
# 4. Blockquotes > text -> just the text (before HTML escaping)
text = re.sub(r'^>\s*(.*)$', r'\1', text, flags=re.MULTILINE)
@@ -154,6 +182,9 @@ def _markdown_to_telegram_html(text: str) -> str:
# 10. Bullet lists - item -> • item
text = re.sub(r'^[-*]\s+', '• ', text, flags=re.MULTILINE)
+ # 10.5. Numbered lists 1. item -> 1. item (keep number, normalize indent)
+ text = re.sub(r'^(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE)
+
# 11. Restore inline code with HTML tags
for i, code in enumerate(inline_codes):
# Escape HTML in code content
@@ -166,6 +197,9 @@ def _markdown_to_telegram_html(text: str) -> str:
escaped = _escape_telegram_html(code)
text = text.replace(f"\x00CB{i}\x00", f"{escaped}
")
+ # 13. Restore header bold markers (inserted in step 3, after HTML escaping)
+ text = text.replace('⟪B⟫', '').replace('⟪/B⟫', '')
+
return text
@@ -637,10 +671,11 @@ class TelegramChannel(BaseChannel):
if message_thread_id := meta.get("message_thread_id"):
thread_kwargs["message_thread_id"] = message_thread_id
if buf.message_id is None:
+ preview = _strip_md_block(buf.text)
try:
sent = await self._call_with_retry(
self._app.bot.send_message,
- chat_id=int_chat_id, text=buf.text,
+ chat_id=int_chat_id, text=preview,
**thread_kwargs,
)
buf.message_id = sent.message_id
@@ -653,11 +688,12 @@ class TelegramChannel(BaseChannel):
await self._flush_stream_overflow(int_chat_id, buf, thread_kwargs)
buf.last_edit = now
return
+ preview = _strip_md_block(buf.text)
try:
await self._call_with_retry(
self._app.bot.edit_message_text,
chat_id=int_chat_id, message_id=buf.message_id,
- text=buf.text,
+ text=preview,
)
buf.last_edit = now
except Exception as e:
diff --git a/tests/channels/test_telegram_channel.py b/tests/channels/test_telegram_channel.py
index e02ca5318..4a69d31a9 100644
--- a/tests/channels/test_telegram_channel.py
+++ b/tests/channels/test_telegram_channel.py
@@ -1471,3 +1471,123 @@ async def test_send_text_bad_request_plain_fallback_exhausted() -> None:
# so HTML fails after 1 attempt → fallback to plain also fails after 1 attempt.
# Before the fix: 2 total. After the fix: still 2 (BadRequest SHOULD fallback).
assert call_count == 2, f"Expected 2 calls (1 HTML + 1 plain), got {call_count}"
+
+
+# ---------------------------------------------------------------------------
+# _markdown_to_telegram_html formatting tests
+# ---------------------------------------------------------------------------
+
+def test_markdown_to_html_headers_become_bold() -> None:
+ from nanobot.channels.telegram import _markdown_to_telegram_html
+
+ assert _markdown_to_telegram_html("# Title") == "Title"
+ assert _markdown_to_telegram_html("## Subtitle") == "Subtitle"
+ assert _markdown_to_telegram_html("### Deep") == "Deep"
+
+
+def test_markdown_to_html_numbered_lists_preserved() -> None:
+ from nanobot.channels.telegram import _markdown_to_telegram_html
+
+ text = "1. First\n2. Second\n3. Third"
+ result = _markdown_to_telegram_html(text)
+ assert "1. First" in result
+ assert "2. Second" in result
+ assert "3. Third" in result
+
+
+def test_markdown_to_html_numbered_list_normalizes_whitespace() -> None:
+ from nanobot.channels.telegram import _markdown_to_telegram_html
+
+ # Extra spaces after dot should be normalized
+ text = "1. Lots of space\n2. Two spaces"
+ result = _markdown_to_telegram_html(text)
+ assert "1. Lots of space" in result
+ assert "2. Two spaces" in result
+
+
+def test_markdown_to_html_headers_survive_html_escaping() -> None:
+ """Headers containing special HTML chars should still render as bold."""
+ from nanobot.channels.telegram import _markdown_to_telegram_html
+
+ result = _markdown_to_telegram_html("# A < B & C > D")
+ assert "A < B & C > D" == result
+
+
+def test_markdown_to_html_mixed_formatting() -> None:
+ """Headers, bullets, numbered lists, and bold coexist correctly."""
+ from nanobot.channels.telegram import _markdown_to_telegram_html
+
+ text = "# Overview\n\n- bullet one\n- bullet two\n\n1. step one\n2. step two\n\n**bold text**"
+ result = _markdown_to_telegram_html(text)
+ assert "Overview" in result
+ assert "\u2022 bullet one" in result
+ assert "1. step one" in result
+ assert "bold text" in result
+
+
+# ---------------------------------------------------------------------------
+# _strip_md_block tests
+# ---------------------------------------------------------------------------
+
+def test_strip_md_block_removes_inline_formatting() -> None:
+ from nanobot.channels.telegram import _strip_md_block
+
+ text = "**bold** and _italic_ and ~~struck~~"
+ result = _strip_md_block(text)
+ assert result == "bold and italic and struck"
+
+
+def test_strip_md_block_strips_headers() -> None:
+ from nanobot.channels.telegram import _strip_md_block
+
+ assert _strip_md_block("## Title\nBody") == "Title\nBody"
+
+
+def test_strip_md_block_converts_bullets_and_numbers() -> None:
+ from nanobot.channels.telegram import _strip_md_block
+
+ text = "- item a\n1. item b\n2. item c"
+ result = _strip_md_block(text)
+ assert "\u2022 item a" in result
+ assert "1. item b" in result
+ assert "2. item c" in result
+
+
+def test_strip_md_block_strips_links() -> None:
+ from nanobot.channels.telegram import _strip_md_block
+
+ assert _strip_md_block("[click here](https://example.com)") == "click here"
+
+
+# ---------------------------------------------------------------------------
+# Streaming mid-edit uses _strip_md_block
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_send_delta_mid_stream_strips_markdown() -> None:
+ """Mid-stream edits should strip markdown so users see clean text."""
+ channel = TelegramChannel(
+ TelegramConfig(enabled=True, token="123:abc", allow_from=["*"]),
+ MessageBus(),
+ )
+ channel._app = _FakeApp(lambda: None)
+ channel._app.bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=42))
+ channel._app.bot.edit_message_text = AsyncMock()
+
+ # Initial send with markdown
+ await channel.send_delta("999", "**hello** world")
+ sent_text = channel._app.bot.send_message.call_args.kwargs.get("text", "")
+ # Should NOT contain raw markdown asterisks
+ assert "**" not in sent_text
+ assert "hello world" in sent_text
+
+ # Mid-stream edit
+ import time
+ buf = channel._stream_bufs["999"]
+ buf.last_edit = time.monotonic() - 10 # force edit interval
+ await channel.send_delta("999", "\n### Title\n1. step")
+ edited_text = channel._app.bot.edit_message_text.call_args.kwargs.get("text", "")
+ assert "###" not in edited_text
+ assert "**" not in edited_text
+ assert "Title" in edited_text
+ assert "1. step" in edited_text