From fd3d7ea752dedaad2e3535a689a8539e9630d8e2 Mon Sep 17 00:00:00 2001 From: T3chC0wb0y Date: Fri, 24 Apr 2026 17:56:07 -0500 Subject: [PATCH] fix(msteams): normalize nbsp in inbound text --- nanobot/channels/msteams.py | 10 +++++++++- tests/test_msteams.py | 11 +++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/nanobot/channels/msteams.py b/nanobot/channels/msteams.py index d2addacca..f1c0ac1bc 100644 --- a/nanobot/channels/msteams.py +++ b/nanobot/channels/msteams.py @@ -312,10 +312,12 @@ class MSTeamsChannel(BaseChannel): """Extract the user-authored text from a Teams activity.""" text = str(activity.get("text") or "") text = self._strip_possible_bot_mention(text) + text = self._normalize_html_whitespace(text) channel_data = activity.get("channelData") or {} reply_to_id = str(activity.get("replyToId") or "").strip() normalized_preview = html.unescape(text).replace("&rsquo", "’").strip() + normalized_preview = normalized_preview.replace("\xa0", " ") normalized_preview = normalized_preview.replace("\r\n", "\n").replace("\r", "\n") preview_lines = [line.strip() for line in normalized_preview.split("\n")] while preview_lines and not preview_lines[0]: @@ -335,9 +337,15 @@ class MSTeamsChannel(BaseChannel): cleaned = re.sub(r"(?:\r?\n){3,}", "\n\n", cleaned) return cleaned.strip() + def _normalize_html_whitespace(self, text: str) -> str: + """Normalize common HTML whitespace/entities from Teams into plain text spacing.""" + normalized = html.unescape(text).replace("&rsquo", "’") + normalized = normalized.replace("\xa0", " ") + return normalized + def _normalize_teams_reply_quote(self, text: str) -> str: """Normalize Teams quoted replies into a compact structured form.""" - cleaned = html.unescape(text).replace("&rsquo", "’").strip() + cleaned = self._normalize_html_whitespace(text).strip() if not cleaned: return "" diff --git a/tests/test_msteams.py b/tests/test_msteams.py index 3dbfdfb2f..b4dcf34f2 100644 --- a/tests/test_msteams.py +++ b/tests/test_msteams.py @@ -261,6 +261,17 @@ def test_sanitize_inbound_text_keeps_normal_inline_message(make_channel): assert ch._sanitize_inbound_text(activity) == "normal inline message" +def test_sanitize_inbound_text_normalizes_nbsp_entities(make_channel): + ch = make_channel() + + activity = { + "text": "Hello from Teams", + "channelData": {}, + } + + assert ch._sanitize_inbound_text(activity) == "Hello from Teams" + + def test_sanitize_inbound_text_normalizes_reply_wrapper_without_reply_metadata(make_channel): ch = make_channel()