mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-04 00:35:58 +00:00
fix(msteams): normalize nbsp in inbound text
This commit is contained in:
parent
722d935d37
commit
fd3d7ea752
@ -312,10 +312,12 @@ class MSTeamsChannel(BaseChannel):
|
||||
"""Extract the user-authored text from a Teams activity."""
|
||||
text = str(activity.get("text") or "")
|
||||
text = self._strip_possible_bot_mention(text)
|
||||
text = self._normalize_html_whitespace(text)
|
||||
|
||||
channel_data = activity.get("channelData") or {}
|
||||
reply_to_id = str(activity.get("replyToId") or "").strip()
|
||||
normalized_preview = html.unescape(text).replace("&rsquo", "’").strip()
|
||||
normalized_preview = normalized_preview.replace("\xa0", " ")
|
||||
normalized_preview = normalized_preview.replace("\r\n", "\n").replace("\r", "\n")
|
||||
preview_lines = [line.strip() for line in normalized_preview.split("\n")]
|
||||
while preview_lines and not preview_lines[0]:
|
||||
@ -335,9 +337,15 @@ class MSTeamsChannel(BaseChannel):
|
||||
cleaned = re.sub(r"(?:\r?\n){3,}", "\n\n", cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
def _normalize_html_whitespace(self, text: str) -> str:
|
||||
"""Normalize common HTML whitespace/entities from Teams into plain text spacing."""
|
||||
normalized = html.unescape(text).replace("&rsquo", "’")
|
||||
normalized = normalized.replace("\xa0", " ")
|
||||
return normalized
|
||||
|
||||
def _normalize_teams_reply_quote(self, text: str) -> str:
|
||||
"""Normalize Teams quoted replies into a compact structured form."""
|
||||
cleaned = html.unescape(text).replace("&rsquo", "’").strip()
|
||||
cleaned = self._normalize_html_whitespace(text).strip()
|
||||
if not cleaned:
|
||||
return ""
|
||||
|
||||
|
||||
@ -261,6 +261,17 @@ def test_sanitize_inbound_text_keeps_normal_inline_message(make_channel):
|
||||
assert ch._sanitize_inbound_text(activity) == "normal inline message"
|
||||
|
||||
|
||||
def test_sanitize_inbound_text_normalizes_nbsp_entities(make_channel):
|
||||
ch = make_channel()
|
||||
|
||||
activity = {
|
||||
"text": "Hello from Teams",
|
||||
"channelData": {},
|
||||
}
|
||||
|
||||
assert ch._sanitize_inbound_text(activity) == "Hello from Teams"
|
||||
|
||||
|
||||
def test_sanitize_inbound_text_normalizes_reply_wrapper_without_reply_metadata(make_channel):
|
||||
ch = make_channel()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user