mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-04 00:35:58 +00:00
fix(msteams): normalize nbsp in inbound text
This commit is contained in:
parent
722d935d37
commit
fd3d7ea752
@ -312,10 +312,12 @@ class MSTeamsChannel(BaseChannel):
|
|||||||
"""Extract the user-authored text from a Teams activity."""
|
"""Extract the user-authored text from a Teams activity."""
|
||||||
text = str(activity.get("text") or "")
|
text = str(activity.get("text") or "")
|
||||||
text = self._strip_possible_bot_mention(text)
|
text = self._strip_possible_bot_mention(text)
|
||||||
|
text = self._normalize_html_whitespace(text)
|
||||||
|
|
||||||
channel_data = activity.get("channelData") or {}
|
channel_data = activity.get("channelData") or {}
|
||||||
reply_to_id = str(activity.get("replyToId") or "").strip()
|
reply_to_id = str(activity.get("replyToId") or "").strip()
|
||||||
normalized_preview = html.unescape(text).replace("&rsquo", "’").strip()
|
normalized_preview = html.unescape(text).replace("&rsquo", "’").strip()
|
||||||
|
normalized_preview = normalized_preview.replace("\xa0", " ")
|
||||||
normalized_preview = normalized_preview.replace("\r\n", "\n").replace("\r", "\n")
|
normalized_preview = normalized_preview.replace("\r\n", "\n").replace("\r", "\n")
|
||||||
preview_lines = [line.strip() for line in normalized_preview.split("\n")]
|
preview_lines = [line.strip() for line in normalized_preview.split("\n")]
|
||||||
while preview_lines and not preview_lines[0]:
|
while preview_lines and not preview_lines[0]:
|
||||||
@ -335,9 +337,15 @@ class MSTeamsChannel(BaseChannel):
|
|||||||
cleaned = re.sub(r"(?:\r?\n){3,}", "\n\n", cleaned)
|
cleaned = re.sub(r"(?:\r?\n){3,}", "\n\n", cleaned)
|
||||||
return cleaned.strip()
|
return cleaned.strip()
|
||||||
|
|
||||||
|
def _normalize_html_whitespace(self, text: str) -> str:
|
||||||
|
"""Normalize common HTML whitespace/entities from Teams into plain text spacing."""
|
||||||
|
normalized = html.unescape(text).replace("&rsquo", "’")
|
||||||
|
normalized = normalized.replace("\xa0", " ")
|
||||||
|
return normalized
|
||||||
|
|
||||||
def _normalize_teams_reply_quote(self, text: str) -> str:
|
def _normalize_teams_reply_quote(self, text: str) -> str:
|
||||||
"""Normalize Teams quoted replies into a compact structured form."""
|
"""Normalize Teams quoted replies into a compact structured form."""
|
||||||
cleaned = html.unescape(text).replace("&rsquo", "’").strip()
|
cleaned = self._normalize_html_whitespace(text).strip()
|
||||||
if not cleaned:
|
if not cleaned:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|||||||
@ -261,6 +261,17 @@ def test_sanitize_inbound_text_keeps_normal_inline_message(make_channel):
|
|||||||
assert ch._sanitize_inbound_text(activity) == "normal inline message"
|
assert ch._sanitize_inbound_text(activity) == "normal inline message"
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_inbound_text_normalizes_nbsp_entities(make_channel):
|
||||||
|
ch = make_channel()
|
||||||
|
|
||||||
|
activity = {
|
||||||
|
"text": "Hello from Teams",
|
||||||
|
"channelData": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
assert ch._sanitize_inbound_text(activity) == "Hello from Teams"
|
||||||
|
|
||||||
|
|
||||||
def test_sanitize_inbound_text_normalizes_reply_wrapper_without_reply_metadata(make_channel):
|
def test_sanitize_inbound_text_normalizes_reply_wrapper_without_reply_metadata(make_channel):
|
||||||
ch = make_channel()
|
ch = make_channel()
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user