diff --git a/src/backend/html_markdown.py b/src/backend/html_markdown.py index 8682fef0..30afda0a 100644 --- a/src/backend/html_markdown.py +++ b/src/backend/html_markdown.py @@ -3,8 +3,7 @@ """HTML and Markdown processing tools.""" import re -from typing import Dict, Optional -from urllib.parse import quote +from typing import DefaultDict, Dict import html_sanitizer.sanitizer as sanitizer import mistune @@ -111,7 +110,18 @@ class HTMLProcessor: "table", "thead", "tbody", "tr", "th", "td", "pre", } - link_regexes = [re.compile(r, re.IGNORECASE) for r in [ + user_id_regex = re.compile( + r"(?=^|\W)(?P
@.+?:(?Pinto account sanitizer.normalize_overall_whitespace = lambda html, *args, **kw: html sanitizer.normalize_whitespace_in_text_or_tail = \ @@ -175,39 +174,32 @@ class HTMLProcessor: def from_markdown( self, - text: str, - inline: bool = False, - outgoing: bool = False, - mentionable_users: Optional[Dict[str, str]] = None, # {id: name} + text: str, + inline: bool = False, + outgoing: bool = False, + room_id: str = "", ) -> str: """Return filtered HTML from Markdown text.""" - text = self.markdown_linkify_users_rooms(text, mentionable_users) - return self.filter(self._markdown_to_html(text), inline, outgoing) - - - def markdown_linkify_users_rooms( - self, text: str, usernames: Optional[Dict[str, str]] = None, - ) -> str: - """Turn usernames, user ID, room alias, room ID into matrix.to URL.""" - - for user_id, username in (usernames or {}).items(): - text = re.sub( - rf"(? str: """Filter and return HTML.""" - html = self._sanitizers[inline, outgoing].sanitize(html).rstrip("\n") + settings = self.sanitize_settings(inline, outgoing, room_id) + html = Sanitizer(settings).sanitize(html).rstrip("\n") if outgoing: return html @@ -226,7 +218,7 @@ class HTMLProcessor: def sanitize_settings( - self, inline: bool = False, outgoing: bool = False, + self, inline: bool = False, outgoing: bool = False, room_id: str = "", ) -> dict: """Return an html_sanitizer configuration.""" @@ -247,6 +239,11 @@ class HTMLProcessor: "span": {"data-mx-color"}, }} + username_link_regexes = [re.compile(r, re.IGNORECASE) for r in [ + rf"(?{re.escape(username)})(?!\w)(?P)" + for username in self.rooms_user_id_names[room_id].values() + ]] + return { "tags": inline_tags if inline else all_tags, "attributes": inlines_attributes if inline else attributes, @@ -258,7 +255,8 @@ class HTMLProcessor: "keep_typographic_whitespace": True, "add_nofollow": False, "autolink": { - "link_regexes": self.link_regexes, + "link_regexes": + self.link_regexes + username_link_regexes, # type: ignore "avoid_hosts": [], }, "sanitize_href": lambda href: href, @@ -280,7 +278,7 @@ class HTMLProcessor: self._remove_extra_newlines, self._newlines_to_return_symbol if inline else lambda el: el, - self._matrix_toify_user_room_links, + lambda el: self._matrix_toify(el, room_id), ], "element_postprocessors": [ self._font_color_to_span if outgoing else lambda el: el, @@ -374,12 +372,31 @@ class HTMLProcessor: return el - @staticmethod - def _matrix_toify_user_room_links(el: HtmlElement) -> HtmlElement: + def _matrix_toify(self, el: HtmlElement, room_id: str = "") -> HtmlElement: + """Turn userID, usernames, roomID, room aliases into matrix.to URL.""" + if el.tag != "a" or not el.attrib.get("href"): + # print("ret 1", el.tag, el.attrib, el.text, el.tail, sep="||") return el - el.attrib["href"] = "https://matrix.to/#/%s" % el.attrib["href"] + id_regexes = ( + self.user_id_regex, self.room_id_regex, self.room_alias_regex, + ) + + for regex in id_regexes: + if regex.match(el.attrib["href"]): + el.attrib["href"] = f"https://matrix.to/#/{el.attrib['href']}" + + if room_id not in self.rooms_user_id_names: + # print("ret 2", el.tag, el.attrib, el.text, el.tail, sep="||") + return el + + for user_id, username in self.rooms_user_id_names[room_id].items(): + # print(el.attrib["href"], username, user_id) + if el.attrib["href"] == username: + el.attrib["href"] = f"https://matrix.to/#/{user_id}" + + # print("ret 3", el.tag, el.attrib, el.text, el.tail, sep="||") return el diff --git a/src/backend/matrix_client.py b/src/backend/matrix_client.py index a1836ac5..09dcd024 100644 --- a/src/backend/matrix_client.py +++ b/src/backend/matrix_client.py @@ -308,14 +308,7 @@ class MatrixClient(nio.AsyncClient): async def send_text(self, room_id: str, text: str) -> None: """Send a markdown `m.text` or `m.notice` (with `/me`) message .""" - from_md = partial( - HTML.from_markdown, - mentionable_users={ - user_id: member.display_name or user_id - for user_id, member in - self.models[self.user_id, room_id, "members"].items() - }, - ) + from_md = partial(HTML.from_markdown, room_id=room_id) escape = False if text.startswith("//") or text.startswith(r"\/"): @@ -626,7 +619,9 @@ class MatrixClient(nio.AsyncClient): content = event_fields.get("content", "").strip() if content and "inline_content" not in event_fields: - event_fields["inline_content"] = HTML.filter(content, inline=True) + event_fields["inline_content"] = HTML.filter( + content, inline=True, room_id=room_id, + ) event = Event( id = f"echo-{transaction_id}", @@ -1088,7 +1083,9 @@ class MatrixClient(nio.AsyncClient): display_name = room.display_name or "", avatar_url = room.gen_avatar_url or "", plain_topic = room.topic or "", - topic = HTML.filter(room.topic or "", inline=True), + topic = HTML.filter( + room.topic or "", inline=True, room_id=room.room_id, + ), inviter_id = inviter, inviter_name = room.user_name(inviter) if inviter else "", inviter_avatar = @@ -1123,6 +1120,7 @@ class MatrixClient(nio.AsyncClient): for user_id in left_the_room: del self.models[self.user_id, room.room_id, "members"][user_id] + HTML.rooms_user_id_names[room.room_id].pop(user_id, None) # Add the room members to the added room new_dict = { @@ -1138,6 +1136,11 @@ class MatrixClient(nio.AsyncClient): } self.models[self.user_id, room.room_id, "members"].update(new_dict) + for user_id, member in room.users.items(): + if member.display_name: + HTML.rooms_user_id_names[room.room_id][user_id] = \ + member.display_name + async def get_member_name_avatar( self, room_id: str, user_id: str, @@ -1182,7 +1185,9 @@ class MatrixClient(nio.AsyncClient): content = fields.get("content", "").strip() if content and "inline_content" not in fields: - fields["inline_content"] = HTML.filter(content, inline=True) + fields["inline_content"] = HTML.filter( + content, inline=True, room_id=room.room_id, + ) # Create Event ModelItem diff --git a/src/backend/nio_callbacks.py b/src/backend/nio_callbacks.py index b49312ea..ade38076 100644 --- a/src/backend/nio_callbacks.py +++ b/src/backend/nio_callbacks.py @@ -97,6 +97,8 @@ class NioCallbacks: ev.formatted_body if ev.format == "org.matrix.custom.html" else utils.plain2html(ev.body), + + room_id = room.room_id, ) await self.client.register_nio_event(room, ev, content=co) @@ -337,8 +339,10 @@ class NioCallbacks: async def onRoomTopicEvent(self, room, ev) -> None: if ev.topic: - topic = HTML_PROCESSOR.filter(ev.topic, inline=True) - co = f"%1 changed the room's topic to \"{topic}\"" + topic = HTML_PROCESSOR.filter( + ev.topic, inline=True, room_id=room.room_id, + ) + co = f"%1 changed the room's topic to \"{topic}\"" else: co = "%1 removed the room's topic"