From 8fd7ce4e141de1c579d07b5bd8243de4f0b42b0e Mon Sep 17 00:00:00 2001 From: miruka Date: Sun, 22 Mar 2020 19:39:58 -0400 Subject: [PATCH] Safer linkifying for user ID, room ID, room alias Don't replace anything in already existing links or . --- src/backend/html_markdown.py | 38 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/backend/html_markdown.py b/src/backend/html_markdown.py index 18cbf301..8682fef0 100644 --- a/src/backend/html_markdown.py +++ b/src/backend/html_markdown.py @@ -112,11 +112,21 @@ class HTMLProcessor: } link_regexes = [re.compile(r, re.IGNORECASE) for r in [ + # Normal :// URLs (r"(?P[a-zA-Z\d]+://(?P[a-z\d._-]+(?:\:\d+)?)" r"(?:/[/\-_.,a-z\d#%&?;=~]*)?(?:\([/\-_.,a-z\d#%&?;=~]*\))?)"), + + # mailto: and tel: r"mailto:(?P[a-z0-9._-]+@(?P[a-z0-9_.-]+[a-z](?:\:\d+)?))", r"tel:(?P[0-9+-]+)(?P)", + + # magnet: r"(?Pmagnet:\?xt=urn:[a-z0-9]+:.+)(?P)", + + # User ID, room ID, room alias + r"(?=^|\W)(?P@.+?:(?P[a-zA-Z\d.-:]*[a-zA-Z\d]))", + r"(?=^|\W)(?P#.+?:(?P[a-zA-Z\d.-:]*[a-zA-Z\d]))", + r"(?=^|\W)(?P!.+?:(?P[a-zA-Z\d.-:]*[a-zA-Z\d]))", ]] inline_quote_regex = re.compile(r"(^|⏎)(\s*>[^⏎\n]*)", re.MULTILINE) @@ -130,16 +140,6 @@ class HTMLProcessor: extra_newlines_regex = re.compile(r"\n(\n*)") - user_id_mention_regex = re.compile( - r"(?=^|\W)@.+?:[a-zA-Z\d.-:]*[a-zA-Z\d]", - ) - room_id_mention_regex = re.compile( - r"(?=^|\W)!.+?:[a-zA-Z\d.-:]*[a-zA-Z\d]", - ) - room_alias_mention_regex = re.compile( - r"(?=^|\W)#.+?:[a-zA-Z\d.-:]*[a-zA-Z\d]", - ) - def __init__(self) -> None: self._sanitizers = { @@ -191,13 +191,6 @@ class HTMLProcessor: ) -> str: """Turn usernames, user ID, room alias, room ID into matrix.to URL.""" - def repl_func(m) -> str: - return rf"[{m.group(0)}](https://matrix.to/#/{quote(m.group(0))})" - - text = self.user_id_mention_regex.sub(repl_func, text) - text = self.room_id_mention_regex.sub(repl_func, text) - text = self.room_alias_mention_regex.sub(repl_func, text) - for user_id, username in (usernames or {}).items(): text = re.sub( rf"(? HtmlElement: + if el.tag != "a" or not el.attrib.get("href"): + return el + + el.attrib["href"] = "https://matrix.to/#/%s" % el.attrib["href"] + return el + + HTML_PROCESSOR = HTMLProcessor()