diff --git a/src/backend/html_markdown.py b/src/backend/html_markdown.py
index 18cbf301..8682fef0 100644
--- a/src/backend/html_markdown.py
+++ b/src/backend/html_markdown.py
@@ -112,11 +112,21 @@ class HTMLProcessor:
}
link_regexes = [re.compile(r, re.IGNORECASE) for r in [
+ # Normal :// URLs
(r"(?P
[a-zA-Z\d]+://(?P[a-z\d._-]+(?:\:\d+)?)"
r"(?:/[/\-_.,a-z\d#%&?;=~]*)?(?:\([/\-_.,a-z\d#%&?;=~]*\))?)"),
+
+ # mailto: and tel:
r"mailto:(?P[a-z0-9._-]+@(?P[a-z0-9_.-]+[a-z](?:\:\d+)?))",
r"tel:(?P[0-9+-]+)(?P)",
+
+ # magnet:
r"(?Pmagnet:\?xt=urn:[a-z0-9]+:.+)(?P)",
+
+ # User ID, room ID, room alias
+ r"(?=^|\W)(?P@.+?:(?P[a-zA-Z\d.-:]*[a-zA-Z\d]))",
+ r"(?=^|\W)(?P#.+?:(?P[a-zA-Z\d.-:]*[a-zA-Z\d]))",
+ r"(?=^|\W)(?P!.+?:(?P[a-zA-Z\d.-:]*[a-zA-Z\d]))",
]]
inline_quote_regex = re.compile(r"(^|⏎)(\s*>[^⏎\n]*)", re.MULTILINE)
@@ -130,16 +140,6 @@ class HTMLProcessor:
extra_newlines_regex = re.compile(r"\n(\n*)")
- user_id_mention_regex = re.compile(
- r"(?=^|\W)@.+?:[a-zA-Z\d.-:]*[a-zA-Z\d]",
- )
- room_id_mention_regex = re.compile(
- r"(?=^|\W)!.+?:[a-zA-Z\d.-:]*[a-zA-Z\d]",
- )
- room_alias_mention_regex = re.compile(
- r"(?=^|\W)#.+?:[a-zA-Z\d.-:]*[a-zA-Z\d]",
- )
-
def __init__(self) -> None:
self._sanitizers = {
@@ -191,13 +191,6 @@ class HTMLProcessor:
) -> str:
"""Turn usernames, user ID, room alias, room ID into matrix.to URL."""
- def repl_func(m) -> str:
- return rf"[{m.group(0)}](https://matrix.to/#/{quote(m.group(0))})"
-
- text = self.user_id_mention_regex.sub(repl_func, text)
- text = self.room_id_mention_regex.sub(repl_func, text)
- text = self.room_alias_mention_regex.sub(repl_func, text)
-
for user_id, username in (usernames or {}).items():
text = re.sub(
rf"(? HtmlElement:
+ if el.tag != "a" or not el.attrib.get("href"):
+ return el
+
+ el.attrib["href"] = "https://matrix.to/#/%s" % el.attrib["href"]
+ return el
+
+
HTML_PROCESSOR = HTMLProcessor()