From c8bf607fc249459d7443ffb534fcd0e2b2e9fa6b Mon Sep 17 00:00:00 2001 From: miruka Date: Tue, 30 Mar 2021 18:18:02 -0400 Subject: [PATCH] Fix parsing links with some special characters Add these characters to the regex allowed list for URL /paths: non-latin word characters, : ! $ * + ^ @ This also fixes the parsing of matrix.to URLs in messages. --- src/backend/html_markdown.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/html_markdown.py b/src/backend/html_markdown.py index 93836bb1..ef077446 100644 --- a/src/backend/html_markdown.py +++ b/src/backend/html_markdown.py @@ -149,8 +149,8 @@ class HTMLProcessor: link_regexes = [re.compile(r, re.IGNORECASE) if isinstance(r, str) else r for r in [ # Normal :// URLs - (r"(?P[a-zA-Z\d]+://(?P[a-z\d._-]+(?:\:\d+)?)" - r"(?:/[/\-_.,a-z\d#%&?;=~]*)?(?:\([/\-_.,a-z\d#%&?;=~]*\))?)"), + (r"(?P[a-z\d]+://(?P[a-z\d._-]+(?:\:\d+)?)" + r"(?:/[/\-.,\w#%&?:;=~!$*+^@]*)?(?:\([/\-_.,a-z\d#%&?;=~]*\))?)"), # mailto: and tel: r"mailto:(?P[a-z0-9._-]+@(?P[a-z0-9.:-]*[a-z\d]))",