Fix parsing links with some special characters
Add these characters to the regex allowed list for URL /paths: non-latin word characters, : ! $ * + ^ @ This also fixes the parsing of matrix.to URLs in messages.
This commit is contained in:
parent
9fa2816d4b
commit
c8bf607fc2
|
@ -149,8 +149,8 @@ class HTMLProcessor:
|
|||
link_regexes = [re.compile(r, re.IGNORECASE)
|
||||
if isinstance(r, str) else r for r in [
|
||||
# Normal :// URLs
|
||||
(r"(?P<body>[a-zA-Z\d]+://(?P<host>[a-z\d._-]+(?:\:\d+)?)"
|
||||
r"(?:/[/\-_.,a-z\d#%&?;=~]*)?(?:\([/\-_.,a-z\d#%&?;=~]*\))?)"),
|
||||
(r"(?P<body>[a-z\d]+://(?P<host>[a-z\d._-]+(?:\:\d+)?)"
|
||||
r"(?:/[/\-.,\w#%&?:;=~!$*+^@]*)?(?:\([/\-_.,a-z\d#%&?;=~]*\))?)"),
|
||||
|
||||
# mailto: and tel:
|
||||
r"mailto:(?P<body>[a-z0-9._-]+@(?P<host>[a-z0-9.:-]*[a-z\d]))",
|
||||
|
|
Loading…
Reference in New Issue
Block a user