diff --git a/src/backend/html_markdown.py b/src/backend/html_markdown.py
index 30afda0a..cbcbea2e 100644
--- a/src/backend/html_markdown.py
+++ b/src/backend/html_markdown.py
@@ -4,11 +4,12 @@
import re
from typing import DefaultDict, Dict
+from urllib.parse import unquote
import html_sanitizer.sanitizer as sanitizer
import mistune
from html_sanitizer.sanitizer import Sanitizer
-from lxml.html import HtmlElement # nosec
+from lxml.html import HtmlElement, etree # nosec
from .svg_colors import SVG_COLORS
@@ -198,8 +199,21 @@ class HTMLProcessor:
) -> str:
"""Filter and return HTML."""
- settings = self.sanitize_settings(inline, outgoing, room_id)
- html = Sanitizer(settings).sanitize(html).rstrip("\n")
+ sanit = Sanitizer(self.sanitize_settings(inline, outgoing, room_id))
+ html = sanit.sanitize(html).rstrip("\n")
+
+ if not html.strip():
+ return html
+
+ tree = etree.fromstring(
+ html, parser=etree.HTMLParser(encoding="utf-8"),
+ )
+
+ for a_tag in tree.iterdescendants("a"):
+ self._matrix_toify(a_tag, room_id)
+
+ html = etree.tostring(tree, encoding="utf-8", method="html").decode()
+ html = sanit.sanitize(html).rstrip("\n")
if outgoing:
return html
@@ -277,8 +291,6 @@ class HTMLProcessor:
self._img_to_a,
self._remove_extra_newlines,
self._newlines_to_return_symbol if inline else lambda el: el,
-
- lambda el: self._matrix_toify(el, room_id),
],
"element_postprocessors": [
self._font_color_to_span if outgoing else lambda el: el,
@@ -376,7 +388,6 @@ class HTMLProcessor:
"""Turn userID, usernames, roomID, room aliases into matrix.to URL."""
if el.tag != "a" or not el.attrib.get("href"):
- # print("ret 1", el.tag, el.attrib, el.text, el.tail, sep="||")
return el
id_regexes = (
@@ -388,12 +399,10 @@ class HTMLProcessor:
el.attrib["href"] = f"https://matrix.to/#/{el.attrib['href']}"
if room_id not in self.rooms_user_id_names:
- # print("ret 2", el.tag, el.attrib, el.text, el.tail, sep="||")
return el
for user_id, username in self.rooms_user_id_names[room_id].items():
- # print(el.attrib["href"], username, user_id)
- if el.attrib["href"] == username:
+ if unquote(el.attrib["href"]) == username:
el.attrib["href"] = f"https://matrix.to/#/{user_id}"
# print("ret 3", el.tag, el.attrib, el.text, el.tail, sep="||")