Fix mentions incorrectly linked for outgoing html

This commit is contained in:
miruka 2020-03-22 23:00:30 -04:00
parent b1144572aa
commit 367fff8f4a

View File

@ -4,11 +4,12 @@
import re import re
from typing import DefaultDict, Dict from typing import DefaultDict, Dict
from urllib.parse import unquote
import html_sanitizer.sanitizer as sanitizer import html_sanitizer.sanitizer as sanitizer
import mistune import mistune
from html_sanitizer.sanitizer import Sanitizer from html_sanitizer.sanitizer import Sanitizer
from lxml.html import HtmlElement # nosec from lxml.html import HtmlElement, etree # nosec
from .svg_colors import SVG_COLORS from .svg_colors import SVG_COLORS
@ -198,8 +199,21 @@ class HTMLProcessor:
) -> str: ) -> str:
"""Filter and return HTML.""" """Filter and return HTML."""
settings = self.sanitize_settings(inline, outgoing, room_id) sanit = Sanitizer(self.sanitize_settings(inline, outgoing, room_id))
html = Sanitizer(settings).sanitize(html).rstrip("\n") html = sanit.sanitize(html).rstrip("\n")
if not html.strip():
return html
tree = etree.fromstring(
html, parser=etree.HTMLParser(encoding="utf-8"),
)
for a_tag in tree.iterdescendants("a"):
self._matrix_toify(a_tag, room_id)
html = etree.tostring(tree, encoding="utf-8", method="html").decode()
html = sanit.sanitize(html).rstrip("\n")
if outgoing: if outgoing:
return html return html
@ -277,8 +291,6 @@ class HTMLProcessor:
self._img_to_a, self._img_to_a,
self._remove_extra_newlines, self._remove_extra_newlines,
self._newlines_to_return_symbol if inline else lambda el: el, self._newlines_to_return_symbol if inline else lambda el: el,
lambda el: self._matrix_toify(el, room_id),
], ],
"element_postprocessors": [ "element_postprocessors": [
self._font_color_to_span if outgoing else lambda el: el, self._font_color_to_span if outgoing else lambda el: el,
@ -376,7 +388,6 @@ class HTMLProcessor:
"""Turn userID, usernames, roomID, room aliases into matrix.to URL.""" """Turn userID, usernames, roomID, room aliases into matrix.to URL."""
if el.tag != "a" or not el.attrib.get("href"): if el.tag != "a" or not el.attrib.get("href"):
# print("ret 1", el.tag, el.attrib, el.text, el.tail, sep="||")
return el return el
id_regexes = ( id_regexes = (
@ -388,12 +399,10 @@ class HTMLProcessor:
el.attrib["href"] = f"https://matrix.to/#/{el.attrib['href']}" el.attrib["href"] = f"https://matrix.to/#/{el.attrib['href']}"
if room_id not in self.rooms_user_id_names: if room_id not in self.rooms_user_id_names:
# print("ret 2", el.tag, el.attrib, el.text, el.tail, sep="||")
return el return el
for user_id, username in self.rooms_user_id_names[room_id].items(): for user_id, username in self.rooms_user_id_names[room_id].items():
# print(el.attrib["href"], username, user_id) if unquote(el.attrib["href"]) == username:
if el.attrib["href"] == username:
el.attrib["href"] = f"https://matrix.to/#/{user_id}" el.attrib["href"] = f"https://matrix.to/#/{user_id}"
# print("ret 3", el.tag, el.attrib, el.text, el.tail, sep="||") # print("ret 3", el.tag, el.attrib, el.text, el.tail, sep="||")