Sanitize HTML displayed as message content

This commit is contained in:
miruka 2019-04-17 10:49:54 -04:00
parent 8a3189df15
commit a7bf1fca44
4 changed files with 96 additions and 6 deletions

View File

@ -22,3 +22,7 @@
- Migrate more JS functions to their own files - Migrate more JS functions to their own files
- Accept room\_id arg for getUser - Accept room\_id arg for getUser
- Set Qt parents for all QObject
- `<pre>` scrollbar on overflow

View File

@ -9,6 +9,7 @@ from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
from .client_manager import ClientManager from .client_manager import ClientManager
from .model.items import User from .model.items import User
from .model.qml_models import QMLModels from .model.qml_models import QMLModels
from .html_filter import HtmlFilter
class Backend(QObject): class Backend(QObject):
@ -16,14 +17,12 @@ class Backend(QObject):
super().__init__() super().__init__()
self._client_manager: ClientManager = ClientManager() self._client_manager: ClientManager = ClientManager()
self._models: QMLModels = QMLModels() self._models: QMLModels = QMLModels()
self._html_filter: HtmlFilter = HtmlFilter()
# a = self._client_manager; m = self._models
from .signal_manager import SignalManager from .signal_manager import SignalManager
self._signal_manager: SignalManager = SignalManager(self) self._signal_manager: SignalManager = SignalManager(self)
# a = self._client_manager; m = self._models
# from PyQt5.QtCore import pyqtRemoveInputHook as PRI
# import pdb; PRI(); pdb.set_trace()
self.clientManager.configLoad() self.clientManager.configLoad()
@ -31,11 +30,14 @@ class Backend(QObject):
def clientManager(self): def clientManager(self):
return self._client_manager return self._client_manager
@pyqtProperty("QVariant", constant=True) @pyqtProperty("QVariant", constant=True)
def models(self): def models(self):
return self._models return self._models
@pyqtProperty("QVariant", constant=True)
def htmlFilter(self):
return self._html_filter
@pyqtSlot(str, result="QVariantMap") @pyqtSlot(str, result="QVariantMap")
def getUser(self, user_id: str) -> Dict[str, str]: def getUser(self, user_id: str) -> Dict[str, str]:

View File

@ -0,0 +1,82 @@
# Copyright 2019 miruka
# This file is part of harmonyqml, licensed under GPLv3.
import html_sanitizer.sanitizer as sanitizer
from lxml.html import HtmlElement
from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
class HtmlFilter(QObject):
def __init__(self) -> None:
super().__init__()
self._sanitizer = sanitizer.Sanitizer(self.sanitizer_settings)
# The whitespace remover doesn't take <pre> into account
sanitizer.normalize_overall_whitespace = lambda html: html
sanitizer.normalize_whitespace_in_text_or_tail = lambda el: el
# Prevent custom attributes from being removed
sanitizer.lxml.html.clean.Cleaner.safe_attrs |= \
self.sanitizer_settings["attributes"]["font"]
@pyqtSlot(str, result=str)
def sanitize(self, html: str) -> str:
return self._sanitizer.sanitize(html)
@pyqtProperty("QVariant")
def sanitizer_settings(self) -> dict:
# https://matrix.org/docs/spec/client_server/latest.html#m-room-message-msgtypes
return {
"tags": {
# TODO: mx-reply, audio, video
"font", "h1", "h2", "h3", "h4", "h5", "h6",
"blockquote", "p", "a", "ul", "ol", "sup", "sub", "li",
"b", "i", "s", "u", "code", "hr", "br",
"table", "thead", "tbody", "tr", "th", "td",
"pre", "img",
},
"attributes": {
# TODO: translate font attrs to qt html subset
"font": {"data-mx-bg-color", "data-mx-color"},
"a": {"href"},
"img": {"width", "height", "alt", "title", "src"},
"ol": {"start"},
"code": {"class"},
},
"empty": {"hr", "br", "img"},
"separate": {
"a", "p", "li", "table", "tr", "th", "td", "br", "hr"
},
"whitespace": {},
"add_nofollow": False,
"autolink": True,
"sanitize_href": sanitizer.sanitize_href,
"element_preprocessors": [
sanitizer.bold_span_to_strong,
sanitizer.italic_span_to_em,
sanitizer.tag_replacer("strong", "b"),
sanitizer.tag_replacer("em", "i"),
sanitizer.tag_replacer("strike", "s"),
sanitizer.tag_replacer("del", "s"),
sanitizer.tag_replacer("span", "font"),
self._remove_empty_font,
sanitizer.tag_replacer("form", "p"),
sanitizer.tag_replacer("div", "p"),
sanitizer.tag_replacer("caption", "p"),
sanitizer.target_blank_noopener,
],
"element_postprocessors": [],
"is_mergeable": lambda e1, e2: e1.attrib == e2.attrib,
}
def _remove_empty_font(self, el: HtmlElement) -> HtmlElement:
if el.tag != "font":
return el
if not self.sanitizer_settings["attributes"]["font"] & set(el.keys()):
el.clear()
return el

View File

@ -38,7 +38,9 @@ Row {
//"</font>" + //"</font>" +
// (isOwn ? "&nbsp;&nbsp;" + content : "") // (isOwn ? "&nbsp;&nbsp;" + content : "")
text: (dict.formatted_body || dict.body) + text: (dict.formatted_body ?
Backend.htmlFilter.sanitize(dict.formatted_body) :
dict.body) +
"&nbsp;&nbsp;<font size=" + smallSize + "px color=gray>" + "&nbsp;&nbsp;<font size=" + smallSize + "px color=gray>" +
Qt.formatDateTime(date_time, "hh:mm:ss") + Qt.formatDateTime(date_time, "hh:mm:ss") +
"</font>" "</font>"