Sanitize HTML displayed as message content
This commit is contained in:
parent
8a3189df15
commit
a7bf1fca44
4
TODO.md
4
TODO.md
@ -22,3 +22,7 @@
|
||||
- Migrate more JS functions to their own files
|
||||
|
||||
- Accept room\_id arg for getUser
|
||||
|
||||
- Set Qt parents for all QObject
|
||||
|
||||
- `<pre>` scrollbar on overflow
|
||||
|
@ -9,6 +9,7 @@ from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
|
||||
from .client_manager import ClientManager
|
||||
from .model.items import User
|
||||
from .model.qml_models import QMLModels
|
||||
from .html_filter import HtmlFilter
|
||||
|
||||
|
||||
class Backend(QObject):
|
||||
@ -16,14 +17,12 @@ class Backend(QObject):
|
||||
super().__init__()
|
||||
self._client_manager: ClientManager = ClientManager()
|
||||
self._models: QMLModels = QMLModels()
|
||||
self._html_filter: HtmlFilter = HtmlFilter()
|
||||
# a = self._client_manager; m = self._models
|
||||
|
||||
from .signal_manager import SignalManager
|
||||
self._signal_manager: SignalManager = SignalManager(self)
|
||||
|
||||
# a = self._client_manager; m = self._models
|
||||
# from PyQt5.QtCore import pyqtRemoveInputHook as PRI
|
||||
# import pdb; PRI(); pdb.set_trace()
|
||||
|
||||
self.clientManager.configLoad()
|
||||
|
||||
|
||||
@ -31,11 +30,14 @@ class Backend(QObject):
|
||||
def clientManager(self):
|
||||
return self._client_manager
|
||||
|
||||
|
||||
@pyqtProperty("QVariant", constant=True)
|
||||
def models(self):
|
||||
return self._models
|
||||
|
||||
@pyqtProperty("QVariant", constant=True)
|
||||
def htmlFilter(self):
|
||||
return self._html_filter
|
||||
|
||||
|
||||
@pyqtSlot(str, result="QVariantMap")
|
||||
def getUser(self, user_id: str) -> Dict[str, str]:
|
||||
|
82
harmonyqml/backend/html_filter.py
Normal file
82
harmonyqml/backend/html_filter.py
Normal file
@ -0,0 +1,82 @@
|
||||
# Copyright 2019 miruka
|
||||
# This file is part of harmonyqml, licensed under GPLv3.
|
||||
|
||||
import html_sanitizer.sanitizer as sanitizer
|
||||
from lxml.html import HtmlElement
|
||||
from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
|
||||
|
||||
|
||||
class HtmlFilter(QObject):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._sanitizer = sanitizer.Sanitizer(self.sanitizer_settings)
|
||||
|
||||
# The whitespace remover doesn't take <pre> into account
|
||||
sanitizer.normalize_overall_whitespace = lambda html: html
|
||||
sanitizer.normalize_whitespace_in_text_or_tail = lambda el: el
|
||||
|
||||
# Prevent custom attributes from being removed
|
||||
sanitizer.lxml.html.clean.Cleaner.safe_attrs |= \
|
||||
self.sanitizer_settings["attributes"]["font"]
|
||||
|
||||
|
||||
@pyqtSlot(str, result=str)
|
||||
def sanitize(self, html: str) -> str:
|
||||
return self._sanitizer.sanitize(html)
|
||||
|
||||
|
||||
@pyqtProperty("QVariant")
|
||||
def sanitizer_settings(self) -> dict:
|
||||
# https://matrix.org/docs/spec/client_server/latest.html#m-room-message-msgtypes
|
||||
return {
|
||||
"tags": {
|
||||
# TODO: mx-reply, audio, video
|
||||
"font", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"blockquote", "p", "a", "ul", "ol", "sup", "sub", "li",
|
||||
"b", "i", "s", "u", "code", "hr", "br",
|
||||
"table", "thead", "tbody", "tr", "th", "td",
|
||||
"pre", "img",
|
||||
},
|
||||
"attributes": {
|
||||
# TODO: translate font attrs to qt html subset
|
||||
"font": {"data-mx-bg-color", "data-mx-color"},
|
||||
"a": {"href"},
|
||||
"img": {"width", "height", "alt", "title", "src"},
|
||||
"ol": {"start"},
|
||||
"code": {"class"},
|
||||
},
|
||||
"empty": {"hr", "br", "img"},
|
||||
"separate": {
|
||||
"a", "p", "li", "table", "tr", "th", "td", "br", "hr"
|
||||
},
|
||||
"whitespace": {},
|
||||
"add_nofollow": False,
|
||||
"autolink": True,
|
||||
"sanitize_href": sanitizer.sanitize_href,
|
||||
"element_preprocessors": [
|
||||
sanitizer.bold_span_to_strong,
|
||||
sanitizer.italic_span_to_em,
|
||||
sanitizer.tag_replacer("strong", "b"),
|
||||
sanitizer.tag_replacer("em", "i"),
|
||||
sanitizer.tag_replacer("strike", "s"),
|
||||
sanitizer.tag_replacer("del", "s"),
|
||||
sanitizer.tag_replacer("span", "font"),
|
||||
self._remove_empty_font,
|
||||
sanitizer.tag_replacer("form", "p"),
|
||||
sanitizer.tag_replacer("div", "p"),
|
||||
sanitizer.tag_replacer("caption", "p"),
|
||||
sanitizer.target_blank_noopener,
|
||||
],
|
||||
"element_postprocessors": [],
|
||||
"is_mergeable": lambda e1, e2: e1.attrib == e2.attrib,
|
||||
}
|
||||
|
||||
|
||||
def _remove_empty_font(self, el: HtmlElement) -> HtmlElement:
|
||||
if el.tag != "font":
|
||||
return el
|
||||
|
||||
if not self.sanitizer_settings["attributes"]["font"] & set(el.keys()):
|
||||
el.clear()
|
||||
|
||||
return el
|
@ -38,7 +38,9 @@ Row {
|
||||
//"</font>" +
|
||||
// (isOwn ? " " + content : "")
|
||||
|
||||
text: (dict.formatted_body || dict.body) +
|
||||
text: (dict.formatted_body ?
|
||||
Backend.htmlFilter.sanitize(dict.formatted_body) :
|
||||
dict.body) +
|
||||
" <font size=" + smallSize + "px color=gray>" +
|
||||
Qt.formatDateTime(date_time, "hh:mm:ss") +
|
||||
"</font>"
|
||||
|
Loading…
Reference in New Issue
Block a user