Sanitize HTML displayed as message content
This commit is contained in:
parent
8a3189df15
commit
a7bf1fca44
4
TODO.md
4
TODO.md
|
@ -22,3 +22,7 @@
|
||||||
- Migrate more JS functions to their own files
|
- Migrate more JS functions to their own files
|
||||||
|
|
||||||
- Accept room\_id arg for getUser
|
- Accept room\_id arg for getUser
|
||||||
|
|
||||||
|
- Set Qt parents for all QObject
|
||||||
|
|
||||||
|
- `<pre>` scrollbar on overflow
|
||||||
|
|
|
@ -9,6 +9,7 @@ from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
|
||||||
from .client_manager import ClientManager
|
from .client_manager import ClientManager
|
||||||
from .model.items import User
|
from .model.items import User
|
||||||
from .model.qml_models import QMLModels
|
from .model.qml_models import QMLModels
|
||||||
|
from .html_filter import HtmlFilter
|
||||||
|
|
||||||
|
|
||||||
class Backend(QObject):
|
class Backend(QObject):
|
||||||
|
@ -16,14 +17,12 @@ class Backend(QObject):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._client_manager: ClientManager = ClientManager()
|
self._client_manager: ClientManager = ClientManager()
|
||||||
self._models: QMLModels = QMLModels()
|
self._models: QMLModels = QMLModels()
|
||||||
|
self._html_filter: HtmlFilter = HtmlFilter()
|
||||||
|
# a = self._client_manager; m = self._models
|
||||||
|
|
||||||
from .signal_manager import SignalManager
|
from .signal_manager import SignalManager
|
||||||
self._signal_manager: SignalManager = SignalManager(self)
|
self._signal_manager: SignalManager = SignalManager(self)
|
||||||
|
|
||||||
# a = self._client_manager; m = self._models
|
|
||||||
# from PyQt5.QtCore import pyqtRemoveInputHook as PRI
|
|
||||||
# import pdb; PRI(); pdb.set_trace()
|
|
||||||
|
|
||||||
self.clientManager.configLoad()
|
self.clientManager.configLoad()
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,11 +30,14 @@ class Backend(QObject):
|
||||||
def clientManager(self):
|
def clientManager(self):
|
||||||
return self._client_manager
|
return self._client_manager
|
||||||
|
|
||||||
|
|
||||||
@pyqtProperty("QVariant", constant=True)
|
@pyqtProperty("QVariant", constant=True)
|
||||||
def models(self):
|
def models(self):
|
||||||
return self._models
|
return self._models
|
||||||
|
|
||||||
|
@pyqtProperty("QVariant", constant=True)
|
||||||
|
def htmlFilter(self):
|
||||||
|
return self._html_filter
|
||||||
|
|
||||||
|
|
||||||
@pyqtSlot(str, result="QVariantMap")
|
@pyqtSlot(str, result="QVariantMap")
|
||||||
def getUser(self, user_id: str) -> Dict[str, str]:
|
def getUser(self, user_id: str) -> Dict[str, str]:
|
||||||
|
|
82
harmonyqml/backend/html_filter.py
Normal file
82
harmonyqml/backend/html_filter.py
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
# Copyright 2019 miruka
|
||||||
|
# This file is part of harmonyqml, licensed under GPLv3.
|
||||||
|
|
||||||
|
import html_sanitizer.sanitizer as sanitizer
|
||||||
|
from lxml.html import HtmlElement
|
||||||
|
from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
|
||||||
|
|
||||||
|
|
||||||
|
class HtmlFilter(QObject):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self._sanitizer = sanitizer.Sanitizer(self.sanitizer_settings)
|
||||||
|
|
||||||
|
# The whitespace remover doesn't take <pre> into account
|
||||||
|
sanitizer.normalize_overall_whitespace = lambda html: html
|
||||||
|
sanitizer.normalize_whitespace_in_text_or_tail = lambda el: el
|
||||||
|
|
||||||
|
# Prevent custom attributes from being removed
|
||||||
|
sanitizer.lxml.html.clean.Cleaner.safe_attrs |= \
|
||||||
|
self.sanitizer_settings["attributes"]["font"]
|
||||||
|
|
||||||
|
|
||||||
|
@pyqtSlot(str, result=str)
|
||||||
|
def sanitize(self, html: str) -> str:
|
||||||
|
return self._sanitizer.sanitize(html)
|
||||||
|
|
||||||
|
|
||||||
|
@pyqtProperty("QVariant")
|
||||||
|
def sanitizer_settings(self) -> dict:
|
||||||
|
# https://matrix.org/docs/spec/client_server/latest.html#m-room-message-msgtypes
|
||||||
|
return {
|
||||||
|
"tags": {
|
||||||
|
# TODO: mx-reply, audio, video
|
||||||
|
"font", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||||
|
"blockquote", "p", "a", "ul", "ol", "sup", "sub", "li",
|
||||||
|
"b", "i", "s", "u", "code", "hr", "br",
|
||||||
|
"table", "thead", "tbody", "tr", "th", "td",
|
||||||
|
"pre", "img",
|
||||||
|
},
|
||||||
|
"attributes": {
|
||||||
|
# TODO: translate font attrs to qt html subset
|
||||||
|
"font": {"data-mx-bg-color", "data-mx-color"},
|
||||||
|
"a": {"href"},
|
||||||
|
"img": {"width", "height", "alt", "title", "src"},
|
||||||
|
"ol": {"start"},
|
||||||
|
"code": {"class"},
|
||||||
|
},
|
||||||
|
"empty": {"hr", "br", "img"},
|
||||||
|
"separate": {
|
||||||
|
"a", "p", "li", "table", "tr", "th", "td", "br", "hr"
|
||||||
|
},
|
||||||
|
"whitespace": {},
|
||||||
|
"add_nofollow": False,
|
||||||
|
"autolink": True,
|
||||||
|
"sanitize_href": sanitizer.sanitize_href,
|
||||||
|
"element_preprocessors": [
|
||||||
|
sanitizer.bold_span_to_strong,
|
||||||
|
sanitizer.italic_span_to_em,
|
||||||
|
sanitizer.tag_replacer("strong", "b"),
|
||||||
|
sanitizer.tag_replacer("em", "i"),
|
||||||
|
sanitizer.tag_replacer("strike", "s"),
|
||||||
|
sanitizer.tag_replacer("del", "s"),
|
||||||
|
sanitizer.tag_replacer("span", "font"),
|
||||||
|
self._remove_empty_font,
|
||||||
|
sanitizer.tag_replacer("form", "p"),
|
||||||
|
sanitizer.tag_replacer("div", "p"),
|
||||||
|
sanitizer.tag_replacer("caption", "p"),
|
||||||
|
sanitizer.target_blank_noopener,
|
||||||
|
],
|
||||||
|
"element_postprocessors": [],
|
||||||
|
"is_mergeable": lambda e1, e2: e1.attrib == e2.attrib,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_empty_font(self, el: HtmlElement) -> HtmlElement:
|
||||||
|
if el.tag != "font":
|
||||||
|
return el
|
||||||
|
|
||||||
|
if not self.sanitizer_settings["attributes"]["font"] & set(el.keys()):
|
||||||
|
el.clear()
|
||||||
|
|
||||||
|
return el
|
|
@ -38,7 +38,9 @@ Row {
|
||||||
//"</font>" +
|
//"</font>" +
|
||||||
// (isOwn ? " " + content : "")
|
// (isOwn ? " " + content : "")
|
||||||
|
|
||||||
text: (dict.formatted_body || dict.body) +
|
text: (dict.formatted_body ?
|
||||||
|
Backend.htmlFilter.sanitize(dict.formatted_body) :
|
||||||
|
dict.body) +
|
||||||
" <font size=" + smallSize + "px color=gray>" +
|
" <font size=" + smallSize + "px color=gray>" +
|
||||||
Qt.formatDateTime(date_time, "hh:mm:ss") +
|
Qt.formatDateTime(date_time, "hh:mm:ss") +
|
||||||
"</font>"
|
"</font>"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user