Sanitize HTML displayed as message content
This commit is contained in:
		
							
								
								
									
										4
									
								
								TODO.md
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								TODO.md
									
									
									
									
									
								
							@@ -22,3 +22,7 @@
 | 
				
			|||||||
- Migrate more JS functions to their own files
 | 
					- Migrate more JS functions to their own files
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Accept room\_id arg for getUser
 | 
					- Accept room\_id arg for getUser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Set Qt parents for all QObject
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- `<pre>` scrollbar on overflow
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,6 +9,7 @@ from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
 | 
				
			|||||||
from .client_manager import ClientManager
 | 
					from .client_manager import ClientManager
 | 
				
			||||||
from .model.items import User
 | 
					from .model.items import User
 | 
				
			||||||
from .model.qml_models import QMLModels
 | 
					from .model.qml_models import QMLModels
 | 
				
			||||||
 | 
					from .html_filter import HtmlFilter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Backend(QObject):
 | 
					class Backend(QObject):
 | 
				
			||||||
@@ -16,14 +17,12 @@ class Backend(QObject):
 | 
				
			|||||||
        super().__init__()
 | 
					        super().__init__()
 | 
				
			||||||
        self._client_manager: ClientManager = ClientManager()
 | 
					        self._client_manager: ClientManager = ClientManager()
 | 
				
			||||||
        self._models:         QMLModels     = QMLModels()
 | 
					        self._models:         QMLModels     = QMLModels()
 | 
				
			||||||
 | 
					        self._html_filter:    HtmlFilter    = HtmlFilter()
 | 
				
			||||||
 | 
					        # a = self._client_manager; m = self._models
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        from .signal_manager import SignalManager
 | 
					        from .signal_manager import SignalManager
 | 
				
			||||||
        self._signal_manager: SignalManager = SignalManager(self)
 | 
					        self._signal_manager: SignalManager = SignalManager(self)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # a = self._client_manager; m = self._models
 | 
					 | 
				
			||||||
        # from PyQt5.QtCore import pyqtRemoveInputHook as PRI
 | 
					 | 
				
			||||||
        # import pdb; PRI(); pdb.set_trace()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.clientManager.configLoad()
 | 
					        self.clientManager.configLoad()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -31,11 +30,14 @@ class Backend(QObject):
 | 
				
			|||||||
    def clientManager(self):
 | 
					    def clientManager(self):
 | 
				
			||||||
        return self._client_manager
 | 
					        return self._client_manager
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
    @pyqtProperty("QVariant", constant=True)
 | 
					    @pyqtProperty("QVariant", constant=True)
 | 
				
			||||||
    def models(self):
 | 
					    def models(self):
 | 
				
			||||||
        return self._models
 | 
					        return self._models
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @pyqtProperty("QVariant", constant=True)
 | 
				
			||||||
 | 
					    def htmlFilter(self):
 | 
				
			||||||
 | 
					        return self._html_filter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @pyqtSlot(str, result="QVariantMap")
 | 
					    @pyqtSlot(str, result="QVariantMap")
 | 
				
			||||||
    def getUser(self, user_id: str) -> Dict[str, str]:
 | 
					    def getUser(self, user_id: str) -> Dict[str, str]:
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										82
									
								
								harmonyqml/backend/html_filter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								harmonyqml/backend/html_filter.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,82 @@
 | 
				
			|||||||
 | 
					# Copyright 2019 miruka
 | 
				
			||||||
 | 
					# This file is part of harmonyqml, licensed under GPLv3.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import html_sanitizer.sanitizer as sanitizer
 | 
				
			||||||
 | 
					from lxml.html import HtmlElement
 | 
				
			||||||
 | 
					from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class HtmlFilter(QObject):
 | 
				
			||||||
 | 
					    def __init__(self) -> None:
 | 
				
			||||||
 | 
					        super().__init__()
 | 
				
			||||||
 | 
					        self._sanitizer = sanitizer.Sanitizer(self.sanitizer_settings)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # The whitespace remover doesn't take <pre> into account
 | 
				
			||||||
 | 
					        sanitizer.normalize_overall_whitespace         = lambda html: html
 | 
				
			||||||
 | 
					        sanitizer.normalize_whitespace_in_text_or_tail = lambda el: el
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Prevent custom attributes from being removed
 | 
				
			||||||
 | 
					        sanitizer.lxml.html.clean.Cleaner.safe_attrs |= \
 | 
				
			||||||
 | 
					            self.sanitizer_settings["attributes"]["font"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @pyqtSlot(str, result=str)
 | 
				
			||||||
 | 
					    def sanitize(self, html: str) -> str:
 | 
				
			||||||
 | 
					        return self._sanitizer.sanitize(html)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @pyqtProperty("QVariant")
 | 
				
			||||||
 | 
					    def sanitizer_settings(self) -> dict:
 | 
				
			||||||
 | 
					        # https://matrix.org/docs/spec/client_server/latest.html#m-room-message-msgtypes
 | 
				
			||||||
 | 
					        return {
 | 
				
			||||||
 | 
					            "tags": {
 | 
				
			||||||
 | 
					                # TODO: mx-reply, audio, video
 | 
				
			||||||
 | 
					                "font", "h1", "h2", "h3", "h4", "h5", "h6",
 | 
				
			||||||
 | 
					                "blockquote", "p", "a", "ul", "ol", "sup", "sub", "li",
 | 
				
			||||||
 | 
					                "b", "i", "s", "u", "code", "hr", "br",
 | 
				
			||||||
 | 
					                "table", "thead", "tbody", "tr", "th", "td",
 | 
				
			||||||
 | 
					                "pre", "img",
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "attributes": {
 | 
				
			||||||
 | 
					                # TODO: translate font attrs to qt html subset
 | 
				
			||||||
 | 
					                "font": {"data-mx-bg-color", "data-mx-color"},
 | 
				
			||||||
 | 
					                "a":    {"href"},
 | 
				
			||||||
 | 
					                "img":  {"width", "height", "alt", "title", "src"},
 | 
				
			||||||
 | 
					                "ol":   {"start"},
 | 
				
			||||||
 | 
					                "code": {"class"},
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "empty": {"hr", "br", "img"},
 | 
				
			||||||
 | 
					            "separate": {
 | 
				
			||||||
 | 
					                "a", "p", "li", "table", "tr", "th", "td", "br", "hr"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "whitespace": {},
 | 
				
			||||||
 | 
					            "add_nofollow": False,
 | 
				
			||||||
 | 
					            "autolink": True,
 | 
				
			||||||
 | 
					            "sanitize_href": sanitizer.sanitize_href,
 | 
				
			||||||
 | 
					            "element_preprocessors": [
 | 
				
			||||||
 | 
					                sanitizer.bold_span_to_strong,
 | 
				
			||||||
 | 
					                sanitizer.italic_span_to_em,
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("strong", "b"),
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("em", "i"),
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("strike", "s"),
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("del", "s"),
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("span", "font"),
 | 
				
			||||||
 | 
					                self._remove_empty_font,
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("form", "p"),
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("div", "p"),
 | 
				
			||||||
 | 
					                sanitizer.tag_replacer("caption", "p"),
 | 
				
			||||||
 | 
					                sanitizer.target_blank_noopener,
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "element_postprocessors": [],
 | 
				
			||||||
 | 
					            "is_mergeable": lambda e1, e2: e1.attrib == e2.attrib,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _remove_empty_font(self, el: HtmlElement) -> HtmlElement:
 | 
				
			||||||
 | 
					        if el.tag != "font":
 | 
				
			||||||
 | 
					            return el
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if not self.sanitizer_settings["attributes"]["font"] & set(el.keys()):
 | 
				
			||||||
 | 
					            el.clear()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return el
 | 
				
			||||||
@@ -38,7 +38,9 @@ Row {
 | 
				
			|||||||
                  //"</font>" +
 | 
					                  //"</font>" +
 | 
				
			||||||
            //      (isOwn ? "  " + content : "")
 | 
					            //      (isOwn ? "  " + content : "")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            text: (dict.formatted_body || dict.body) +
 | 
					            text: (dict.formatted_body ?
 | 
				
			||||||
 | 
					                   Backend.htmlFilter.sanitize(dict.formatted_body) :
 | 
				
			||||||
 | 
					                   dict.body) +
 | 
				
			||||||
                  "  <font size=" + smallSize + "px color=gray>" +
 | 
					                  "  <font size=" + smallSize + "px color=gray>" +
 | 
				
			||||||
                  Qt.formatDateTime(date_time, "hh:mm:ss") +
 | 
					                  Qt.formatDateTime(date_time, "hh:mm:ss") +
 | 
				
			||||||
                  "</font>"
 | 
					                  "</font>"
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user