Improve HTML autolinking
Allow example.com and 127.0.0.1, handle tel: and magnet:
This commit is contained in:
parent
aa105e0c2c
commit
a59c0d1572
2
TODO.md
2
TODO.md
@ -26,3 +26,5 @@
|
|||||||
- Set Qt parents for all QObject
|
- Set Qt parents for all QObject
|
||||||
|
|
||||||
- `<pre>` scrollbar on overflow
|
- `<pre>` scrollbar on overflow
|
||||||
|
|
||||||
|
- Make links in room subtitle clickable, formatting?
|
||||||
|
@ -1,12 +1,23 @@
|
|||||||
# Copyright 2019 miruka
|
# Copyright 2019 miruka
|
||||||
# This file is part of harmonyqml, licensed under GPLv3.
|
# This file is part of harmonyqml, licensed under GPLv3.
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
import html_sanitizer.sanitizer as sanitizer
|
import html_sanitizer.sanitizer as sanitizer
|
||||||
from lxml.html import HtmlElement
|
from lxml.html import HtmlElement
|
||||||
from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
|
from PyQt5.QtCore import QObject, pyqtProperty, pyqtSlot
|
||||||
|
|
||||||
|
|
||||||
class HtmlFilter(QObject):
|
class HtmlFilter(QObject):
|
||||||
|
link_regexes = [re.compile(r, re.IGNORECASE) for r in [
|
||||||
|
(r"(?P<body>.+://(?P<host>[a-z0-9._-]+)(?:/[/\-_.,a-z0-9%&?;=~]*)?"
|
||||||
|
r"(?:\([/\-_.,a-z0-9%&?;=~]*\))?)"),
|
||||||
|
r"mailto:(?P<body>[a-z0-9._-]+@(?P<host>[a-z0-9_.-]+[a-z]))",
|
||||||
|
r"tel:(?P<body>[0-9+-]+)(?P<host>)",
|
||||||
|
r"(?P<body>magnet:\?xt=urn:[a-z0-9]+:.+)(?P<host>)",
|
||||||
|
]]
|
||||||
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._sanitizer = sanitizer.Sanitizer(self.sanitizer_settings)
|
self._sanitizer = sanitizer.Sanitizer(self.sanitizer_settings)
|
||||||
@ -15,6 +26,13 @@ class HtmlFilter(QObject):
|
|||||||
sanitizer.normalize_overall_whitespace = lambda html: html
|
sanitizer.normalize_overall_whitespace = lambda html: html
|
||||||
sanitizer.normalize_whitespace_in_text_or_tail = lambda el: el
|
sanitizer.normalize_whitespace_in_text_or_tail = lambda el: el
|
||||||
|
|
||||||
|
# See FIXME note in sanitizer_settings
|
||||||
|
autolink_func = sanitizer.lxml.html.clean.autolink
|
||||||
|
sanitizer.lxml.html.clean.autolink = \
|
||||||
|
lambda el, **kw: autolink_func(
|
||||||
|
el, **self.sanitizer_settings["autolink"]
|
||||||
|
)
|
||||||
|
|
||||||
# Prevent custom attributes from being removed
|
# Prevent custom attributes from being removed
|
||||||
sanitizer.lxml.html.clean.Cleaner.safe_attrs |= \
|
sanitizer.lxml.html.clean.Cleaner.safe_attrs |= \
|
||||||
self.sanitizer_settings["attributes"]["font"]
|
self.sanitizer_settings["attributes"]["font"]
|
||||||
@ -51,7 +69,10 @@ class HtmlFilter(QObject):
|
|||||||
},
|
},
|
||||||
"whitespace": {},
|
"whitespace": {},
|
||||||
"add_nofollow": False,
|
"add_nofollow": False,
|
||||||
"autolink": True,
|
"autolink": { # FIXME: arg dict not working
|
||||||
|
"link_regexes": self.link_regexes,
|
||||||
|
"avoid_hosts": [],
|
||||||
|
},
|
||||||
"sanitize_href": sanitizer.sanitize_href,
|
"sanitize_href": sanitizer.sanitize_href,
|
||||||
"element_preprocessors": [
|
"element_preprocessors": [
|
||||||
sanitizer.bold_span_to_strong,
|
sanitizer.bold_span_to_strong,
|
||||||
|
Loading…
Reference in New Issue
Block a user