2019-12-19 00:33:22 +11:00
|
|
|
"""HTML and Markdown processing tools."""
|
|
|
|
|
2019-07-03 03:59:52 +10:00
|
|
|
import re
|
|
|
|
|
|
|
|
import html_sanitizer.sanitizer as sanitizer
|
2019-10-24 22:27:13 +11:00
|
|
|
import mistune
|
2019-07-04 14:24:21 +10:00
|
|
|
from html_sanitizer.sanitizer import Sanitizer
|
2019-10-24 22:27:13 +11:00
|
|
|
from lxml.html import HtmlElement # nosec
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
|
2019-09-12 07:19:24 +10:00
|
|
|
class MarkdownInlineGrammar(mistune.InlineGrammar):
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Markdown inline elements syntax modifications for the Mistune parser.
|
|
|
|
|
|
|
|
Modifications:
|
|
|
|
|
|
|
|
- Disable underscores for bold/italics (e.g. `__bold__`)
|
|
|
|
"""
|
|
|
|
|
2019-09-12 07:19:24 +10:00
|
|
|
emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)\*(?!\*)")
|
|
|
|
double_emphasis = re.compile(r"^\*{2}([\s\S]+?)\*{2}(?!\*)")
|
|
|
|
|
|
|
|
|
|
|
|
class MarkdownInlineLexer(mistune.InlineLexer):
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Apply the changes from `MarkdownInlineGrammar` for Mistune."""
|
|
|
|
|
2019-09-12 07:19:24 +10:00
|
|
|
grammar_class = MarkdownInlineGrammar
|
|
|
|
|
|
|
|
|
|
|
|
def output_double_emphasis(self, m):
|
|
|
|
return self.renderer.double_emphasis(self.output(m.group(1)))
|
|
|
|
|
|
|
|
|
|
|
|
def output_emphasis(self, m):
|
|
|
|
return self.renderer.emphasis(self.output(m.group(1)))
|
2019-07-22 06:08:40 +10:00
|
|
|
|
|
|
|
|
2019-12-19 00:33:22 +11:00
|
|
|
class HTMLProcessor:
|
|
|
|
"""Provide HTML filtering and conversion from Markdown.
|
|
|
|
|
|
|
|
Filtering sanitizes HTML and ensures it complies with the supported Qt
|
|
|
|
subset for usage in QML: https://doc.qt.io/qt-5/richtext-html-subset.html
|
|
|
|
|
|
|
|
Some methods take an `outgoing` argument, specifying if the HTML is
|
|
|
|
intended to be sent to matrix servers or used locally in our application.
|
|
|
|
|
|
|
|
For local usage, extra transformations are applied:
|
|
|
|
|
|
|
|
- Wrap text lines starting with a `>` in `<span>` with a `quote` class.
|
|
|
|
This allows them to be styled appropriately from QML.
|
|
|
|
|
|
|
|
Some methods have `inline` counterparts, which return text appropriate
|
|
|
|
for UI elements restricted to display a single line, e.g. the room
|
|
|
|
last message subtitles in QML or notifications.
|
|
|
|
In inline filtered HTML, block tags are stripped or substituted and
|
|
|
|
newlines are turned into ⏎ symbols (U+23CE).
|
|
|
|
"""
|
|
|
|
|
2019-10-25 00:08:10 +11:00
|
|
|
inline_tags = {"font", "a", "sup", "sub", "b", "i", "s", "u", "code"}
|
|
|
|
|
|
|
|
block_tags = {
|
|
|
|
"h1", "h2", "h3", "h4", "h5", "h6","blockquote",
|
|
|
|
"p", "ul", "ol", "li", "hr", "br",
|
|
|
|
"table", "thead", "tbody", "tr", "th", "td", "pre",
|
|
|
|
}
|
|
|
|
|
2019-07-03 03:59:52 +10:00
|
|
|
link_regexes = [re.compile(r, re.IGNORECASE) for r in [
|
2019-08-22 04:22:34 +10:00
|
|
|
(r"(?P<body>[a-zA-Z\d]+://(?P<host>[a-z\d._-]+(?:\:\d+)?)"
|
2019-09-04 19:10:53 +10:00
|
|
|
r"(?:/[/\-_.,a-z\d#%&?;=~]*)?(?:\([/\-_.,a-z\d#%&?;=~]*\))?)"),
|
2019-08-22 04:22:34 +10:00
|
|
|
r"mailto:(?P<body>[a-z0-9._-]+@(?P<host>[a-z0-9_.-]+[a-z](?:\:\d+)?))",
|
2019-07-03 03:59:52 +10:00
|
|
|
r"tel:(?P<body>[0-9+-]+)(?P<host>)",
|
|
|
|
r"(?P<body>magnet:\?xt=urn:[a-z0-9]+:.+)(?P<host>)",
|
|
|
|
]]
|
|
|
|
|
2019-10-25 00:43:40 +11:00
|
|
|
inline_quote_regex = re.compile(r"(^|⏎)(\s*>[^⏎\n]*)", re.MULTILINE)
|
2019-09-07 15:28:02 +10:00
|
|
|
|
|
|
|
quote_regex = re.compile(
|
2019-09-07 16:22:42 +10:00
|
|
|
r"(^|<p/?>|<br/?>|<h\d/?>)(\s*>.*?)(</?p>|<br/?>|</?h\d>|$)",
|
2019-09-09 00:48:58 +10:00
|
|
|
re.MULTILINE,
|
2019-09-07 15:28:02 +10:00
|
|
|
)
|
|
|
|
|
2019-10-25 00:08:10 +11:00
|
|
|
extra_newlines_regex = re.compile(r"\n(\n*)")
|
2019-09-11 11:22:42 +10:00
|
|
|
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
def __init__(self) -> None:
|
2019-07-04 14:24:21 +10:00
|
|
|
self._sanitizer = Sanitizer(self.sanitize_settings())
|
|
|
|
self._inline_sanitizer = Sanitizer(self.sanitize_settings(inline=True))
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
# The whitespace remover doesn't take <pre> into account
|
2019-08-30 14:07:41 +10:00
|
|
|
sanitizer.normalize_overall_whitespace = lambda html, *args, **kw: html
|
|
|
|
sanitizer.normalize_whitespace_in_text_or_tail = \
|
|
|
|
lambda el, *args, **kw: el
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
# hard_wrap: convert all \n to <br> without required two spaces
|
2019-10-28 03:06:19 +11:00
|
|
|
# escape: escape HTML characters in the input string, e.g. tags
|
2019-07-22 06:08:40 +10:00
|
|
|
self._markdown_to_html = mistune.Markdown(
|
2019-10-28 03:06:19 +11:00
|
|
|
hard_wrap=True, escape=True, inline=MarkdownInlineLexer,
|
2019-07-22 06:08:40 +10:00
|
|
|
)
|
|
|
|
|
|
|
|
self._markdown_to_html.block.default_rules = [
|
|
|
|
rule for rule in self._markdown_to_html.block.default_rules
|
|
|
|
if rule != "block_quote"
|
|
|
|
]
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
|
2019-07-22 07:41:43 +10:00
|
|
|
def from_markdown(self, text: str, outgoing: bool = False) -> str:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Return filtered HTML from Markdown text."""
|
|
|
|
|
2019-10-28 03:06:19 +11:00
|
|
|
return self.filter(self._markdown_to_html(text), outgoing)
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
|
2019-07-22 07:41:43 +10:00
|
|
|
def from_markdown_inline(self, text: str, outgoing: bool = False) -> str:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Return single-line filtered HTML from Markdown text."""
|
|
|
|
|
2019-10-28 03:06:19 +11:00
|
|
|
return self.filter_inline(self._markdown_to_html(text), outgoing)
|
2019-07-20 10:55:52 +10:00
|
|
|
|
|
|
|
|
2019-07-22 07:41:43 +10:00
|
|
|
def filter_inline(self, html: str, outgoing: bool = False) -> str:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Filter and return HTML with block tags stripped or substituted."""
|
|
|
|
|
2019-09-11 11:22:42 +10:00
|
|
|
html = self._inline_sanitizer.sanitize(html)
|
|
|
|
|
|
|
|
if outgoing:
|
|
|
|
return html
|
2019-07-22 07:41:43 +10:00
|
|
|
|
2019-09-11 11:22:42 +10:00
|
|
|
# Client-side modifications
|
2019-09-07 15:28:02 +10:00
|
|
|
return self.inline_quote_regex.sub(
|
2019-10-25 00:43:40 +11:00
|
|
|
r'\1<span class="quote">\2</span>', html,
|
2019-08-22 04:14:44 +10:00
|
|
|
)
|
2019-07-04 14:24:21 +10:00
|
|
|
|
|
|
|
|
2019-07-22 07:41:43 +10:00
|
|
|
def filter(self, html: str, outgoing: bool = False) -> str:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Filter and return HTML."""
|
|
|
|
|
2019-09-07 14:19:36 +10:00
|
|
|
html = self._sanitizer.sanitize(html).rstrip("\n")
|
2019-08-22 04:14:44 +10:00
|
|
|
|
|
|
|
if outgoing:
|
|
|
|
return html
|
|
|
|
|
2019-09-09 00:48:58 +10:00
|
|
|
return self.quote_regex.sub(r'\1<span class="quote">\2</span>\3', html)
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
|
2019-07-04 14:24:21 +10:00
|
|
|
def sanitize_settings(self, inline: bool = False) -> dict:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Return an html_sanitizer configuration."""
|
|
|
|
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
# https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes
|
2019-09-03 17:04:57 +10:00
|
|
|
# TODO: mx-reply and the new hidden thing
|
2019-07-04 14:24:21 +10:00
|
|
|
|
2019-10-25 00:08:10 +11:00
|
|
|
inline_tags = self.inline_tags
|
|
|
|
all_tags = inline_tags | self.block_tags
|
2019-07-04 14:24:21 +10:00
|
|
|
|
|
|
|
inlines_attributes = {
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
"font": {"color"},
|
2019-07-04 14:24:21 +10:00
|
|
|
"a": {"href"},
|
|
|
|
"code": {"class"},
|
|
|
|
}
|
|
|
|
attributes = {**inlines_attributes, **{
|
|
|
|
"ol": {"start"},
|
2019-07-22 08:17:51 +10:00
|
|
|
"hr": {"width"},
|
2019-07-04 14:24:21 +10:00
|
|
|
}}
|
|
|
|
|
2019-07-03 03:59:52 +10:00
|
|
|
return {
|
2019-10-25 00:08:10 +11:00
|
|
|
"tags": inline_tags if inline else all_tags,
|
2019-07-04 14:24:21 +10:00
|
|
|
"attributes": inlines_attributes if inline else attributes,
|
2019-09-03 17:04:57 +10:00
|
|
|
"empty": {} if inline else {"hr", "br"},
|
2019-07-04 14:24:21 +10:00
|
|
|
"separate": {"a"} if inline else {
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
"a", "p", "li", "table", "tr", "th", "td", "br", "hr",
|
2019-07-03 03:59:52 +10:00
|
|
|
},
|
|
|
|
"whitespace": {},
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
"keep_typographic_whitespace": True,
|
2019-07-03 03:59:52 +10:00
|
|
|
"add_nofollow": False,
|
2019-07-04 14:24:21 +10:00
|
|
|
"autolink": {
|
2019-07-03 03:59:52 +10:00
|
|
|
"link_regexes": self.link_regexes,
|
|
|
|
"avoid_hosts": [],
|
|
|
|
},
|
|
|
|
"sanitize_href": lambda href: href,
|
|
|
|
"element_preprocessors": [
|
|
|
|
sanitizer.bold_span_to_strong,
|
|
|
|
sanitizer.italic_span_to_em,
|
|
|
|
sanitizer.tag_replacer("strong", "b"),
|
|
|
|
sanitizer.tag_replacer("em", "i"),
|
|
|
|
sanitizer.tag_replacer("strike", "s"),
|
|
|
|
sanitizer.tag_replacer("del", "s"),
|
|
|
|
sanitizer.tag_replacer("form", "p"),
|
|
|
|
sanitizer.tag_replacer("div", "p"),
|
|
|
|
sanitizer.tag_replacer("caption", "p"),
|
|
|
|
sanitizer.target_blank_noopener,
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
self._process_span_font,
|
2019-09-03 17:04:57 +10:00
|
|
|
self._img_to_a,
|
2019-10-24 23:06:50 +11:00
|
|
|
self._remove_extra_newlines,
|
2019-10-25 00:08:10 +11:00
|
|
|
self._newlines_to_return_symbol if inline else lambda el: el,
|
2019-07-03 03:59:52 +10:00
|
|
|
],
|
|
|
|
"element_postprocessors": [],
|
|
|
|
"is_mergeable": lambda e1, e2: e1.attrib == e2.attrib,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
@staticmethod
|
|
|
|
def _process_span_font(el: HtmlElement) -> HtmlElement:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Convert HTML `<span data-mx-color=...` to `<font color=...>`."""
|
|
|
|
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
if el.tag not in ("span", "font"):
|
2019-07-03 03:59:52 +10:00
|
|
|
return el
|
|
|
|
|
Big performance refactoring & various improvements
Instead of passing all sorts of events for the JS to handle and manually
add to different data models, we now handle everything we can in Python.
For any change, the python models send a sync event with their
contents (no more than 4 times per second) to JS, and the QSyncable
library's JsonListModel takes care of converting it to a QML ListModel
and sending the appropriate signals.
The SortFilterProxyModel library is not used anymore, the only case
where we need to filter/sort something now is when the user interacts
with the "Filter rooms" or "Filter members" fields. These cases are
handled by a simple JS function.
We now keep separated room and timeline models for different accounts,
the previous approach of sharing all the data we could between accounts
created a lot of complications (local echoes, decrypted messages
replacing others, etc).
The users's own account profile changes are now hidden in the timeline.
On startup, if all events for a room were only own profile changes, more
events will be loaded.
Any kind of image format supported by Qt is now handled by the
pyotherside image provider, instead of just PNG/JPG.
SVGs which previously caused errors are supported as well.
The typing members bar paddings/margins are fixed.
The behavior of the avatar/"upload a profile picture" overlay is fixed.
Config files read from disk are now cached (TODO: make them reloadable
again).
Pylint is not used anymore because of all its annoying false warnings
and lack of understanding for dataclasses, it is replaced by flake8 with
a custom config and various plugins.
Debug mode is now considered on if the program was compiled with
the right option, instead of taking an argument from CLI.
When on, C++ will set a flag in the Window QML component.
The loading screen is now unloaded after the UI is ready, where
previously it just stayed in the background invisible and wasted CPU.
The overall refactoring and improvements make us now able to handle
rooms with thousand of members and no lazy-loading, where previously
everything would freeze and simply scrolling up to load past events
in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
|
|
|
color = el.attrib.pop("data-mx-color", None)
|
|
|
|
if color:
|
|
|
|
el.tag = "font"
|
|
|
|
el.attrib["color"] = color
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
return el
|
|
|
|
|
|
|
|
|
2019-07-08 13:52:41 +10:00
|
|
|
@staticmethod
|
2019-09-03 17:04:57 +10:00
|
|
|
def _img_to_a(el: HtmlElement) -> HtmlElement:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Linkify images by wrapping `<img>` tags in `<a>`."""
|
|
|
|
|
2019-09-03 17:04:57 +10:00
|
|
|
if el.tag == "img":
|
|
|
|
el.tag = "a"
|
|
|
|
el.attrib["href"] = el.attrib.pop("src", "")
|
|
|
|
el.text = el.attrib.pop("alt", None) or el.attrib["href"]
|
2019-07-03 03:59:52 +10:00
|
|
|
|
|
|
|
return el
|
|
|
|
|
|
|
|
|
2019-10-24 23:06:50 +11:00
|
|
|
def _remove_extra_newlines(self, el: HtmlElement) -> HtmlElement:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Remove excess `\\n` characters from non-`<pre>` HTML elements.
|
|
|
|
|
|
|
|
This is done to avoid additional blank lines when the CSS directive
|
|
|
|
`white-space: pre` is used.
|
|
|
|
"""
|
2019-10-24 23:06:50 +11:00
|
|
|
|
|
|
|
pre_parent = any(parent.tag == "pre" for parent in el.iterancestors())
|
|
|
|
|
|
|
|
if el.tag != "pre" and not pre_parent:
|
|
|
|
if el.text:
|
2019-10-25 00:08:10 +11:00
|
|
|
el.text = self.extra_newlines_regex.sub(r"\1", el.text)
|
2019-10-24 23:06:50 +11:00
|
|
|
if el.tail:
|
2019-10-25 00:08:10 +11:00
|
|
|
el.tail = self.extra_newlines_regex.sub(r"\1", el.tail)
|
|
|
|
|
|
|
|
return el
|
|
|
|
|
|
|
|
|
|
|
|
def _newlines_to_return_symbol(self, el: HtmlElement) -> HtmlElement:
|
2019-12-19 00:33:22 +11:00
|
|
|
"""Turn newlines into unicode return symbols (⏎, U+23CE).
|
|
|
|
|
|
|
|
The symbol is added to blocks with siblings (e.g. a `<p>` followed by
|
|
|
|
another `<p>`) and `<br>` tags.
|
|
|
|
The `<br>` themselves will be removed by the inline sanitizer.
|
|
|
|
"""
|
2019-10-25 00:08:10 +11:00
|
|
|
|
|
|
|
is_block_with_siblings = (el.tag in self.block_tags and
|
|
|
|
next(el.itersiblings(), None) is not None)
|
|
|
|
|
|
|
|
if el.tag == "br" or is_block_with_siblings:
|
|
|
|
el.tail = f" ⏎ {el.tail or ''}"
|
|
|
|
|
|
|
|
|
|
|
|
# Replace left \n in text/tail of <pre> content by the return symbol.
|
|
|
|
if el.text:
|
|
|
|
el.text = re.sub(r"\n", r" ⏎ ", el.text)
|
|
|
|
|
|
|
|
if el.tail:
|
|
|
|
el.tail = re.sub(r"\n", r" ⏎ ", el.tail)
|
2019-10-24 23:06:50 +11:00
|
|
|
|
|
|
|
return el
|
|
|
|
|
|
|
|
|
2019-12-19 00:33:22 +11:00
|
|
|
HTML_PROCESSOR = HTMLProcessor()
|