Rename and document html filtering stuff
This commit is contained in:
parent
cfcc190473
commit
d7045644f1
4
TODO.md
4
TODO.md
|
@ -90,6 +90,7 @@
|
|||
- Way to open context menus without a right mouse button
|
||||
- `smartVerticalFlick()` gradual acceleration
|
||||
- Make banner buttons look better
|
||||
- Way to color HTML from the composer
|
||||
|
||||
- Choose a better default easing type for animations
|
||||
- Make HListView scrollbars visible
|
||||
|
@ -206,8 +207,9 @@
|
|||
- Turn all the Error and Response classes into exceptions and normal returns
|
||||
once `HttpClient` is deprecated
|
||||
|
||||
## Distribution
|
||||
## Distribution & dependencies
|
||||
|
||||
- Mistune v2.0
|
||||
- Include python dependencies in binary with rcc?
|
||||
- Improve the README.md
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
"""HTML and Markdown processing tools."""
|
||||
|
||||
import re
|
||||
|
||||
import html_sanitizer.sanitizer as sanitizer
|
||||
|
@ -7,12 +9,20 @@ from lxml.html import HtmlElement # nosec
|
|||
|
||||
|
||||
class MarkdownInlineGrammar(mistune.InlineGrammar):
|
||||
# Enable *word* but not _word_ syntaxes (TODO: config option for that)
|
||||
"""Markdown inline elements syntax modifications for the Mistune parser.
|
||||
|
||||
Modifications:
|
||||
|
||||
- Disable underscores for bold/italics (e.g. `__bold__`)
|
||||
"""
|
||||
|
||||
emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)\*(?!\*)")
|
||||
double_emphasis = re.compile(r"^\*{2}([\s\S]+?)\*{2}(?!\*)")
|
||||
|
||||
|
||||
class MarkdownInlineLexer(mistune.InlineLexer):
|
||||
"""Apply the changes from `MarkdownInlineGrammar` for Mistune."""
|
||||
|
||||
grammar_class = MarkdownInlineGrammar
|
||||
|
||||
|
||||
|
@ -24,7 +34,27 @@ class MarkdownInlineLexer(mistune.InlineLexer):
|
|||
return self.renderer.emphasis(self.output(m.group(1)))
|
||||
|
||||
|
||||
class HtmlFilter:
|
||||
class HTMLProcessor:
|
||||
"""Provide HTML filtering and conversion from Markdown.
|
||||
|
||||
Filtering sanitizes HTML and ensures it complies with the supported Qt
|
||||
subset for usage in QML: https://doc.qt.io/qt-5/richtext-html-subset.html
|
||||
|
||||
Some methods take an `outgoing` argument, specifying if the HTML is
|
||||
intended to be sent to matrix servers or used locally in our application.
|
||||
|
||||
For local usage, extra transformations are applied:
|
||||
|
||||
- Wrap text lines starting with a `>` in `<span>` with a `quote` class.
|
||||
This allows them to be styled appropriately from QML.
|
||||
|
||||
Some methods have `inline` counterparts, which return text appropriate
|
||||
for UI elements restricted to display a single line, e.g. the room
|
||||
last message subtitles in QML or notifications.
|
||||
In inline filtered HTML, block tags are stripped or substituted and
|
||||
newlines are turned into ⏎ symbols (U+23CE).
|
||||
"""
|
||||
|
||||
inline_tags = {"font", "a", "sup", "sub", "b", "i", "s", "u", "code"}
|
||||
|
||||
block_tags = {
|
||||
|
@ -73,14 +103,20 @@ class HtmlFilter:
|
|||
|
||||
|
||||
def from_markdown(self, text: str, outgoing: bool = False) -> str:
|
||||
"""Return filtered HTML from Markdown text."""
|
||||
|
||||
return self.filter(self._markdown_to_html(text), outgoing)
|
||||
|
||||
|
||||
def from_markdown_inline(self, text: str, outgoing: bool = False) -> str:
|
||||
"""Return single-line filtered HTML from Markdown text."""
|
||||
|
||||
return self.filter_inline(self._markdown_to_html(text), outgoing)
|
||||
|
||||
|
||||
def filter_inline(self, html: str, outgoing: bool = False) -> str:
|
||||
"""Filter and return HTML with block tags stripped or substituted."""
|
||||
|
||||
html = self._inline_sanitizer.sanitize(html)
|
||||
|
||||
if outgoing:
|
||||
|
@ -93,6 +129,8 @@ class HtmlFilter:
|
|||
|
||||
|
||||
def filter(self, html: str, outgoing: bool = False) -> str:
|
||||
"""Filter and return HTML."""
|
||||
|
||||
html = self._sanitizer.sanitize(html).rstrip("\n")
|
||||
|
||||
if outgoing:
|
||||
|
@ -102,6 +140,8 @@ class HtmlFilter:
|
|||
|
||||
|
||||
def sanitize_settings(self, inline: bool = False) -> dict:
|
||||
"""Return an html_sanitizer configuration."""
|
||||
|
||||
# https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes
|
||||
# TODO: mx-reply and the new hidden thing
|
||||
|
||||
|
@ -156,6 +196,8 @@ class HtmlFilter:
|
|||
|
||||
@staticmethod
|
||||
def _process_span_font(el: HtmlElement) -> HtmlElement:
|
||||
"""Convert HTML `<span data-mx-color=...` to `<font color=...>`."""
|
||||
|
||||
if el.tag not in ("span", "font"):
|
||||
return el
|
||||
|
||||
|
@ -169,6 +211,8 @@ class HtmlFilter:
|
|||
|
||||
@staticmethod
|
||||
def _img_to_a(el: HtmlElement) -> HtmlElement:
|
||||
"""Linkify images by wrapping `<img>` tags in `<a>`."""
|
||||
|
||||
if el.tag == "img":
|
||||
el.tag = "a"
|
||||
el.attrib["href"] = el.attrib.pop("src", "")
|
||||
|
@ -178,8 +222,11 @@ class HtmlFilter:
|
|||
|
||||
|
||||
def _remove_extra_newlines(self, el: HtmlElement) -> HtmlElement:
|
||||
# Remove excess \n characters to avoid additional blank lines with
|
||||
# HTML/CSS using `white-space: pre`, except in <pre> content.
|
||||
"""Remove excess `\\n` characters from non-`<pre>` HTML elements.
|
||||
|
||||
This is done to avoid additional blank lines when the CSS directive
|
||||
`white-space: pre` is used.
|
||||
"""
|
||||
|
||||
pre_parent = any(parent.tag == "pre" for parent in el.iterancestors())
|
||||
|
||||
|
@ -193,9 +240,12 @@ class HtmlFilter:
|
|||
|
||||
|
||||
def _newlines_to_return_symbol(self, el: HtmlElement) -> HtmlElement:
|
||||
# Add a return unicode symbol (U+23CE) to blocks with siblings
|
||||
# (e.g. a <p> followed by another <p>) or <br>.
|
||||
# The <br> themselves will be removed by the inline sanitizer.
|
||||
"""Turn newlines into unicode return symbols (⏎, U+23CE).
|
||||
|
||||
The symbol is added to blocks with siblings (e.g. a `<p>` followed by
|
||||
another `<p>`) and `<br>` tags.
|
||||
The `<br>` themselves will be removed by the inline sanitizer.
|
||||
"""
|
||||
|
||||
is_block_with_siblings = (el.tag in self.block_tags and
|
||||
next(el.itersiblings(), None) is not None)
|
||||
|
@ -214,4 +264,4 @@ class HtmlFilter:
|
|||
return el
|
||||
|
||||
|
||||
HTML_FILTER = HtmlFilter()
|
||||
HTML_PROCESSOR = HTMLProcessor()
|
|
@ -30,7 +30,7 @@ from .errors import (
|
|||
BadMimeType, InvalidUserId, InvalidUserInContext, MatrixError,
|
||||
UneededThumbnail, UserNotFound,
|
||||
)
|
||||
from .html_filter import HTML_FILTER
|
||||
from .html_markdown import HTML_PROCESSOR as HTML
|
||||
from .models.items import (
|
||||
Account, Event, Member, Room, TypeSpecifier, Upload, UploadStatus,
|
||||
)
|
||||
|
@ -205,13 +205,13 @@ class MatrixClient(nio.AsyncClient):
|
|||
event_type = nio.RoomMessageEmote
|
||||
text = text[len("/me "): ]
|
||||
content = {"body": text, "msgtype": "m.emote"}
|
||||
to_html = HTML_FILTER.from_markdown_inline(text, outgoing=True)
|
||||
echo_body = HTML_FILTER.from_markdown_inline(text)
|
||||
to_html = HTML.from_markdown_inline(text, outgoing=True)
|
||||
echo_body = HTML.from_markdown_inline(text)
|
||||
else:
|
||||
event_type = nio.RoomMessageText
|
||||
content = {"body": text, "msgtype": "m.text"}
|
||||
to_html = HTML_FILTER.from_markdown(text, outgoing=True)
|
||||
echo_body = HTML_FILTER.from_markdown(text)
|
||||
to_html = HTML.from_markdown(text, outgoing=True)
|
||||
echo_body = HTML.from_markdown(text)
|
||||
|
||||
if to_html not in (html.escape(text), f"<p>{html.escape(text)}</p>"):
|
||||
content["format"] = "org.matrix.custom.html"
|
||||
|
@ -832,7 +832,7 @@ class MatrixClient(nio.AsyncClient):
|
|||
display_name = room.display_name or "",
|
||||
avatar_url = room.gen_avatar_url or "",
|
||||
plain_topic = room.topic or "",
|
||||
topic = HTML_FILTER.filter_inline(room.topic or ""),
|
||||
topic = HTML.filter_inline(room.topic or ""),
|
||||
inviter_id = inviter,
|
||||
inviter_name = room.user_name(inviter) if inviter else "",
|
||||
inviter_avatar =
|
||||
|
|
|
@ -10,7 +10,7 @@ import lxml # nosec
|
|||
|
||||
import nio
|
||||
|
||||
from ..html_filter import HTML_FILTER
|
||||
from ..html_markdown import HTML_PROCESSOR
|
||||
from ..utils import AutoStrEnum, auto
|
||||
from .model_item import ModelItem
|
||||
|
||||
|
@ -200,7 +200,7 @@ class Event(ModelItem):
|
|||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.inline_content:
|
||||
self.inline_content = HTML_FILTER.filter_inline(self.content)
|
||||
self.inline_content = HTML_PROCESSOR.filter_inline(self.content)
|
||||
|
||||
|
||||
def __lt__(self, other: "Event") -> bool:
|
||||
|
|
|
@ -10,7 +10,7 @@ from urllib.parse import quote
|
|||
import nio
|
||||
|
||||
from . import utils
|
||||
from .html_filter import HTML_FILTER
|
||||
from .html_markdown import HTML_PROCESSOR
|
||||
from .matrix_client import MatrixClient
|
||||
from .models.items import Account, Room, TypeSpecifier
|
||||
|
||||
|
@ -80,7 +80,7 @@ class NioCallbacks:
|
|||
# Content: %1 is the sender, %2 the target (ev.state_key).
|
||||
|
||||
async def onRoomMessageText(self, room, ev) -> None:
|
||||
co = HTML_FILTER.filter(
|
||||
co = HTML_PROCESSOR.filter(
|
||||
ev.formatted_body
|
||||
if ev.format == "org.matrix.custom.html" else
|
||||
utils.plain2html(ev.body),
|
||||
|
@ -315,7 +315,7 @@ class NioCallbacks:
|
|||
|
||||
async def onRoomTopicEvent(self, room, ev) -> None:
|
||||
if ev.topic:
|
||||
topic = HTML_FILTER.filter_inline(ev.topic)
|
||||
topic = HTML_PROCESSOR.filter_inline(ev.topic)
|
||||
co = f"%1 changed the room's topic to \"{topic}\""
|
||||
else:
|
||||
co = "%1 removed the room's topic"
|
||||
|
|
Loading…
Reference in New Issue
Block a user