moment/src/backend/utils.py

355 lines
10 KiB
Python
Raw Normal View History

# Copyright Mirage authors & contributors <https://github.com/mirukana/mirage>
2019-12-19 22:46:16 +11:00
# SPDX-License-Identifier: LGPL-3.0-or-later
2019-12-19 00:42:56 +11:00
"""Various utilities that are used throughout the package."""
2019-11-12 00:26:35 +11:00
import asyncio
import collections
import html
import inspect
import io
import json
2020-03-15 08:32:05 +11:00
import sys
import xml.etree.cElementTree as xml_etree
from concurrent.futures import ProcessPoolExecutor
from contextlib import suppress
from datetime import date, datetime, time, timedelta
from enum import Enum
Big performance refactoring & various improvements Instead of passing all sorts of events for the JS to handle and manually add to different data models, we now handle everything we can in Python. For any change, the python models send a sync event with their contents (no more than 4 times per second) to JS, and the QSyncable library's JsonListModel takes care of converting it to a QML ListModel and sending the appropriate signals. The SortFilterProxyModel library is not used anymore, the only case where we need to filter/sort something now is when the user interacts with the "Filter rooms" or "Filter members" fields. These cases are handled by a simple JS function. We now keep separated room and timeline models for different accounts, the previous approach of sharing all the data we could between accounts created a lot of complications (local echoes, decrypted messages replacing others, etc). The users's own account profile changes are now hidden in the timeline. On startup, if all events for a room were only own profile changes, more events will be loaded. Any kind of image format supported by Qt is now handled by the pyotherside image provider, instead of just PNG/JPG. SVGs which previously caused errors are supported as well. The typing members bar paddings/margins are fixed. The behavior of the avatar/"upload a profile picture" overlay is fixed. Config files read from disk are now cached (TODO: make them reloadable again). Pylint is not used anymore because of all its annoying false warnings and lack of understanding for dataclasses, it is replaced by flake8 with a custom config and various plugins. Debug mode is now considered on if the program was compiled with the right option, instead of taking an argument from CLI. When on, C++ will set a flag in the Window QML component. The loading screen is now unloaded after the UI is ready, where previously it just stayed in the background invisible and wasted CPU. The overall refactoring and improvements make us now able to handle rooms with thousand of members and no lazy-loading, where previously everything would freeze and simply scrolling up to load past events in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
from enum import auto as autostr
from pathlib import Path
2020-03-13 19:35:51 +11:00
from tempfile import NamedTemporaryFile
from types import ModuleType
2020-03-13 19:35:51 +11:00
from typing import (
Any, AsyncIterator, Callable, Dict, Iterable, Mapping, Optional, Sequence,
Tuple, Type, Union,
2020-03-13 19:35:51 +11:00
)
from uuid import UUID
2020-03-13 19:35:51 +11:00
import aiofiles
Big performance refactoring & various improvements Instead of passing all sorts of events for the JS to handle and manually add to different data models, we now handle everything we can in Python. For any change, the python models send a sync event with their contents (no more than 4 times per second) to JS, and the QSyncable library's JsonListModel takes care of converting it to a QML ListModel and sending the appropriate signals. The SortFilterProxyModel library is not used anymore, the only case where we need to filter/sort something now is when the user interacts with the "Filter rooms" or "Filter members" fields. These cases are handled by a simple JS function. We now keep separated room and timeline models for different accounts, the previous approach of sharing all the data we could between accounts created a lot of complications (local echoes, decrypted messages replacing others, etc). The users's own account profile changes are now hidden in the timeline. On startup, if all events for a room were only own profile changes, more events will be loaded. Any kind of image format supported by Qt is now handled by the pyotherside image provider, instead of just PNG/JPG. SVGs which previously caused errors are supported as well. The typing members bar paddings/margins are fixed. The behavior of the avatar/"upload a profile picture" overlay is fixed. Config files read from disk are now cached (TODO: make them reloadable again). Pylint is not used anymore because of all its annoying false warnings and lack of understanding for dataclasses, it is replaced by flake8 with a custom config and various plugins. Debug mode is now considered on if the program was compiled with the right option, instead of taking an argument from CLI. When on, C++ will set a flag in the Window QML component. The loading screen is now unloaded after the UI is ready, where previously it just stayed in the background invisible and wasted CPU. The overall refactoring and improvements make us now able to handle rooms with thousand of members and no lazy-loading, where previously everything would freeze and simply scrolling up to load past events in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
import filetype
from aiofiles.threadpool.binary import AsyncBufferedIOBase
from nio.crypto import AsyncDataT as File
from nio.crypto import async_generator_from_data
from PIL import Image as PILImage
Big performance refactoring & various improvements Instead of passing all sorts of events for the JS to handle and manually add to different data models, we now handle everything we can in Python. For any change, the python models send a sync event with their contents (no more than 4 times per second) to JS, and the QSyncable library's JsonListModel takes care of converting it to a QML ListModel and sending the appropriate signals. The SortFilterProxyModel library is not used anymore, the only case where we need to filter/sort something now is when the user interacts with the "Filter rooms" or "Filter members" fields. These cases are handled by a simple JS function. We now keep separated room and timeline models for different accounts, the previous approach of sharing all the data we could between accounts created a lot of complications (local echoes, decrypted messages replacing others, etc). The users's own account profile changes are now hidden in the timeline. On startup, if all events for a room were only own profile changes, more events will be loaded. Any kind of image format supported by Qt is now handled by the pyotherside image provider, instead of just PNG/JPG. SVGs which previously caused errors are supported as well. The typing members bar paddings/margins are fixed. The behavior of the avatar/"upload a profile picture" overlay is fixed. Config files read from disk are now cached (TODO: make them reloadable again). Pylint is not used anymore because of all its annoying false warnings and lack of understanding for dataclasses, it is replaced by flake8 with a custom config and various plugins. Debug mode is now considered on if the program was compiled with the right option, instead of taking an argument from CLI. When on, C++ will set a flag in the Window QML component. The loading screen is now unloaded after the UI is ready, where previously it just stayed in the background invisible and wasted CPU. The overall refactoring and improvements make us now able to handle rooms with thousand of members and no lazy-loading, where previously everything would freeze and simply scrolling up to load past events in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
from .color import Color
2020-03-15 08:32:05 +11:00
if sys.version_info >= (3, 7):
from contextlib import asynccontextmanager
else:
from async_generator import asynccontextmanager
Size = Tuple[int, int]
BytesOrPIL = Union[bytes, PILImage.Image]
auto = autostr
COMPRESSION_POOL = ProcessPoolExecutor()
class AutoStrEnum(Enum):
2020-03-13 05:41:00 +11:00
"""An Enum where auto() assigns the member's name instead of an integer.
2019-11-12 00:26:35 +11:00
Example:
>>> class Fruits(AutoStrEnum): apple = auto()
>>> Fruits.apple.value
"apple"
"""
2019-12-19 00:42:56 +11:00
@staticmethod
def _generate_next_value_(name, *_):
return name
2019-11-12 00:26:35 +11:00
def dict_update_recursive(dict1: dict, dict2: dict) -> None:
2019-12-19 00:42:56 +11:00
"""Deep-merge `dict1` and `dict2`, recursive version of `dict.update()`."""
# https://gist.github.com/angstwad/bf22d1822c38a92ec0a9
2019-11-12 00:26:35 +11:00
for k in dict2:
if (k in dict1 and isinstance(dict1[k], dict) and
isinstance(dict2[k], collections.Mapping)):
dict_update_recursive(dict1[k], dict2[k])
else:
dict1[k] = dict2[k]
Big performance refactoring & various improvements Instead of passing all sorts of events for the JS to handle and manually add to different data models, we now handle everything we can in Python. For any change, the python models send a sync event with their contents (no more than 4 times per second) to JS, and the QSyncable library's JsonListModel takes care of converting it to a QML ListModel and sending the appropriate signals. The SortFilterProxyModel library is not used anymore, the only case where we need to filter/sort something now is when the user interacts with the "Filter rooms" or "Filter members" fields. These cases are handled by a simple JS function. We now keep separated room and timeline models for different accounts, the previous approach of sharing all the data we could between accounts created a lot of complications (local echoes, decrypted messages replacing others, etc). The users's own account profile changes are now hidden in the timeline. On startup, if all events for a room were only own profile changes, more events will be loaded. Any kind of image format supported by Qt is now handled by the pyotherside image provider, instead of just PNG/JPG. SVGs which previously caused errors are supported as well. The typing members bar paddings/margins are fixed. The behavior of the avatar/"upload a profile picture" overlay is fixed. Config files read from disk are now cached (TODO: make them reloadable again). Pylint is not used anymore because of all its annoying false warnings and lack of understanding for dataclasses, it is replaced by flake8 with a custom config and various plugins. Debug mode is now considered on if the program was compiled with the right option, instead of taking an argument from CLI. When on, C++ will set a flag in the Window QML component. The loading screen is now unloaded after the UI is ready, where previously it just stayed in the background invisible and wasted CPU. The overall refactoring and improvements make us now able to handle rooms with thousand of members and no lazy-loading, where previously everything would freeze and simply scrolling up to load past events in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
def flatten_dict_keys(
source: Optional[Dict[str, Any]] = None,
separator: str = ".",
last_level: bool = True,
_flat: Optional[Dict[str, Any]] = None,
_prefix: str = "",
) -> Dict[str, Any]:
"""Return a flattened version of the ``source`` dict.
Example:
>>> dct
{"content": {"body": "foo"}, "m.test": {"key": {"bar": 1}}}
>>> flatten_dict_keys(dct)
{"content.body": "foo", "m.test.key.bar": 1}
>>> flatten_dict_keys(dct, last_level=False)
{"content": {"body": "foo"}, "m.test.key": {bar": 1}}
"""
flat = {} if _flat is None else _flat
for key, value in (source or {}).items():
if isinstance(value, dict):
prefix = f"{_prefix}{key}{separator}"
flatten_dict_keys(value, separator, last_level, flat, prefix)
elif last_level:
flat[f"{_prefix}{key}"] = value
else:
prefix = _prefix[:-len(separator)] # remove trailing separator
flat.setdefault(prefix, {})[key] = value
return flat
async def is_svg(file: File) -> bool:
2019-12-19 00:42:56 +11:00
"""Return whether the file is a SVG (`lxml` is used for detection)."""
2019-11-12 00:26:35 +11:00
chunks = [c async for c in async_generator_from_data(file)]
with io.BytesIO(b"".join(chunks)) as file:
try:
_, element = next(xml_etree.iterparse(file, ("start",)))
return element.tag == "{http://www.w3.org/2000/svg}svg"
except (StopIteration, xml_etree.ParseError):
return False
Big performance refactoring & various improvements Instead of passing all sorts of events for the JS to handle and manually add to different data models, we now handle everything we can in Python. For any change, the python models send a sync event with their contents (no more than 4 times per second) to JS, and the QSyncable library's JsonListModel takes care of converting it to a QML ListModel and sending the appropriate signals. The SortFilterProxyModel library is not used anymore, the only case where we need to filter/sort something now is when the user interacts with the "Filter rooms" or "Filter members" fields. These cases are handled by a simple JS function. We now keep separated room and timeline models for different accounts, the previous approach of sharing all the data we could between accounts created a lot of complications (local echoes, decrypted messages replacing others, etc). The users's own account profile changes are now hidden in the timeline. On startup, if all events for a room were only own profile changes, more events will be loaded. Any kind of image format supported by Qt is now handled by the pyotherside image provider, instead of just PNG/JPG. SVGs which previously caused errors are supported as well. The typing members bar paddings/margins are fixed. The behavior of the avatar/"upload a profile picture" overlay is fixed. Config files read from disk are now cached (TODO: make them reloadable again). Pylint is not used anymore because of all its annoying false warnings and lack of understanding for dataclasses, it is replaced by flake8 with a custom config and various plugins. Debug mode is now considered on if the program was compiled with the right option, instead of taking an argument from CLI. When on, C++ will set a flag in the Window QML component. The loading screen is now unloaded after the UI is ready, where previously it just stayed in the background invisible and wasted CPU. The overall refactoring and improvements make us now able to handle rooms with thousand of members and no lazy-loading, where previously everything would freeze and simply scrolling up to load past events in any room would block the UI for a few seconds.
2019-08-11 22:01:22 +10:00
async def svg_dimensions(file: File) -> Size:
2019-12-19 00:42:56 +11:00
"""Return the width and height, or viewBox width and height for a SVG.
2019-11-12 00:26:35 +11:00
If these properties are missing (broken file), ``(256, 256)`` is returned.
"""
chunks = [c async for c in async_generator_from_data(file)]
with io.BytesIO(b"".join(chunks)) as file:
attrs = xml_etree.parse(file).getroot().attrib
try:
width = round(float(attrs.get("width", attrs["viewBox"].split()[3])))
except (KeyError, IndexError, ValueError, TypeError):
width = 256
try:
height = round(float(attrs.get("height", attrs["viewBox"].split()[4])))
except (KeyError, IndexError, ValueError, TypeError):
height = 256
return (width, height)
async def guess_mime(file: File) -> str:
2019-12-19 00:42:56 +11:00
"""Return the file's mimetype, or `application/octet-stream` if unknown."""
2019-11-12 00:26:35 +11:00
if isinstance(file, io.IOBase):
file.seek(0, 0)
elif isinstance(file, AsyncBufferedIOBase):
await file.seek(0, 0) # type: ignore
try:
first_chunk: bytes
async for first_chunk in async_generator_from_data(file):
break
else:
return "inode/x-empty" # empty file
# TODO: plaintext
mime = filetype.guess_mime(first_chunk)
return mime or (
"image/svg+xml" if await is_svg(file) else
"application/octet-stream"
)
finally:
if isinstance(file, io.IOBase):
file.seek(0, 0)
elif isinstance(file, AsyncBufferedIOBase):
await file.seek(0, 0) # type: ignore
def plain2html(text: str) -> str:
2019-12-19 00:42:56 +11:00
"""Convert `\\n` into `<br>` tags and `\\t` into four spaces."""
2019-11-12 00:26:35 +11:00
return html.escape(text)\
.replace("\n", "<br>")\
.replace("\t", "&nbsp;" * 4)
def serialize_value_for_qml(
value: Any, json_list_dicts: bool = False, reject_unknown: bool = False,
) -> Any:
2019-12-19 00:42:56 +11:00
"""Convert a value to make it easier to use from QML.
Returns:
2019-11-12 00:26:35 +11:00
- For `bool`, `int`, `float`, `bytes`, `str`, `datetime`, `date`, `time`:
the unchanged value (PyOtherSide handles these)
2020-03-13 05:41:00 +11:00
- For `Iterable` objects (includes `list` and `dict`):
2020-03-13 05:41:00 +11:00
a JSON dump if `json_list_dicts` is `True`, else the unchanged value
- If the value is an instancied object and has a `serialized` attribute or
property, return that
2020-03-13 05:41:00 +11:00
- For `Enum` members, the actual value of the member
- For `Path` objects, a `file://<path...>` string
- For `UUID` object: the UUID in string form
- For `timedelta` objects: the delta as a number of milliseconds `int`
- For `Color` objects: the color's hexadecimal value
2020-03-13 05:41:00 +11:00
- For class types: the class `__name__`
- For anything else: raise a `TypeError` if `reject_unknown` is `True`,
else return the unchanged value.
2019-11-12 00:26:35 +11:00
"""
if isinstance(value, (bool, int, float, bytes, str, datetime, date, time)):
return value
if json_list_dicts and isinstance(value, (Sequence, Mapping)):
return json.dumps(value)
if not inspect.isclass(value) and hasattr(value, "serialized"):
return value.serialized
if isinstance(value, Iterable):
return value
if hasattr(value, "__class__") and issubclass(value.__class__, Enum):
return value.value
if isinstance(value, Path):
return f"file://{value!s}"
if isinstance(value, UUID):
return str(value)
2019-12-06 01:00:23 +11:00
if isinstance(value, timedelta):
return value.total_seconds() * 1000
if isinstance(value, Color):
return value.hex
if inspect.isclass(value):
return value.__name__
if reject_unknown:
raise TypeError("Unknown type reject")
return value
def deep_serialize_for_qml(obj: Iterable) -> Union[list, dict]:
"""Recursively serialize lists and dict values for QML."""
if isinstance(obj, Mapping):
dct = {}
for key, value in obj.items():
if isinstance(value, Iterable) and not isinstance(value, str):
# PyOtherSide only accept dicts with string keys
dct[str(key)] = deep_serialize_for_qml(value)
continue
with suppress(TypeError):
dct[str(key)] = \
serialize_value_for_qml(value, reject_unknown=True)
return dct
lst = []
for value in obj:
if isinstance(value, Iterable) and not isinstance(value, str):
lst.append(deep_serialize_for_qml(value))
continue
with suppress(TypeError):
lst.append(serialize_value_for_qml(value, reject_unknown=True))
return lst
def classes_defined_in(module: ModuleType) -> Dict[str, Type]:
2019-12-19 00:42:56 +11:00
"""Return a `{name: class}` dict of all the classes a module defines."""
2019-11-12 00:26:35 +11:00
return {
m[0]: m[1] for m in inspect.getmembers(module, inspect.isclass)
if not m[0].startswith("_") and
m[1].__module__.startswith(module.__name__)
}
2020-03-13 19:35:51 +11:00
@asynccontextmanager
async def aiopen(*args, **kwargs) -> AsyncIterator[Any]:
"""Wrapper for `aiofiles.open()` that doesn't break mypy"""
async with aiofiles.open(*args, **kwargs) as file:
yield file
2020-03-13 19:35:51 +11:00
@asynccontextmanager
async def atomic_write(
path: Union[Path, str], binary: bool = False, **kwargs,
) -> AsyncIterator[Tuple[Any, Callable[[], None]]]:
"""Write a file asynchronously (using aiofiles) and atomically.
Yields a `(open_temporary_file, done_function)` tuple.
The done function should be called after writing to the given file.
When the context manager exits, the temporary file will either replace
`path` if the function was called, or be deleted.
Example:
>>> async with atomic_write("foo.txt") as (file, done):
>>> await file.write("Sample text")
>>> done()
"""
2020-03-13 19:35:51 +11:00
mode = "wb" if binary else "w"
path = Path(path)
temp = NamedTemporaryFile(dir=path.parent, delete=False)
temp_path = Path(temp.name)
can_replace = False
def done() -> None:
nonlocal can_replace
can_replace = True
2020-03-13 19:35:51 +11:00
try:
async with aiopen(temp_path, mode, **kwargs) as out:
yield (out, done)
2020-03-13 19:35:51 +11:00
finally:
if can_replace:
temp_path.replace(path)
else:
temp_path.unlink()
def _compress(image: BytesOrPIL, fmt: str, optimize: bool) -> bytes:
if isinstance(image, bytes):
pil_image = PILImage.open(io.BytesIO(image))
else:
pil_image = image
with io.BytesIO() as buffer:
pil_image.save(buffer, fmt, optimize=optimize)
return buffer.getvalue()
async def compress_image(
image: BytesOrPIL, fmt: str = "PNG", optimize: bool = True,
) -> bytes:
"""Compress image in a separate process, without blocking event loop."""
return await asyncio.get_event_loop().run_in_executor(
COMPRESSION_POOL, _compress, image, fmt, optimize,
)