2019-12-19 22:46:16 +11:00
|
|
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
|
|
|
|
2019-12-19 07:24:43 +11:00
|
|
|
"""Matrix media downloading, caching and retrieval."""
|
2019-12-19 04:43:31 +11:00
|
|
|
|
2019-11-04 04:48:12 +11:00
|
|
|
import asyncio
|
2019-11-05 05:37:25 +11:00
|
|
|
import functools
|
2019-11-04 04:48:12 +11:00
|
|
|
import io
|
|
|
|
import re
|
2019-11-18 04:31:00 +11:00
|
|
|
import shutil
|
|
|
|
import sys
|
2019-11-04 04:48:12 +11:00
|
|
|
from dataclasses import dataclass, field
|
|
|
|
from pathlib import Path
|
2020-02-12 07:22:05 +11:00
|
|
|
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Optional
|
2019-11-04 04:48:12 +11:00
|
|
|
from urllib.parse import urlparse
|
2020-07-19 12:26:23 +10:00
|
|
|
from .errors import MatrixError
|
2019-11-04 04:48:12 +11:00
|
|
|
|
2020-02-12 07:22:05 +11:00
|
|
|
from PIL import Image as PILImage
|
2019-11-05 05:37:25 +11:00
|
|
|
|
2020-03-13 19:35:51 +11:00
|
|
|
import nio
|
|
|
|
|
|
|
|
from .utils import Size, atomic_write
|
2019-11-04 04:48:12 +11:00
|
|
|
|
2020-02-12 07:22:05 +11:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from .backend import Backend
|
|
|
|
|
2019-11-18 04:31:00 +11:00
|
|
|
if sys.version_info < (3, 8):
|
|
|
|
import pyfastcopy # noqa
|
|
|
|
|
2019-11-05 05:37:25 +11:00
|
|
|
CryptDict = Optional[Dict[str, Any]]
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
CONCURRENT_DOWNLOADS_LIMIT = asyncio.BoundedSemaphore(8)
|
|
|
|
ACCESS_LOCKS: DefaultDict[str, asyncio.Lock] = DefaultDict(asyncio.Lock)
|
|
|
|
|
|
|
|
|
2019-12-19 04:43:31 +11:00
|
|
|
@dataclass
|
|
|
|
class MediaCache:
|
2019-12-19 07:24:43 +11:00
|
|
|
"""Matrix downloaded media cache."""
|
2019-12-19 04:43:31 +11:00
|
|
|
|
2020-02-12 07:22:05 +11:00
|
|
|
backend: "Backend" = field()
|
|
|
|
base_dir: Path = field()
|
2019-12-19 04:43:31 +11:00
|
|
|
|
|
|
|
|
|
|
|
def __post_init__(self) -> None:
|
|
|
|
self.thumbs_dir = self.base_dir / "thumbnails"
|
|
|
|
self.downloads_dir = self.base_dir / "downloads"
|
|
|
|
|
|
|
|
self.thumbs_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
self.downloads_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
2020-03-10 02:46:08 +11:00
|
|
|
async def get_media(
|
|
|
|
self,
|
|
|
|
mxc: str,
|
|
|
|
title: str,
|
|
|
|
crypt_dict: CryptDict = None,
|
|
|
|
) -> Path:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Return a `Media` object. Method intended for QML convenience."""
|
|
|
|
|
2020-03-10 02:46:08 +11:00
|
|
|
return await Media(self, mxc, title, crypt_dict).get()
|
2019-12-19 04:43:31 +11:00
|
|
|
|
|
|
|
|
|
|
|
async def get_thumbnail(
|
2020-03-10 02:46:08 +11:00
|
|
|
self,
|
|
|
|
mxc: str,
|
|
|
|
title: str,
|
|
|
|
width: int,
|
|
|
|
height: int,
|
|
|
|
crypt_dict: CryptDict = None,
|
2019-12-19 04:43:31 +11:00
|
|
|
) -> Path:
|
|
|
|
"""Return a `Thumbnail` object. Method intended for QML convenience."""
|
|
|
|
|
|
|
|
thumb = Thumbnail(
|
|
|
|
# QML sometimes pass float sizes, which matrix API doesn't like.
|
2020-03-10 02:46:08 +11:00
|
|
|
self, mxc, title, crypt_dict, (round(width), round(height)),
|
2019-12-19 04:43:31 +11:00
|
|
|
)
|
|
|
|
return await thumb.get()
|
|
|
|
|
|
|
|
|
2019-11-04 04:48:12 +11:00
|
|
|
@dataclass
|
|
|
|
class Media:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""A matrix media file."""
|
|
|
|
|
2019-11-18 04:31:00 +11:00
|
|
|
cache: "MediaCache" = field()
|
|
|
|
mxc: str = field()
|
2020-03-10 02:46:08 +11:00
|
|
|
title: str = field()
|
2019-11-18 04:31:00 +11:00
|
|
|
crypt_dict: CryptDict = field(repr=False)
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
|
|
|
|
def __post_init__(self) -> None:
|
|
|
|
self.mxc = re.sub(r"#auto$", "", self.mxc)
|
|
|
|
|
|
|
|
if not re.match(r"^mxc://.+/.+", self.mxc):
|
|
|
|
raise ValueError(f"Invalid mxc URI: {self.mxc}")
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
def local_path(self) -> Path:
|
2020-03-13 05:41:00 +11:00
|
|
|
"""The path where the file either exists or should be downloaded.
|
|
|
|
|
|
|
|
The returned paths are in this form:
|
|
|
|
```
|
|
|
|
<base download folder>/<homeserver domain>/
|
|
|
|
<file title>_<mxc id>.<file extension>`
|
|
|
|
```
|
|
|
|
e.g. `~/.cache/mirage/downloads/matrix.org/foo_Hm24ar11i768b0el.png`.
|
|
|
|
"""
|
2019-12-19 04:43:31 +11:00
|
|
|
|
2020-03-10 02:46:08 +11:00
|
|
|
parsed = urlparse(self.mxc)
|
|
|
|
mxc_id = parsed.path.lstrip("/")
|
|
|
|
title = Path(self.title)
|
|
|
|
filename = f"{title.stem}_{mxc_id}{title.suffix}"
|
|
|
|
return self.cache.downloads_dir / parsed.netloc / filename
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
|
|
|
|
async def get(self) -> Path:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Return the cached file's path, downloading it first if needed."""
|
|
|
|
|
2019-11-04 04:48:12 +11:00
|
|
|
async with ACCESS_LOCKS[self.mxc]:
|
|
|
|
try:
|
|
|
|
return await self._get_local_existing_file()
|
|
|
|
except FileNotFoundError:
|
2019-11-04 22:00:28 +11:00
|
|
|
return await self.create()
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
|
|
|
|
async def _get_local_existing_file(self) -> Path:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Return the cached file's path."""
|
|
|
|
|
2019-11-04 04:48:12 +11:00
|
|
|
if not self.local_path.exists():
|
|
|
|
raise FileNotFoundError()
|
|
|
|
|
|
|
|
return self.local_path
|
|
|
|
|
|
|
|
|
2019-11-04 22:00:28 +11:00
|
|
|
async def create(self) -> Path:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Download and cache the media file to disk."""
|
|
|
|
|
2020-07-19 12:26:23 +10:00
|
|
|
retries = 0
|
|
|
|
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
async with CONCURRENT_DOWNLOADS_LIMIT:
|
|
|
|
data = await self._get_remote_data()
|
|
|
|
except MatrixError as err:
|
|
|
|
if err.http_code != 404 and err.http_code < 500:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
|
|
await asyncio.sleep(min(30, 0.2 * (2 ** (min(1000, retries) - 1))))
|
|
|
|
retries += 1
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
self.local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
2020-03-18 03:09:08 +11:00
|
|
|
async with atomic_write(self.local_path, binary=True) as (file, done):
|
2019-11-18 04:31:00 +11:00
|
|
|
await file.write(data)
|
2020-03-18 03:09:08 +11:00
|
|
|
done()
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
return self.local_path
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_remote_data(self) -> bytes:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Return the file's data from the matrix server, decrypt if needed."""
|
|
|
|
|
2019-11-05 01:29:18 +11:00
|
|
|
parsed = urlparse(self.mxc)
|
|
|
|
|
2019-11-13 00:10:00 +11:00
|
|
|
resp = await self.cache.backend.download(
|
2019-11-05 01:29:18 +11:00
|
|
|
server_name = parsed.netloc,
|
|
|
|
media_id = parsed.path.lstrip("/"),
|
|
|
|
)
|
|
|
|
|
2019-11-05 05:37:25 +11:00
|
|
|
return await self._decrypt(resp.body)
|
|
|
|
|
|
|
|
|
|
|
|
async def _decrypt(self, data: bytes) -> bytes:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Decrypt an encrypted file's data."""
|
|
|
|
|
2019-11-05 05:37:25 +11:00
|
|
|
if not self.crypt_dict:
|
|
|
|
return data
|
|
|
|
|
|
|
|
func = functools.partial(
|
|
|
|
nio.crypto.attachments.decrypt_attachment,
|
|
|
|
data,
|
|
|
|
self.crypt_dict["key"]["k"],
|
|
|
|
self.crypt_dict["hashes"]["sha256"],
|
|
|
|
self.crypt_dict["iv"],
|
|
|
|
)
|
|
|
|
|
|
|
|
# Run in a separate thread
|
|
|
|
return await asyncio.get_event_loop().run_in_executor(None, func)
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
|
2019-11-18 04:31:00 +11:00
|
|
|
@classmethod
|
|
|
|
async def from_existing_file(
|
|
|
|
cls,
|
|
|
|
cache: "MediaCache",
|
|
|
|
mxc: str,
|
|
|
|
existing: Path,
|
|
|
|
overwrite: bool = False,
|
|
|
|
**kwargs,
|
|
|
|
) -> "Media":
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Copy an existing file to cache and return a `Media` for it."""
|
2019-11-18 04:31:00 +11:00
|
|
|
|
2020-03-10 02:46:08 +11:00
|
|
|
media = cls(cache, mxc, existing.name, {}, **kwargs) # type: ignore
|
2019-11-18 04:31:00 +11:00
|
|
|
media.local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
if not media.local_path.exists() or overwrite:
|
|
|
|
func = functools.partial(shutil.copy, existing, media.local_path)
|
|
|
|
await asyncio.get_event_loop().run_in_executor(None, func)
|
|
|
|
|
|
|
|
return media
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
async def from_bytes(
|
|
|
|
cls,
|
|
|
|
cache: "MediaCache",
|
|
|
|
mxc: str,
|
2020-03-10 08:33:07 +11:00
|
|
|
filename: str,
|
2019-11-18 04:31:00 +11:00
|
|
|
data: bytes,
|
|
|
|
overwrite: bool = False,
|
|
|
|
**kwargs,
|
|
|
|
) -> "Media":
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Create a cached file from bytes data and return a `Media` for it."""
|
2019-11-18 04:31:00 +11:00
|
|
|
|
2020-03-10 08:33:07 +11:00
|
|
|
media = cls(cache, mxc, filename, {}, **kwargs) # type: ignore
|
2019-11-18 04:31:00 +11:00
|
|
|
media.local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
if not media.local_path.exists() or overwrite:
|
2020-03-18 03:09:08 +11:00
|
|
|
path = media.local_path
|
|
|
|
|
|
|
|
async with atomic_write(path, binary=True) as (file, done):
|
2019-11-18 04:31:00 +11:00
|
|
|
await file.write(data)
|
2020-03-18 03:09:08 +11:00
|
|
|
done()
|
2019-11-18 04:31:00 +11:00
|
|
|
|
|
|
|
return media
|
|
|
|
|
|
|
|
|
2019-11-04 04:48:12 +11:00
|
|
|
@dataclass
|
|
|
|
class Thumbnail(Media):
|
2019-12-19 04:43:31 +11:00
|
|
|
"""The thumbnail of a matrix media, which is a media itself."""
|
|
|
|
|
2019-11-18 04:31:00 +11:00
|
|
|
cache: "MediaCache" = field()
|
|
|
|
mxc: str = field()
|
2020-03-10 02:46:08 +11:00
|
|
|
title: str = field()
|
2019-11-18 04:31:00 +11:00
|
|
|
crypt_dict: CryptDict = field(repr=False)
|
|
|
|
wanted_size: Size = field()
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
server_size: Optional[Size] = field(init=False, repr=False, default=None)
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def normalize_size(size: Size) -> Size:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""Return standard `(width, height)` matrix thumbnail dimensions.
|
|
|
|
|
|
|
|
The Matrix specification defines a few standard thumbnail dimensions
|
|
|
|
for homeservers to store and return: 32x32, 96x96, 320x240, 640x480,
|
|
|
|
and 800x600.
|
|
|
|
|
|
|
|
This method returns the best matching size for a `size` without
|
|
|
|
upscaling, e.g. passing `(641, 480)` will return `(800, 600)`.
|
|
|
|
"""
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
if size[0] > 640 or size[1] > 480:
|
|
|
|
return (800, 600)
|
|
|
|
|
|
|
|
if size[0] > 320 or size[1] > 240:
|
|
|
|
return (640, 480)
|
|
|
|
|
|
|
|
if size[0] > 96 or size[1] > 96:
|
|
|
|
return (320, 240)
|
|
|
|
|
|
|
|
if size[0] > 32 or size[1] > 32:
|
|
|
|
return (96, 96)
|
|
|
|
|
|
|
|
return (32, 32)
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
def local_path(self) -> Path:
|
2019-12-19 04:43:31 +11:00
|
|
|
"""The path where the thumbnail either exists or should be downloaded.
|
|
|
|
|
|
|
|
The returned paths are in this form:
|
2020-03-13 05:41:00 +11:00
|
|
|
```
|
|
|
|
<base thumbnail folder>/<homeserver domain>/<standard size>/
|
|
|
|
<file title>_<mxc id>.<file extension>`
|
|
|
|
```
|
|
|
|
e.g.
|
|
|
|
`~/.cache/mirage/thumbnails/matrix.org/32x32/foo_Hm24ar11i768b0el.png`.
|
2019-12-19 04:43:31 +11:00
|
|
|
"""
|
2019-11-04 04:48:12 +11:00
|
|
|
|
2020-03-10 02:46:08 +11:00
|
|
|
size = self.normalize_size(self.server_size or self.wanted_size)
|
|
|
|
size_dir = f"{size[0]}x{size[1]}"
|
|
|
|
|
|
|
|
parsed = urlparse(self.mxc)
|
|
|
|
mxc_id = parsed.path.lstrip("/")
|
|
|
|
title = Path(self.title)
|
|
|
|
filename = f"{title.stem}_{mxc_id}{title.suffix}"
|
2019-11-04 04:48:12 +11:00
|
|
|
|
2020-03-10 02:46:08 +11:00
|
|
|
return self.cache.thumbs_dir / parsed.netloc / size_dir / filename
|
2019-11-04 04:48:12 +11:00
|
|
|
|
|
|
|
|
|
|
|
async def _get_local_existing_file(self) -> Path:
|
2020-03-13 05:41:00 +11:00
|
|
|
"""Return an existing thumbnail path or raise `FileNotFoundError`.
|
|
|
|
|
|
|
|
If we have a bigger size thumbnail downloaded than the `wanted_size`
|
|
|
|
for the media, return it instead of asking the server for a
|
|
|
|
smaller thumbnail.
|
|
|
|
"""
|
|
|
|
|
2019-11-04 04:48:12 +11:00
|
|
|
if self.local_path.exists():
|
|
|
|
return self.local_path
|
|
|
|
|
|
|
|
try_sizes = ((32, 32), (96, 96), (320, 240), (640, 480), (800, 600))
|
|
|
|
parts = list(self.local_path.parts)
|
|
|
|
size = self.normalize_size(self.server_size or self.wanted_size)
|
|
|
|
|
|
|
|
for width, height in try_sizes:
|
|
|
|
if width < size[0] or height < size[1]:
|
|
|
|
continue
|
|
|
|
|
|
|
|
parts[-2] = f"{width}x{height}"
|
|
|
|
path = Path("/".join(parts))
|
|
|
|
|
|
|
|
if path.exists():
|
|
|
|
return path
|
|
|
|
|
|
|
|
raise FileNotFoundError()
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_remote_data(self) -> bytes:
|
2020-03-13 05:41:00 +11:00
|
|
|
"""Return the (decrypted) media file's content from the server."""
|
|
|
|
|
2019-11-04 04:48:12 +11:00
|
|
|
parsed = urlparse(self.mxc)
|
|
|
|
|
2019-11-05 05:37:25 +11:00
|
|
|
if self.crypt_dict:
|
2019-12-17 08:36:14 +11:00
|
|
|
# Matrix makes encrypted thumbs only available through the download
|
|
|
|
# end-point, not the thumbnail one
|
2019-11-13 00:10:00 +11:00
|
|
|
resp = await self.cache.backend.download(
|
2019-11-05 05:37:25 +11:00
|
|
|
server_name = parsed.netloc,
|
|
|
|
media_id = parsed.path.lstrip("/"),
|
|
|
|
)
|
|
|
|
else:
|
2019-11-13 00:10:00 +11:00
|
|
|
resp = await self.cache.backend.thumbnail(
|
2019-11-05 05:37:25 +11:00
|
|
|
server_name = parsed.netloc,
|
|
|
|
media_id = parsed.path.lstrip("/"),
|
|
|
|
width = self.wanted_size[0],
|
|
|
|
height = self.wanted_size[1],
|
|
|
|
)
|
|
|
|
|
|
|
|
decrypted = await self._decrypt(resp.body)
|
|
|
|
|
|
|
|
with io.BytesIO(decrypted) as img:
|
2019-11-04 04:48:12 +11:00
|
|
|
# The server may return a thumbnail bigger than what we asked for
|
|
|
|
self.server_size = PILImage.open(img).size
|
|
|
|
|
2019-11-05 05:37:25 +11:00
|
|
|
return decrypted
|