Rewrite media caching (old image provider)

- Doesn't use pyotherside's image provider feature, for more flexibility
  and simplicity
- Suitable for supporting matrix media events and more later
- Avoid a lot of duplicate files that the old cache created due to
  server not returning what we expect, mistakes in Python/QML code, etc
- Changed file structure
  (e.g. thumbnails/32x32/<mxc id> instead of
   thumbnails/<mxc id>.32.32.crop)

- Backend.wait_until_account_exist: start issuing warnings if the
  function runs for more than 10s, which means in most case a bad user
  ID was passed

- New HMxcImage QML component, used in H(User/Room)Avatar
This commit is contained in:
miruka
2019-11-03 13:48:12 -04:00
parent 55d4035f60
commit 2f19ff493b
20 changed files with 291 additions and 261 deletions

View File

@@ -38,12 +38,7 @@ class App:
self.backend = Backend(app=self)
self.debug = False
from .image_provider import ImageProvider
self.image_provider = ImageProvider(self)
pyotherside.set_image_provider(self.image_provider.get)
self.loop = asyncio.get_event_loop()
self.loop = asyncio.get_event_loop()
self.loop_thread = Thread(target=self._loop_starter)
self.loop_thread.start()

View File

@@ -109,6 +109,7 @@ class Backend:
async def wait_until_client_exists(self, user_id: str = "") -> None:
loops = 0
while True:
if user_id and user_id in self.clients:
return
@@ -116,7 +117,12 @@ class Backend:
if not user_id and self.clients:
return
if loops and loops % 100 == 0: # every 10s except first time
log.warning("Waiting for account %s to exist, %ds passed",
user_id, loops // 10)
await asyncio.sleep(0.1)
loops += 1
# General functions

View File

@@ -1,183 +0,0 @@
import asyncio
import logging as log
import random
import re
from dataclasses import dataclass, field
from io import BytesIO
from pathlib import Path
from typing import Optional, Tuple
from urllib.parse import urlparse
import aiofiles
from PIL import Image as PILImage
import nio
import pyotherside
from nio.api import ResizingMethod
from . import utils
POSFormat = int
Size = Tuple[int, int]
ImageData = Tuple[bytearray, Size, int] # last int: pyotherside format enum
CONCURRENT_DOWNLOADS_LIMIT = asyncio.BoundedSemaphore(8)
with BytesIO() as img_out:
PILImage.new("RGBA", (1, 1), (0, 0, 0, 0)).save(img_out, "PNG")
TRANSPARENT_1X1_PNG = (img_out.getvalue(), pyotherside.format_data)
@dataclass
class Thumbnail:
provider: "ImageProvider" = field()
mxc: str = field()
width: int = field()
height: int = field()
def __post_init__(self) -> None:
self.mxc = re.sub(r"#auto$", "", self.mxc)
if not re.match(r"^mxc://.+/.+", self.mxc):
raise ValueError(f"Invalid mxc URI: {self.mxc}")
@property
def server_size(self) -> Tuple[int, int]:
# https://matrix.org/docs/spec/client_server/latest#thumbnails
if self.width > 640 or self.height > 480:
return (800, 600)
if self.width > 320 or self.height > 240:
return (640, 480)
if self.width > 96 or self.height > 96:
return (320, 240)
if self.width > 32 or self.height > 32:
return (96, 96)
return (32, 32)
@property
def resize_method(self) -> ResizingMethod:
return ResizingMethod.scale \
if self.width > 96 or self.height > 96 else ResizingMethod.crop
@property
def http(self) -> str:
return nio.Api.mxc_to_http(self.mxc)
@property
def local_path(self) -> Path:
parsed = urlparse(self.mxc)
name = "%s.%03d.%03d.%s" % (
parsed.path.lstrip("/"),
self.server_size[0],
self.server_size[1],
self.resize_method.value,
)
return self.provider.cache / parsed.netloc / name
async def read_data(self, data: bytes, mime: Optional[str],
) -> Tuple[bytes, POSFormat]:
if mime == "image/svg+xml":
return (data, pyotherside.format_svg_data)
if mime in ("image/jpeg", "image/png"):
return (data, pyotherside.format_data)
try:
with BytesIO(data) as img_in:
image = PILImage.open(img_in)
if image.mode == "RGB":
return (data, pyotherside.format_rgb888)
if image.mode == "RGBA":
return (data, pyotherside.format_argb32)
with BytesIO() as img_out:
image.save(img_out, "PNG")
return (img_out.getvalue(), pyotherside.format_data)
except OSError as err:
log.warning("Unable to process image: %s - %r", self.http, err)
return TRANSPARENT_1X1_PNG
async def download(self) -> Tuple[bytes, POSFormat]:
client = random.choice(
tuple(self.provider.app.backend.clients.values()),
)
parsed = urlparse(self.mxc)
async with CONCURRENT_DOWNLOADS_LIMIT:
resp = await client.thumbnail(
server_name = parsed.netloc,
media_id = parsed.path.lstrip("/"),
width = self.server_size[0],
height = self.server_size[1],
method = self.resize_method,
)
if isinstance(resp, nio.ThumbnailError):
log.warning("Downloading thumbnail failed - %s", resp)
return TRANSPARENT_1X1_PNG
body, pos_format = await self.read_data(resp.body, resp.content_type)
self.local_path.parent.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(self.local_path, "wb") as file:
# body might have been converted, always save the original image.
await file.write(resp.body)
return (body, pos_format)
async def local_read(self) -> Tuple[bytes, POSFormat]:
data = self.local_path.read_bytes()
with BytesIO(data) as data_io:
return await self.read_data(data, utils.guess_mime(data_io))
async def get_data(self) -> ImageData:
try:
data, pos_format = await self.local_read()
except (OSError, IOError, FileNotFoundError):
data, pos_format = await self.download()
with BytesIO(data) as img_in:
real_size = PILImage.open(img_in).size
return (bytearray(data), real_size, pos_format)
class ImageProvider:
def __init__(self, app) -> None:
self.app = app
self.cache = Path(self.app.appdirs.user_cache_dir) / "thumbnails"
self.cache.mkdir(parents=True, exist_ok=True)
def get(self, image_id: str, requested_size: Size) -> ImageData:
if requested_size[0] < 1 or requested_size[1] < 1:
raise ValueError(f"width or height < 1: {requested_size!r}")
try:
thumb = Thumbnail(self, image_id, *requested_size)
except ValueError as err:
log.warning(err)
data, pos_format = TRANSPARENT_1X1_PNG
return (bytearray(data), (1, 1), pos_format)
return asyncio.run_coroutine_threadsafe(
thumb.get_data(), self.app.loop,
).result()

View File

@@ -93,6 +93,10 @@ class MatrixClient(nio.AsyncClient):
self.skipped_events: DefaultDict[str, int] = DefaultDict(lambda: 0)
from .media_cache import MediaCache
cache_dir = Path(self.backend.app.appdirs.user_cache_dir)
self.media_cache = MediaCache(self, cache_dir)
self.connect_callbacks()

182
src/python/media_cache.py Normal file
View File

@@ -0,0 +1,182 @@
import asyncio
import io
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import DefaultDict, Optional, Tuple
from urllib.parse import urlparse
import aiofiles
import nio
from PIL import Image as PILImage
from .matrix_client import MatrixClient
Size = Tuple[int, int]
CONCURRENT_DOWNLOADS_LIMIT = asyncio.BoundedSemaphore(8)
ACCESS_LOCKS: DefaultDict[str, asyncio.Lock] = DefaultDict(asyncio.Lock)
@dataclass
class DownloadFailed(Exception):
message: str = field()
http_code: int = field()
@dataclass
class Media:
cache: "MediaCache" = field()
mxc: str = field()
def __post_init__(self) -> None:
self.mxc = re.sub(r"#auto$", "", self.mxc)
if not re.match(r"^mxc://.+/.+", self.mxc):
raise ValueError(f"Invalid mxc URI: {self.mxc}")
@property
def http(self) -> str:
return nio.Api.mxc_to_http(self.mxc)
@property
def local_path(self) -> Path:
parsed = urlparse(self.mxc)
name = parsed.path.lstrip("/")
return self.cache.downloads_dir / parsed.netloc / name
async def get(self) -> Path:
async with ACCESS_LOCKS[self.mxc]:
try:
return await self._get_local_existing_file()
except FileNotFoundError:
return await self._download()
async def _get_local_existing_file(self) -> Path:
if not self.local_path.exists():
raise FileNotFoundError()
return self.local_path
async def _download(self) -> Path:
async with CONCURRENT_DOWNLOADS_LIMIT:
body = await self._get_remote_data()
self.local_path.parent.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(self.local_path, "wb") as file:
await file.write(body)
return self.local_path
async def _get_remote_data(self) -> bytes:
raise NotImplementedError()
@dataclass
class Thumbnail(Media):
cache: "MediaCache" = field()
mxc: str = field()
wanted_size: Size = field()
server_size: Optional[Size] = field(init=False, repr=False, default=None)
@staticmethod
def normalize_size(size: Size) -> Size:
# https://matrix.org/docs/spec/client_server/latest#thumbnails
if size[0] > 640 or size[1] > 480:
return (800, 600)
if size[0] > 320 or size[1] > 240:
return (640, 480)
if size[0] > 96 or size[1] > 96:
return (320, 240)
if size[0] > 32 or size[1] > 32:
return (96, 96)
return (32, 32)
@property
def local_path(self) -> Path:
# example: thumbnails/matrix.org/32x32/<mxc id>
parsed = urlparse(self.mxc)
size = self.normalize_size(self.server_size or self.wanted_size)
name = "%dx%d/%s" % (size[0], size[1], parsed.path.lstrip("/"))
return self.cache.thumbs_dir / parsed.netloc / name
async def _get_local_existing_file(self) -> Path:
if self.local_path.exists():
return self.local_path
# If we have a bigger size thumbnail than the wanted_size for this pic,
# return it instead of asking the server for a smaller thumbnail.
try_sizes = ((32, 32), (96, 96), (320, 240), (640, 480), (800, 600))
parts = list(self.local_path.parts)
size = self.normalize_size(self.server_size or self.wanted_size)
for width, height in try_sizes:
if width < size[0] or height < size[1]:
continue
parts[-2] = f"{width}x{height}"
path = Path("/".join(parts))
if path.exists():
return path
raise FileNotFoundError()
async def _get_remote_data(self) -> bytes:
parsed = urlparse(self.mxc)
resp = await self.cache.client.thumbnail(
server_name = parsed.netloc,
media_id = parsed.path.lstrip("/"),
width = self.wanted_size[0],
height = self.wanted_size[1],
)
with io.BytesIO(resp.body) as img:
# The server may return a thumbnail bigger than what we asked for
self.server_size = PILImage.open(img).size
if isinstance(resp, nio.ErrorResponse):
raise DownloadFailed(resp.message, resp.status_code)
return resp.body
@dataclass
class MediaCache:
client: MatrixClient = field()
base_dir: Path = field()
def __post_init__(self) -> None:
self.thumbs_dir = self.base_dir / "thumbnails"
self.downloads_dir = self.base_dir / "downloads"
self.thumbs_dir.mkdir(parents=True, exist_ok=True)
self.downloads_dir.mkdir(parents=True, exist_ok=True)
async def thumbnail(self, mxc: str, width: int, height: int) -> str:
return str(await Thumbnail(self, mxc, (width, height)).get())