Remove file caching

This commit is contained in:
Lonami Exo 2019-06-07 20:57:05 +02:00
parent f6f7345a3a
commit 78971fd2e5
4 changed files with 14 additions and 169 deletions

View File

@ -23,18 +23,6 @@ if typing.TYPE_CHECKING:
from .telegramclient import TelegramClient from .telegramclient import TelegramClient
class _CacheType:
"""Like functools.partial but pretends to be the wrapped class."""
def __init__(self, cls):
self._cls = cls
def __call__(self, *args, **kwargs):
return self._cls(*args, file_reference=b'', **kwargs)
def __eq__(self, other):
return self._cls == other
def _resize_photo_if_needed( def _resize_photo_if_needed(
file, is_image, width=1280, height=1280, background=(255, 255, 255)): file, is_image, width=1280, height=1280, background=(255, 255, 255)):
@ -98,7 +86,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
reply_to: 'hints.MessageIDLike' = None, reply_to: 'hints.MessageIDLike' = None,
attributes: 'typing.Sequence[types.TypeDocumentAttribute]' = None, attributes: 'typing.Sequence[types.TypeDocumentAttribute]' = None,
thumb: 'hints.FileLike' = None, thumb: 'hints.FileLike' = None,
allow_cache: bool = True,
parse_mode: str = (), parse_mode: str = (),
voice_note: bool = False, voice_note: bool = False,
video_note: bool = False, video_note: bool = False,
@ -187,12 +174,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
Successful thumbnails were files below 20kb and 200x200px. Successful thumbnails were files below 20kb and 200x200px.
Width/height and dimensions/size ratios may be important. Width/height and dimensions/size ratios may be important.
allow_cache (`bool`, optional):
Whether to allow using the cached version stored in the
database or not. Defaults to ``True`` to avoid re-uploads.
Must be ``False`` if you wish to use different attributes
or thumb than those that were used when the file was cached.
parse_mode (`object`, optional): parse_mode (`object`, optional):
See the `TelegramClient.parse_mode See the `TelegramClient.parse_mode
<telethon.client.messageparse.MessageParseMethods.parse_mode>` <telethon.client.messageparse.MessageParseMethods.parse_mode>`
@ -202,16 +183,10 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
voice_note (`bool`, optional): voice_note (`bool`, optional):
If ``True`` the audio will be sent as a voice note. If ``True`` the audio will be sent as a voice note.
Set `allow_cache` to ``False`` if you sent the same file
without this setting before for it to work.
video_note (`bool`, optional): video_note (`bool`, optional):
If ``True`` the video will be sent as a video note, If ``True`` the video will be sent as a video note,
also known as a round video message. also known as a round video message.
Set `allow_cache` to ``False`` if you sent the same file
without this setting before for it to work.
buttons (`list`, `custom.Button <telethon.tl.custom.button.Button>`, :tl:`KeyboardButton`): buttons (`list`, `custom.Button <telethon.tl.custom.button.Button>`, :tl:`KeyboardButton`):
The matrix (list of lists), row list or button to be shown The matrix (list of lists), row list or button to be shown
after sending the message. This parameter will only work if after sending the message. This parameter will only work if
@ -292,7 +267,7 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
for x in documents: for x in documents:
result.append(await self.send_file( result.append(await self.send_file(
entity, x, allow_cache=allow_cache, entity, x,
caption=caption, force_document=force_document, caption=caption, force_document=force_document,
progress_callback=progress_callback, reply_to=reply_to, progress_callback=progress_callback, reply_to=reply_to,
attributes=attributes, thumb=thumb, voice_note=voice_note, attributes=attributes, thumb=thumb, voice_note=voice_note,
@ -317,7 +292,7 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
file_handle, media, image = await self._file_to_media( file_handle, media, image = await self._file_to_media(
file, force_document=force_document, file, force_document=force_document,
progress_callback=progress_callback, progress_callback=progress_callback,
attributes=attributes, allow_cache=allow_cache, thumb=thumb, attributes=attributes, thumb=thumb,
voice_note=voice_note, video_note=video_note, voice_note=voice_note, video_note=video_note,
supports_streaming=supports_streaming supports_streaming=supports_streaming
) )
@ -332,7 +307,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
entities=msg_entities, reply_markup=markup, silent=silent entities=msg_entities, reply_markup=markup, silent=silent
) )
msg = self._get_response_message(request, await self(request), entity) msg = self._get_response_message(request, await self(request), entity)
await self._cache_media(msg, file, file_handle, image=image)
return msg return msg
@ -340,15 +314,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
progress_callback=None, reply_to=None, progress_callback=None, reply_to=None,
parse_mode=(), silent=None): parse_mode=(), silent=None):
"""Specialized version of .send_file for albums""" """Specialized version of .send_file for albums"""
# We don't care if the user wants to avoid cache, we will use it
# anyway. Why? The cached version will be exactly the same thing
# we need to produce right now to send albums (uploadMedia), and
# cache only makes a difference for documents where the user may
# want the attributes used on them to change.
#
# In theory documents can be sent inside the albums but they appear
# as different messages (not inside the album), and the logic to set
# the attributes/avoid cache is already written in .send_file().
entity = await self.get_input_entity(entity) entity = await self.get_input_entity(entity)
if not utils.is_list_like(caption): if not utils.is_list_like(caption):
caption = (caption,) caption = (caption,)
@ -359,7 +324,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
reply_to = utils.get_message_id(reply_to) reply_to = utils.get_message_id(reply_to)
# Need to upload the media first, but only if they're not cached yet
media = [] media = []
for file in files: for file in files:
# Albums want :tl:`InputMedia` which, in theory, includes # Albums want :tl:`InputMedia` which, in theory, includes
@ -371,9 +335,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
r = await self(functions.messages.UploadMediaRequest( r = await self(functions.messages.UploadMediaRequest(
entity, media=fm entity, media=fm
)) ))
self.session.cache_file(
fh.md5, fh.size, utils.get_input_photo(r.photo))
fm = utils.get_input_media(r.photo) fm = utils.get_input_media(r.photo)
if captions: if captions:
@ -403,13 +364,14 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
# Sent photo IDs -> messages # Sent photo IDs -> messages
return [messages[m.media.id.id] for m in media] return [messages[m.media.id.id] for m in media]
# TODO Offer a way to easily save media for later use, to replace old caching system
async def upload_file( async def upload_file(
self: 'TelegramClient', self: 'TelegramClient',
file: 'hints.FileLike', file: 'hints.FileLike',
*, *,
part_size_kb: float = None, part_size_kb: float = None,
file_name: str = None, file_name: str = None,
use_cache: type = None,
progress_callback: 'hints.ProgressCallback' = None) -> 'types.TypeInputFile': progress_callback: 'hints.ProgressCallback' = None) -> 'types.TypeInputFile':
""" """
Uploads a file to Telegram's servers, without sending it. Uploads a file to Telegram's servers, without sending it.
@ -438,13 +400,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
If not specified, the name will be taken from the ``file`` If not specified, the name will be taken from the ``file``
and if this is not a ``str``, it will be ``"unnamed"``. and if this is not a ``str``, it will be ``"unnamed"``.
use_cache (`type`, optional):
The type of cache to use (currently either :tl:`InputDocument`
or :tl:`InputPhoto`). If present and the file is small enough
to need the MD5, it will be checked against the database,
and if a match is found, the upload won't be made. Instead,
an instance of type ``use_cache`` will be returned.
progress_callback (`callable`, optional): progress_callback (`callable`, optional):
A callback function accepting two parameters: A callback function accepting two parameters:
``(sent bytes, total)``. ``(sent bytes, total)``.
@ -526,19 +481,11 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
hash_md5 = hashlib.md5() hash_md5 = hashlib.md5()
if not is_large: if not is_large:
# Calculate the MD5 hash before anything else. # Calculate the MD5 hash before anything else.
# As this needs to be done always for small files, # This needs to be done always for small files.
# might as well do it before anything else and
# check the cache.
if isinstance(file, str): if isinstance(file, str):
with open(file, 'rb') as stream: with open(file, 'rb') as stream:
file = stream.read() file = stream.read()
hash_md5.update(file) hash_md5.update(file)
if use_cache:
cached = self.session.get_file(
hash_md5.digest(), file_size, cls=_CacheType(use_cache)
)
if cached:
return cached
part_count = (file_size + part_size - 1) // part_size part_count = (file_size + part_size - 1) // part_size
self._log[__name__].info('Uploading file of %d bytes in %d chunks of %d', self._log[__name__].info('Uploading file of %d bytes in %d chunks of %d',
@ -581,7 +528,7 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
async def _file_to_media( async def _file_to_media(
self, file, force_document=False, self, file, force_document=False,
progress_callback=None, attributes=None, thumb=None, progress_callback=None, attributes=None, thumb=None,
allow_cache=True, voice_note=False, video_note=False, voice_note=False, video_note=False,
supports_streaming=False, mime_type=None, as_image=None): supports_streaming=False, mime_type=None, as_image=None):
if not file: if not file:
return None, None, None return None, None, None
@ -615,12 +562,10 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
media = None media = None
file_handle = None file_handle = None
use_cache = types.InputPhoto if as_image else types.InputDocument
if not isinstance(file, str) or os.path.isfile(file): if not isinstance(file, str) or os.path.isfile(file):
file_handle = await self.upload_file( file_handle = await self.upload_file(
_resize_photo_if_needed(file, as_image), _resize_photo_if_needed(file, as_image),
progress_callback=progress_callback, progress_callback=progress_callback
use_cache=use_cache if allow_cache else None
) )
elif re.match('https?://', file): elif re.match('https?://', file):
if as_image: if as_image:
@ -641,12 +586,6 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
'Failed to convert {} to media. Not an existing file, ' 'Failed to convert {} to media. Not an existing file, '
'an HTTP URL or a valid bot-API-like file ID'.format(file) 'an HTTP URL or a valid bot-API-like file ID'.format(file)
) )
elif isinstance(file_handle, use_cache):
# File was cached, so an instance of use_cache was returned
if as_image:
media = types.InputMediaPhoto(file_handle)
else:
media = types.InputMediaDocument(file_handle)
elif as_image: elif as_image:
media = types.InputMediaUploadedPhoto(file_handle) media = types.InputMediaUploadedPhoto(file_handle)
else: else:
@ -674,16 +613,4 @@ class UploadMethods(ButtonMethods, MessageParseMethods, UserMethods):
) )
return file_handle, media, as_image return file_handle, media, as_image
async def _cache_media(self: 'TelegramClient', msg, file, file_handle, image):
if file and msg and isinstance(file_handle,
custom.InputSizedFile):
# There was a response message and we didn't use cached
# version, so cache whatever we just sent to the database.
md5, size = file_handle.md5, file_handle.size
if image:
to_cache = utils.get_input_photo(msg.media.photo)
else:
to_cache = utils.get_input_document(msg.media.document)
self.session.cache_file(md5, size, to_cache)
# endregion # endregion

View File

@ -144,24 +144,3 @@ class Session(ABC):
to use a cached username to avoid extra RPC). to use a cached username to avoid extra RPC).
""" """
raise NotImplementedError raise NotImplementedError
@abstractmethod
def cache_file(self, md5_digest, file_size, instance):
"""
Caches the given file information persistently, so that it
doesn't need to be re-uploaded in case the file is used again.
The ``instance`` will be either an ``InputPhoto`` or ``InputDocument``,
both with an ``.id`` and ``.access_hash`` attributes.
"""
raise NotImplementedError
@abstractmethod
def get_file(self, md5_digest, file_size, cls):
"""
Returns an instance of ``cls`` if the ``md5_digest`` and ``file_size``
match an existing saved record. The class will either be an
``InputPhoto`` or ``InputDocument``, both with two parameters
``id`` and ``access_hash`` in that order.
"""
raise NotImplementedError

View File

@ -1,29 +1,12 @@
from enum import Enum
from .abstract import Session from .abstract import Session
from .. import utils from .. import utils
from ..tl import TLObject from ..tl import TLObject
from ..tl.types import ( from ..tl.types import (
PeerUser, PeerChat, PeerChannel, PeerUser, PeerChat, PeerChannel,
InputPeerUser, InputPeerChat, InputPeerChannel, InputPeerUser, InputPeerChat, InputPeerChannel
InputPhoto, InputDocument
) )
class _SentFileType(Enum):
DOCUMENT = 0
PHOTO = 1
@staticmethod
def from_type(cls):
if cls == InputDocument:
return _SentFileType.DOCUMENT
elif cls == InputPhoto:
return _SentFileType.PHOTO
else:
raise ValueError('The cls must be either InputDocument/InputPhoto')
class MemorySession(Session): class MemorySession(Session):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
@ -34,7 +17,6 @@ class MemorySession(Session):
self._auth_key = None self._auth_key = None
self._takeout_id = None self._takeout_id = None
self._files = {}
self._entities = set() self._entities = set()
self._update_states = {} self._update_states = {}
@ -228,17 +210,3 @@ class MemorySession(Session):
return InputPeerChannel(entity_id, entity_hash) return InputPeerChannel(entity_id, entity_hash)
else: else:
raise ValueError('Could not find input entity with key ', key) raise ValueError('Could not find input entity with key ', key)
def cache_file(self, md5_digest, file_size, instance):
if not isinstance(instance, (InputDocument, InputPhoto)):
raise TypeError('Cannot cache %s instance' % type(instance))
key = (md5_digest, file_size, _SentFileType.from_type(type(instance)))
value = (instance.id, instance.access_hash)
self._files[key] = value
def get_file(self, md5_digest, file_size, cls):
key = (md5_digest, file_size, _SentFileType.from_type(cls))
try:
return cls(*self._files[key])
except KeyError:
return None

View File

@ -2,11 +2,11 @@ import datetime
import os import os
from telethon.tl import types from telethon.tl import types
from .memory import MemorySession, _SentFileType from .memory import MemorySession
from .. import utils from .. import utils
from ..crypto import AuthKey from ..crypto import AuthKey
from ..tl.types import ( from ..tl.types import (
InputPhoto, InputDocument, PeerUser, PeerChat, PeerChannel PeerUser, PeerChat, PeerChannel
) )
try: try:
@ -17,7 +17,7 @@ except ImportError as e:
sqlite3_err = type(e) sqlite3_err = type(e)
EXTENSION = '.session' EXTENSION = '.session'
CURRENT_VERSION = 5 # database version CURRENT_VERSION = 6 # database version
class SQLiteSession(MemorySession): class SQLiteSession(MemorySession):
@ -87,15 +87,6 @@ class SQLiteSession(MemorySession):
name text name text
)""" )"""
, ,
"""sent_files (
md5_digest blob,
file_size integer,
type integer,
id integer,
hash integer,
primary key(md5_digest, file_size, type)
)"""
,
"""update_state ( """update_state (
id integer primary key, id integer primary key,
pts integer, pts integer,
@ -143,6 +134,9 @@ class SQLiteSession(MemorySession):
if old == 4: if old == 4:
old += 1 old += 1
c.execute("alter table sessions add column takeout_id integer") c.execute("alter table sessions add column takeout_id integer")
if old == 5:
old += 1
c.execute('drop table sent_files')
c.close() c.close()
@staticmethod @staticmethod
@ -300,26 +294,3 @@ class SQLiteSession(MemorySession):
utils.get_peer_id(PeerChat(id)), utils.get_peer_id(PeerChat(id)),
utils.get_peer_id(PeerChannel(id)) utils.get_peer_id(PeerChannel(id))
) )
# File processing
def get_file(self, md5_digest, file_size, cls):
row = self._execute(
'select id, hash from sent_files '
'where md5_digest = ? and file_size = ? and type = ?',
md5_digest, file_size, _SentFileType.from_type(cls).value
)
if row:
# Both allowed classes have (id, access_hash) as parameters
return cls(row[0], row[1])
def cache_file(self, md5_digest, file_size, instance):
if not isinstance(instance, (InputDocument, InputPhoto)):
raise TypeError('Cannot cache %s instance' % type(instance))
self._execute(
'insert or replace into sent_files values (?,?,?,?,?)',
md5_digest, file_size,
_SentFileType.from_type(type(instance)).value,
instance.id, instance.access_hash
)