mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2024-11-22 17:36:34 +03:00
b6b4ea669d
Since it was easy to cause MRO inconsistencies, and it's not really needed now that self is type hinted as the client.
863 lines
32 KiB
Python
863 lines
32 KiB
Python
import datetime
|
|
import io
|
|
import os
|
|
import pathlib
|
|
import typing
|
|
|
|
from .. import utils, helpers, errors, hints
|
|
from ..requestiter import RequestIter
|
|
from ..tl import TLObject, types, functions
|
|
|
|
try:
|
|
import aiohttp
|
|
except ImportError:
|
|
aiohttp = None
|
|
|
|
if typing.TYPE_CHECKING:
|
|
from .telegramclient import TelegramClient
|
|
|
|
|
|
# Chunk sizes for upload.getFile must be multiples of the smallest size
|
|
MIN_CHUNK_SIZE = 4096
|
|
MAX_CHUNK_SIZE = 512 * 1024
|
|
|
|
|
|
class _DirectDownloadIter(RequestIter):
|
|
async def _init(
|
|
self, file, dc_id, offset, stride, chunk_size, request_size, file_size
|
|
):
|
|
self.request = functions.upload.GetFileRequest(
|
|
file, offset=offset, limit=request_size)
|
|
|
|
self.total = file_size
|
|
self._stride = stride
|
|
self._chunk_size = chunk_size
|
|
self._last_part = None
|
|
|
|
self._exported = dc_id and self.client.session.dc_id != dc_id
|
|
if not self._exported:
|
|
# The used sender will also change if ``FileMigrateError`` occurs
|
|
self._sender = self.client._sender
|
|
else:
|
|
try:
|
|
self._sender = await self.client._borrow_exported_sender(dc_id)
|
|
except errors.DcIdInvalidError:
|
|
# Can't export a sender for the ID we are currently in
|
|
config = await self.client(functions.help.GetConfigRequest())
|
|
for option in config.dc_options:
|
|
if option.ip_address == self.client.session.server_address:
|
|
self.client.session.set_dc(
|
|
option.id, option.ip_address, option.port)
|
|
self.client.session.save()
|
|
break
|
|
|
|
# TODO Figure out why the session may have the wrong DC ID
|
|
self._sender = self.client._sender
|
|
self._exported = False
|
|
|
|
async def _load_next_chunk(self):
|
|
cur = await self._request()
|
|
self.buffer.append(cur)
|
|
if len(cur) < self.request.limit:
|
|
self.left = len(self.buffer)
|
|
await self.close()
|
|
else:
|
|
self.request.offset += self._stride
|
|
|
|
async def _request(self):
|
|
try:
|
|
result = await self._sender.send(self.request)
|
|
if isinstance(result, types.upload.FileCdnRedirect):
|
|
raise NotImplementedError # TODO Implement
|
|
else:
|
|
return result.bytes
|
|
|
|
except errors.FileMigrateError as e:
|
|
self.client._log[__name__].info('File lives in another DC')
|
|
self._sender = await self.client._borrow_exported_sender(e.new_dc)
|
|
self._exported = True
|
|
return await self._request()
|
|
|
|
async def close(self):
|
|
if not self._sender:
|
|
return
|
|
|
|
try:
|
|
if self._exported:
|
|
await self.client._return_exported_sender(self._sender)
|
|
elif self._sender != self.client._sender:
|
|
await self._sender.disconnect()
|
|
finally:
|
|
self._sender = None
|
|
|
|
async def __aenter__(self):
|
|
pass
|
|
|
|
async def __aexit__(self, *args):
|
|
await self.close()
|
|
|
|
__enter__ = helpers._sync_enter
|
|
__exit__ = helpers._sync_exit
|
|
|
|
|
|
class _GenericDownloadIter(_DirectDownloadIter):
|
|
async def _load_next_chunk(self, mask=MIN_CHUNK_SIZE - 1):
|
|
# 1. Fetch enough for one chunk
|
|
data = b''
|
|
|
|
# 1.1. ``bad`` is how much into the data we have we need to offset
|
|
bad = self.request.offset & mask
|
|
before = self.request.offset
|
|
|
|
# 1.2. We have to fetch from a valid offset, so remove that bad part
|
|
self.request.offset -= bad
|
|
|
|
done = False
|
|
while not done and len(data) - bad < self._chunk_size:
|
|
cur = await self._request()
|
|
self.request.offset += self.request.limit
|
|
|
|
data += cur
|
|
done = len(cur) < self.request.limit
|
|
|
|
# 1.3 Restore our last desired offset
|
|
self.request.offset = before
|
|
|
|
# 2. Fill the buffer with the data we have
|
|
# 2.1. Slicing ``bytes`` is expensive, yield ``memoryview`` instead
|
|
mem = memoryview(data)
|
|
|
|
# 2.2. The current chunk starts at ``bad`` offset into the data,
|
|
# and each new chunk is ``stride`` bytes apart of the other
|
|
for i in range(bad, len(data), self._stride):
|
|
self.buffer.append(mem[i:i + self._chunk_size])
|
|
|
|
# 2.3. We will yield this offset, so move to the next one
|
|
self.request.offset += self._stride
|
|
|
|
# 2.4. If we are in the last chunk, we will return the last partial data
|
|
if done:
|
|
self.left = len(self.buffer)
|
|
await self.close()
|
|
return
|
|
|
|
# 2.5. If we are not done, we can't return incomplete chunks.
|
|
if len(self.buffer[-1]) != self._chunk_size:
|
|
self._last_part = self.buffer.pop().tobytes()
|
|
|
|
# 3. Be careful with the offsets. Re-fetching a bit of data
|
|
# is fine, since it greatly simplifies things.
|
|
# TODO Try to not re-fetch data
|
|
self.request.offset -= self._stride
|
|
|
|
|
|
class DownloadMethods:
|
|
|
|
# region Public methods
|
|
|
|
async def download_profile_photo(
|
|
self: 'TelegramClient',
|
|
entity: 'hints.EntityLike',
|
|
file: 'hints.FileLike' = None,
|
|
*,
|
|
download_big: bool = True) -> typing.Optional[str]:
|
|
"""
|
|
Downloads the profile photo from the given user, chat or channel.
|
|
|
|
Arguments
|
|
entity (`entity`):
|
|
From who the photo will be downloaded.
|
|
|
|
.. note::
|
|
|
|
This method expects the full entity (which has the data
|
|
to download the photo), not an input variant.
|
|
|
|
It's possible that sometimes you can't fetch the entity
|
|
from its input (since you can get errors like
|
|
``ChannelPrivateError``) but you already have it through
|
|
another call, like getting a forwarded message from it.
|
|
|
|
file (`str` | `file`, optional):
|
|
The output file path, directory, or stream-like object.
|
|
If the path exists and is a file, it will be overwritten.
|
|
If file is the type `bytes`, it will be downloaded in-memory
|
|
as a bytestring (e.g. ``file=bytes``).
|
|
|
|
download_big (`bool`, optional):
|
|
Whether to use the big version of the available photos.
|
|
|
|
Returns
|
|
``None`` if no photo was provided, or if it was Empty. On success
|
|
the file path is returned since it may differ from the one given.
|
|
|
|
Example
|
|
.. code-block:: python
|
|
|
|
# Download your own profile photo
|
|
path = client.download_profile_photo('me')
|
|
print(path)
|
|
"""
|
|
# hex(crc32(x.encode('ascii'))) for x in
|
|
# ('User', 'Chat', 'UserFull', 'ChatFull')
|
|
ENTITIES = (0x2da17977, 0xc5af5d94, 0x1f4661b9, 0xd49a2697)
|
|
# ('InputPeer', 'InputUser', 'InputChannel')
|
|
INPUTS = (0xc91c90b6, 0xe669bf46, 0x40f202fd)
|
|
if not isinstance(entity, TLObject) or entity.SUBCLASS_OF_ID in INPUTS:
|
|
entity = await self.get_entity(entity)
|
|
|
|
thumb = -1 if download_big else 0
|
|
|
|
possible_names = []
|
|
if entity.SUBCLASS_OF_ID not in ENTITIES:
|
|
photo = entity
|
|
else:
|
|
if not hasattr(entity, 'photo'):
|
|
# Special case: may be a ChatFull with photo:Photo
|
|
# This is different from a normal UserProfilePhoto and Chat
|
|
if not hasattr(entity, 'chat_photo'):
|
|
return None
|
|
|
|
return await self._download_photo(
|
|
entity.chat_photo, file, date=None,
|
|
thumb=thumb, progress_callback=None
|
|
)
|
|
|
|
for attr in ('username', 'first_name', 'title'):
|
|
possible_names.append(getattr(entity, attr, None))
|
|
|
|
photo = entity.photo
|
|
|
|
if isinstance(photo, (types.UserProfilePhoto, types.ChatPhoto)):
|
|
dc_id = photo.dc_id
|
|
which = photo.photo_big if download_big else photo.photo_small
|
|
loc = types.InputPeerPhotoFileLocation(
|
|
peer=await self.get_input_entity(entity),
|
|
local_id=which.local_id,
|
|
volume_id=which.volume_id,
|
|
big=download_big
|
|
)
|
|
else:
|
|
# It doesn't make any sense to check if `photo` can be used
|
|
# as input location, because then this method would be able
|
|
# to "download the profile photo of a message", i.e. its
|
|
# media which should be done with `download_media` instead.
|
|
return None
|
|
|
|
file = self._get_proper_filename(
|
|
file, 'profile_photo', '.jpg',
|
|
possible_names=possible_names
|
|
)
|
|
|
|
try:
|
|
result = await self.download_file(loc, file, dc_id=dc_id)
|
|
return result if file is bytes else file
|
|
except errors.LocationInvalidError:
|
|
# See issue #500, Android app fails as of v4.6.0 (1155).
|
|
# The fix seems to be using the full channel chat photo.
|
|
ie = await self.get_input_entity(entity)
|
|
if isinstance(ie, types.InputPeerChannel):
|
|
full = await self(functions.channels.GetFullChannelRequest(ie))
|
|
return await self._download_photo(
|
|
full.full_chat.chat_photo, file,
|
|
date=None, progress_callback=None,
|
|
thumb=thumb
|
|
)
|
|
else:
|
|
# Until there's a report for chats, no need to.
|
|
return None
|
|
|
|
async def download_media(
|
|
self: 'TelegramClient',
|
|
message: 'hints.MessageLike',
|
|
file: 'hints.FileLike' = None,
|
|
*,
|
|
thumb: 'hints.FileLike' = None,
|
|
progress_callback: 'hints.ProgressCallback' = None) -> typing.Optional[typing.Union[str, bytes]]:
|
|
"""
|
|
Downloads the given media from a message object.
|
|
|
|
Note that if the download is too slow, you should consider installing
|
|
``cryptg`` (through ``pip install cryptg``) so that decrypting the
|
|
received data is done in C instead of Python (much faster).
|
|
|
|
See also `Message.download_media() <telethon.tl.custom.message.Message.download_media>`.
|
|
|
|
Arguments
|
|
message (`Message <telethon.tl.custom.message.Message>` | :tl:`Media`):
|
|
The media or message containing the media that will be downloaded.
|
|
|
|
file (`str` | `file`, optional):
|
|
The output file path, directory, or stream-like object.
|
|
If the path exists and is a file, it will be overwritten.
|
|
If file is the type `bytes`, it will be downloaded in-memory
|
|
as a bytestring (e.g. ``file=bytes``).
|
|
|
|
progress_callback (`callable`, optional):
|
|
A callback function accepting two parameters:
|
|
``(received bytes, total)``.
|
|
|
|
thumb (`int` | :tl:`PhotoSize`, optional):
|
|
Which thumbnail size from the document or photo to download,
|
|
instead of downloading the document or photo itself.
|
|
|
|
If it's specified but the file does not have a thumbnail,
|
|
this method will return ``None``.
|
|
|
|
The parameter should be an integer index between ``0`` and
|
|
``len(sizes)``. ``0`` will download the smallest thumbnail,
|
|
and ``len(sizes) - 1`` will download the largest thumbnail.
|
|
You can also use negative indices.
|
|
|
|
You can also pass the :tl:`PhotoSize` instance to use.
|
|
|
|
In short, use ``thumb=0`` if you want the smallest thumbnail
|
|
and ``thumb=-1`` if you want the largest thumbnail.
|
|
|
|
Returns
|
|
``None`` if no media was provided, or if it was Empty. On success
|
|
the file path is returned since it may differ from the one given.
|
|
|
|
Example
|
|
.. code-block:: python
|
|
|
|
path = client.download_media(message)
|
|
client.download_media(message, filename)
|
|
# or
|
|
path = message.download_media()
|
|
message.download_media(filename)
|
|
"""
|
|
# TODO This won't work for messageService
|
|
if isinstance(message, types.Message):
|
|
date = message.date
|
|
media = message.media
|
|
else:
|
|
date = datetime.datetime.now()
|
|
media = message
|
|
|
|
if isinstance(media, str):
|
|
media = utils.resolve_bot_file_id(media)
|
|
|
|
if isinstance(media, types.MessageMediaWebPage):
|
|
if isinstance(media.webpage, types.WebPage):
|
|
media = media.webpage.document or media.webpage.photo
|
|
|
|
if isinstance(media, (types.MessageMediaPhoto, types.Photo)):
|
|
return await self._download_photo(
|
|
media, file, date, thumb, progress_callback
|
|
)
|
|
elif isinstance(media, (types.MessageMediaDocument, types.Document)):
|
|
return await self._download_document(
|
|
media, file, date, thumb, progress_callback
|
|
)
|
|
elif isinstance(media, types.MessageMediaContact) and thumb is None:
|
|
return self._download_contact(
|
|
media, file
|
|
)
|
|
elif isinstance(media, (types.WebDocument, types.WebDocumentNoProxy)) and thumb is None:
|
|
return await self._download_web_document(
|
|
media, file, progress_callback
|
|
)
|
|
|
|
async def download_file(
|
|
self: 'TelegramClient',
|
|
input_location: 'hints.FileLike',
|
|
file: 'hints.OutFileLike' = None,
|
|
*,
|
|
part_size_kb: float = None,
|
|
file_size: int = None,
|
|
progress_callback: 'hints.ProgressCallback' = None,
|
|
dc_id: int = None) -> typing.Optional[bytes]:
|
|
"""
|
|
Low-level method to download files from their input location.
|
|
|
|
Arguments
|
|
input_location (:tl:`InputFileLocation`):
|
|
The file location from which the file will be downloaded.
|
|
See `telethon.utils.get_input_location` source for a complete
|
|
list of supported types.
|
|
|
|
file (`str` | `file`, optional):
|
|
The output file path, directory, or stream-like object.
|
|
If the path exists and is a file, it will be overwritten.
|
|
|
|
If the file path is ``None`` or ``bytes``, then the result
|
|
will be saved in memory and returned as `bytes`.
|
|
|
|
part_size_kb (`int`, optional):
|
|
Chunk size when downloading files. The larger, the less
|
|
requests will be made (up to 512KB maximum).
|
|
|
|
file_size (`int`, optional):
|
|
The file size that is about to be downloaded, if known.
|
|
Only used if ``progress_callback`` is specified.
|
|
|
|
progress_callback (`callable`, optional):
|
|
A callback function accepting two parameters:
|
|
``(downloaded bytes, total)``. Note that the
|
|
``total`` is the provided ``file_size``.
|
|
|
|
dc_id (`int`, optional):
|
|
The data center the library should connect to in order
|
|
to download the file. You shouldn't worry about this.
|
|
|
|
Example
|
|
.. code-block:: python
|
|
|
|
# Download a file and print its header
|
|
data = client.download_file(input_file, bytes)
|
|
print(data[:16])
|
|
"""
|
|
if not part_size_kb:
|
|
if not file_size:
|
|
part_size_kb = 64 # Reasonable default
|
|
else:
|
|
part_size_kb = utils.get_appropriated_part_size(file_size)
|
|
|
|
part_size = int(part_size_kb * 1024)
|
|
if part_size % MIN_CHUNK_SIZE != 0:
|
|
raise ValueError(
|
|
'The part size must be evenly divisible by 4096.')
|
|
|
|
in_memory = file is None or file is bytes
|
|
if in_memory:
|
|
f = io.BytesIO()
|
|
elif isinstance(file, str):
|
|
# Ensure that we'll be able to download the media
|
|
helpers.ensure_parent_dir_exists(file)
|
|
f = open(file, 'wb')
|
|
else:
|
|
f = file
|
|
|
|
try:
|
|
async for chunk in self.iter_download(
|
|
input_location, request_size=part_size, dc_id=dc_id):
|
|
f.write(chunk)
|
|
if progress_callback:
|
|
progress_callback(f.tell(), file_size)
|
|
|
|
f.flush()
|
|
if in_memory:
|
|
return f.getvalue()
|
|
finally:
|
|
if isinstance(file, str) or in_memory:
|
|
f.close()
|
|
|
|
def iter_download(
|
|
self: 'TelegramClient',
|
|
file: 'hints.FileLike',
|
|
*,
|
|
offset: int = 0,
|
|
stride: int = None,
|
|
limit: int = None,
|
|
chunk_size: int = None,
|
|
request_size: int = MAX_CHUNK_SIZE,
|
|
file_size: int = None,
|
|
dc_id: int = None
|
|
):
|
|
"""
|
|
Iterates over a file download, yielding chunks of the file.
|
|
|
|
This method can be used to stream files in a more convenient
|
|
way, since it offers more control (pausing, resuming, etc.)
|
|
|
|
.. note::
|
|
|
|
Using a value for `offset` or `stride` which is not a multiple
|
|
of the minimum allowed `request_size`, or if `chunk_size` is
|
|
different from `request_size`, the library will need to do a
|
|
bit more work to fetch the data in the way you intend it to.
|
|
|
|
You normally shouldn't worry about this.
|
|
|
|
Arguments
|
|
file (`hints.FileLike`):
|
|
The file of which contents you want to iterate over.
|
|
|
|
offset (`int`, optional):
|
|
The offset in bytes into the file from where the
|
|
download should start. For example, if a file is
|
|
1024KB long and you just want the last 512KB, you
|
|
would use ``offset=512 * 1024``.
|
|
|
|
stride (`int`, optional):
|
|
The stride of each chunk (how much the offset should
|
|
advance between reading each chunk). This parameter
|
|
should only be used for more advanced use cases.
|
|
|
|
It must be bigger than or equal to the `chunk_size`.
|
|
|
|
limit (`int`, optional):
|
|
The limit for how many *chunks* will be yielded at most.
|
|
|
|
chunk_size (`int`, optional):
|
|
The maximum size of the chunks that will be yielded.
|
|
Note that the last chunk may be less than this value.
|
|
By default, it equals to `request_size`.
|
|
|
|
request_size (`int`, optional):
|
|
How many bytes will be requested to Telegram when more
|
|
data is required. By default, as many bytes as possible
|
|
are requested. If you would like to request data in
|
|
smaller sizes, adjust this parameter.
|
|
|
|
Note that values outside the valid range will be clamped,
|
|
and the final value will also be a multiple of the minimum
|
|
allowed size.
|
|
|
|
file_size (`int`, optional):
|
|
If the file size is known beforehand, you should set
|
|
this parameter to said value. Depending on the type of
|
|
the input file passed, this may be set automatically.
|
|
|
|
dc_id (`int`, optional):
|
|
The data center the library should connect to in order
|
|
to download the file. You shouldn't worry about this.
|
|
|
|
Yields
|
|
|
|
``bytes`` objects representing the chunks of the file if the
|
|
right conditions are met, or ``memoryview`` objects instead.
|
|
|
|
Example
|
|
.. code-block:: python
|
|
|
|
# Streaming `media` to an output file
|
|
# After the iteration ends, the sender is cleaned up
|
|
with open('photo.jpg', 'wb') as fd:
|
|
for chunk client.iter_download(media):
|
|
fd.write(chunk)
|
|
|
|
# Fetching only the header of a file (32 bytes)
|
|
# You should manually close the iterator in this case.
|
|
stream = client.iter_download(media, request_size=32)
|
|
header = next(stream)
|
|
stream.close()
|
|
assert len(header) == 32
|
|
|
|
# Fetching only the header, inside of an ``async def``
|
|
async def main():
|
|
stream = client.iter_download(media, request_size=32)
|
|
header = await stream.__anext__()
|
|
await stream.close()
|
|
assert len(header) == 32
|
|
"""
|
|
if chunk_size is None:
|
|
chunk_size = request_size
|
|
|
|
if limit is None and file_size is not None:
|
|
limit = (file_size + chunk_size - 1) // chunk_size
|
|
|
|
if stride is None:
|
|
stride = chunk_size
|
|
elif stride < chunk_size:
|
|
raise ValueError('stride must be >= chunk_size')
|
|
|
|
request_size -= request_size % MIN_CHUNK_SIZE
|
|
if request_size < MIN_CHUNK_SIZE:
|
|
request_size = MIN_CHUNK_SIZE
|
|
elif request_size > MAX_CHUNK_SIZE:
|
|
request_size = MAX_CHUNK_SIZE
|
|
|
|
old_dc = dc_id
|
|
dc_id, file = utils.get_input_location(file)
|
|
if dc_id is None:
|
|
dc_id = old_dc
|
|
|
|
if chunk_size == request_size \
|
|
and offset % MIN_CHUNK_SIZE == 0 \
|
|
and stride % MIN_CHUNK_SIZE == 0:
|
|
cls = _DirectDownloadIter
|
|
self._log[__name__].info('Starting direct file download in chunks of '
|
|
'%d at %d, stride %d', request_size, offset, stride)
|
|
else:
|
|
cls = _GenericDownloadIter
|
|
self._log[__name__].info('Starting indirect file download in chunks of '
|
|
'%d at %d, stride %d', request_size, offset, stride)
|
|
|
|
return cls(
|
|
self,
|
|
limit,
|
|
file=file,
|
|
dc_id=dc_id,
|
|
offset=offset,
|
|
stride=stride,
|
|
chunk_size=chunk_size,
|
|
request_size=request_size,
|
|
file_size=file_size
|
|
)
|
|
|
|
# endregion
|
|
|
|
# region Private methods
|
|
|
|
@staticmethod
|
|
def _get_thumb(thumbs, thumb):
|
|
if thumb is None:
|
|
return thumbs[-1]
|
|
elif isinstance(thumb, int):
|
|
return thumbs[thumb]
|
|
elif isinstance(thumb, (types.PhotoSize, types.PhotoCachedSize,
|
|
types.PhotoStrippedSize)):
|
|
return thumb
|
|
else:
|
|
return None
|
|
|
|
def _download_cached_photo_size(self: 'TelegramClient', size, file):
|
|
# No need to download anything, simply write the bytes
|
|
if isinstance(size, types.PhotoStrippedSize):
|
|
data = utils.stripped_photo_to_jpg(size.bytes)
|
|
else:
|
|
data = size.bytes
|
|
|
|
if file is bytes:
|
|
return data
|
|
elif isinstance(file, str):
|
|
helpers.ensure_parent_dir_exists(file)
|
|
f = open(file, 'wb')
|
|
else:
|
|
f = file
|
|
|
|
try:
|
|
f.write(data)
|
|
finally:
|
|
if isinstance(file, str):
|
|
f.close()
|
|
return file
|
|
|
|
async def _download_photo(self: 'TelegramClient', photo, file, date, thumb, progress_callback):
|
|
"""Specialized version of .download_media() for photos"""
|
|
# Determine the photo and its largest size
|
|
if isinstance(photo, types.MessageMediaPhoto):
|
|
photo = photo.photo
|
|
if not isinstance(photo, types.Photo):
|
|
return
|
|
|
|
size = self._get_thumb(photo.sizes, thumb)
|
|
if not size or isinstance(size, types.PhotoSizeEmpty):
|
|
return
|
|
|
|
file = self._get_proper_filename(file, 'photo', '.jpg', date=date)
|
|
if isinstance(size, (types.PhotoCachedSize, types.PhotoStrippedSize)):
|
|
return self._download_cached_photo_size(size, file)
|
|
|
|
result = await self.download_file(
|
|
types.InputPhotoFileLocation(
|
|
id=photo.id,
|
|
access_hash=photo.access_hash,
|
|
file_reference=photo.file_reference,
|
|
thumb_size=size.type
|
|
),
|
|
file,
|
|
file_size=size.size,
|
|
progress_callback=progress_callback
|
|
)
|
|
return result if file is bytes else file
|
|
|
|
@staticmethod
|
|
def _get_kind_and_names(attributes):
|
|
"""Gets kind and possible names for :tl:`DocumentAttribute`."""
|
|
kind = 'document'
|
|
possible_names = []
|
|
for attr in attributes:
|
|
if isinstance(attr, types.DocumentAttributeFilename):
|
|
possible_names.insert(0, attr.file_name)
|
|
|
|
elif isinstance(attr, types.DocumentAttributeAudio):
|
|
kind = 'audio'
|
|
if attr.performer and attr.title:
|
|
possible_names.append('{} - {}'.format(
|
|
attr.performer, attr.title
|
|
))
|
|
elif attr.performer:
|
|
possible_names.append(attr.performer)
|
|
elif attr.title:
|
|
possible_names.append(attr.title)
|
|
elif attr.voice:
|
|
kind = 'voice'
|
|
|
|
return kind, possible_names
|
|
|
|
async def _download_document(
|
|
self, document, file, date, thumb, progress_callback):
|
|
"""Specialized version of .download_media() for documents."""
|
|
if isinstance(document, types.MessageMediaDocument):
|
|
document = document.document
|
|
if not isinstance(document, types.Document):
|
|
return
|
|
|
|
kind, possible_names = self._get_kind_and_names(document.attributes)
|
|
file = self._get_proper_filename(
|
|
file, kind, utils.get_extension(document),
|
|
date=date, possible_names=possible_names
|
|
)
|
|
|
|
if thumb is None:
|
|
size = None
|
|
else:
|
|
size = self._get_thumb(document.thumbs, thumb)
|
|
if isinstance(size, (types.PhotoCachedSize, types.PhotoStrippedSize)):
|
|
return self._download_cached_photo_size(size, file)
|
|
|
|
result = await self.download_file(
|
|
types.InputDocumentFileLocation(
|
|
id=document.id,
|
|
access_hash=document.access_hash,
|
|
file_reference=document.file_reference,
|
|
thumb_size=size.type if size else ''
|
|
),
|
|
file,
|
|
file_size=size.size if size else document.size,
|
|
progress_callback=progress_callback
|
|
)
|
|
|
|
return result if file is bytes else file
|
|
|
|
@classmethod
|
|
def _download_contact(cls, mm_contact, file):
|
|
"""
|
|
Specialized version of .download_media() for contacts.
|
|
Will make use of the vCard 4.0 format.
|
|
"""
|
|
first_name = mm_contact.first_name
|
|
last_name = mm_contact.last_name
|
|
phone_number = mm_contact.phone_number
|
|
|
|
# Remove these pesky characters
|
|
first_name = first_name.replace(';', '')
|
|
last_name = (last_name or '').replace(';', '')
|
|
result = (
|
|
'BEGIN:VCARD\n'
|
|
'VERSION:4.0\n'
|
|
'N:{f};{l};;;\n'
|
|
'FN:{f} {l}\n'
|
|
'TEL;TYPE=cell;VALUE=uri:tel:+{p}\n'
|
|
'END:VCARD\n'
|
|
).format(f=first_name, l=last_name, p=phone_number).encode('utf-8')
|
|
|
|
if file is bytes:
|
|
return result
|
|
elif isinstance(file, str):
|
|
file = cls._get_proper_filename(
|
|
file, 'contact', '.vcard',
|
|
possible_names=[first_name, phone_number, last_name]
|
|
)
|
|
f = open(file, 'wb')
|
|
else:
|
|
f = file
|
|
|
|
try:
|
|
f.write(result)
|
|
finally:
|
|
# Only close the stream if we opened it
|
|
if isinstance(file, str):
|
|
f.close()
|
|
|
|
return file
|
|
|
|
@classmethod
|
|
async def _download_web_document(cls, web, file, progress_callback):
|
|
"""
|
|
Specialized version of .download_media() for web documents.
|
|
"""
|
|
if not aiohttp:
|
|
raise ValueError(
|
|
'Cannot download web documents without the aiohttp '
|
|
'dependency install it (pip install aiohttp)'
|
|
)
|
|
|
|
# TODO Better way to get opened handles of files and auto-close
|
|
in_memory = file is bytes
|
|
if in_memory:
|
|
f = io.BytesIO()
|
|
elif isinstance(file, str):
|
|
kind, possible_names = cls._get_kind_and_names(web.attributes)
|
|
file = cls._get_proper_filename(
|
|
file, kind, utils.get_extension(web),
|
|
possible_names=possible_names
|
|
)
|
|
f = open(file, 'wb')
|
|
else:
|
|
f = file
|
|
|
|
try:
|
|
with aiohttp.ClientSession() as session:
|
|
# TODO Use progress_callback; get content length from response
|
|
# https://github.com/telegramdesktop/tdesktop/blob/c7e773dd9aeba94e2be48c032edc9a78bb50234e/Telegram/SourceFiles/ui/images.cpp#L1318-L1319
|
|
async with session.get(web.url) as response:
|
|
while True:
|
|
chunk = await response.content.read(128 * 1024)
|
|
if not chunk:
|
|
break
|
|
f.write(chunk)
|
|
finally:
|
|
if isinstance(file, str) or file is bytes:
|
|
f.close()
|
|
|
|
return f.getvalue() if in_memory else file
|
|
|
|
@staticmethod
|
|
def _get_proper_filename(file, kind, extension,
|
|
date=None, possible_names=None):
|
|
"""Gets a proper filename for 'file', if this is a path.
|
|
|
|
'kind' should be the kind of the output file (photo, document...)
|
|
'extension' should be the extension to be added to the file if
|
|
the filename doesn't have any yet
|
|
'date' should be when this file was originally sent, if known
|
|
'possible_names' should be an ordered list of possible names
|
|
|
|
If no modification is made to the path, any existing file
|
|
will be overwritten.
|
|
If any modification is made to the path, this method will
|
|
ensure that no existing file will be overwritten.
|
|
"""
|
|
if isinstance(file, pathlib.Path):
|
|
file = str(file.absolute())
|
|
|
|
if file is not None and not isinstance(file, str):
|
|
# Probably a stream-like object, we cannot set a filename here
|
|
return file
|
|
|
|
if file is None:
|
|
file = ''
|
|
elif os.path.isfile(file):
|
|
# Make no modifications to valid existing paths
|
|
return file
|
|
|
|
if os.path.isdir(file) or not file:
|
|
try:
|
|
name = None if possible_names is None else next(
|
|
x for x in possible_names if x
|
|
)
|
|
except StopIteration:
|
|
name = None
|
|
|
|
if not name:
|
|
if not date:
|
|
date = datetime.datetime.now()
|
|
name = '{}_{}-{:02}-{:02}_{:02}-{:02}-{:02}'.format(
|
|
kind,
|
|
date.year, date.month, date.day,
|
|
date.hour, date.minute, date.second,
|
|
)
|
|
file = os.path.join(file, name)
|
|
|
|
directory, name = os.path.split(file)
|
|
name, ext = os.path.splitext(name)
|
|
if not ext:
|
|
ext = extension
|
|
|
|
result = os.path.join(directory, name + ext)
|
|
if not os.path.isfile(result):
|
|
return result
|
|
|
|
i = 1
|
|
while True:
|
|
result = os.path.join(directory, '{} ({}){}'.format(name, i, ext))
|
|
if not os.path.isfile(result):
|
|
return result
|
|
i += 1
|
|
|
|
# endregion
|