2018-06-10 13:04:23 +03:00
|
|
|
import datetime
|
|
|
|
import io
|
|
|
|
import os
|
2018-06-16 18:01:20 +03:00
|
|
|
import pathlib
|
2019-05-03 22:37:27 +03:00
|
|
|
import typing
|
2019-07-04 16:34:51 +03:00
|
|
|
import inspect
|
2021-01-24 03:38:44 +03:00
|
|
|
import asyncio
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-12 14:27:13 +03:00
|
|
|
from .._crypto import AES
|
2021-09-12 17:58:06 +03:00
|
|
|
from .._misc import utils, helpers, requestiter, tlobject
|
2021-09-12 14:27:13 +03:00
|
|
|
from .. import errors, hints, _tl
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2018-08-01 01:37:25 +03:00
|
|
|
try:
|
|
|
|
import aiohttp
|
|
|
|
except ImportError:
|
|
|
|
aiohttp = None
|
|
|
|
|
2019-05-03 22:37:27 +03:00
|
|
|
if typing.TYPE_CHECKING:
|
|
|
|
from .telegramclient import TelegramClient
|
|
|
|
|
2019-05-21 17:16:16 +03:00
|
|
|
# Chunk sizes for upload.getFile must be multiples of the smallest size
|
|
|
|
MIN_CHUNK_SIZE = 4096
|
|
|
|
MAX_CHUNK_SIZE = 512 * 1024
|
|
|
|
|
2021-01-24 03:36:10 +03:00
|
|
|
# 2021-01-15, users reported that `errors.TimeoutError` can occur while downloading files.
|
|
|
|
TIMED_OUT_SLEEP = 1
|
2019-05-21 17:16:16 +03:00
|
|
|
|
2021-09-12 14:27:13 +03:00
|
|
|
class _DirectDownloadIter(requestiter.RequestIter):
|
2019-05-21 17:16:16 +03:00
|
|
|
async def _init(
|
2020-09-24 11:03:28 +03:00
|
|
|
self, file, dc_id, offset, stride, chunk_size, request_size, file_size, msg_data
|
2019-05-21 17:16:16 +03:00
|
|
|
):
|
2021-09-12 13:16:02 +03:00
|
|
|
self.request = _tl.fn.upload.GetFile(
|
2019-05-21 17:16:16 +03:00
|
|
|
file, offset=offset, limit=request_size)
|
|
|
|
|
|
|
|
self.total = file_size
|
|
|
|
self._stride = stride
|
|
|
|
self._chunk_size = chunk_size
|
|
|
|
self._last_part = None
|
2020-09-24 11:03:28 +03:00
|
|
|
self._msg_data = msg_data
|
2021-01-24 03:36:10 +03:00
|
|
|
self._timed_out = False
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
self._exported = dc_id and self.client.session.dc_id != dc_id
|
|
|
|
if not self._exported:
|
|
|
|
# The used sender will also change if ``FileMigrateError`` occurs
|
|
|
|
self._sender = self.client._sender
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
self._sender = await self.client._borrow_exported_sender(dc_id)
|
|
|
|
except errors.DcIdInvalidError:
|
|
|
|
# Can't export a sender for the ID we are currently in
|
2021-09-12 13:16:02 +03:00
|
|
|
config = await self.client(_tl.fn.help.GetConfig())
|
2019-05-21 17:16:16 +03:00
|
|
|
for option in config.dc_options:
|
|
|
|
if option.ip_address == self.client.session.server_address:
|
|
|
|
self.client.session.set_dc(
|
|
|
|
option.id, option.ip_address, option.port)
|
|
|
|
self.client.session.save()
|
|
|
|
break
|
|
|
|
|
|
|
|
# TODO Figure out why the session may have the wrong DC ID
|
|
|
|
self._sender = self.client._sender
|
|
|
|
self._exported = False
|
|
|
|
|
|
|
|
async def _load_next_chunk(self):
|
|
|
|
cur = await self._request()
|
|
|
|
self.buffer.append(cur)
|
|
|
|
if len(cur) < self.request.limit:
|
|
|
|
self.left = len(self.buffer)
|
|
|
|
await self.close()
|
|
|
|
else:
|
|
|
|
self.request.offset += self._stride
|
|
|
|
|
|
|
|
async def _request(self):
|
|
|
|
try:
|
2020-04-28 21:49:57 +03:00
|
|
|
result = await self.client._call(self._sender, self.request)
|
2021-01-24 03:36:10 +03:00
|
|
|
self._timed_out = False
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(result, _tl.upload.FileCdnRedirect):
|
2019-05-21 17:16:16 +03:00
|
|
|
raise NotImplementedError # TODO Implement
|
|
|
|
else:
|
|
|
|
return result.bytes
|
|
|
|
|
2021-01-24 03:36:10 +03:00
|
|
|
except errors.TimeoutError as e:
|
|
|
|
if self._timed_out:
|
|
|
|
self.client._log[__name__].warning('Got two timeouts in a row while downloading file')
|
|
|
|
raise
|
|
|
|
|
|
|
|
self._timed_out = True
|
|
|
|
self.client._log[__name__].info('Got timeout while downloading file, retrying once')
|
|
|
|
await asyncio.sleep(TIMED_OUT_SLEEP)
|
|
|
|
return await self._request()
|
|
|
|
|
2019-05-21 17:16:16 +03:00
|
|
|
except errors.FileMigrateError as e:
|
|
|
|
self.client._log[__name__].info('File lives in another DC')
|
|
|
|
self._sender = await self.client._borrow_exported_sender(e.new_dc)
|
|
|
|
self._exported = True
|
|
|
|
return await self._request()
|
|
|
|
|
2020-09-24 11:03:28 +03:00
|
|
|
except errors.FilerefUpgradeNeededError as e:
|
|
|
|
# Only implemented for documents which are the ones that may take that long to download
|
|
|
|
if not self._msg_data \
|
2021-09-12 13:16:02 +03:00
|
|
|
or not isinstance(self.request.location, _tl.InputDocumentFileLocation) \
|
2020-09-24 11:03:28 +03:00
|
|
|
or self.request.location.thumb_size != '':
|
|
|
|
raise
|
|
|
|
|
|
|
|
self.client._log[__name__].info('File ref expired during download; refetching message')
|
|
|
|
chat, msg_id = self._msg_data
|
|
|
|
msg = await self.client.get_messages(chat, ids=msg_id)
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if not isinstance(msg.media, _tl.MessageMediaDocument):
|
2020-09-24 11:03:28 +03:00
|
|
|
raise
|
|
|
|
|
|
|
|
document = msg.media.document
|
|
|
|
|
|
|
|
# Message media may have been edited for something else
|
|
|
|
if document.id != self.request.location.id:
|
|
|
|
raise
|
|
|
|
|
|
|
|
self.request.location.file_reference = document.file_reference
|
|
|
|
return await self._request()
|
|
|
|
|
2019-05-21 17:16:16 +03:00
|
|
|
async def close(self):
|
|
|
|
if not self._sender:
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
if self._exported:
|
|
|
|
await self.client._return_exported_sender(self._sender)
|
|
|
|
elif self._sender != self.client._sender:
|
|
|
|
await self._sender.disconnect()
|
|
|
|
finally:
|
|
|
|
self._sender = None
|
|
|
|
|
|
|
|
async def __aenter__(self):
|
2019-07-13 22:20:51 +03:00
|
|
|
return self
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
async def __aexit__(self, *args):
|
|
|
|
await self.close()
|
|
|
|
|
|
|
|
|
|
|
|
class _GenericDownloadIter(_DirectDownloadIter):
|
2021-05-14 09:11:54 +03:00
|
|
|
async def _load_next_chunk(self):
|
2019-05-21 17:16:16 +03:00
|
|
|
# 1. Fetch enough for one chunk
|
|
|
|
data = b''
|
|
|
|
|
|
|
|
# 1.1. ``bad`` is how much into the data we have we need to offset
|
2021-05-14 09:11:54 +03:00
|
|
|
bad = self.request.offset % self.request.limit
|
2019-05-21 17:16:16 +03:00
|
|
|
before = self.request.offset
|
|
|
|
|
|
|
|
# 1.2. We have to fetch from a valid offset, so remove that bad part
|
|
|
|
self.request.offset -= bad
|
|
|
|
|
|
|
|
done = False
|
|
|
|
while not done and len(data) - bad < self._chunk_size:
|
|
|
|
cur = await self._request()
|
|
|
|
self.request.offset += self.request.limit
|
|
|
|
|
|
|
|
data += cur
|
|
|
|
done = len(cur) < self.request.limit
|
|
|
|
|
|
|
|
# 1.3 Restore our last desired offset
|
|
|
|
self.request.offset = before
|
|
|
|
|
|
|
|
# 2. Fill the buffer with the data we have
|
2019-07-06 13:10:25 +03:00
|
|
|
# 2.1. Slicing `bytes` is expensive, yield `memoryview` instead
|
2019-05-21 17:16:16 +03:00
|
|
|
mem = memoryview(data)
|
|
|
|
|
|
|
|
# 2.2. The current chunk starts at ``bad`` offset into the data,
|
|
|
|
# and each new chunk is ``stride`` bytes apart of the other
|
|
|
|
for i in range(bad, len(data), self._stride):
|
|
|
|
self.buffer.append(mem[i:i + self._chunk_size])
|
|
|
|
|
|
|
|
# 2.3. We will yield this offset, so move to the next one
|
|
|
|
self.request.offset += self._stride
|
|
|
|
|
|
|
|
# 2.4. If we are in the last chunk, we will return the last partial data
|
|
|
|
if done:
|
|
|
|
self.left = len(self.buffer)
|
|
|
|
await self.close()
|
|
|
|
return
|
|
|
|
|
|
|
|
# 2.5. If we are not done, we can't return incomplete chunks.
|
|
|
|
if len(self.buffer[-1]) != self._chunk_size:
|
|
|
|
self._last_part = self.buffer.pop().tobytes()
|
|
|
|
|
|
|
|
# 3. Be careful with the offsets. Re-fetching a bit of data
|
|
|
|
# is fine, since it greatly simplifies things.
|
|
|
|
# TODO Try to not re-fetch data
|
|
|
|
self.request.offset -= self._stride
|
|
|
|
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
async def download_profile_photo(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
entity: 'hints.EntityLike',
|
|
|
|
file: 'hints.FileLike' = None,
|
|
|
|
*,
|
|
|
|
download_big: bool = True) -> typing.Optional[str]:
|
|
|
|
# hex(crc32(x.encode('ascii'))) for x in
|
|
|
|
# ('User', 'Chat', 'UserFull', 'ChatFull')
|
|
|
|
ENTITIES = (0x2da17977, 0xc5af5d94, 0x1f4661b9, 0xd49a2697)
|
|
|
|
# ('InputPeer', 'InputUser', 'InputChannel')
|
|
|
|
INPUTS = (0xc91c90b6, 0xe669bf46, 0x40f202fd)
|
2021-09-12 17:58:06 +03:00
|
|
|
if not isinstance(entity, tlobject.TLObject) or entity.SUBCLASS_OF_ID in INPUTS:
|
2021-09-11 14:33:27 +03:00
|
|
|
entity = await self.get_entity(entity)
|
|
|
|
|
|
|
|
thumb = -1 if download_big else 0
|
|
|
|
|
|
|
|
possible_names = []
|
|
|
|
if entity.SUBCLASS_OF_ID not in ENTITIES:
|
|
|
|
photo = entity
|
|
|
|
else:
|
|
|
|
if not hasattr(entity, 'photo'):
|
|
|
|
# Special case: may be a ChatFull with photo:Photo
|
|
|
|
# This is different from a normal UserProfilePhoto and Chat
|
|
|
|
if not hasattr(entity, 'chat_photo'):
|
2018-06-10 13:04:23 +03:00
|
|
|
return None
|
|
|
|
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_photo(
|
|
|
|
self, entity.chat_photo, file, date=None,
|
2021-09-11 14:33:27 +03:00
|
|
|
thumb=thumb, progress_callback=None
|
2018-06-10 13:04:23 +03:00
|
|
|
)
|
2019-12-27 14:04:08 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
for attr in ('username', 'first_name', 'title'):
|
|
|
|
possible_names.append(getattr(entity, attr, None))
|
2019-05-21 17:40:11 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
photo = entity.photo
|
2019-07-09 12:40:05 +03:00
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(photo, (_tl.UserProfilePhoto, _tl.ChatPhoto)):
|
2021-09-11 14:33:27 +03:00
|
|
|
dc_id = photo.dc_id
|
2021-09-12 13:16:02 +03:00
|
|
|
loc = _tl.InputPeerPhotoFileLocation(
|
2021-09-11 14:33:27 +03:00
|
|
|
peer=await self.get_input_entity(entity),
|
|
|
|
photo_id=photo.photo_id,
|
|
|
|
big=download_big
|
2019-05-21 17:16:16 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
else:
|
|
|
|
# It doesn't make any sense to check if `photo` can be used
|
|
|
|
# as input location, because then this method would be able
|
|
|
|
# to "download the profile photo of a message", i.e. its
|
|
|
|
# media which should be done with `download_media` instead.
|
|
|
|
return None
|
|
|
|
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(
|
2021-09-11 14:33:27 +03:00
|
|
|
file, 'profile_photo', '.jpg',
|
|
|
|
possible_names=possible_names
|
|
|
|
)
|
|
|
|
|
|
|
|
try:
|
2021-09-18 17:05:07 +03:00
|
|
|
result = await _download_file(
|
|
|
|
self=self,
|
|
|
|
input_location=loc,
|
|
|
|
file=file,
|
|
|
|
dc_id=dc_id
|
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
return result if file is bytes else file
|
|
|
|
except errors.LocationInvalidError:
|
|
|
|
# See issue #500, Android app fails as of v4.6.0 (1155).
|
|
|
|
# The fix seems to be using the full channel chat photo.
|
|
|
|
ie = await self.get_input_entity(entity)
|
|
|
|
ty = helpers._entity_type(ie)
|
|
|
|
if ty == helpers._EntityType.CHANNEL:
|
2021-09-12 13:16:02 +03:00
|
|
|
full = await self(_tl.fn.channels.GetFullChannel(ie))
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_photo(
|
|
|
|
self, full.full_chat.chat_photo, file,
|
2021-09-11 14:33:27 +03:00
|
|
|
date=None, progress_callback=None,
|
|
|
|
thumb=thumb
|
|
|
|
)
|
2019-04-24 13:38:03 +03:00
|
|
|
else:
|
2021-09-11 14:33:27 +03:00
|
|
|
# Until there's a report for chats, no need to.
|
2019-04-24 13:38:03 +03:00
|
|
|
return None
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
async def download_media(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
message: 'hints.MessageLike',
|
|
|
|
file: 'hints.FileLike' = None,
|
|
|
|
*,
|
2021-09-12 13:16:02 +03:00
|
|
|
thumb: 'typing.Union[int, _tl.TypePhotoSize]' = None,
|
2021-09-11 14:33:27 +03:00
|
|
|
progress_callback: 'hints.ProgressCallback' = None) -> typing.Optional[typing.Union[str, bytes]]:
|
|
|
|
# Downloading large documents may be slow enough to require a new file reference
|
|
|
|
# to be obtained mid-download. Store (input chat, message id) so that the message
|
|
|
|
# can be re-fetched.
|
|
|
|
msg_data = None
|
|
|
|
|
|
|
|
# TODO This won't work for messageService
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(message, _tl.Message):
|
2021-09-11 14:33:27 +03:00
|
|
|
date = message.date
|
|
|
|
media = message.media
|
|
|
|
msg_data = (message.input_chat, message.id) if message.input_chat else None
|
|
|
|
else:
|
|
|
|
date = datetime.datetime.now()
|
|
|
|
media = message
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(media, _tl.MessageService):
|
2021-09-11 14:33:27 +03:00
|
|
|
if isinstance(message.action,
|
2021-09-12 13:16:02 +03:00
|
|
|
_tl.MessageActionChatEditPhoto):
|
2021-09-11 14:33:27 +03:00
|
|
|
media = media.photo
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(media, _tl.MessageMediaWebPage):
|
|
|
|
if isinstance(media.webpage, _tl.WebPage):
|
2021-09-11 14:33:27 +03:00
|
|
|
media = media.webpage.document or media.webpage.photo
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(media, (_tl.MessageMediaPhoto, _tl.Photo)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_photo(
|
|
|
|
self, media, file, date, thumb, progress_callback
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2021-09-12 13:16:02 +03:00
|
|
|
elif isinstance(media, (_tl.MessageMediaDocument, _tl.Document)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_document(
|
|
|
|
self, media, file, date, thumb, progress_callback, msg_data
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2021-09-12 13:16:02 +03:00
|
|
|
elif isinstance(media, _tl.MessageMediaContact) and thumb is None:
|
2021-09-12 15:09:53 +03:00
|
|
|
return _download_contact(
|
|
|
|
self, media, file
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2021-09-12 13:16:02 +03:00
|
|
|
elif isinstance(media, (_tl.WebDocument, _tl.WebDocumentNoProxy)) and thumb is None:
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_web_document(
|
|
|
|
self, media, file, progress_callback
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2019-05-09 19:56:54 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
async def _download_file(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
input_location: 'hints.FileLike',
|
|
|
|
file: 'hints.OutFileLike' = None,
|
|
|
|
*,
|
|
|
|
part_size_kb: float = None,
|
|
|
|
file_size: int = None,
|
|
|
|
progress_callback: 'hints.ProgressCallback' = None,
|
|
|
|
dc_id: int = None,
|
|
|
|
key: bytes = None,
|
|
|
|
iv: bytes = None,
|
|
|
|
msg_data: tuple = None) -> typing.Optional[bytes]:
|
2021-09-18 17:05:07 +03:00
|
|
|
"""
|
|
|
|
Low-level method to download files from their input location.
|
|
|
|
|
|
|
|
Arguments
|
|
|
|
input_location (:tl:`InputFileLocation`):
|
|
|
|
The file location from which the file will be downloaded.
|
|
|
|
See `telethon.utils.get_input_location` source for a complete
|
|
|
|
list of supported _tl.
|
|
|
|
|
|
|
|
file (`str` | `file`, optional):
|
|
|
|
The output file path, directory, or stream-like object.
|
|
|
|
If the path exists and is a file, it will be overwritten.
|
|
|
|
|
|
|
|
If the file path is `None` or `bytes`, then the result
|
|
|
|
will be saved in memory and returned as `bytes`.
|
|
|
|
|
|
|
|
part_size_kb (`int`, optional):
|
|
|
|
Chunk size when downloading files. The larger, the less
|
|
|
|
requests will be made (up to 512KB maximum).
|
|
|
|
|
|
|
|
file_size (`int`, optional):
|
|
|
|
The file size that is about to be downloaded, if known.
|
|
|
|
Only used if ``progress_callback`` is specified.
|
|
|
|
|
|
|
|
progress_callback (`callable`, optional):
|
|
|
|
A callback function accepting two parameters:
|
|
|
|
``(downloaded bytes, total)``. Note that the
|
|
|
|
``total`` is the provided ``file_size``.
|
|
|
|
|
|
|
|
dc_id (`int`, optional):
|
|
|
|
The data center the library should connect to in order
|
|
|
|
to download the file. You shouldn't worry about this.
|
|
|
|
|
|
|
|
key ('bytes', optional):
|
|
|
|
In case of an encrypted upload (secret chats) a key is supplied
|
|
|
|
|
|
|
|
iv ('bytes', optional):
|
|
|
|
In case of an encrypted upload (secret chats) an iv is supplied
|
|
|
|
"""
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if not part_size_kb:
|
|
|
|
if not file_size:
|
|
|
|
part_size_kb = 64 # Reasonable default
|
2019-04-24 13:38:03 +03:00
|
|
|
else:
|
2021-09-11 14:33:27 +03:00
|
|
|
part_size_kb = utils.get_appropriated_part_size(file_size)
|
|
|
|
|
|
|
|
part_size = int(part_size_kb * 1024)
|
|
|
|
if part_size % MIN_CHUNK_SIZE != 0:
|
|
|
|
raise ValueError(
|
|
|
|
'The part size must be evenly divisible by 4096.')
|
|
|
|
|
|
|
|
if isinstance(file, pathlib.Path):
|
|
|
|
file = str(file.absolute())
|
|
|
|
|
|
|
|
in_memory = file is None or file is bytes
|
|
|
|
if in_memory:
|
|
|
|
f = io.BytesIO()
|
|
|
|
elif isinstance(file, str):
|
|
|
|
# Ensure that we'll be able to download the media
|
|
|
|
helpers.ensure_parent_dir_exists(file)
|
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
2021-09-12 15:09:53 +03:00
|
|
|
async for chunk in _iter_download(
|
|
|
|
self, input_location, request_size=part_size, dc_id=dc_id, msg_data=msg_data):
|
2021-09-11 14:33:27 +03:00
|
|
|
if iv and key:
|
|
|
|
chunk = AES.decrypt_ige(chunk, key, iv)
|
|
|
|
r = f.write(chunk)
|
|
|
|
if inspect.isawaitable(r):
|
|
|
|
await r
|
|
|
|
|
|
|
|
if progress_callback:
|
|
|
|
r = progress_callback(f.tell(), file_size)
|
|
|
|
if inspect.isawaitable(r):
|
|
|
|
await r
|
2018-12-27 21:05:52 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
# Not all IO objects have flush (see #1227)
|
|
|
|
if callable(getattr(f, 'flush', None)):
|
|
|
|
f.flush()
|
2020-08-11 23:31:12 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if in_memory:
|
|
|
|
return f.getvalue()
|
|
|
|
finally:
|
|
|
|
if isinstance(file, str) or in_memory:
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
def iter_download(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
file: 'hints.FileLike',
|
|
|
|
*,
|
|
|
|
offset: int = 0,
|
|
|
|
stride: int = None,
|
|
|
|
limit: int = None,
|
|
|
|
chunk_size: int = None,
|
|
|
|
request_size: int = MAX_CHUNK_SIZE,
|
|
|
|
file_size: int = None,
|
|
|
|
dc_id: int = None
|
|
|
|
):
|
2021-09-12 15:09:53 +03:00
|
|
|
return _iter_download(
|
|
|
|
self,
|
2021-09-11 14:33:27 +03:00
|
|
|
file,
|
|
|
|
offset=offset,
|
|
|
|
stride=stride,
|
|
|
|
limit=limit,
|
|
|
|
chunk_size=chunk_size,
|
|
|
|
request_size=request_size,
|
|
|
|
file_size=file_size,
|
|
|
|
dc_id=dc_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
def _iter_download(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
file: 'hints.FileLike',
|
|
|
|
*,
|
|
|
|
offset: int = 0,
|
|
|
|
stride: int = None,
|
|
|
|
limit: int = None,
|
|
|
|
chunk_size: int = None,
|
|
|
|
request_size: int = MAX_CHUNK_SIZE,
|
|
|
|
file_size: int = None,
|
|
|
|
dc_id: int = None,
|
|
|
|
msg_data: tuple = None
|
|
|
|
):
|
|
|
|
info = utils._get_file_info(file)
|
|
|
|
if info.dc_id is not None:
|
|
|
|
dc_id = info.dc_id
|
|
|
|
|
|
|
|
if file_size is None:
|
|
|
|
file_size = info.size
|
|
|
|
|
|
|
|
file = info.location
|
|
|
|
|
|
|
|
if chunk_size is None:
|
|
|
|
chunk_size = request_size
|
|
|
|
|
|
|
|
if limit is None and file_size is not None:
|
|
|
|
limit = (file_size + chunk_size - 1) // chunk_size
|
|
|
|
|
|
|
|
if stride is None:
|
|
|
|
stride = chunk_size
|
|
|
|
elif stride < chunk_size:
|
|
|
|
raise ValueError('stride must be >= chunk_size')
|
|
|
|
|
|
|
|
request_size -= request_size % MIN_CHUNK_SIZE
|
|
|
|
if request_size < MIN_CHUNK_SIZE:
|
|
|
|
request_size = MIN_CHUNK_SIZE
|
|
|
|
elif request_size > MAX_CHUNK_SIZE:
|
|
|
|
request_size = MAX_CHUNK_SIZE
|
|
|
|
|
|
|
|
if chunk_size == request_size \
|
|
|
|
and offset % MIN_CHUNK_SIZE == 0 \
|
|
|
|
and stride % MIN_CHUNK_SIZE == 0 \
|
|
|
|
and (limit is None or offset % limit == 0):
|
|
|
|
cls = _DirectDownloadIter
|
|
|
|
self._log[__name__].info('Starting direct file download in chunks of '
|
|
|
|
'%d at %d, stride %d', request_size, offset, stride)
|
|
|
|
else:
|
|
|
|
cls = _GenericDownloadIter
|
|
|
|
self._log[__name__].info('Starting indirect file download in chunks of '
|
|
|
|
'%d at %d, stride %d', request_size, offset, stride)
|
|
|
|
|
|
|
|
return cls(
|
|
|
|
self,
|
|
|
|
limit,
|
|
|
|
file=file,
|
|
|
|
dc_id=dc_id,
|
|
|
|
offset=offset,
|
|
|
|
stride=stride,
|
|
|
|
chunk_size=chunk_size,
|
|
|
|
request_size=request_size,
|
|
|
|
file_size=file_size,
|
|
|
|
msg_data=msg_data,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _get_thumb(thumbs, thumb):
|
|
|
|
# Seems Telegram has changed the order and put `PhotoStrippedSize`
|
|
|
|
# last while this is the smallest (layer 116). Ensure we have the
|
|
|
|
# sizes sorted correctly with a custom function.
|
|
|
|
def sort_thumbs(thumb):
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(thumb, _tl.PhotoStrippedSize):
|
2021-09-11 14:33:27 +03:00
|
|
|
return 1, len(thumb.bytes)
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(thumb, _tl.PhotoCachedSize):
|
2021-09-11 14:33:27 +03:00
|
|
|
return 1, len(thumb.bytes)
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(thumb, _tl.PhotoSize):
|
2021-09-11 14:33:27 +03:00
|
|
|
return 1, thumb.size
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(thumb, _tl.PhotoSizeProgressive):
|
2021-09-11 14:33:27 +03:00
|
|
|
return 1, max(thumb.sizes)
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(thumb, _tl.VideoSize):
|
2021-09-11 14:33:27 +03:00
|
|
|
return 2, thumb.size
|
|
|
|
|
|
|
|
# Empty size or invalid should go last
|
|
|
|
return 0, 0
|
|
|
|
|
|
|
|
thumbs = list(sorted(thumbs, key=sort_thumbs))
|
|
|
|
|
|
|
|
for i in reversed(range(len(thumbs))):
|
|
|
|
# :tl:`PhotoPathSize` is used for animated stickers preview, and the thumb is actually
|
|
|
|
# a SVG path of the outline. Users expect thumbnails to be JPEG files, so pretend this
|
|
|
|
# thumb size doesn't actually exist (#1655).
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(thumbs[i], _tl.PhotoPathSize):
|
2021-09-11 14:33:27 +03:00
|
|
|
thumbs.pop(i)
|
|
|
|
|
|
|
|
if thumb is None:
|
|
|
|
return thumbs[-1]
|
|
|
|
elif isinstance(thumb, int):
|
|
|
|
return thumbs[thumb]
|
|
|
|
elif isinstance(thumb, str):
|
|
|
|
return next((t for t in thumbs if t.type == thumb), None)
|
2021-09-12 13:16:02 +03:00
|
|
|
elif isinstance(thumb, (_tl.PhotoSize, _tl.PhotoCachedSize,
|
|
|
|
_tl.PhotoStrippedSize, _tl.VideoSize)):
|
2021-09-11 14:33:27 +03:00
|
|
|
return thumb
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def _download_cached_photo_size(self: 'TelegramClient', size, file):
|
|
|
|
# No need to download anything, simply write the bytes
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, _tl.PhotoStrippedSize):
|
2021-09-11 14:33:27 +03:00
|
|
|
data = utils.stripped_photo_to_jpg(size.bytes)
|
|
|
|
else:
|
|
|
|
data = size.bytes
|
|
|
|
|
|
|
|
if file is bytes:
|
|
|
|
return data
|
|
|
|
elif isinstance(file, str):
|
|
|
|
helpers.ensure_parent_dir_exists(file)
|
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
|
|
|
f.write(data)
|
|
|
|
finally:
|
|
|
|
if isinstance(file, str):
|
|
|
|
f.close()
|
|
|
|
return file
|
|
|
|
|
|
|
|
async def _download_photo(self: 'TelegramClient', photo, file, date, thumb, progress_callback):
|
|
|
|
"""Specialized version of .download_media() for photos"""
|
|
|
|
# Determine the photo and its largest size
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(photo, _tl.MessageMediaPhoto):
|
2021-09-11 14:33:27 +03:00
|
|
|
photo = photo.photo
|
2021-09-12 13:16:02 +03:00
|
|
|
if not isinstance(photo, _tl.Photo):
|
2021-09-11 14:33:27 +03:00
|
|
|
return
|
|
|
|
|
|
|
|
# Include video sizes here (but they may be None so provide an empty list)
|
2021-09-12 15:09:53 +03:00
|
|
|
size = _get_thumb(photo.sizes + (photo.video_sizes or []), thumb)
|
2021-09-12 13:16:02 +03:00
|
|
|
if not size or isinstance(size, _tl.PhotoSizeEmpty):
|
2021-09-11 14:33:27 +03:00
|
|
|
return
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, _tl.VideoSize):
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(file, 'video', '.mp4', date=date)
|
2021-09-11 14:33:27 +03:00
|
|
|
else:
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(file, 'photo', '.jpg', date=date)
|
2021-09-11 14:33:27 +03:00
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, (_tl.PhotoCachedSize, _tl.PhotoStrippedSize)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return _download_cached_photo_size(self, size, file)
|
2021-09-11 14:33:27 +03:00
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, _tl.PhotoSizeProgressive):
|
2021-09-11 14:33:27 +03:00
|
|
|
file_size = max(size.sizes)
|
|
|
|
else:
|
|
|
|
file_size = size.size
|
|
|
|
|
2021-09-18 17:05:07 +03:00
|
|
|
result = await _download_file(
|
|
|
|
self=self,
|
|
|
|
input_location=_tl.InputPhotoFileLocation(
|
2021-09-11 14:33:27 +03:00
|
|
|
id=photo.id,
|
|
|
|
access_hash=photo.access_hash,
|
|
|
|
file_reference=photo.file_reference,
|
|
|
|
thumb_size=size.type
|
|
|
|
),
|
2021-09-18 17:05:07 +03:00
|
|
|
file=file,
|
2021-09-11 14:33:27 +03:00
|
|
|
file_size=file_size,
|
|
|
|
progress_callback=progress_callback
|
|
|
|
)
|
|
|
|
return result if file is bytes else file
|
|
|
|
|
|
|
|
def _get_kind_and_names(attributes):
|
|
|
|
"""Gets kind and possible names for :tl:`DocumentAttribute`."""
|
|
|
|
kind = 'document'
|
|
|
|
possible_names = []
|
|
|
|
for attr in attributes:
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(attr, _tl.DocumentAttributeFilename):
|
2021-09-11 14:33:27 +03:00
|
|
|
possible_names.insert(0, attr.file_name)
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
elif isinstance(attr, _tl.DocumentAttributeAudio):
|
2021-09-11 14:33:27 +03:00
|
|
|
kind = 'audio'
|
|
|
|
if attr.performer and attr.title:
|
|
|
|
possible_names.append('{} - {}'.format(
|
|
|
|
attr.performer, attr.title
|
|
|
|
))
|
|
|
|
elif attr.performer:
|
|
|
|
possible_names.append(attr.performer)
|
|
|
|
elif attr.title:
|
|
|
|
possible_names.append(attr.title)
|
|
|
|
elif attr.voice:
|
|
|
|
kind = 'voice'
|
|
|
|
|
|
|
|
return kind, possible_names
|
|
|
|
|
|
|
|
async def _download_document(
|
|
|
|
self, document, file, date, thumb, progress_callback, msg_data):
|
|
|
|
"""Specialized version of .download_media() for documents."""
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(document, _tl.MessageMediaDocument):
|
2021-09-11 14:33:27 +03:00
|
|
|
document = document.document
|
2021-09-12 13:16:02 +03:00
|
|
|
if not isinstance(document, _tl.Document):
|
2021-09-11 14:33:27 +03:00
|
|
|
return
|
|
|
|
|
|
|
|
if thumb is None:
|
2021-09-12 15:09:53 +03:00
|
|
|
kind, possible_names = _get_kind_and_names(document.attributes)
|
|
|
|
file = _get_proper_filename(
|
2021-09-11 14:33:27 +03:00
|
|
|
file, kind, utils.get_extension(document),
|
|
|
|
date=date, possible_names=possible_names
|
|
|
|
)
|
|
|
|
size = None
|
|
|
|
else:
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(file, 'photo', '.jpg', date=date)
|
|
|
|
size = _get_thumb(document.thumbs, thumb)
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, (_tl.PhotoCachedSize, _tl.PhotoStrippedSize)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return _download_cached_photo_size(self, size, file)
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-12 15:09:53 +03:00
|
|
|
result = await _download_file(
|
2021-09-18 17:05:07 +03:00
|
|
|
self=self,
|
|
|
|
input_location=_tl.InputDocumentFileLocation(
|
2021-09-11 14:33:27 +03:00
|
|
|
id=document.id,
|
|
|
|
access_hash=document.access_hash,
|
|
|
|
file_reference=document.file_reference,
|
|
|
|
thumb_size=size.type if size else ''
|
|
|
|
),
|
2021-09-18 17:05:07 +03:00
|
|
|
file=file,
|
2021-09-11 14:33:27 +03:00
|
|
|
file_size=size.size if size else document.size,
|
|
|
|
progress_callback=progress_callback,
|
|
|
|
msg_data=msg_data,
|
|
|
|
)
|
|
|
|
|
|
|
|
return result if file is bytes else file
|
|
|
|
|
|
|
|
def _download_contact(cls, mm_contact, file):
|
|
|
|
"""
|
|
|
|
Specialized version of .download_media() for contacts.
|
|
|
|
Will make use of the vCard 4.0 format.
|
|
|
|
"""
|
|
|
|
first_name = mm_contact.first_name
|
|
|
|
last_name = mm_contact.last_name
|
|
|
|
phone_number = mm_contact.phone_number
|
|
|
|
|
|
|
|
# Remove these pesky characters
|
|
|
|
first_name = first_name.replace(';', '')
|
|
|
|
last_name = (last_name or '').replace(';', '')
|
|
|
|
result = (
|
|
|
|
'BEGIN:VCARD\n'
|
|
|
|
'VERSION:4.0\n'
|
|
|
|
'N:{f};{l};;;\n'
|
|
|
|
'FN:{f} {l}\n'
|
|
|
|
'TEL;TYPE=cell;VALUE=uri:tel:+{p}\n'
|
|
|
|
'END:VCARD\n'
|
|
|
|
).format(f=first_name, l=last_name, p=phone_number).encode('utf-8')
|
|
|
|
|
|
|
|
if file is bytes:
|
|
|
|
return result
|
|
|
|
elif isinstance(file, str):
|
|
|
|
file = cls._get_proper_filename(
|
|
|
|
file, 'contact', '.vcard',
|
|
|
|
possible_names=[first_name, phone_number, last_name]
|
2019-04-24 13:38:03 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
|
|
|
f.write(result)
|
|
|
|
finally:
|
|
|
|
# Only close the stream if we opened it
|
|
|
|
if isinstance(file, str):
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
return file
|
|
|
|
|
|
|
|
async def _download_web_document(cls, web, file, progress_callback):
|
|
|
|
"""
|
|
|
|
Specialized version of .download_media() for web documents.
|
|
|
|
"""
|
|
|
|
if not aiohttp:
|
|
|
|
raise ValueError(
|
|
|
|
'Cannot download web documents without the aiohttp '
|
|
|
|
'dependency install it (pip install aiohttp)'
|
2019-04-24 13:38:03 +03:00
|
|
|
)
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
# TODO Better way to get opened handles of files and auto-close
|
|
|
|
in_memory = file is bytes
|
|
|
|
if in_memory:
|
|
|
|
f = io.BytesIO()
|
|
|
|
elif isinstance(file, str):
|
|
|
|
kind, possible_names = cls._get_kind_and_names(web.attributes)
|
|
|
|
file = cls._get_proper_filename(
|
|
|
|
file, kind, utils.get_extension(web),
|
|
|
|
possible_names=possible_names
|
|
|
|
)
|
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
|
|
|
with aiohttp.ClientSession() as session:
|
|
|
|
# TODO Use progress_callback; get content length from response
|
|
|
|
# https://github.com/telegramdesktop/tdesktop/blob/c7e773dd9aeba94e2be48c032edc9a78bb50234e/Telegram/SourceFiles/ui/images.cpp#L1318-L1319
|
|
|
|
async with session.get(web.url) as response:
|
|
|
|
while True:
|
|
|
|
chunk = await response.content.read(128 * 1024)
|
|
|
|
if not chunk:
|
|
|
|
break
|
|
|
|
f.write(chunk)
|
|
|
|
finally:
|
|
|
|
if isinstance(file, str) or file is bytes:
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
return f.getvalue() if in_memory else file
|
|
|
|
|
|
|
|
def _get_proper_filename(file, kind, extension,
|
|
|
|
date=None, possible_names=None):
|
|
|
|
"""Gets a proper filename for 'file', if this is a path.
|
|
|
|
|
|
|
|
'kind' should be the kind of the output file (photo, document...)
|
|
|
|
'extension' should be the extension to be added to the file if
|
|
|
|
the filename doesn't have any yet
|
|
|
|
'date' should be when this file was originally sent, if known
|
|
|
|
'possible_names' should be an ordered list of possible names
|
|
|
|
|
|
|
|
If no modification is made to the path, any existing file
|
|
|
|
will be overwritten.
|
|
|
|
If any modification is made to the path, this method will
|
|
|
|
ensure that no existing file will be overwritten.
|
|
|
|
"""
|
|
|
|
if isinstance(file, pathlib.Path):
|
|
|
|
file = str(file.absolute())
|
|
|
|
|
|
|
|
if file is not None and not isinstance(file, str):
|
|
|
|
# Probably a stream-like object, we cannot set a filename here
|
|
|
|
return file
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if file is None:
|
|
|
|
file = ''
|
|
|
|
elif os.path.isfile(file):
|
|
|
|
# Make no modifications to valid existing paths
|
2018-06-10 13:04:23 +03:00
|
|
|
return file
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if os.path.isdir(file) or not file:
|
|
|
|
try:
|
|
|
|
name = None if possible_names is None else next(
|
|
|
|
x for x in possible_names if x
|
2018-08-01 01:37:25 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
except StopIteration:
|
|
|
|
name = None
|
|
|
|
|
|
|
|
if not name:
|
|
|
|
if not date:
|
|
|
|
date = datetime.datetime.now()
|
|
|
|
name = '{}_{}-{:02}-{:02}_{:02}-{:02}-{:02}'.format(
|
|
|
|
kind,
|
|
|
|
date.year, date.month, date.day,
|
|
|
|
date.hour, date.minute, date.second,
|
2018-08-01 01:37:25 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
file = os.path.join(file, name)
|
2018-08-01 01:37:25 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
directory, name = os.path.split(file)
|
|
|
|
name, ext = os.path.splitext(name)
|
|
|
|
if not ext:
|
|
|
|
ext = extension
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
result = os.path.join(directory, name + ext)
|
|
|
|
if not os.path.isfile(result):
|
|
|
|
return result
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
i = 1
|
|
|
|
while True:
|
|
|
|
result = os.path.join(directory, '{} ({}){}'.format(name, i, ext))
|
|
|
|
if not os.path.isfile(result):
|
|
|
|
return result
|
|
|
|
i += 1
|