2018-06-10 13:04:23 +03:00
|
|
|
import datetime
|
|
|
|
import io
|
|
|
|
import os
|
2018-06-16 18:01:20 +03:00
|
|
|
import pathlib
|
2019-05-03 22:37:27 +03:00
|
|
|
import typing
|
2019-07-04 16:34:51 +03:00
|
|
|
import inspect
|
2021-01-24 03:38:44 +03:00
|
|
|
import asyncio
|
2022-02-07 11:28:39 +03:00
|
|
|
import dataclasses
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-12 14:27:13 +03:00
|
|
|
from .._crypto import AES
|
2021-10-16 14:56:38 +03:00
|
|
|
from .._misc import utils, helpers, requestiter, tlobject, hints, enums
|
2021-09-26 20:58:42 +03:00
|
|
|
from .. import errors, _tl
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2018-08-01 01:37:25 +03:00
|
|
|
try:
|
|
|
|
import aiohttp
|
|
|
|
except ImportError:
|
|
|
|
aiohttp = None
|
|
|
|
|
2019-05-03 22:37:27 +03:00
|
|
|
if typing.TYPE_CHECKING:
|
|
|
|
from .telegramclient import TelegramClient
|
|
|
|
|
2019-05-21 17:16:16 +03:00
|
|
|
# Chunk sizes for upload.getFile must be multiples of the smallest size
|
|
|
|
MIN_CHUNK_SIZE = 4096
|
|
|
|
MAX_CHUNK_SIZE = 512 * 1024
|
|
|
|
|
2021-01-24 03:36:10 +03:00
|
|
|
# 2021-01-15, users reported that `errors.TimeoutError` can occur while downloading files.
|
|
|
|
TIMED_OUT_SLEEP = 1
|
2019-05-21 17:16:16 +03:00
|
|
|
|
2021-09-12 14:27:13 +03:00
|
|
|
class _DirectDownloadIter(requestiter.RequestIter):
|
2019-05-21 17:16:16 +03:00
|
|
|
async def _init(
|
2020-09-24 11:03:28 +03:00
|
|
|
self, file, dc_id, offset, stride, chunk_size, request_size, file_size, msg_data
|
2019-05-21 17:16:16 +03:00
|
|
|
):
|
2021-09-12 13:16:02 +03:00
|
|
|
self.request = _tl.fn.upload.GetFile(
|
2019-05-21 17:16:16 +03:00
|
|
|
file, offset=offset, limit=request_size)
|
|
|
|
|
|
|
|
self.total = file_size
|
|
|
|
self._stride = stride
|
|
|
|
self._chunk_size = chunk_size
|
|
|
|
self._last_part = None
|
2020-09-24 11:03:28 +03:00
|
|
|
self._msg_data = msg_data
|
2021-01-24 03:36:10 +03:00
|
|
|
self._timed_out = False
|
2019-05-21 17:16:16 +03:00
|
|
|
|
2021-09-19 18:51:05 +03:00
|
|
|
self._exported = dc_id and self.client._session_state.dc_id != dc_id
|
2019-05-21 17:16:16 +03:00
|
|
|
if not self._exported:
|
|
|
|
# The used sender will also change if ``FileMigrateError`` occurs
|
|
|
|
self._sender = self.client._sender
|
|
|
|
else:
|
2021-09-19 17:38:11 +03:00
|
|
|
# If this raises DcIdInvalidError, it means we tried exporting the same DC we're in.
|
|
|
|
# This should not happen, but if it does, it's a bug.
|
|
|
|
self._sender = await self.client._borrow_exported_sender(dc_id)
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
async def _load_next_chunk(self):
|
|
|
|
cur = await self._request()
|
|
|
|
self.buffer.append(cur)
|
|
|
|
if len(cur) < self.request.limit:
|
|
|
|
self.left = len(self.buffer)
|
|
|
|
await self.close()
|
|
|
|
else:
|
2022-02-07 11:28:39 +03:00
|
|
|
self.request = dataclasses.replace(self.request, offset=self.request.offset + self._stride)
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
async def _request(self):
|
|
|
|
try:
|
2020-04-28 21:49:57 +03:00
|
|
|
result = await self.client._call(self._sender, self.request)
|
2021-01-24 03:36:10 +03:00
|
|
|
self._timed_out = False
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(result, _tl.upload.FileCdnRedirect):
|
2019-05-21 17:16:16 +03:00
|
|
|
raise NotImplementedError # TODO Implement
|
|
|
|
else:
|
|
|
|
return result.bytes
|
|
|
|
|
2021-01-24 03:36:10 +03:00
|
|
|
except errors.TimeoutError as e:
|
|
|
|
if self._timed_out:
|
|
|
|
self.client._log[__name__].warning('Got two timeouts in a row while downloading file')
|
|
|
|
raise
|
|
|
|
|
|
|
|
self._timed_out = True
|
|
|
|
self.client._log[__name__].info('Got timeout while downloading file, retrying once')
|
|
|
|
await asyncio.sleep(TIMED_OUT_SLEEP)
|
|
|
|
return await self._request()
|
|
|
|
|
2019-05-21 17:16:16 +03:00
|
|
|
except errors.FileMigrateError as e:
|
|
|
|
self.client._log[__name__].info('File lives in another DC')
|
|
|
|
self._sender = await self.client._borrow_exported_sender(e.new_dc)
|
|
|
|
self._exported = True
|
|
|
|
return await self._request()
|
|
|
|
|
2020-09-24 11:03:28 +03:00
|
|
|
except errors.FilerefUpgradeNeededError as e:
|
|
|
|
# Only implemented for documents which are the ones that may take that long to download
|
|
|
|
if not self._msg_data \
|
2021-09-12 13:16:02 +03:00
|
|
|
or not isinstance(self.request.location, _tl.InputDocumentFileLocation) \
|
2020-09-24 11:03:28 +03:00
|
|
|
or self.request.location.thumb_size != '':
|
|
|
|
raise
|
|
|
|
|
|
|
|
self.client._log[__name__].info('File ref expired during download; refetching message')
|
|
|
|
chat, msg_id = self._msg_data
|
|
|
|
msg = await self.client.get_messages(chat, ids=msg_id)
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if not isinstance(msg.media, _tl.MessageMediaDocument):
|
2020-09-24 11:03:28 +03:00
|
|
|
raise
|
|
|
|
|
|
|
|
document = msg.media.document
|
|
|
|
|
|
|
|
# Message media may have been edited for something else
|
|
|
|
if document.id != self.request.location.id:
|
|
|
|
raise
|
|
|
|
|
2022-02-07 11:28:39 +03:00
|
|
|
self.request.location = dataclasses.replace(self.request.location, file_reference=document.file_reference)
|
2020-09-24 11:03:28 +03:00
|
|
|
return await self._request()
|
|
|
|
|
2019-05-21 17:16:16 +03:00
|
|
|
async def close(self):
|
|
|
|
if not self._sender:
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
if self._exported:
|
|
|
|
await self.client._return_exported_sender(self._sender)
|
|
|
|
elif self._sender != self.client._sender:
|
|
|
|
await self._sender.disconnect()
|
|
|
|
finally:
|
|
|
|
self._sender = None
|
|
|
|
|
|
|
|
async def __aenter__(self):
|
2019-07-13 22:20:51 +03:00
|
|
|
return self
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
async def __aexit__(self, *args):
|
|
|
|
await self.close()
|
|
|
|
|
|
|
|
|
|
|
|
class _GenericDownloadIter(_DirectDownloadIter):
|
2021-05-14 09:11:54 +03:00
|
|
|
async def _load_next_chunk(self):
|
2019-05-21 17:16:16 +03:00
|
|
|
# 1. Fetch enough for one chunk
|
|
|
|
data = b''
|
|
|
|
|
|
|
|
# 1.1. ``bad`` is how much into the data we have we need to offset
|
2021-05-14 09:11:54 +03:00
|
|
|
bad = self.request.offset % self.request.limit
|
2019-05-21 17:16:16 +03:00
|
|
|
before = self.request.offset
|
|
|
|
|
|
|
|
# 1.2. We have to fetch from a valid offset, so remove that bad part
|
2022-02-07 11:28:39 +03:00
|
|
|
self.request = dataclasses.replace(self.request, offset=self.request.offset - bad)
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
done = False
|
|
|
|
while not done and len(data) - bad < self._chunk_size:
|
|
|
|
cur = await self._request()
|
2022-02-07 11:28:39 +03:00
|
|
|
self.request = dataclasses.replace(self.request, offset=self.request.offset - self.request.limit)
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
data += cur
|
|
|
|
done = len(cur) < self.request.limit
|
|
|
|
|
|
|
|
# 1.3 Restore our last desired offset
|
2022-02-07 11:28:39 +03:00
|
|
|
self.request = dataclasses.replace(self.request, offset=before)
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
# 2. Fill the buffer with the data we have
|
2019-07-06 13:10:25 +03:00
|
|
|
# 2.1. Slicing `bytes` is expensive, yield `memoryview` instead
|
2019-05-21 17:16:16 +03:00
|
|
|
mem = memoryview(data)
|
|
|
|
|
|
|
|
# 2.2. The current chunk starts at ``bad`` offset into the data,
|
|
|
|
# and each new chunk is ``stride`` bytes apart of the other
|
|
|
|
for i in range(bad, len(data), self._stride):
|
|
|
|
self.buffer.append(mem[i:i + self._chunk_size])
|
|
|
|
|
|
|
|
# 2.3. We will yield this offset, so move to the next one
|
2022-02-07 11:28:39 +03:00
|
|
|
self.request = dataclasses.replace(self.request, offset=self.request.offset + self._stride)
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
# 2.4. If we are in the last chunk, we will return the last partial data
|
|
|
|
if done:
|
|
|
|
self.left = len(self.buffer)
|
|
|
|
await self.close()
|
|
|
|
return
|
|
|
|
|
|
|
|
# 2.5. If we are not done, we can't return incomplete chunks.
|
|
|
|
if len(self.buffer[-1]) != self._chunk_size:
|
|
|
|
self._last_part = self.buffer.pop().tobytes()
|
|
|
|
|
|
|
|
# 3. Be careful with the offsets. Re-fetching a bit of data
|
|
|
|
# is fine, since it greatly simplifies things.
|
|
|
|
# TODO Try to not re-fetch data
|
2022-02-07 11:28:39 +03:00
|
|
|
self.request = dataclasses.replace(self.request, offset=self.request.offset - self._stride)
|
2019-05-21 17:16:16 +03:00
|
|
|
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
async def download_profile_photo(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
entity: 'hints.EntityLike',
|
|
|
|
file: 'hints.FileLike' = None,
|
|
|
|
*,
|
2022-01-16 14:42:00 +03:00
|
|
|
thumb,
|
|
|
|
progress_callback) -> typing.Optional[str]:
|
2021-09-11 14:33:27 +03:00
|
|
|
# hex(crc32(x.encode('ascii'))) for x in
|
|
|
|
# ('User', 'Chat', 'UserFull', 'ChatFull')
|
|
|
|
ENTITIES = (0x2da17977, 0xc5af5d94, 0x1f4661b9, 0xd49a2697)
|
|
|
|
# ('InputPeer', 'InputUser', 'InputChannel')
|
|
|
|
INPUTS = (0xc91c90b6, 0xe669bf46, 0x40f202fd)
|
2021-09-12 17:58:06 +03:00
|
|
|
if not isinstance(entity, tlobject.TLObject) or entity.SUBCLASS_OF_ID in INPUTS:
|
2021-09-11 14:33:27 +03:00
|
|
|
entity = await self.get_entity(entity)
|
|
|
|
|
|
|
|
possible_names = []
|
|
|
|
if entity.SUBCLASS_OF_ID not in ENTITIES:
|
|
|
|
photo = entity
|
|
|
|
else:
|
|
|
|
if not hasattr(entity, 'photo'):
|
|
|
|
# Special case: may be a ChatFull with photo:Photo
|
|
|
|
# This is different from a normal UserProfilePhoto and Chat
|
|
|
|
if not hasattr(entity, 'chat_photo'):
|
2018-06-10 13:04:23 +03:00
|
|
|
return None
|
|
|
|
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_photo(
|
|
|
|
self, entity.chat_photo, file, date=None,
|
2022-01-16 14:42:00 +03:00
|
|
|
thumb=thumb, progress_callback=progress_callback
|
2018-06-10 13:04:23 +03:00
|
|
|
)
|
2019-12-27 14:04:08 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
for attr in ('username', 'first_name', 'title'):
|
|
|
|
possible_names.append(getattr(entity, attr, None))
|
2019-05-21 17:40:11 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
photo = entity.photo
|
2019-07-09 12:40:05 +03:00
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(photo, (_tl.UserProfilePhoto, _tl.ChatPhoto)):
|
2022-01-16 14:06:42 +03:00
|
|
|
thumb = enums.Size.ORIGINAL if thumb == () else enums.Size(thumb)
|
2021-10-16 14:56:38 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
dc_id = photo.dc_id
|
2021-09-12 13:16:02 +03:00
|
|
|
loc = _tl.InputPeerPhotoFileLocation(
|
2021-09-11 14:33:27 +03:00
|
|
|
peer=await self.get_input_entity(entity),
|
|
|
|
photo_id=photo.photo_id,
|
2021-10-16 14:56:38 +03:00
|
|
|
big=thumb >= enums.Size.LARGE
|
2019-05-21 17:16:16 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
else:
|
|
|
|
# It doesn't make any sense to check if `photo` can be used
|
|
|
|
# as input location, because then this method would be able
|
|
|
|
# to "download the profile photo of a message", i.e. its
|
|
|
|
# media which should be done with `download_media` instead.
|
|
|
|
return None
|
|
|
|
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(
|
2021-09-11 14:33:27 +03:00
|
|
|
file, 'profile_photo', '.jpg',
|
|
|
|
possible_names=possible_names
|
|
|
|
)
|
|
|
|
|
|
|
|
try:
|
2021-09-18 17:05:07 +03:00
|
|
|
result = await _download_file(
|
|
|
|
self=self,
|
|
|
|
input_location=loc,
|
|
|
|
file=file,
|
|
|
|
dc_id=dc_id
|
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
return result if file is bytes else file
|
|
|
|
except errors.LocationInvalidError:
|
|
|
|
# See issue #500, Android app fails as of v4.6.0 (1155).
|
|
|
|
# The fix seems to be using the full channel chat photo.
|
|
|
|
ie = await self.get_input_entity(entity)
|
|
|
|
ty = helpers._entity_type(ie)
|
|
|
|
if ty == helpers._EntityType.CHANNEL:
|
2021-09-12 13:16:02 +03:00
|
|
|
full = await self(_tl.fn.channels.GetFullChannel(ie))
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_photo(
|
|
|
|
self, full.full_chat.chat_photo, file,
|
2022-01-16 14:42:00 +03:00
|
|
|
date=None, progress_callback=progress_callback,
|
2021-09-11 14:33:27 +03:00
|
|
|
thumb=thumb
|
|
|
|
)
|
2019-04-24 13:38:03 +03:00
|
|
|
else:
|
2021-09-11 14:33:27 +03:00
|
|
|
# Until there's a report for chats, no need to.
|
2019-04-24 13:38:03 +03:00
|
|
|
return None
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
async def download_media(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
message: 'hints.MessageLike',
|
|
|
|
file: 'hints.FileLike' = None,
|
|
|
|
*,
|
2021-10-16 14:56:38 +03:00
|
|
|
size = (),
|
2021-09-11 14:33:27 +03:00
|
|
|
progress_callback: 'hints.ProgressCallback' = None) -> typing.Optional[typing.Union[str, bytes]]:
|
|
|
|
# Downloading large documents may be slow enough to require a new file reference
|
|
|
|
# to be obtained mid-download. Store (input chat, message id) so that the message
|
|
|
|
# can be re-fetched.
|
|
|
|
msg_data = None
|
|
|
|
|
|
|
|
# TODO This won't work for messageService
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(message, _tl.Message):
|
2021-09-11 14:33:27 +03:00
|
|
|
date = message.date
|
|
|
|
media = message.media
|
|
|
|
msg_data = (message.input_chat, message.id) if message.input_chat else None
|
|
|
|
else:
|
|
|
|
date = datetime.datetime.now()
|
|
|
|
media = message
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(media, _tl.MessageService):
|
2021-09-11 14:33:27 +03:00
|
|
|
if isinstance(message.action,
|
2021-09-12 13:16:02 +03:00
|
|
|
_tl.MessageActionChatEditPhoto):
|
2021-09-11 14:33:27 +03:00
|
|
|
media = media.photo
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(media, _tl.MessageMediaWebPage):
|
|
|
|
if isinstance(media.webpage, _tl.WebPage):
|
2021-09-11 14:33:27 +03:00
|
|
|
media = media.webpage.document or media.webpage.photo
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(media, (_tl.MessageMediaPhoto, _tl.Photo)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_photo(
|
|
|
|
self, media, file, date, thumb, progress_callback
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2021-09-12 13:16:02 +03:00
|
|
|
elif isinstance(media, (_tl.MessageMediaDocument, _tl.Document)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_document(
|
|
|
|
self, media, file, date, thumb, progress_callback, msg_data
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2021-10-16 14:56:38 +03:00
|
|
|
elif isinstance(media, _tl.MessageMediaContact):
|
2021-09-12 15:09:53 +03:00
|
|
|
return _download_contact(
|
|
|
|
self, media, file
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2021-10-16 14:56:38 +03:00
|
|
|
elif isinstance(media, (_tl.WebDocument, _tl.WebDocumentNoProxy)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return await _download_web_document(
|
|
|
|
self, media, file, progress_callback
|
2021-09-11 14:33:27 +03:00
|
|
|
)
|
2019-05-09 19:56:54 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
async def _download_file(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
input_location: 'hints.FileLike',
|
|
|
|
file: 'hints.OutFileLike' = None,
|
|
|
|
*,
|
|
|
|
part_size_kb: float = None,
|
|
|
|
file_size: int = None,
|
|
|
|
progress_callback: 'hints.ProgressCallback' = None,
|
|
|
|
dc_id: int = None,
|
|
|
|
key: bytes = None,
|
|
|
|
iv: bytes = None,
|
|
|
|
msg_data: tuple = None) -> typing.Optional[bytes]:
|
2021-09-18 17:05:07 +03:00
|
|
|
"""
|
|
|
|
Low-level method to download files from their input location.
|
|
|
|
|
|
|
|
Arguments
|
|
|
|
input_location (:tl:`InputFileLocation`):
|
|
|
|
The file location from which the file will be downloaded.
|
|
|
|
See `telethon.utils.get_input_location` source for a complete
|
|
|
|
list of supported _tl.
|
|
|
|
|
|
|
|
file (`str` | `file`, optional):
|
|
|
|
The output file path, directory, or stream-like object.
|
|
|
|
If the path exists and is a file, it will be overwritten.
|
|
|
|
|
|
|
|
If the file path is `None` or `bytes`, then the result
|
|
|
|
will be saved in memory and returned as `bytes`.
|
|
|
|
|
|
|
|
part_size_kb (`int`, optional):
|
|
|
|
Chunk size when downloading files. The larger, the less
|
|
|
|
requests will be made (up to 512KB maximum).
|
|
|
|
|
|
|
|
file_size (`int`, optional):
|
|
|
|
The file size that is about to be downloaded, if known.
|
|
|
|
Only used if ``progress_callback`` is specified.
|
|
|
|
|
|
|
|
progress_callback (`callable`, optional):
|
|
|
|
A callback function accepting two parameters:
|
|
|
|
``(downloaded bytes, total)``. Note that the
|
|
|
|
``total`` is the provided ``file_size``.
|
|
|
|
|
|
|
|
dc_id (`int`, optional):
|
|
|
|
The data center the library should connect to in order
|
|
|
|
to download the file. You shouldn't worry about this.
|
|
|
|
|
|
|
|
key ('bytes', optional):
|
|
|
|
In case of an encrypted upload (secret chats) a key is supplied
|
|
|
|
|
|
|
|
iv ('bytes', optional):
|
|
|
|
In case of an encrypted upload (secret chats) an iv is supplied
|
|
|
|
"""
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if not part_size_kb:
|
|
|
|
if not file_size:
|
|
|
|
part_size_kb = 64 # Reasonable default
|
2019-04-24 13:38:03 +03:00
|
|
|
else:
|
2021-09-11 14:33:27 +03:00
|
|
|
part_size_kb = utils.get_appropriated_part_size(file_size)
|
|
|
|
|
|
|
|
part_size = int(part_size_kb * 1024)
|
|
|
|
if part_size % MIN_CHUNK_SIZE != 0:
|
|
|
|
raise ValueError(
|
|
|
|
'The part size must be evenly divisible by 4096.')
|
|
|
|
|
|
|
|
if isinstance(file, pathlib.Path):
|
|
|
|
file = str(file.absolute())
|
|
|
|
|
|
|
|
in_memory = file is None or file is bytes
|
|
|
|
if in_memory:
|
|
|
|
f = io.BytesIO()
|
|
|
|
elif isinstance(file, str):
|
|
|
|
# Ensure that we'll be able to download the media
|
|
|
|
helpers.ensure_parent_dir_exists(file)
|
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
2021-09-12 15:09:53 +03:00
|
|
|
async for chunk in _iter_download(
|
|
|
|
self, input_location, request_size=part_size, dc_id=dc_id, msg_data=msg_data):
|
2021-09-11 14:33:27 +03:00
|
|
|
if iv and key:
|
|
|
|
chunk = AES.decrypt_ige(chunk, key, iv)
|
|
|
|
r = f.write(chunk)
|
|
|
|
if inspect.isawaitable(r):
|
|
|
|
await r
|
|
|
|
|
|
|
|
if progress_callback:
|
|
|
|
r = progress_callback(f.tell(), file_size)
|
|
|
|
if inspect.isawaitable(r):
|
|
|
|
await r
|
2018-12-27 21:05:52 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
# Not all IO objects have flush (see #1227)
|
|
|
|
if callable(getattr(f, 'flush', None)):
|
|
|
|
f.flush()
|
2020-08-11 23:31:12 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if in_memory:
|
|
|
|
return f.getvalue()
|
|
|
|
finally:
|
|
|
|
if isinstance(file, str) or in_memory:
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
def iter_download(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
file: 'hints.FileLike',
|
|
|
|
*,
|
|
|
|
offset: int = 0,
|
|
|
|
stride: int = None,
|
2021-09-26 19:37:09 +03:00
|
|
|
limit: int = (),
|
2021-09-11 14:33:27 +03:00
|
|
|
chunk_size: int = None,
|
|
|
|
request_size: int = MAX_CHUNK_SIZE,
|
|
|
|
file_size: int = None,
|
|
|
|
dc_id: int = None
|
|
|
|
):
|
2021-09-12 15:09:53 +03:00
|
|
|
return _iter_download(
|
|
|
|
self,
|
2021-09-11 14:33:27 +03:00
|
|
|
file,
|
|
|
|
offset=offset,
|
|
|
|
stride=stride,
|
|
|
|
limit=limit,
|
|
|
|
chunk_size=chunk_size,
|
|
|
|
request_size=request_size,
|
|
|
|
file_size=file_size,
|
|
|
|
dc_id=dc_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
def _iter_download(
|
|
|
|
self: 'TelegramClient',
|
|
|
|
file: 'hints.FileLike',
|
|
|
|
*,
|
|
|
|
offset: int = 0,
|
|
|
|
stride: int = None,
|
|
|
|
limit: int = None,
|
|
|
|
chunk_size: int = None,
|
|
|
|
request_size: int = MAX_CHUNK_SIZE,
|
|
|
|
file_size: int = None,
|
|
|
|
dc_id: int = None,
|
|
|
|
msg_data: tuple = None
|
|
|
|
):
|
|
|
|
info = utils._get_file_info(file)
|
|
|
|
if info.dc_id is not None:
|
|
|
|
dc_id = info.dc_id
|
|
|
|
|
|
|
|
if file_size is None:
|
|
|
|
file_size = info.size
|
|
|
|
|
|
|
|
file = info.location
|
|
|
|
|
|
|
|
if chunk_size is None:
|
|
|
|
chunk_size = request_size
|
|
|
|
|
|
|
|
if limit is None and file_size is not None:
|
|
|
|
limit = (file_size + chunk_size - 1) // chunk_size
|
|
|
|
|
|
|
|
if stride is None:
|
|
|
|
stride = chunk_size
|
|
|
|
elif stride < chunk_size:
|
|
|
|
raise ValueError('stride must be >= chunk_size')
|
|
|
|
|
|
|
|
request_size -= request_size % MIN_CHUNK_SIZE
|
|
|
|
if request_size < MIN_CHUNK_SIZE:
|
|
|
|
request_size = MIN_CHUNK_SIZE
|
|
|
|
elif request_size > MAX_CHUNK_SIZE:
|
|
|
|
request_size = MAX_CHUNK_SIZE
|
|
|
|
|
|
|
|
if chunk_size == request_size \
|
|
|
|
and offset % MIN_CHUNK_SIZE == 0 \
|
|
|
|
and stride % MIN_CHUNK_SIZE == 0 \
|
|
|
|
and (limit is None or offset % limit == 0):
|
|
|
|
cls = _DirectDownloadIter
|
|
|
|
self._log[__name__].info('Starting direct file download in chunks of '
|
|
|
|
'%d at %d, stride %d', request_size, offset, stride)
|
|
|
|
else:
|
|
|
|
cls = _GenericDownloadIter
|
|
|
|
self._log[__name__].info('Starting indirect file download in chunks of '
|
|
|
|
'%d at %d, stride %d', request_size, offset, stride)
|
|
|
|
|
|
|
|
return cls(
|
|
|
|
self,
|
|
|
|
limit,
|
|
|
|
file=file,
|
|
|
|
dc_id=dc_id,
|
|
|
|
offset=offset,
|
|
|
|
stride=stride,
|
|
|
|
chunk_size=chunk_size,
|
|
|
|
request_size=request_size,
|
|
|
|
file_size=file_size,
|
|
|
|
msg_data=msg_data,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _get_thumb(thumbs, thumb):
|
2021-10-16 14:56:38 +03:00
|
|
|
if isinstance(thumb, tlobject.TLObject):
|
2021-09-11 14:33:27 +03:00
|
|
|
return thumb
|
2021-10-16 14:56:38 +03:00
|
|
|
|
2022-01-16 14:06:42 +03:00
|
|
|
thumb = enums.Size(thumb)
|
2021-10-16 14:56:38 +03:00
|
|
|
return min(
|
|
|
|
thumbs,
|
|
|
|
default=None,
|
2022-01-16 14:06:42 +03:00
|
|
|
key=lambda t: abs(thumb - enums.Size(t.type))
|
2021-10-16 14:56:38 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
|
|
|
|
def _download_cached_photo_size(self: 'TelegramClient', size, file):
|
|
|
|
# No need to download anything, simply write the bytes
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, _tl.PhotoStrippedSize):
|
2021-09-11 14:33:27 +03:00
|
|
|
data = utils.stripped_photo_to_jpg(size.bytes)
|
|
|
|
else:
|
|
|
|
data = size.bytes
|
|
|
|
|
|
|
|
if file is bytes:
|
|
|
|
return data
|
|
|
|
elif isinstance(file, str):
|
|
|
|
helpers.ensure_parent_dir_exists(file)
|
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
|
|
|
f.write(data)
|
|
|
|
finally:
|
|
|
|
if isinstance(file, str):
|
|
|
|
f.close()
|
|
|
|
return file
|
|
|
|
|
|
|
|
async def _download_photo(self: 'TelegramClient', photo, file, date, thumb, progress_callback):
|
|
|
|
"""Specialized version of .download_media() for photos"""
|
|
|
|
# Determine the photo and its largest size
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(photo, _tl.MessageMediaPhoto):
|
2021-09-11 14:33:27 +03:00
|
|
|
photo = photo.photo
|
2021-09-12 13:16:02 +03:00
|
|
|
if not isinstance(photo, _tl.Photo):
|
2021-09-11 14:33:27 +03:00
|
|
|
return
|
|
|
|
|
|
|
|
# Include video sizes here (but they may be None so provide an empty list)
|
2021-09-12 15:09:53 +03:00
|
|
|
size = _get_thumb(photo.sizes + (photo.video_sizes or []), thumb)
|
2021-09-12 13:16:02 +03:00
|
|
|
if not size or isinstance(size, _tl.PhotoSizeEmpty):
|
2021-09-11 14:33:27 +03:00
|
|
|
return
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, _tl.VideoSize):
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(file, 'video', '.mp4', date=date)
|
2021-09-11 14:33:27 +03:00
|
|
|
else:
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(file, 'photo', '.jpg', date=date)
|
2021-09-11 14:33:27 +03:00
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, (_tl.PhotoCachedSize, _tl.PhotoStrippedSize)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return _download_cached_photo_size(self, size, file)
|
2021-09-11 14:33:27 +03:00
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, _tl.PhotoSizeProgressive):
|
2021-09-11 14:33:27 +03:00
|
|
|
file_size = max(size.sizes)
|
|
|
|
else:
|
|
|
|
file_size = size.size
|
|
|
|
|
2021-09-18 17:05:07 +03:00
|
|
|
result = await _download_file(
|
|
|
|
self=self,
|
|
|
|
input_location=_tl.InputPhotoFileLocation(
|
2021-09-11 14:33:27 +03:00
|
|
|
id=photo.id,
|
|
|
|
access_hash=photo.access_hash,
|
|
|
|
file_reference=photo.file_reference,
|
|
|
|
thumb_size=size.type
|
|
|
|
),
|
2021-09-18 17:05:07 +03:00
|
|
|
file=file,
|
2021-09-11 14:33:27 +03:00
|
|
|
file_size=file_size,
|
|
|
|
progress_callback=progress_callback
|
|
|
|
)
|
|
|
|
return result if file is bytes else file
|
|
|
|
|
|
|
|
def _get_kind_and_names(attributes):
|
|
|
|
"""Gets kind and possible names for :tl:`DocumentAttribute`."""
|
|
|
|
kind = 'document'
|
|
|
|
possible_names = []
|
|
|
|
for attr in attributes:
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(attr, _tl.DocumentAttributeFilename):
|
2021-09-11 14:33:27 +03:00
|
|
|
possible_names.insert(0, attr.file_name)
|
|
|
|
|
2021-09-12 13:16:02 +03:00
|
|
|
elif isinstance(attr, _tl.DocumentAttributeAudio):
|
2021-09-11 14:33:27 +03:00
|
|
|
kind = 'audio'
|
|
|
|
if attr.performer and attr.title:
|
|
|
|
possible_names.append('{} - {}'.format(
|
|
|
|
attr.performer, attr.title
|
|
|
|
))
|
|
|
|
elif attr.performer:
|
|
|
|
possible_names.append(attr.performer)
|
|
|
|
elif attr.title:
|
|
|
|
possible_names.append(attr.title)
|
|
|
|
elif attr.voice:
|
|
|
|
kind = 'voice'
|
|
|
|
|
|
|
|
return kind, possible_names
|
|
|
|
|
|
|
|
async def _download_document(
|
|
|
|
self, document, file, date, thumb, progress_callback, msg_data):
|
|
|
|
"""Specialized version of .download_media() for documents."""
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(document, _tl.MessageMediaDocument):
|
2021-09-11 14:33:27 +03:00
|
|
|
document = document.document
|
2021-09-12 13:16:02 +03:00
|
|
|
if not isinstance(document, _tl.Document):
|
2021-09-11 14:33:27 +03:00
|
|
|
return
|
|
|
|
|
2021-10-16 14:56:38 +03:00
|
|
|
if thumb == ():
|
2021-09-12 15:09:53 +03:00
|
|
|
kind, possible_names = _get_kind_and_names(document.attributes)
|
|
|
|
file = _get_proper_filename(
|
2021-09-11 14:33:27 +03:00
|
|
|
file, kind, utils.get_extension(document),
|
|
|
|
date=date, possible_names=possible_names
|
|
|
|
)
|
|
|
|
size = None
|
|
|
|
else:
|
2021-09-12 15:09:53 +03:00
|
|
|
file = _get_proper_filename(file, 'photo', '.jpg', date=date)
|
|
|
|
size = _get_thumb(document.thumbs, thumb)
|
2021-09-12 13:16:02 +03:00
|
|
|
if isinstance(size, (_tl.PhotoCachedSize, _tl.PhotoStrippedSize)):
|
2021-09-12 15:09:53 +03:00
|
|
|
return _download_cached_photo_size(self, size, file)
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-12 15:09:53 +03:00
|
|
|
result = await _download_file(
|
2021-09-18 17:05:07 +03:00
|
|
|
self=self,
|
|
|
|
input_location=_tl.InputDocumentFileLocation(
|
2021-09-11 14:33:27 +03:00
|
|
|
id=document.id,
|
|
|
|
access_hash=document.access_hash,
|
|
|
|
file_reference=document.file_reference,
|
|
|
|
thumb_size=size.type if size else ''
|
|
|
|
),
|
2021-09-18 17:05:07 +03:00
|
|
|
file=file,
|
2021-09-11 14:33:27 +03:00
|
|
|
file_size=size.size if size else document.size,
|
|
|
|
progress_callback=progress_callback,
|
|
|
|
msg_data=msg_data,
|
|
|
|
)
|
|
|
|
|
|
|
|
return result if file is bytes else file
|
|
|
|
|
|
|
|
def _download_contact(cls, mm_contact, file):
|
|
|
|
"""
|
|
|
|
Specialized version of .download_media() for contacts.
|
|
|
|
Will make use of the vCard 4.0 format.
|
|
|
|
"""
|
|
|
|
first_name = mm_contact.first_name
|
|
|
|
last_name = mm_contact.last_name
|
|
|
|
phone_number = mm_contact.phone_number
|
|
|
|
|
|
|
|
# Remove these pesky characters
|
|
|
|
first_name = first_name.replace(';', '')
|
|
|
|
last_name = (last_name or '').replace(';', '')
|
|
|
|
result = (
|
|
|
|
'BEGIN:VCARD\n'
|
|
|
|
'VERSION:4.0\n'
|
|
|
|
'N:{f};{l};;;\n'
|
|
|
|
'FN:{f} {l}\n'
|
|
|
|
'TEL;TYPE=cell;VALUE=uri:tel:+{p}\n'
|
|
|
|
'END:VCARD\n'
|
|
|
|
).format(f=first_name, l=last_name, p=phone_number).encode('utf-8')
|
|
|
|
|
|
|
|
if file is bytes:
|
|
|
|
return result
|
|
|
|
elif isinstance(file, str):
|
|
|
|
file = cls._get_proper_filename(
|
|
|
|
file, 'contact', '.vcard',
|
|
|
|
possible_names=[first_name, phone_number, last_name]
|
2019-04-24 13:38:03 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
|
|
|
f.write(result)
|
|
|
|
finally:
|
|
|
|
# Only close the stream if we opened it
|
|
|
|
if isinstance(file, str):
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
return file
|
|
|
|
|
|
|
|
async def _download_web_document(cls, web, file, progress_callback):
|
|
|
|
"""
|
|
|
|
Specialized version of .download_media() for web documents.
|
|
|
|
"""
|
|
|
|
if not aiohttp:
|
|
|
|
raise ValueError(
|
|
|
|
'Cannot download web documents without the aiohttp '
|
|
|
|
'dependency install it (pip install aiohttp)'
|
2019-04-24 13:38:03 +03:00
|
|
|
)
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
# TODO Better way to get opened handles of files and auto-close
|
|
|
|
in_memory = file is bytes
|
|
|
|
if in_memory:
|
|
|
|
f = io.BytesIO()
|
|
|
|
elif isinstance(file, str):
|
|
|
|
kind, possible_names = cls._get_kind_and_names(web.attributes)
|
|
|
|
file = cls._get_proper_filename(
|
|
|
|
file, kind, utils.get_extension(web),
|
|
|
|
possible_names=possible_names
|
|
|
|
)
|
|
|
|
f = open(file, 'wb')
|
|
|
|
else:
|
|
|
|
f = file
|
|
|
|
|
|
|
|
try:
|
Merge branch 'v2'
v2 is still not complete. A lot of cleanup still needs to be done.
In particular, entities still need some care. However, most of it
is there, and keeping up with two branches is annoying.
This also lets me close a lot of issues to reduce noise
and focus on the important ones.
Closes #354 (input entities have been reworked).
Closes #902 (sessions were overhauled).
Closes #1125, #3253, #1589, #1634, #3150, #3668 (updates are reworked, gaps are properly handled now).
Closes #1169 (2.0 is now merged).
Closes #1311 (proper usage should not trigger this issue on the reworked connection code).
Closes #1327 (there have been some stringify changes).
Closes #1330 (gaps are now detected).
Closes #1366 (sessions are now async).
Closes #1476, #1484 (asyncio open connection is no longer used).
Closes #1529 (commonmark is now used).
Closes #1721 (update gaps are now properly handled).
Closes #1724 (a gap that fixes this will eventually trigger).
Closes #3006 (force_sms is gone).
Closes #3041 (a clean implementation to get difference now exists).
Closes #3049 (commonmark is now used).
Closes #3111 (to_dict has changed).
Closes #3117 (SMS is no longer an option).
Closes #3171 (connectivity bug is unlikely to be a bug in the library).
Closes #3206 (Telethon cannot really fix broken SSL).
Closes #3214, #3257, #3661 (not enough information).
Closes #3215 (this had already been fixed).
Closes #3230, #3674 (entities were reworked).
Closes #3234, #3238, #3245, #3258, #3264 (the layer has been updated).
Closes #3242 (bot-API file IDs have been removed).
Closes #3244 (the error is now documented).
Closes #3249 (errors have been reworked).
2022-01-24 15:24:35 +03:00
|
|
|
async with aiohttp.ClientSession() as session:
|
2021-09-11 14:33:27 +03:00
|
|
|
# TODO Use progress_callback; get content length from response
|
|
|
|
# https://github.com/telegramdesktop/tdesktop/blob/c7e773dd9aeba94e2be48c032edc9a78bb50234e/Telegram/SourceFiles/ui/images.cpp#L1318-L1319
|
|
|
|
async with session.get(web.url) as response:
|
|
|
|
while True:
|
|
|
|
chunk = await response.content.read(128 * 1024)
|
|
|
|
if not chunk:
|
|
|
|
break
|
|
|
|
f.write(chunk)
|
|
|
|
finally:
|
|
|
|
if isinstance(file, str) or file is bytes:
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
return f.getvalue() if in_memory else file
|
|
|
|
|
|
|
|
def _get_proper_filename(file, kind, extension,
|
|
|
|
date=None, possible_names=None):
|
|
|
|
"""Gets a proper filename for 'file', if this is a path.
|
|
|
|
|
|
|
|
'kind' should be the kind of the output file (photo, document...)
|
|
|
|
'extension' should be the extension to be added to the file if
|
|
|
|
the filename doesn't have any yet
|
|
|
|
'date' should be when this file was originally sent, if known
|
|
|
|
'possible_names' should be an ordered list of possible names
|
|
|
|
|
|
|
|
If no modification is made to the path, any existing file
|
|
|
|
will be overwritten.
|
|
|
|
If any modification is made to the path, this method will
|
|
|
|
ensure that no existing file will be overwritten.
|
|
|
|
"""
|
|
|
|
if isinstance(file, pathlib.Path):
|
|
|
|
file = str(file.absolute())
|
|
|
|
|
|
|
|
if file is not None and not isinstance(file, str):
|
|
|
|
# Probably a stream-like object, we cannot set a filename here
|
|
|
|
return file
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if file is None:
|
|
|
|
file = ''
|
|
|
|
elif os.path.isfile(file):
|
|
|
|
# Make no modifications to valid existing paths
|
2018-06-10 13:04:23 +03:00
|
|
|
return file
|
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
if os.path.isdir(file) or not file:
|
|
|
|
try:
|
|
|
|
name = None if possible_names is None else next(
|
|
|
|
x for x in possible_names if x
|
2018-08-01 01:37:25 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
except StopIteration:
|
|
|
|
name = None
|
|
|
|
|
|
|
|
if not name:
|
|
|
|
if not date:
|
|
|
|
date = datetime.datetime.now()
|
|
|
|
name = '{}_{}-{:02}-{:02}_{:02}-{:02}-{:02}'.format(
|
|
|
|
kind,
|
|
|
|
date.year, date.month, date.day,
|
|
|
|
date.hour, date.minute, date.second,
|
2018-08-01 01:37:25 +03:00
|
|
|
)
|
2021-09-11 14:33:27 +03:00
|
|
|
file = os.path.join(file, name)
|
2018-08-01 01:37:25 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
directory, name = os.path.split(file)
|
|
|
|
name, ext = os.path.splitext(name)
|
|
|
|
if not ext:
|
|
|
|
ext = extension
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
result = os.path.join(directory, name + ext)
|
|
|
|
if not os.path.isfile(result):
|
|
|
|
return result
|
2018-06-10 13:04:23 +03:00
|
|
|
|
2021-09-11 14:33:27 +03:00
|
|
|
i = 1
|
|
|
|
while True:
|
|
|
|
result = os.path.join(directory, '{} ({}){}'.format(name, i, ext))
|
|
|
|
if not os.path.isfile(result):
|
|
|
|
return result
|
|
|
|
i += 1
|