Enhance a lot choosing a valid path when downloading files

This commit is contained in:
Lonami Exo 2017-08-24 17:44:38 +02:00
parent 8cfc4d0fda
commit d5323c5506
2 changed files with 125 additions and 91 deletions

View File

@ -563,22 +563,24 @@ class TelegramClient(TelegramBareClient):
# region Downloading media requests # region Downloading media requests
def download_profile_photo(self, def download_profile_photo(self, entity, file=None, download_big=True):
entity,
file_path='',
add_extension=True,
download_big=True):
"""Downloads the profile photo for an user or a chat (channels too). """Downloads the profile photo for an user or a chat (channels too).
Returns None if no photo was provided, or if it was Empty. Returns None if no photo was provided, or if it was Empty.
If an entity itself (an user, chat or channel) is given, the photo If an entity itself (an user, chat or channel) is given, the photo
to be downloaded will be downloaded automatically. to be downloaded will be downloaded automatically.
On success, the file path is returned since it may differ. On success, the file path is returned since it may differ from
the one provided.
The specified output file can either be a file path, a directory,
or a stream-like object. If the path exists and is a file, it will
be overwritten.
The entity may be a phone or an username at the expense of The entity may be a phone or an username at the expense of
some performance loss. some performance loss.
""" """
possible_names = []
if not isinstance(entity, TLObject) or type(entity).subclass_of_id in ( if not isinstance(entity, TLObject) or type(entity).subclass_of_id in (
0x2da17977, 0xc5af5d94, 0x1f4661b9, 0xd49a2697 0x2da17977, 0xc5af5d94, 0x1f4661b9, 0xd49a2697
): ):
@ -593,26 +595,15 @@ class TelegramClient(TelegramBareClient):
# This is different from a normal UserProfilePhoto and Chat # This is different from a normal UserProfilePhoto and Chat
if hasattr(entity, 'chat_photo'): if hasattr(entity, 'chat_photo'):
return self._download_photo( return self._download_photo(
entity.chat_photo, file_path, entity.chat_photo, file,
add_extension=add_extension date=None, progress_callback=None
) )
else: else:
# Give up # Give up
return None return None
was_dir = None
if os.path.isdir(file_path):
was_dir = file_path
file_path = '' # File path needs to be empty to infer it
if not file_path:
for attr in ('username', 'first_name', 'title'): for attr in ('username', 'first_name', 'title'):
file_path = getattr(entity, attr, '') possible_names.append(getattr(entity, attr, None))
if file_path:
break
if was_dir:
file_path = os.path.join(was_dir, file_path)
entity = entity.photo entity = entity.photo
@ -620,17 +611,16 @@ class TelegramClient(TelegramBareClient):
not isinstance(entity, ChatPhoto): not isinstance(entity, ChatPhoto):
return None return None
if os.path.isdir(file_path) or not file_path:
file_path = 'profile_photo_{}'.format(datetime.now())
if add_extension:
file_path += get_extension(entity)
if download_big: if download_big:
photo_location = entity.photo_big photo_location = entity.photo_big
else: else:
photo_location = entity.photo_small photo_location = entity.photo_small
file = self._get_proper_filename(
file, 'profile_photo', '.jpg',
possible_names=possible_names
)
# Download the media with the largest size input file location # Download the media with the largest size input file location
self.download_file( self.download_file(
InputFileLocation( InputFileLocation(
@ -638,20 +628,18 @@ class TelegramClient(TelegramBareClient):
local_id=photo_location.local_id, local_id=photo_location.local_id,
secret=photo_location.secret secret=photo_location.secret
), ),
file_path file
) )
return file_path return file
def download_media(self, def download_media(self, message, file=None, progress_callback=None):
message,
file,
add_extension=True,
progress_callback=None):
"""Downloads the media from a specified Message (it can also be """Downloads the media from a specified Message (it can also be
the message.media) into the desired file (a stream or str), the message.media) into the desired file (a stream or str),
optionally finding its extension automatically. optionally finding its extension automatically.
The file may be an existing directory, or a full filename. The specified output file can either be a file path, a directory,
or a stream-like object. If the path exists and is a file, it will
be overwritten.
If the operation succeeds, the path will be returned (since If the operation succeeds, the path will be returned (since
the extension may have been added automatically). Otherwise, the extension may have been added automatically). Otherwise,
@ -664,44 +652,34 @@ class TelegramClient(TelegramBareClient):
# TODO This won't work for messageService # TODO This won't work for messageService
if isinstance(message, Message): if isinstance(message, Message):
date = message.date date = message.date
message = message.media media = message.media
else: else:
date = datetime.now() date = datetime.now()
media = message
if isinstance(message, MessageMediaPhoto): if isinstance(media, MessageMediaPhoto):
if isinstance(file, str) and (os.path.isdir(file) or not file):
file = os.path.join(file, 'photo_{}'.format(date))
return self._download_photo( return self._download_photo(
message, file, add_extension, progress_callback media, file, date, progress_callback
) )
elif isinstance(media, MessageMediaDocument):
elif isinstance(message, MessageMediaDocument):
# Pass the date if a better filename cannot be inferred
return self._download_document( return self._download_document(
message, file, add_extension, date, progress_callback media, file, date, progress_callback
) )
elif isinstance(media, MessageMediaContact):
elif isinstance(message, MessageMediaContact):
return self._download_contact( return self._download_contact(
message, file, add_extension media, file
) )
def _download_photo(self, def _download_photo(self, mm_photo, file, date, progress_callback):
message_media_photo,
file,
add_extension=False,
progress_callback=None):
"""Specialized version of .download_media() for photos""" """Specialized version of .download_media() for photos"""
# Determine the photo and its largest size # Determine the photo and its largest size
photo = message_media_photo.photo photo = mm_photo.photo
largest_size = photo.sizes[-1] largest_size = photo.sizes[-1]
file_size = largest_size.size file_size = largest_size.size
largest_size = largest_size.location largest_size = largest_size.location
if isinstance(file, str) and add_extension: file = self._get_proper_filename(file, 'photo', '.jpg', date=date)
file += get_extension(message_media_photo)
# Download the media with the largest size input file location # Download the media with the largest size input file location
self.download_file( self.download_file(
@ -716,28 +694,26 @@ class TelegramClient(TelegramBareClient):
) )
return file return file
def _download_document(self, message_media_document, file, def _download_document(self, mm_doc, file, date, progress_callback):
add_extension, date, progress_callback):
"""Specialized version of .download_media() for documents""" """Specialized version of .download_media() for documents"""
document = message_media_document.document document = mm_doc.document
file_size = document.size file_size = document.size
if os.path.isdir(file) or not file: possible_names = []
for attr in document.attributes: for attr in document.attributes:
if isinstance(attr, DocumentAttributeFilename): if isinstance(attr, DocumentAttributeFilename):
file = os.path.join(file, attr.file_name) possible_names.insert(0, attr.file_name)
break # This attribute has higher preference
elif isinstance(attr, DocumentAttributeAudio): elif isinstance(attr, DocumentAttributeAudio):
file = os.path.join( possible_names.append('{} - {}'.format(
file, '{} - {}'.format(attr.performer, attr.title) attr.performer, attr.title
))
file = self._get_proper_filename(
file, 'document', get_extension(mm_doc),
date=date, possible_names=possible_names
) )
file = os.path.join(file, 'document_{}'.format(date))
if isinstance(file, str) and add_extension:
file += get_extension(message_media_document)
self.download_file( self.download_file(
InputDocumentFileLocation( InputDocumentFileLocation(
id=document.id, id=document.id,
@ -751,25 +727,19 @@ class TelegramClient(TelegramBareClient):
return file return file
@staticmethod @staticmethod
def _download_contact(message_media_contact, file, add_extension=True): def _download_contact(mm_contact, file):
"""Specialized version of .download_media() for contacts. """Specialized version of .download_media() for contacts.
Will make use of the vCard 4.0 format Will make use of the vCard 4.0 format
""" """
first_name = message_media_contact.first_name first_name = mm_contact.first_name
last_name = message_media_contact.last_name last_name = mm_contact.last_name
phone_number = message_media_contact.phone_number phone_number = mm_contact.phone_number
if isinstance(file, str): if isinstance(file, str):
if not file: file = TelegramClient._get_proper_filename(
file = phone_number file, 'contact', '.vcard',
possible_names=[first_name, phone_number, last_name]
# The only way we can save a contact in an understandable )
# way by phones is by using the .vCard format
if add_extension:
file += '.vcard'
# Ensure that we'll be able to download the contact
utils.ensure_parent_dir_exists(file)
f = open(file, 'w', encoding='utf-8') f = open(file, 'w', encoding='utf-8')
else: else:
f = file f = file
@ -791,6 +761,64 @@ class TelegramClient(TelegramBareClient):
return file return file
@staticmethod
def _get_proper_filename(file, kind, extension,
date=None, possible_names=None):
"""Gets a proper filename for 'file', if this is a path.
'kind' should be the kind of the output file (photo, document...)
'extension' should be the extension to be added to the file if
the filename doesn't have any yet
'date' should be when this file was originally sent, if known
'possible_names' should be an ordered list of possible names
If no modification is made to the path, any existing file
will be overwritten.
If any modification is made to the path, this method will
ensure that no existing file will be overwritten.
"""
if file is not None and not isinstance(file, str):
# Probably a stream-like object, we cannot set a filename here
return file
if file is None:
file = ''
elif os.path.isfile(file):
# Make no modifications to valid existing paths
return file
if os.path.isdir(file) or not file:
try:
name = None if possible_names is None else next(
x for x in possible_names if x
)
except StopIteration:
name = None
if not name:
name = '{}_{}-{:02}-{:02}_{:02}-{:02}-{:02}'.format(
kind,
date.year, date.month, date.day,
date.hour, date.minute, date.second,
)
file = os.path.join(file, name)
directory, name = os.path.split(file)
name, ext = os.path.splitext(name)
if not ext:
ext = extension
result = os.path.join(directory, name + ext)
if not os.path.isfile(result):
return result
i = 1
while True:
result = os.path.join(directory, '{} ({}){}'.format(name, i, ext))
if not os.path.isfile(result):
return result
i += 1
# endregion # endregion
# endregion # endregion

View File

@ -9,7 +9,8 @@ from .tl.types import (
ChatPhoto, InputPeerChannel, InputPeerChat, InputPeerUser, InputPeerEmpty, ChatPhoto, InputPeerChannel, InputPeerChat, InputPeerUser, InputPeerEmpty,
MessageMediaDocument, MessageMediaPhoto, PeerChannel, InputChannel, MessageMediaDocument, MessageMediaPhoto, PeerChannel, InputChannel,
UserEmpty, InputUser, InputUserEmpty, InputUserSelf, InputPeerSelf, UserEmpty, InputUser, InputUserEmpty, InputUserSelf, InputPeerSelf,
PeerChat, PeerUser, User, UserFull, UserProfilePhoto) PeerChat, PeerUser, User, UserFull, UserProfilePhoto, Document
)
def get_display_name(entity): def get_display_name(entity):
@ -42,12 +43,17 @@ def get_extension(media):
isinstance(media, MessageMediaPhoto)): isinstance(media, MessageMediaPhoto)):
return '.jpg' return '.jpg'
# Documents will come with a mime type, from which we can guess their mime type # Documents will come with a mime type
if isinstance(media, MessageMediaDocument): if isinstance(media, MessageMediaDocument):
if isinstance(media.document, Document):
if media.document.mime_type == 'application/octet-stream':
# Octet stream are just bytes, which have no default extension
return ''
else:
extension = guess_extension(media.document.mime_type) extension = guess_extension(media.document.mime_type)
return extension if extension else '' return extension if extension else ''
return None return ''
def get_input_peer(entity): def get_input_peer(entity):