Handle hachoir metadata more gracefully, bump 1.6.2

Since bf11bbd _get_extension supports plenty more things,
which hachoir cannot deal with. Add some extra safety checks.
This commit is contained in:
Lonami Exo 2019-03-04 08:50:33 +01:00
parent 6799295115
commit 0f69455dc7
2 changed files with 30 additions and 15 deletions

View File

@ -8,6 +8,7 @@ import imghdr
import inspect import inspect
import io import io
import itertools import itertools
import logging
import math import math
import mimetypes import mimetypes
import os import os
@ -66,6 +67,8 @@ VALID_USERNAME_RE = re.compile(
re.IGNORECASE re.IGNORECASE
) )
_log = logging.getLogger(__name__)
def chunks(iterable, size=100): def chunks(iterable, size=100):
""" """
@ -490,6 +493,19 @@ def get_message_id(message):
raise TypeError('Invalid message type: {}'.format(type(message))) raise TypeError('Invalid message type: {}'.format(type(message)))
def _get_metadata(file):
# `hachoir` only deals with paths to in-disk files, while
# `_get_extension` supports a few other things. The parser
# may also fail in any case and we don't want to crash if
# the extraction process fails.
if hachoir and isinstance(file, str) and os.path.isfile(file):
try:
with hachoir.parser.createParser(file) as parser:
return hachoir.metadata.extractMetadata(parser)
except Exception as e:
_log.warning('Failed to analyze %s: %s %s', file, e.__class__, e)
def get_attributes(file, *, attributes=None, mime_type=None, def get_attributes(file, *, attributes=None, mime_type=None,
force_document=False, voice_note=False, video_note=False, force_document=False, voice_note=False, video_note=False,
supports_streaming=False): supports_streaming=False):
@ -505,9 +521,9 @@ def get_attributes(file, *, attributes=None, mime_type=None,
attr_dict = {types.DocumentAttributeFilename: attr_dict = {types.DocumentAttributeFilename:
types.DocumentAttributeFilename(os.path.basename(name))} types.DocumentAttributeFilename(os.path.basename(name))}
if is_audio(file) and hachoir is not None: if is_audio(file):
with hachoir.parser.createParser(file) as parser: m = _get_metadata(file)
m = hachoir.metadata.extractMetadata(parser) if m:
attr_dict[types.DocumentAttributeAudio] = \ attr_dict[types.DocumentAttributeAudio] = \
types.DocumentAttributeAudio( types.DocumentAttributeAudio(
voice=voice_note, voice=voice_note,
@ -518,9 +534,8 @@ def get_attributes(file, *, attributes=None, mime_type=None,
) )
if not force_document and is_video(file): if not force_document and is_video(file):
if hachoir: m = _get_metadata(file)
with hachoir.parser.createParser(file) as parser: if m:
m = hachoir.metadata.extractMetadata(parser)
doc = types.DocumentAttributeVideo( doc = types.DocumentAttributeVideo(
round_message=video_note, round_message=video_note,
w=m.get('width') if m.has('width') else 0, w=m.get('width') if m.has('width') else 0,

View File

@ -1,3 +1,3 @@
# Versions should comply with PEP440. # Versions should comply with PEP440.
# This line is parsed in setup.py: # This line is parsed in setup.py:
__version__ = '1.6.1' __version__ = '1.6.2'