Use hachoir to determine audio and video metadata if possible

Closes #611
2025-12-06 17:53:54 +03:00 · 2018-02-17 13:00:58 +01:00 · 2018-02-17 13:00:58 +01:00 · 33fd6895d3
commit 33fd6895d3
parent 2bfe86cda1
3 changed files with 46 additions and 6 deletions
--- a/optional-requirements.txt
+++ b/optional-requirements.txt
@ -1 +1,2 @@
 cryptg
 hachoir3
--- a/telethon/telegram_client.py
+++ b/telethon/telegram_client.py
@ -23,6 +23,13 @@ try:
 except ImportError:
    socks = None
 try:
    import hachoir
    import hachoir.metadata
    import hachoir.parser
 except ImportError:
    hachoir = None
 from . import TelegramBareClient
 from . import helpers, utils
 from .errors import (
@ -1021,6 +1028,10 @@ class TelegramClient(TelegramBareClient):
           If "is_voice_note" in kwargs, despite its value, and the file is
           sent as a document, it will be sent as a voice note.
        Notes:
            If the ``hachoir3`` package (``hachoir`` module) is installed,
            it will be used to determine metadata from audio and video files.
        Returns:
            The message (or messages) containing the sent file.
        """
@ -1084,12 +1095,32 @@ class TelegramClient(TelegramBareClient):
                attr_dict = {
                    DocumentAttributeFilename:
                        DocumentAttributeFilename(os.path.basename(file))
                    # TODO If the input file is an audio, find out:
                    # Performer and song title and add DocumentAttributeAudio
                }
                if utils.is_audio(file) and hachoir:
                    m = hachoir.metadata.extractMetadata(
                        hachoir.parser.createParser(file)
                    )
                    attr_dict[DocumentAttributeAudio] = DocumentAttributeAudio(
                        title=m.get('title') if m.has('title') else None,
                        performer=m.get('author') if m.has('author') else None,
                        duration=int(m.get('duration').seconds
                                     if m.has('duration') else 0)
                    )
                if not force_document and utils.is_video(file):
-                    attr_dict[DocumentAttributeVideo] = \
+                    if hachoir:
-                        DocumentAttributeVideo(0, 0, 0)
+                        m = hachoir.metadata.extractMetadata(
                            hachoir.parser.createParser(file)
                        )
                        doc = DocumentAttributeVideo(
                            w=m.get('width') if m.has('width') else 0,
                            h=m.get('height') if m.has('height') else 0,
                            duration=int(m.get('duration').seconds
                                         if m.has('duration') else 0)
                        )
                    else:
                        doc = DocumentAttributeVideo(0, 0, 0)
                    attr_dict[DocumentAttributeVideo] = doc
            else:
                attr_dict = {
                    DocumentAttributeFilename:
@ -1097,8 +1128,11 @@ class TelegramClient(TelegramBareClient):
                }
            if 'is_voice_note' in kwargs:
-                attr_dict[DocumentAttributeAudio] = \
+                if DocumentAttributeAudio in attr_dict:
-                    DocumentAttributeAudio(0, voice=True)
+                    attr_dict[DocumentAttributeAudio].voice = True
                else:
                    attr_dict[DocumentAttributeAudio] = \
                        DocumentAttributeAudio(0, voice=True)
            # Now override the attributes if any. As we have a dict of
            # {cls: instance}, we can override any class with the list
--- a/telethon/utils.py
+++ b/telethon/utils.py
@ -326,6 +326,11 @@ def is_image(file):
    return (mimetypes.guess_type(file)[0] or '').startswith('image/')
 def is_audio(file):
    """Returns True if the file extension looks like an audio file"""
    return (mimetypes.guess_type(file)[0] or '').startswith('audio/')
 def is_video(file):
    """Returns True if the file extension looks like a video file"""
    return (mimetypes.guess_type(file)[0] or '').startswith('video/')