diff --git a/README-long.rst b/README-long.rst index 86c965bc..8393dade 100755 --- a/README-long.rst +++ b/README-long.rst @@ -147,6 +147,7 @@ This class is further specialized into further errors: * ``ForbiddenError`` (403), privacy violation error. * ``NotFoundError`` (404), make sure you're invoking ``Request``'s! * ``FloodError`` (420), the same request was repeated many times. Must wait ``.seconds``. +* ``CdnFileTamperedError``, if the media you were trying to download has been altered. Further specialization is also available, for instance, the ``SessionPasswordNeededError`` when signing in means that a password must be provided to continue. diff --git a/telethon/crypto/__init__.py b/telethon/crypto/__init__.py index a3513063..6dbcdc36 100644 --- a/telethon/crypto/__init__.py +++ b/telethon/crypto/__init__.py @@ -1,3 +1,4 @@ from .aes import AES from .auth_key import AuthKey from .factorization import Factorization +from .hash_checker import HashChecker diff --git a/telethon/crypto/hash_checker.py b/telethon/crypto/hash_checker.py new file mode 100644 index 00000000..5cdcf73f --- /dev/null +++ b/telethon/crypto/hash_checker.py @@ -0,0 +1,39 @@ +from hashlib import sha256 +from ..errors import CdnFileTamperedError + + +class HashChecker: + def __init__(self, cdn_file_hashes): + self.cdn_file_hashes = cdn_file_hashes + self.shaes = [sha256() for _ in range(len(cdn_file_hashes))] + + def check(self, offset, data): + for cdn_hash, sha in zip(self.cdn_file_hashes, self.shaes): + inter = self.intersect( + cdn_hash.offset, cdn_hash.offset + cdn_hash.limit, + offset, offset + len(data) + ) + if inter: + x1, x2 = inter[0] - offset, inter[1] - offset + sha.update(data[x1:x2]) + elif offset > cdn_hash.offset: + if cdn_hash.hash == sha.digest(): + self.cdn_file_hashes.remove(cdn_hash) + self.shaes.remove(sha) + else: + raise CdnFileTamperedError() + + def finish_check(self): + for cdn_hash, sha in zip(self.cdn_file_hashes, self.shaes): + if cdn_hash.hash != sha.digest(): + raise CdnFileTamperedError() + + self.cdn_file_hashes.clear() + self.shaes.clear() + + @staticmethod + def intersect(x1, x2, z1, z2): + if x1 > z1: + return None if x1 > z2 else (x1, min(x2, z2)) + else: + return (z1, min(x2, z2)) if x2 > z1 else None diff --git a/telethon/errors/__init__.py b/telethon/errors/__init__.py index 8e98f6c8..ca07f6c0 100644 --- a/telethon/errors/__init__.py +++ b/telethon/errors/__init__.py @@ -3,7 +3,7 @@ import re from .common import ( ReadCancelledError, InvalidParameterError, TypeNotFoundError, - InvalidChecksumError + InvalidChecksumError, CdnFileTamperedError ) from .rpc_errors import ( diff --git a/telethon/errors/common.py b/telethon/errors/common.py index 4ce0a365..748c4309 100644 --- a/telethon/errors/common.py +++ b/telethon/errors/common.py @@ -35,3 +35,11 @@ class InvalidChecksumError(Exception): self.checksum = checksum self.valid_checksum = valid_checksum + + +class CdnFileTamperedError(Exception): + def __init__(self): + super().__init__( + self, + 'The CDN file has been altered and its download cancelled.' + ) diff --git a/telethon/telegram_bare_client.py b/telethon/telegram_bare_client.py index 665f6667..24b60f6f 100644 --- a/telethon/telegram_bare_client.py +++ b/telethon/telegram_bare_client.py @@ -12,8 +12,7 @@ from .errors import ( ) from .network import authenticator, MtProtoSender, TcpTransport from .utils import get_appropriated_part_size -from .crypto import AES -from .crypto import rsa +from .crypto import rsa, HashChecker # For sending and receiving requests from .tl import TLObject, JsonSession @@ -487,6 +486,7 @@ class TelegramBareClient: try: offset_index = 0 cdn_file_token = None + hash_checker = None def encrypt_method(x): return x # Defaults to no-op @@ -505,7 +505,11 @@ class TelegramBareClient: )) if isinstance(result, FileCdnRedirect): - client, cdn_file_token, encrypt_method, result = \ + cdn_file_token = result.file_token + hash_checker = HashChecker( + result.cdn_file_hashes + ) + client, encrypt_method, result = \ self._prepare_cdn_redirect( result, offset, part_size ) @@ -524,9 +528,13 @@ class TelegramBareClient: # So there is nothing left to download and write if not result.bytes: # Return some extra information, unless it's a cdn file + hash_checker.finish_check() return getattr(result, 'type', '') - f.write(encrypt_method(result.bytes)) + result.bytes = encrypt_method(result.bytes) + hash_checker.check(offset, result.bytes) + + f.write(result.bytes) if progress_callback: progress_callback(f.tell(), file_size) finally: @@ -534,7 +542,7 @@ class TelegramBareClient: f.close() def _prepare_cdn_redirect(self, cdn_redirect, offset, part_size): - """Returns (client, cdn_file_token, encrypt_method, result)""" + """Returns (client, encrypt_method, result)""" # https://core.telegram.org/cdn # TODO Use libssl if available cdn_aes = pyaes.AESModeOfOperationCTR( @@ -559,9 +567,9 @@ class TelegramBareClient: file_token=cdn_redirect.file_token, request_token=cdn_file.request_token )) - return client, cdn_redirect.file_token, cdn_aes.encrypt, None + return client, cdn_aes.encrypt, None else: # We have the first bytes for the file - return client, cdn_redirect.file_token, cdn_aes.encrypt, cdn_file + return client, cdn_aes.encrypt, cdn_file # endregion