2018-06-09 14:48:27 +03:00
|
|
|
import logging
|
2018-06-09 12:34:01 +03:00
|
|
|
import os
|
|
|
|
import struct
|
|
|
|
import time
|
|
|
|
from hashlib import sha256
|
|
|
|
|
|
|
|
from ..crypto import AES
|
|
|
|
from ..errors import SecurityError, BrokenAuthKeyError
|
|
|
|
from ..extensions import BinaryReader
|
2018-06-09 14:11:49 +03:00
|
|
|
from ..tl.core import TLMessage
|
2018-09-29 11:58:45 +03:00
|
|
|
from ..tl.functions import InvokeAfterMsgRequest
|
|
|
|
from ..tl.core.gzippacked import GzipPacked
|
2018-06-09 12:34:01 +03:00
|
|
|
|
2018-06-09 14:48:27 +03:00
|
|
|
__log__ = logging.getLogger(__name__)
|
|
|
|
|
2018-06-09 12:34:01 +03:00
|
|
|
|
|
|
|
class MTProtoState:
|
|
|
|
"""
|
|
|
|
`telethon.network.mtprotosender.MTProtoSender` needs to hold a state
|
|
|
|
in order to be able to encrypt and decrypt incoming/outgoing messages,
|
|
|
|
as well as generating the message IDs. Instances of this class hold
|
|
|
|
together all the required information.
|
|
|
|
|
|
|
|
It doesn't make sense to use `telethon.sessions.abstract.Session` for
|
|
|
|
the sender because the sender should *not* be concerned about storing
|
|
|
|
this information to disk, as one may create as many senders as they
|
|
|
|
desire to any other data center, or some CDN. Using the same session
|
|
|
|
for all these is not a good idea as each need their own authkey, and
|
|
|
|
the concept of "copying" sessions with the unnecessary entities or
|
|
|
|
updates state for these connections doesn't make sense.
|
2018-10-01 10:58:53 +03:00
|
|
|
|
|
|
|
While it would be possible to have a `MTProtoPlainState` that does no
|
|
|
|
encryption so that it was usable through the `MTProtoLayer` and thus
|
|
|
|
avoid the need for a `MTProtoPlainSender`, the `MTProtoLayer` is more
|
|
|
|
focused to efficiency and this state is also more advanced (since it
|
|
|
|
supports gzipping and invoking after other message IDs). There are too
|
|
|
|
many methods that would be needed to make it convenient to use for the
|
|
|
|
authentication process, at which point the `MTProtoPlainSender` is better.
|
2018-06-09 12:34:01 +03:00
|
|
|
"""
|
|
|
|
def __init__(self, auth_key):
|
|
|
|
# Session IDs can be random on every connection
|
|
|
|
self.id = struct.unpack('q', os.urandom(8))[0]
|
|
|
|
self.auth_key = auth_key
|
|
|
|
self.time_offset = 0
|
|
|
|
self.salt = 0
|
|
|
|
self._sequence = 0
|
|
|
|
self._last_msg_id = 0
|
|
|
|
|
2018-06-27 20:04:33 +03:00
|
|
|
def update_message_id(self, message):
|
|
|
|
"""
|
|
|
|
Updates the message ID to a new one,
|
|
|
|
used when the time offset changed.
|
|
|
|
"""
|
|
|
|
message.msg_id = self._get_new_msg_id()
|
|
|
|
|
2018-06-09 12:34:01 +03:00
|
|
|
@staticmethod
|
|
|
|
def _calc_key(auth_key, msg_key, client):
|
|
|
|
"""
|
|
|
|
Calculate the key based on Telegram guidelines for MTProto 2,
|
|
|
|
specifying whether it's the client or not. See
|
|
|
|
https://core.telegram.org/mtproto/description#defining-aes-key-and-initialization-vector
|
|
|
|
"""
|
|
|
|
x = 0 if client else 8
|
|
|
|
sha256a = sha256(msg_key + auth_key[x: x + 36]).digest()
|
|
|
|
sha256b = sha256(auth_key[x + 40:x + 76] + msg_key).digest()
|
|
|
|
|
|
|
|
aes_key = sha256a[:8] + sha256b[8:24] + sha256a[24:32]
|
|
|
|
aes_iv = sha256b[:8] + sha256a[8:24] + sha256b[24:32]
|
|
|
|
|
|
|
|
return aes_key, aes_iv
|
|
|
|
|
2018-10-01 15:02:23 +03:00
|
|
|
def write_data_as_message(self, buffer, data, content_related,
|
|
|
|
*, after_id=None):
|
2018-06-09 12:34:01 +03:00
|
|
|
"""
|
2018-09-29 11:58:45 +03:00
|
|
|
Writes a message containing the given data into buffer.
|
2018-06-09 12:34:01 +03:00
|
|
|
|
2018-09-29 11:58:45 +03:00
|
|
|
Returns the message id.
|
|
|
|
"""
|
|
|
|
msg_id = self._get_new_msg_id()
|
2018-10-01 15:02:23 +03:00
|
|
|
seq_no = self._get_seq_no(content_related)
|
2018-09-29 11:58:45 +03:00
|
|
|
if after_id is None:
|
2018-10-04 17:15:51 +03:00
|
|
|
body = GzipPacked.gzip_if_smaller(content_related, data)
|
2018-09-29 11:58:45 +03:00
|
|
|
else:
|
2018-10-04 17:15:51 +03:00
|
|
|
body = GzipPacked.gzip_if_smaller(content_related,
|
2018-09-29 11:58:45 +03:00
|
|
|
bytes(InvokeAfterMsgRequest(after_id, data)))
|
|
|
|
|
|
|
|
buffer.write(struct.pack('<qii', msg_id, seq_no, len(body)))
|
|
|
|
buffer.write(body)
|
|
|
|
return msg_id
|
|
|
|
|
|
|
|
def encrypt_message_data(self, data):
|
|
|
|
"""
|
|
|
|
Encrypts the given message data using the current authorization key
|
|
|
|
following MTProto 2.0 guidelines core.telegram.org/mtproto/description.
|
2018-06-09 12:34:01 +03:00
|
|
|
"""
|
2018-09-29 11:58:45 +03:00
|
|
|
data = struct.pack('<qq', self.salt, self.id) + data
|
2018-06-09 12:34:01 +03:00
|
|
|
padding = os.urandom(-(len(data) + 12) % 16 + 12)
|
|
|
|
|
|
|
|
# Being substr(what, offset, length); x = 0 for client
|
|
|
|
# "msg_key_large = SHA256(substr(auth_key, 88+x, 32) + pt + padding)"
|
|
|
|
msg_key_large = sha256(
|
|
|
|
self.auth_key.key[88:88 + 32] + data + padding).digest()
|
|
|
|
|
|
|
|
# "msg_key = substr (msg_key_large, 8, 16)"
|
|
|
|
msg_key = msg_key_large[8:24]
|
|
|
|
aes_key, aes_iv = self._calc_key(self.auth_key.key, msg_key, True)
|
|
|
|
|
|
|
|
key_id = struct.pack('<Q', self.auth_key.key_id)
|
|
|
|
return (key_id + msg_key +
|
|
|
|
AES.encrypt_ige(data + padding, aes_key, aes_iv))
|
|
|
|
|
2018-09-29 11:58:45 +03:00
|
|
|
def decrypt_message_data(self, body):
|
2018-06-09 12:34:01 +03:00
|
|
|
"""
|
2018-09-29 11:58:45 +03:00
|
|
|
Inverse of `encrypt_message_data` for incoming server messages.
|
2018-06-09 12:34:01 +03:00
|
|
|
"""
|
|
|
|
if len(body) < 8:
|
2018-09-29 11:58:45 +03:00
|
|
|
# TODO If len == 4, raise HTTPErrorCode(-little endian int)
|
2018-06-09 12:34:01 +03:00
|
|
|
if body == b'l\xfe\xff\xff':
|
|
|
|
raise BrokenAuthKeyError()
|
|
|
|
else:
|
|
|
|
raise BufferError("Can't decode packet ({})".format(body))
|
|
|
|
|
2018-10-02 09:55:46 +03:00
|
|
|
# TODO Check salt, session_id and sequence_number
|
2018-06-09 12:34:01 +03:00
|
|
|
key_id = struct.unpack('<Q', body[:8])[0]
|
|
|
|
if key_id != self.auth_key.key_id:
|
|
|
|
raise SecurityError('Server replied with an invalid auth key')
|
|
|
|
|
|
|
|
msg_key = body[8:24]
|
|
|
|
aes_key, aes_iv = self._calc_key(self.auth_key.key, msg_key, False)
|
2018-06-09 14:48:27 +03:00
|
|
|
body = AES.decrypt_ige(body[24:], aes_key, aes_iv)
|
2018-06-09 12:34:01 +03:00
|
|
|
|
|
|
|
# https://core.telegram.org/mtproto/security_guidelines
|
|
|
|
# Sections "checking sha256 hash" and "message length"
|
2018-06-09 14:48:27 +03:00
|
|
|
our_key = sha256(self.auth_key.key[96:96 + 32] + body)
|
2018-06-09 12:34:01 +03:00
|
|
|
if msg_key != our_key.digest()[8:24]:
|
|
|
|
raise SecurityError(
|
|
|
|
"Received msg_key doesn't match with expected one")
|
|
|
|
|
2018-06-09 14:48:27 +03:00
|
|
|
reader = BinaryReader(body)
|
|
|
|
reader.read_long() # remote_salt
|
|
|
|
if reader.read_long() != self.id:
|
|
|
|
raise SecurityError('Server replied with a wrong session ID')
|
|
|
|
|
|
|
|
remote_msg_id = reader.read_long()
|
|
|
|
remote_sequence = reader.read_int()
|
2018-06-09 15:23:42 +03:00
|
|
|
reader.read_int() # msg_len for the inner object, padding ignored
|
2018-06-25 13:54:33 +03:00
|
|
|
|
|
|
|
# We could read msg_len bytes and use those in a new reader to read
|
|
|
|
# the next TLObject without including the padding, but since the
|
|
|
|
# reader isn't used for anything else after this, it's unnecessary.
|
2018-06-09 14:48:27 +03:00
|
|
|
obj = reader.tgread_object()
|
|
|
|
|
2018-09-29 11:58:45 +03:00
|
|
|
return TLMessage(remote_msg_id, remote_sequence, obj)
|
2018-06-09 12:34:01 +03:00
|
|
|
|
|
|
|
def _get_new_msg_id(self):
|
|
|
|
"""
|
|
|
|
Generates a new unique message ID based on the current
|
|
|
|
time (in ms) since epoch, applying a known time offset.
|
|
|
|
"""
|
|
|
|
now = time.time() + self.time_offset
|
|
|
|
nanoseconds = int((now - int(now)) * 1e+9)
|
|
|
|
new_msg_id = (int(now) << 32) | (nanoseconds << 2)
|
|
|
|
|
|
|
|
if self._last_msg_id >= new_msg_id:
|
|
|
|
new_msg_id = self._last_msg_id + 4
|
|
|
|
|
|
|
|
self._last_msg_id = new_msg_id
|
|
|
|
return new_msg_id
|
|
|
|
|
|
|
|
def update_time_offset(self, correct_msg_id):
|
|
|
|
"""
|
|
|
|
Updates the time offset to the correct
|
|
|
|
one given a known valid message ID.
|
|
|
|
"""
|
2018-07-25 13:33:12 +03:00
|
|
|
bad = self._get_new_msg_id()
|
|
|
|
old = self.time_offset
|
|
|
|
|
2018-06-09 12:34:01 +03:00
|
|
|
now = int(time.time())
|
|
|
|
correct = correct_msg_id >> 32
|
|
|
|
self.time_offset = correct - now
|
2018-07-25 13:33:12 +03:00
|
|
|
|
|
|
|
if self.time_offset != old:
|
|
|
|
self._last_msg_id = 0
|
|
|
|
__log__.debug(
|
|
|
|
'Updated time offset (old offset %d, bad %d, good %d, new %d)',
|
|
|
|
old, bad, correct_msg_id, self.time_offset
|
|
|
|
)
|
|
|
|
|
2018-06-27 20:04:33 +03:00
|
|
|
return self.time_offset
|
2018-06-09 12:34:01 +03:00
|
|
|
|
|
|
|
def _get_seq_no(self, content_related):
|
|
|
|
"""
|
|
|
|
Generates the next sequence number depending on whether
|
|
|
|
it should be for a content-related query or not.
|
|
|
|
"""
|
|
|
|
if content_related:
|
|
|
|
result = self._sequence * 2 + 1
|
|
|
|
self._sequence += 1
|
|
|
|
return result
|
|
|
|
else:
|
|
|
|
return self._sequence * 2
|