diff --git a/README.md b/README.md index d2e87b25..bd6ba817 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Telethon -**Telethon** is Telegram client implementation in Python. This project is **completely based** on [TLSharp](https://github.com/sochix/TLSharp), so please, also have a look to the original project! +**Telethon** is Telegram client implementation in Python. This project's _core_ is **completely based** on [TLSharp](https://github.com/sochix/TLSharp), so please, also have a look to the original project! + +Other parts, such as the request themselves, the .tl tokenizer and code generator, or some ported C# utilities such as `BinaryWriter`, `BinaryReader`, `TCPClient` and so on, are no longer part of TLSharp itself. ### Requirements This project requires the following Python modules, which can be installed by issuing `sudo -H pip install ` on a Linux terminal: @@ -7,3 +9,20 @@ This project requires the following Python modules, which can be installed by is ### We need your help! As of now, the project is fully **untested** and with many pending things to do. If you know both Python and C#, please don't think it twice and help us (me)! + +### Code generator limitations +The current code generator is not complete, yet adding the missing features would only over-complicate an already hard-to-read code. +Some parts of the .tl file _should_ be omitted, because they're "built-in" in the generated code (such as writing booleans, etc.). + +In order to make sure that all the generated files will work, please make sure to **always** comment out these lines in `scheme.tl` +(the latest version can always be found [here](https://github.com/telegramdesktop/tdesktop/blob/master/Telegram/SourceFiles/mtproto/scheme.tl)): + +```tl +// boolFalse#bc799737 = Bool; +// boolTrue#997275b5 = Bool; +// true#3fedd339 = True; +// vector#1cb5c415 {t:Type} # [ t ] = Vector t; +``` + + + \ No newline at end of file diff --git a/main.py b/main.py index d06de64f..77f65b2f 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,5 @@ -from parser.tl_parser import TLParser +import tlobjects_generator if __name__ == '__main__': - - parser = TLParser() - for tlobject in parser.parse_file('parser/scheme.tl'): - print(tlobject) + tlobjects_generator.generate_tlobjecs() diff --git a/network/mtproto_sender.py b/network/mtproto_sender.py index 345fa2c4..b43add5c 100644 --- a/network/mtproto_sender.py +++ b/network/mtproto_sender.py @@ -50,7 +50,7 @@ class MtProtoSender: # TODO Is there any difference with unsigned long and long? writer.write_long(self._session.salt, signed=False) writer.write_long(self._session.id, signed=False) - writer.write_long(request.message_id) + writer.write_long(request.msg_id) writer.write_int(self.generate_sequence(request.confirmed)) writer.write_int(len(packet)) writer.write(packet) diff --git a/parser/source_builder.py b/parser/source_builder.py index 132e3a15..9011d2f5 100644 --- a/parser/source_builder.py +++ b/parser/source_builder.py @@ -1,13 +1,18 @@ +from io import StringIO + class SourceBuilder: """This class should be used to build .py source files""" - def __init__(self, indent_size=4): + def __init__(self, out_stream=None, indent_size=4): self.current_indent = 0 self.on_new_line = False self.indent_size = indent_size - self.buffer = [] + if out_stream is None: + self.out_stream = StringIO() + else: + self.out_stream = out_stream def indent(self): self.write(' ' * (self.current_indent * self.indent_size)) @@ -15,9 +20,10 @@ class SourceBuilder: def write(self, string): if self.on_new_line: self.on_new_line = False # We're not on a new line anymore - self.indent() + if string.strip(): # If the string was not empty, indent; Else it probably was a new line + self.indent() - self.buffer += list(string) + self.out_stream.write(string) def writeln(self, string=''): self.write(string + '\n') @@ -32,7 +38,12 @@ class SourceBuilder: self.writeln() def __str__(self): - if self.buffer: - return ''.join(self.buffer) - else: - return '' + self.out_stream.seek(0) + return self.out_stream.read() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.out_stream.flush() + self.out_stream.close() diff --git a/parser/scheme.tl b/scheme.tl similarity index 99% rename from parser/scheme.tl rename to scheme.tl index cc0d08a9..16f943ba 100644 --- a/parser/scheme.tl +++ b/scheme.tl @@ -1,5 +1,9 @@ // Core types (no need to gen) +// We handle some types in a special way +//boolFalse#bc799737 = Bool; +//boolTrue#997275b5 = Bool; +//true#3fedd339 = True; //vector#1cb5c415 {t:Type} # [ t ] = Vector t; /////////////////////////////// @@ -119,11 +123,6 @@ contest.saveDeveloperInfo#9a5f6e95 vk_id:int name:string phone_number:string age ---types--- -boolFalse#bc799737 = Bool; -boolTrue#997275b5 = Bool; - -true#3fedd339 = True; - error#c4b9f9bb code:int text:string = Error; null#56730bcc = Null; diff --git a/tl/tlobject.py b/tl/tlobject.py index f8c4a1a2..9b5232a0 100644 --- a/tl/tlobject.py +++ b/tl/tlobject.py @@ -96,7 +96,8 @@ class TLArg: """ self.name = name - # Default flag values + # Default values + self.is_vector = False self.is_flag = False self.flag_index = -1 @@ -119,15 +120,27 @@ class TLArg: self.flag_index = int(flag_match.group(1)) self.type = flag_match.group(2) # Update the type to match the exact type, not the "flagged" one + # Then check if the type is a Vector + vector_match = re.match(r'vector<(\w+)>', self.type, re.IGNORECASE) + if vector_match: + self.is_vector = True + self.type = vector_match.group(1) # Update the type to match the one inside the vector + self.generic_definition = generic_definition def __str__(self): - type = ('!{}'.format(self.type) if self.is_generic - else - ('flags.{}?{}'.format(self.flag_index, self.type) if self.is_flag - else self.type)) + # Find the real type representation by updating it as required + real_type = self.type + if self.is_vector: + real_type = 'Vector<{}>'.format(real_type) + + if self.is_generic: + real_type = '!{}'.format(real_type) + + if self.is_flag: + real_type = 'flags.{}?{}'.format(self.flag_index, real_type) if self.generic_definition: - return '{{{}:{}}}'.format(self.name, type) + return '{{{}:{}}}'.format(self.name, real_type) else: - return '{}:{}'.format(self.name, type) + return '{}:{}'.format(self.name, real_type) diff --git a/tlobjects_generator.py b/tlobjects_generator.py new file mode 100644 index 00000000..6f42fd41 --- /dev/null +++ b/tlobjects_generator.py @@ -0,0 +1,217 @@ +import os +import re +from parser.tl_parser import TLParser +from parser.source_builder import SourceBuilder + + +def generate_tlobjecs(): + """Generates all the TLObjects from scheme.tl to tl/functions and tl/types""" + + # First ensure that the required parent directories exist + os.makedirs('tl/functions', exist_ok=True) + os.makedirs('tl/types', exist_ok=True) + for tlobject in TLParser.parse_file('scheme.tl'): + + # Determine the output directory and create it + out_dir = os.path.join('tl', + 'functions' if tlobject.is_function + else 'types') + + if tlobject.namespace is not None: + out_dir = os.path.join(out_dir, tlobject.namespace) + + os.makedirs(out_dir, exist_ok=True) + + init_py = os.path.join(out_dir, '__init__.py') + # Also create __init__.py + if not os.path.isfile(init_py): + open(init_py, 'a').close() + + # Create the file + filename = os.path.join(out_dir, get_file_name(tlobject)) + with open(filename, 'w', encoding='utf-8') as file: + + # Let's build the source code! + with SourceBuilder(file) as builder: + builder.writeln('from requests.mtproto_request import MTProtoRequest') + builder.writeln() + builder.writeln() + builder.writeln('class {}(MTProtoRequest):'.format(get_class_name(tlobject))) + + # Write the original .tl definition, along with a "generated automatically" message + builder.writeln('"""Class generated by TLObjects\' generator. ' + 'All changes will be ERASED. Original .tl definition below.') + builder.writeln('{}"""'.format(tlobject)) + builder.writeln() + + # First sort the arguments so that those not being a flag come first + args = sorted([arg for arg in tlobject.args if not arg.flag_indicator], + key=lambda x: x.is_flag) + + # Then convert the args to string parameters, the flags having =None + args = [(arg.name if not arg.is_flag + else '{}=None'.format(arg.name)) for arg in args + if not arg.flag_indicator and not arg.generic_definition] + + # Write the __init__ function + if args: + builder.writeln('def __init__(self, {}):'.format(', '.join(args))) + else: + builder.writeln('def __init__(self):') + + # Now update args to have the TLObject arguments, _except_ + # those which are generated automatically: flag indicator and generic definitions. + # We don't need the generic definitions in Python because arguments can be any type + args = [arg for arg in tlobject.args + if not arg.flag_indicator and not arg.generic_definition] + + if args: + # Write the docstring, so we know the type of the arguments + builder.writeln('"""') + for arg in args: + if not arg.flag_indicator: + builder.write(':param {}: Telegram type: «{}».'.format(arg.name, arg.type)) + if arg.is_vector: + builder.write(' Must be a list.'.format(arg.name)) + if arg.is_generic: + builder.write(' This should be another MTProtoRequest.') + builder.writeln() + builder.writeln('"""') + + builder.writeln('super().__init__()') + # Leave an empty line if there are any args + if args: + builder.writeln() + + for arg in args: + builder.writeln('self.{0} = {0}'.format(arg.name)) + builder.end_block() + + # Write the on_send(self, writer) function + builder.writeln('def on_send(self, writer):') + builder.writeln("writer.write_int({}) # {}'s constructor ID" + .format(hex(tlobject.id), tlobject.name)) + + for arg in tlobject.args: + write_onsend_code(builder, arg, tlobject.args) + builder.end_block() + + +def get_class_name(tlobject): + # Courtesy of http://stackoverflow.com/a/31531797/4759433 + # Also, '_' could be replaced for ' ', then use .title(), and then remove ' ' + result = re.sub(r'_([a-z])', lambda m: m.group(1).upper(), tlobject.name) + return result[:1].upper() + result[1:].replace('_', '') # Replace again to fully ensure! + + +def get_file_name(tlobject): + # Courtesy of http://stackoverflow.com/a/1176023/4759433 + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', tlobject.name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + '.py' + + +foundEver = set() +def write_onsend_code(builder, arg, args, name=None): + """ + Writes the write code for the given argument + + :param builder: The source code builder + :param arg: The argument to write + :param args: All the other arguments in TLObject same on_send. This is required to determine the flags value + :param name: The name of the argument. Defaults to «self.argname» + This argument is an option because it's required when writing Vectors<> + """ + + if arg.generic_definition: + return # Do nothing, this only specifies a later type + + if name is None: + name = 'self.{}'.format(arg.name) + + # The argument may be a flag, only write if it's not None! + if arg.is_flag: + builder.writeln('if {} is not None:'.format(name)) + + if arg.is_vector: + builder.writeln("writer.write_int(0x1cb5c415) # Vector's constructor ID") + builder.writeln('writer.write_int(len({}))'.format(name)) + builder.writeln('for {}_item in {}:'.format(arg.name, name)) + # Temporary disable .is_vector, not to enter this if again + arg.is_vector = False + write_onsend_code(builder, arg, args, name='{}_item'.format(arg.name)) + arg.is_vector = True + + elif arg.flag_indicator: + # Calculate the flags with those items which are not None + builder.writeln('# Calculate the flags. This equals to those flag arguments which are NOT None') + builder.writeln('flags = 0') + for flag in args: + if flag.is_flag: + builder.writeln('flags |= (1 << {}) if {} is not None else 0' + .format(flag.flag_index, 'self.{}'.format(flag.name))) + + builder.writeln('writer.write_int(flags)') + builder.writeln() + + elif 'int' == arg.type: + builder.writeln('writer.write_int({})'.format(name)) + + elif 'long' == arg.type: + builder.writeln('writer.write_long({})'.format(name)) + + elif 'int128' == arg.type: + builder.writeln('writer.write_large_int({}, bits=128)'.format(name)) + + elif 'int256' == arg.type: + builder.writeln('writer.write_large_int({}, bits=256)'.format(name)) + + elif 'double' == arg.type: + builder.writeln('writer.write_double({})'.format(name)) + + elif 'string' == arg.type: + builder.writeln('writer.tgwrite_string({})'.format(name)) + + elif 'Bool' == arg.type: + builder.writeln('writer.tgwrite_bool({})'.format(name)) + + elif 'true' == arg.type: # Awkwardly enough, Telegram has both bool and "true", used in flags + builder.writeln('writer.write_int(0x3fedd339) # true') + + elif 'bytes' == arg.type: + builder.writeln('writer.write({})'.format(name)) + + else: + # Else it may be a custom type + builder.writeln('{}.write(writer)'.format(name)) + if arg.type not in foundEver: + foundEver.add(arg.type) + print('{}: {}'.format(arg.type, arg)) + + # End vector and flag blocks if required (if we opened them before) + if arg.is_vector: + builder.end_block() + + if arg.is_flag: + builder.end_block() + + +''' SourceBuilder generated file example: + +class Example(MTProtoRequest): + def __init__(self, some, parameter): + """ + .tl definition: Example#12345678 some:int parameter:int = Exmpl + :param some: [type=Vector] Cannot be NONE + :param parameter: [type=int] Cannot be NONE + """ + + def on_send(self, writer): + writer.write_int(0x62d6b459) # example's constructor ID + writer.write_int(0x1cb5c415) # vector code + writer.write_int(len(self.msgs)) + for some_item in self.some: + writer.write_int(some_item) + + def on_response(self, reader): + pass +''' diff --git a/utils/binary_writer.py b/utils/binary_writer.py index 5b86e552..0ddba440 100644 --- a/utils/binary_writer.py +++ b/utils/binary_writer.py @@ -17,18 +17,31 @@ class BinaryWriter: # region Writing - def write_byte(self, byte): - self.writer.write(pack('B', byte)) + def write_byte(self, value): + self.writer.write(pack('B', value)) - def write_int(self, integer, signed=True): - if not signed: - integer &= 0xFFFFFFFF # Ensure it's unsigned (see http://stackoverflow.com/a/30092291/4759433) - self.writer.write(pack('I', integer)) + def write_int(self, value, signed=True): + if signed: + self.writer.write(pack('i', value)) + else: + value &= 0xFFFFFFFF # Ensure it's unsigned (see http://stackoverflow.com/a/30092291/4759433) + self.writer.write(pack('I', value)) - def write_long(self, long, signed=True): - if not signed: - long &= 0xFFFFFFFFFFFFFFFF - self.writer.write(pack('Q', long)) + def write_long(self, value, signed=True): + if signed: + self.writer.write(pack('q', value)) + else: + value &= 0xFFFFFFFFFFFFFFFF + self.writer.write(pack('Q', value)) + + def write_float(self, value): + self.writer.write(pack('f', value)) + + def write_double(self, value): + self.writer.write(pack('d', value)) + + def write_large_int(self, value, bits): + self.writer.write(pack('{}B'.format(bits // 8), value)) def write(self, data): self.writer.write(data) @@ -71,6 +84,10 @@ class BinaryWriter: def tgwrite_string(self, string): return self.tgwrite_bytes(string.encode('utf-8')) + def tgwrite_bool(self, bool): + # boolTrue boolFalse + return self.write_int(0x997275b5 if bool else 0xbc799737, signed=False) + # endregion def flush(self):