Implemented init and write code on TLObjects Generator

The code generated by the generator now classifies the output files
in their corresponding categories, also writing their __init__(...)
with documented arguments, and the on_send(...) method
This commit is contained in:
Lonami 2016-08-27 11:59:23 +02:00
parent 1974569927
commit 06832f8108
8 changed files with 310 additions and 37 deletions

View File

@ -1,5 +1,7 @@
# Telethon
**Telethon** is Telegram client implementation in Python. This project is **completely based** on [TLSharp](https://github.com/sochix/TLSharp), so please, also have a look to the original project!
**Telethon** is Telegram client implementation in Python. This project's _core_ is **completely based** on [TLSharp](https://github.com/sochix/TLSharp), so please, also have a look to the original project!
Other parts, such as the request themselves, the .tl tokenizer and code generator, or some ported C# utilities such as `BinaryWriter`, `BinaryReader`, `TCPClient` and so on, are no longer part of TLSharp itself.
### Requirements
This project requires the following Python modules, which can be installed by issuing `sudo -H pip install <module>` on a Linux terminal:
@ -7,3 +9,20 @@ This project requires the following Python modules, which can be installed by is
### We need your help!
As of now, the project is fully **untested** and with many pending things to do. If you know both Python and C#, please don't think it twice and help us (me)!
### Code generator limitations
The current code generator is not complete, yet adding the missing features would only over-complicate an already hard-to-read code.
Some parts of the .tl file _should_ be omitted, because they're "built-in" in the generated code (such as writing booleans, etc.).
In order to make sure that all the generated files will work, please make sure to **always** comment out these lines in `scheme.tl`
(the latest version can always be found [here](https://github.com/telegramdesktop/tdesktop/blob/master/Telegram/SourceFiles/mtproto/scheme.tl)):
```tl
// boolFalse#bc799737 = Bool;
// boolTrue#997275b5 = Bool;
// true#3fedd339 = True;
// vector#1cb5c415 {t:Type} # [ t ] = Vector t;
```

View File

@ -1,8 +1,5 @@
from parser.tl_parser import TLParser
import tlobjects_generator
if __name__ == '__main__':
parser = TLParser()
for tlobject in parser.parse_file('parser/scheme.tl'):
print(tlobject)
tlobjects_generator.generate_tlobjecs()

View File

@ -50,7 +50,7 @@ class MtProtoSender:
# TODO Is there any difference with unsigned long and long?
writer.write_long(self._session.salt, signed=False)
writer.write_long(self._session.id, signed=False)
writer.write_long(request.message_id)
writer.write_long(request.msg_id)
writer.write_int(self.generate_sequence(request.confirmed))
writer.write_int(len(packet))
writer.write(packet)

View File

@ -1,13 +1,18 @@
from io import StringIO
class SourceBuilder:
"""This class should be used to build .py source files"""
def __init__(self, indent_size=4):
def __init__(self, out_stream=None, indent_size=4):
self.current_indent = 0
self.on_new_line = False
self.indent_size = indent_size
self.buffer = []
if out_stream is None:
self.out_stream = StringIO()
else:
self.out_stream = out_stream
def indent(self):
self.write(' ' * (self.current_indent * self.indent_size))
@ -15,9 +20,10 @@ class SourceBuilder:
def write(self, string):
if self.on_new_line:
self.on_new_line = False # We're not on a new line anymore
self.indent()
if string.strip(): # If the string was not empty, indent; Else it probably was a new line
self.indent()
self.buffer += list(string)
self.out_stream.write(string)
def writeln(self, string=''):
self.write(string + '\n')
@ -32,7 +38,12 @@ class SourceBuilder:
self.writeln()
def __str__(self):
if self.buffer:
return ''.join(self.buffer)
else:
return ''
self.out_stream.seek(0)
return self.out_stream.read()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.out_stream.flush()
self.out_stream.close()

View File

@ -1,5 +1,9 @@
// Core types (no need to gen)
// We handle some types in a special way
//boolFalse#bc799737 = Bool;
//boolTrue#997275b5 = Bool;
//true#3fedd339 = True;
//vector#1cb5c415 {t:Type} # [ t ] = Vector t;
///////////////////////////////
@ -119,11 +123,6 @@ contest.saveDeveloperInfo#9a5f6e95 vk_id:int name:string phone_number:string age
---types---
boolFalse#bc799737 = Bool;
boolTrue#997275b5 = Bool;
true#3fedd339 = True;
error#c4b9f9bb code:int text:string = Error;
null#56730bcc = Null;

View File

@ -96,7 +96,8 @@ class TLArg:
"""
self.name = name
# Default flag values
# Default values
self.is_vector = False
self.is_flag = False
self.flag_index = -1
@ -119,15 +120,27 @@ class TLArg:
self.flag_index = int(flag_match.group(1))
self.type = flag_match.group(2) # Update the type to match the exact type, not the "flagged" one
# Then check if the type is a Vector<REAL_TYPE>
vector_match = re.match(r'vector<(\w+)>', self.type, re.IGNORECASE)
if vector_match:
self.is_vector = True
self.type = vector_match.group(1) # Update the type to match the one inside the vector
self.generic_definition = generic_definition
def __str__(self):
type = ('!{}'.format(self.type) if self.is_generic
else
('flags.{}?{}'.format(self.flag_index, self.type) if self.is_flag
else self.type))
# Find the real type representation by updating it as required
real_type = self.type
if self.is_vector:
real_type = 'Vector<{}>'.format(real_type)
if self.is_generic:
real_type = '!{}'.format(real_type)
if self.is_flag:
real_type = 'flags.{}?{}'.format(self.flag_index, real_type)
if self.generic_definition:
return '{{{}:{}}}'.format(self.name, type)
return '{{{}:{}}}'.format(self.name, real_type)
else:
return '{}:{}'.format(self.name, type)
return '{}:{}'.format(self.name, real_type)

217
tlobjects_generator.py Normal file
View File

@ -0,0 +1,217 @@
import os
import re
from parser.tl_parser import TLParser
from parser.source_builder import SourceBuilder
def generate_tlobjecs():
"""Generates all the TLObjects from scheme.tl to tl/functions and tl/types"""
# First ensure that the required parent directories exist
os.makedirs('tl/functions', exist_ok=True)
os.makedirs('tl/types', exist_ok=True)
for tlobject in TLParser.parse_file('scheme.tl'):
# Determine the output directory and create it
out_dir = os.path.join('tl',
'functions' if tlobject.is_function
else 'types')
if tlobject.namespace is not None:
out_dir = os.path.join(out_dir, tlobject.namespace)
os.makedirs(out_dir, exist_ok=True)
init_py = os.path.join(out_dir, '__init__.py')
# Also create __init__.py
if not os.path.isfile(init_py):
open(init_py, 'a').close()
# Create the file
filename = os.path.join(out_dir, get_file_name(tlobject))
with open(filename, 'w', encoding='utf-8') as file:
# Let's build the source code!
with SourceBuilder(file) as builder:
builder.writeln('from requests.mtproto_request import MTProtoRequest')
builder.writeln()
builder.writeln()
builder.writeln('class {}(MTProtoRequest):'.format(get_class_name(tlobject)))
# Write the original .tl definition, along with a "generated automatically" message
builder.writeln('"""Class generated by TLObjects\' generator. '
'All changes will be ERASED. Original .tl definition below.')
builder.writeln('{}"""'.format(tlobject))
builder.writeln()
# First sort the arguments so that those not being a flag come first
args = sorted([arg for arg in tlobject.args if not arg.flag_indicator],
key=lambda x: x.is_flag)
# Then convert the args to string parameters, the flags having =None
args = [(arg.name if not arg.is_flag
else '{}=None'.format(arg.name)) for arg in args
if not arg.flag_indicator and not arg.generic_definition]
# Write the __init__ function
if args:
builder.writeln('def __init__(self, {}):'.format(', '.join(args)))
else:
builder.writeln('def __init__(self):')
# Now update args to have the TLObject arguments, _except_
# those which are generated automatically: flag indicator and generic definitions.
# We don't need the generic definitions in Python because arguments can be any type
args = [arg for arg in tlobject.args
if not arg.flag_indicator and not arg.generic_definition]
if args:
# Write the docstring, so we know the type of the arguments
builder.writeln('"""')
for arg in args:
if not arg.flag_indicator:
builder.write(':param {}: Telegram type: «{}».'.format(arg.name, arg.type))
if arg.is_vector:
builder.write(' Must be a list.'.format(arg.name))
if arg.is_generic:
builder.write(' This should be another MTProtoRequest.')
builder.writeln()
builder.writeln('"""')
builder.writeln('super().__init__()')
# Leave an empty line if there are any args
if args:
builder.writeln()
for arg in args:
builder.writeln('self.{0} = {0}'.format(arg.name))
builder.end_block()
# Write the on_send(self, writer) function
builder.writeln('def on_send(self, writer):')
builder.writeln("writer.write_int({}) # {}'s constructor ID"
.format(hex(tlobject.id), tlobject.name))
for arg in tlobject.args:
write_onsend_code(builder, arg, tlobject.args)
builder.end_block()
def get_class_name(tlobject):
# Courtesy of http://stackoverflow.com/a/31531797/4759433
# Also, '_' could be replaced for ' ', then use .title(), and then remove ' '
result = re.sub(r'_([a-z])', lambda m: m.group(1).upper(), tlobject.name)
return result[:1].upper() + result[1:].replace('_', '') # Replace again to fully ensure!
def get_file_name(tlobject):
# Courtesy of http://stackoverflow.com/a/1176023/4759433
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', tlobject.name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + '.py'
foundEver = set()
def write_onsend_code(builder, arg, args, name=None):
"""
Writes the write code for the given argument
:param builder: The source code builder
:param arg: The argument to write
:param args: All the other arguments in TLObject same on_send. This is required to determine the flags value
:param name: The name of the argument. Defaults to «self.argname»
This argument is an option because it's required when writing Vectors<>
"""
if arg.generic_definition:
return # Do nothing, this only specifies a later type
if name is None:
name = 'self.{}'.format(arg.name)
# The argument may be a flag, only write if it's not None!
if arg.is_flag:
builder.writeln('if {} is not None:'.format(name))
if arg.is_vector:
builder.writeln("writer.write_int(0x1cb5c415) # Vector's constructor ID")
builder.writeln('writer.write_int(len({}))'.format(name))
builder.writeln('for {}_item in {}:'.format(arg.name, name))
# Temporary disable .is_vector, not to enter this if again
arg.is_vector = False
write_onsend_code(builder, arg, args, name='{}_item'.format(arg.name))
arg.is_vector = True
elif arg.flag_indicator:
# Calculate the flags with those items which are not None
builder.writeln('# Calculate the flags. This equals to those flag arguments which are NOT None')
builder.writeln('flags = 0')
for flag in args:
if flag.is_flag:
builder.writeln('flags |= (1 << {}) if {} is not None else 0'
.format(flag.flag_index, 'self.{}'.format(flag.name)))
builder.writeln('writer.write_int(flags)')
builder.writeln()
elif 'int' == arg.type:
builder.writeln('writer.write_int({})'.format(name))
elif 'long' == arg.type:
builder.writeln('writer.write_long({})'.format(name))
elif 'int128' == arg.type:
builder.writeln('writer.write_large_int({}, bits=128)'.format(name))
elif 'int256' == arg.type:
builder.writeln('writer.write_large_int({}, bits=256)'.format(name))
elif 'double' == arg.type:
builder.writeln('writer.write_double({})'.format(name))
elif 'string' == arg.type:
builder.writeln('writer.tgwrite_string({})'.format(name))
elif 'Bool' == arg.type:
builder.writeln('writer.tgwrite_bool({})'.format(name))
elif 'true' == arg.type: # Awkwardly enough, Telegram has both bool and "true", used in flags
builder.writeln('writer.write_int(0x3fedd339) # true')
elif 'bytes' == arg.type:
builder.writeln('writer.write({})'.format(name))
else:
# Else it may be a custom type
builder.writeln('{}.write(writer)'.format(name))
if arg.type not in foundEver:
foundEver.add(arg.type)
print('{}: {}'.format(arg.type, arg))
# End vector and flag blocks if required (if we opened them before)
if arg.is_vector:
builder.end_block()
if arg.is_flag:
builder.end_block()
''' SourceBuilder generated file example:
class Example(MTProtoRequest):
def __init__(self, some, parameter):
"""
.tl definition: Example#12345678 some:int parameter:int = Exmpl
:param some: [type=Vector<int>] Cannot be NONE
:param parameter: [type=int] Cannot be NONE
"""
def on_send(self, writer):
writer.write_int(0x62d6b459) # example's constructor ID
writer.write_int(0x1cb5c415) # vector code
writer.write_int(len(self.msgs))
for some_item in self.some:
writer.write_int(some_item)
def on_response(self, reader):
pass
'''

View File

@ -17,18 +17,31 @@ class BinaryWriter:
# region Writing
def write_byte(self, byte):
self.writer.write(pack('B', byte))
def write_byte(self, value):
self.writer.write(pack('B', value))
def write_int(self, integer, signed=True):
if not signed:
integer &= 0xFFFFFFFF # Ensure it's unsigned (see http://stackoverflow.com/a/30092291/4759433)
self.writer.write(pack('I', integer))
def write_int(self, value, signed=True):
if signed:
self.writer.write(pack('i', value))
else:
value &= 0xFFFFFFFF # Ensure it's unsigned (see http://stackoverflow.com/a/30092291/4759433)
self.writer.write(pack('I', value))
def write_long(self, long, signed=True):
if not signed:
long &= 0xFFFFFFFFFFFFFFFF
self.writer.write(pack('Q', long))
def write_long(self, value, signed=True):
if signed:
self.writer.write(pack('q', value))
else:
value &= 0xFFFFFFFFFFFFFFFF
self.writer.write(pack('Q', value))
def write_float(self, value):
self.writer.write(pack('f', value))
def write_double(self, value):
self.writer.write(pack('d', value))
def write_large_int(self, value, bits):
self.writer.write(pack('{}B'.format(bits // 8), value))
def write(self, data):
self.writer.write(data)
@ -71,6 +84,10 @@ class BinaryWriter:
def tgwrite_string(self, string):
return self.tgwrite_bytes(string.encode('utf-8'))
def tgwrite_bool(self, bool):
# boolTrue boolFalse
return self.write_int(0x997275b5 if bool else 0xbc799737, signed=False)
# endregion
def flush(self):