Telethon/telethon_generator/parsers/tlobject/parser.py
Lonami Exo 8224e5aabf Make use of pathlib nearly everywhere (breaks docs gen)
Python 3.6 introduced support for the os.PathLike interface,
which means Python 3.5 did not have it yet and attempting to
use it in os functions would fail. Instead we can use pathlib
for everything, but not all work is done yet.
2018-12-21 13:24:16 +01:00

145 lines
4.4 KiB
Python

import collections
import re
from .tlarg import TLArg
from .tlobject import TLObject
from ..methods import Usability
CORE_TYPES = {
0xbc799737, # boolFalse#bc799737 = Bool;
0x997275b5, # boolTrue#997275b5 = Bool;
0x3fedd339, # true#3fedd339 = True;
0xc4b9f9bb, # error#c4b9f9bb code:int text:string = Error;
0x56730bcc # null#56730bcc = Null;
}
# Telegram Desktop (C++) doesn't care about string/bytes, and the .tl files
# don't either. However in Python we *do*, and we want to deal with bytes
# for the authorization key process, not UTF-8 strings (they won't be).
#
# Every type with an ID that's in here should get their attribute types
# with string being replaced with bytes.
AUTH_KEY_TYPES = {
0x05162463, # resPQ,
0x83c95aec, # p_q_inner_data
0xa9f55f95, # p_q_inner_data_dc
0x3c6a84d4, # p_q_inner_data_temp
0x56fddf88, # p_q_inner_data_temp_dc
0xd0e8075c, # server_DH_params_ok
0xb5890dba, # server_DH_inner_data
0x6643b654, # client_DH_inner_data
0xd712e4be, # req_DH_params
0xf5045f1f, # set_client_DH_params
0x3072cfa1 # gzip_packed
}
def _from_line(line, is_function, method_info, layer):
match = re.match(
r'^([\w.]+)' # 'name'
r'(?:#([0-9a-fA-F]+))?' # '#optionalcode'
r'(?:\s{?\w+:[\w\d<>#.?!]+}?)*' # '{args:.0?type}'
r'\s=\s' # ' = '
r'([\w\d<>#.?]+);$', # '<result.type>;'
line
)
if match is None:
# Probably "vector#1cb5c415 {t:Type} # [ t ] = Vector t;"
raise ValueError('Cannot parse TLObject {}'.format(line))
args_match = re.findall(
r'({)?'
r'(\w+)'
r':'
r'([\w\d<>#.?!]+)'
r'}?',
line
)
name = match.group(1)
if name in method_info:
usability = method_info[name].usability
else:
usability = Usability.UNKNOWN
return TLObject(
fullname=name,
object_id=match.group(2),
result=match.group(3),
is_function=is_function,
layer=layer,
usability=usability,
args=[TLArg(name, arg_type, brace != '')
for brace, name, arg_type in args_match]
)
def parse_tl(file_path, layer, methods=None, ignored_ids=CORE_TYPES):
"""
This method yields TLObjects from a given .tl file.
Note that the file is parsed completely before the function yields
because references to other objects may appear later in the file.
"""
method_info = {m.name: m for m in (methods or [])}
obj_all = []
obj_by_name = {}
obj_by_type = collections.defaultdict(list)
with file_path.open() as file:
is_function = False
for line in file:
comment_index = line.find('//')
if comment_index != -1:
line = line[:comment_index]
line = line.strip()
if not line:
continue
match = re.match('---(\w+)---', line)
if match:
following_types = match.group(1)
is_function = following_types == 'functions'
continue
try:
result = _from_line(
line, is_function, method_info, layer=layer)
if result.id in ignored_ids:
continue
obj_all.append(result)
if not result.is_function:
obj_by_name[result.fullname] = result
obj_by_type[result.result].append(result)
except ValueError as e:
if 'vector#1cb5c415' not in str(e):
raise
# Once all objects have been parsed, replace the
# string type from the arguments with references
for obj in obj_all:
if obj.id in AUTH_KEY_TYPES:
for arg in obj.args:
if arg.type == 'string':
arg.type = 'bytes'
for arg in obj.args:
arg.cls = obj_by_type.get(arg.type) or (
[obj_by_name[arg.type]] if arg.type in obj_by_name else []
)
yield from obj_all
def find_layer(file_path):
"""Finds the layer used on the specified scheme.tl file."""
layer_regex = re.compile(r'^//\s*LAYER\s*(\d+)$')
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
match = layer_regex.match(line)
if match:
return int(match.group(1))