mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2024-11-25 02:43:45 +03:00
Update code generator to parse CSV files
This commit is contained in:
parent
f646863149
commit
f05109f186
21
setup.py
21
setup.py
|
@ -15,7 +15,6 @@ import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
from codecs import open
|
|
||||||
from sys import argv
|
from sys import argv
|
||||||
|
|
||||||
from setuptools import find_packages, setup
|
from setuptools import find_packages, setup
|
||||||
|
@ -40,11 +39,10 @@ class TempWorkDir:
|
||||||
GENERATOR_DIR = 'telethon_generator'
|
GENERATOR_DIR = 'telethon_generator'
|
||||||
LIBRARY_DIR = 'telethon'
|
LIBRARY_DIR = 'telethon'
|
||||||
|
|
||||||
ERRORS_IN_JSON = os.path.join(GENERATOR_DIR, 'data', 'errors.json')
|
ERRORS_IN = os.path.join(GENERATOR_DIR, 'data', 'errors.csv')
|
||||||
ERRORS_IN_DESC = os.path.join(GENERATOR_DIR, 'data', 'error_descriptions')
|
|
||||||
ERRORS_OUT = os.path.join(LIBRARY_DIR, 'errors', 'rpcerrorlist.py')
|
ERRORS_OUT = os.path.join(LIBRARY_DIR, 'errors', 'rpcerrorlist.py')
|
||||||
|
|
||||||
INVALID_BM_IN = os.path.join(GENERATOR_DIR, 'data', 'invalid_bot_methods.json')
|
METHODS_IN = os.path.join(GENERATOR_DIR, 'data', 'methods.csv')
|
||||||
|
|
||||||
TLOBJECT_IN_CORE_TL = os.path.join(GENERATOR_DIR, 'data', 'mtproto_api.tl')
|
TLOBJECT_IN_CORE_TL = os.path.join(GENERATOR_DIR, 'data', 'mtproto_api.tl')
|
||||||
TLOBJECT_IN_TL = os.path.join(GENERATOR_DIR, 'data', 'telegram_api.tl')
|
TLOBJECT_IN_TL = os.path.join(GENERATOR_DIR, 'data', 'telegram_api.tl')
|
||||||
|
@ -56,16 +54,19 @@ DOCS_OUT = 'docs'
|
||||||
|
|
||||||
|
|
||||||
def generate(which):
|
def generate(which):
|
||||||
from telethon_generator.parsers import parse_errors, parse_tl, find_layer
|
# TODO make docs generator use the new CSV too
|
||||||
|
from telethon_generator.parsers import\
|
||||||
|
parse_errors, parse_methods, parse_tl, find_layer
|
||||||
|
|
||||||
from telethon_generator.generators import\
|
from telethon_generator.generators import\
|
||||||
generate_errors, generate_tlobjects, generate_docs, clean_tlobjects
|
generate_errors, generate_tlobjects, generate_docs, clean_tlobjects
|
||||||
|
|
||||||
# Older Python versions open the file as bytes instead (3.4.2)
|
|
||||||
with open(INVALID_BM_IN, 'r') as f:
|
|
||||||
invalid_bot_methods = set(json.load(f))
|
|
||||||
|
|
||||||
layer = find_layer(TLOBJECT_IN_TL)
|
layer = find_layer(TLOBJECT_IN_TL)
|
||||||
errors = list(parse_errors(ERRORS_IN_JSON, ERRORS_IN_DESC))
|
errors = list(parse_errors(ERRORS_IN))
|
||||||
|
methods = list(parse_methods(METHODS_IN, {e.str_code: e for e in errors}))
|
||||||
|
invalid_bot_methods = {m.name for m in methods
|
||||||
|
if not m.usability.startswith('bot')}
|
||||||
|
|
||||||
tlobjects = list(itertools.chain(
|
tlobjects = list(itertools.chain(
|
||||||
parse_tl(TLOBJECT_IN_CORE_TL, layer, invalid_bot_methods),
|
parse_tl(TLOBJECT_IN_CORE_TL, layer, invalid_bot_methods),
|
||||||
parse_tl(TLOBJECT_IN_TL, layer, invalid_bot_methods)))
|
parse_tl(TLOBJECT_IN_TL, layer, invalid_bot_methods)))
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
import csv
|
||||||
import functools
|
import functools
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -9,7 +10,6 @@ from ..docswriter import DocsWriter
|
||||||
from ..parsers import TLObject
|
from ..parsers import TLObject
|
||||||
from ..utils import snake_to_camel_case
|
from ..utils import snake_to_camel_case
|
||||||
|
|
||||||
|
|
||||||
CORE_TYPES = {
|
CORE_TYPES = {
|
||||||
'int', 'long', 'int128', 'int256', 'double',
|
'int', 'long', 'int128', 'int256', 'double',
|
||||||
'vector', 'string', 'bool', 'true', 'bytes', 'date'
|
'vector', 'string', 'bool', 'true', 'bytes', 'date'
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
from .errors import Error, parse_errors
|
from .errors import Error, parse_errors
|
||||||
|
from .methods import MethodInfo, parse_methods
|
||||||
from .tlobject import TLObject, parse_tl, find_layer
|
from .tlobject import TLObject, parse_tl, find_layer
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
import json
|
import csv
|
||||||
import re
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
from ..utils import snake_to_camel_case
|
from ..utils import snake_to_camel_case
|
||||||
|
|
||||||
|
@ -16,15 +14,8 @@ KNOWN_BASE_CLASSES = {
|
||||||
500: 'ServerError',
|
500: 'ServerError',
|
||||||
}
|
}
|
||||||
|
|
||||||
# The API doesn't return the code for some (vital) errors. They are
|
|
||||||
# all assumed to be 400, except these well-known ones that aren't.
|
|
||||||
KNOWN_CODES = {
|
|
||||||
'ACTIVE_USER_REQUIRED': 401,
|
|
||||||
'AUTH_KEY_UNREGISTERED': 401,
|
|
||||||
'USER_DEACTIVATED': 401
|
|
||||||
}
|
|
||||||
|
|
||||||
# Give better semantic names to some captures
|
# Give better semantic names to some captures
|
||||||
|
# TODO Move this to the CSV?
|
||||||
CAPTURE_NAMES = {
|
CAPTURE_NAMES = {
|
||||||
'FloodWaitError': 'seconds',
|
'FloodWaitError': 'seconds',
|
||||||
'FloodTestPhoneWaitError': 'seconds',
|
'FloodTestPhoneWaitError': 'seconds',
|
||||||
|
@ -52,96 +43,40 @@ def _get_class_name(error_code):
|
||||||
|
|
||||||
|
|
||||||
class Error:
|
class Error:
|
||||||
def __init__(self, int_code, str_code, description, caused_by):
|
def __init__(self, codes, name, description):
|
||||||
# TODO Some errors have the same str_code but different int_code
|
# TODO Some errors have the same name but different integer codes
|
||||||
# Should these be split into different files or doesn't really matter?
|
# Should these be split into different files or doesn't really matter?
|
||||||
# Telegram isn't exactly consistent with returned errors anyway.
|
# Telegram isn't exactly consistent with returned errors anyway.
|
||||||
self.int_code = int_code
|
self.int_code = codes[0]
|
||||||
self.str_code = str_code
|
self.str_code = name
|
||||||
self.subclass = _get_class_name(int_code)
|
self.subclass = _get_class_name(codes[0])
|
||||||
self.subclass_exists = int_code in KNOWN_BASE_CLASSES
|
self.subclass_exists = codes[0] in KNOWN_BASE_CLASSES
|
||||||
self.description = description
|
self.description = description
|
||||||
self.caused_by = list(sorted(caused_by))
|
|
||||||
|
|
||||||
self.has_captures = '_X' in str_code
|
self.has_captures = '_X' in name
|
||||||
if self.has_captures:
|
if self.has_captures:
|
||||||
self.name = _get_class_name(str_code.replace('_X', ''))
|
self.name = _get_class_name(name.replace('_X', ''))
|
||||||
self.pattern = str_code.replace('_X', r'_(\d+)')
|
self.pattern = name.replace('_X', r'_(\d+)')
|
||||||
self.capture_name = CAPTURE_NAMES.get(self.name, 'x')
|
self.capture_name = CAPTURE_NAMES.get(self.name, 'x')
|
||||||
else:
|
else:
|
||||||
self.name = _get_class_name(str_code)
|
self.name = _get_class_name(name)
|
||||||
self.pattern = str_code
|
self.pattern = name
|
||||||
self.capture_name = None
|
self.capture_name = None
|
||||||
|
|
||||||
|
|
||||||
def parse_errors(json_file, descriptions_file):
|
def parse_errors(csv_file):
|
||||||
"""
|
"""
|
||||||
Parses the given JSON file in the following format:
|
Parses the input CSV file with columns (name, error codes, description)
|
||||||
{
|
and yields `Error` instances as a result.
|
||||||
"ok": true,
|
|
||||||
"human_result": {"int_code": ["descriptions"]},
|
|
||||||
"result": {"int_code": {"full_method_name": ["str_error"]}}
|
|
||||||
}
|
|
||||||
|
|
||||||
The descriptions file, which has precedence over the JSON's human_result,
|
|
||||||
should have the following format:
|
|
||||||
# comment
|
|
||||||
str_error=Description
|
|
||||||
|
|
||||||
The method yields `Error` instances as a result.
|
|
||||||
"""
|
"""
|
||||||
with open(json_file, 'r', encoding='utf-8') as f:
|
with open(csv_file, newline='') as f:
|
||||||
data = json.load(f)
|
f = csv.reader(f)
|
||||||
|
next(f, None) # header
|
||||||
|
for line, (name, codes, description) in enumerate(f, start=2):
|
||||||
|
try:
|
||||||
|
codes = [int(x) for x in codes.split()] or [400]
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError('Not all codes are integers '
|
||||||
|
'(line {})'.format(line)) from None
|
||||||
|
|
||||||
errors = defaultdict(set)
|
yield Error([int(x) for x in codes], name, description)
|
||||||
error_to_method = defaultdict(set)
|
|
||||||
# PWRTelegram's API doesn't return all errors, which we do need here.
|
|
||||||
# Add some special known-cases manually first.
|
|
||||||
errors[420].update((
|
|
||||||
'FLOOD_WAIT_X', 'FLOOD_TEST_PHONE_WAIT_X', 'TAKEOUT_INIT_DELAY_X'
|
|
||||||
))
|
|
||||||
errors[401].update((
|
|
||||||
'AUTH_KEY_INVALID', 'SESSION_EXPIRED', 'SESSION_REVOKED'
|
|
||||||
))
|
|
||||||
errors[303].update((
|
|
||||||
'FILE_MIGRATE_X', 'PHONE_MIGRATE_X',
|
|
||||||
'NETWORK_MIGRATE_X', 'USER_MIGRATE_X'
|
|
||||||
))
|
|
||||||
for int_code, method_errors in data['result'].items():
|
|
||||||
for method, error_list in method_errors.items():
|
|
||||||
for error in error_list:
|
|
||||||
error = re.sub('_\d+', '_X', error).upper()
|
|
||||||
errors[int(int_code)].add(error)
|
|
||||||
error_to_method[error].add(method)
|
|
||||||
|
|
||||||
# Some errors are in the human result, but not with a code. Assume 400
|
|
||||||
for error in data['human_result']:
|
|
||||||
if error[0] != '-' and not error.isdigit():
|
|
||||||
error = re.sub('_\d+', '_X', error).upper()
|
|
||||||
if not any(error in es for es in errors.values()):
|
|
||||||
errors[KNOWN_CODES.get(error, 400)].add(error)
|
|
||||||
|
|
||||||
# Prefer the descriptions that are related with Telethon way of coding
|
|
||||||
# to those that PWRTelegram's API provides.
|
|
||||||
telethon_descriptions = {}
|
|
||||||
with open(descriptions_file, 'r', encoding='utf-8') as f:
|
|
||||||
for line in f:
|
|
||||||
line = line.strip()
|
|
||||||
if line and not line.startswith('#'):
|
|
||||||
equal = line.index('=')
|
|
||||||
message, description = line[:equal], line[equal + 1:]
|
|
||||||
telethon_descriptions[message.rstrip()] = description.lstrip()
|
|
||||||
|
|
||||||
for int_code, error_set in errors.items():
|
|
||||||
for str_code in sorted(error_set):
|
|
||||||
description = telethon_descriptions.get(
|
|
||||||
str_code, '\n'.join(data['human_result'].get(
|
|
||||||
str_code, ['No description known']
|
|
||||||
))
|
|
||||||
)
|
|
||||||
yield Error(
|
|
||||||
int_code=int_code,
|
|
||||||
str_code=str_code,
|
|
||||||
description=description,
|
|
||||||
caused_by=error_to_method[str_code]
|
|
||||||
)
|
|
||||||
|
|
29
telethon_generator/parsers/methods.py
Normal file
29
telethon_generator/parsers/methods.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
import csv
|
||||||
|
|
||||||
|
|
||||||
|
class MethodInfo:
|
||||||
|
def __init__(self, name, usability, errors):
|
||||||
|
self.name = name
|
||||||
|
self.usability = usability
|
||||||
|
self.errors = errors
|
||||||
|
|
||||||
|
|
||||||
|
def parse_methods(csv_file, errors_dict):
|
||||||
|
"""
|
||||||
|
Parses the input CSV file with columns (method, usability, errors)
|
||||||
|
and yields `MethodInfo` instances as a result.
|
||||||
|
"""
|
||||||
|
with open(csv_file, newline='') as f:
|
||||||
|
f = csv.reader(f)
|
||||||
|
next(f, None) # header
|
||||||
|
for line, (method, usability, errors) in enumerate(f, start=2):
|
||||||
|
if usability not in ('user', 'bot', 'both', 'unknown'):
|
||||||
|
raise ValueError('Usability must be either user, bot, '
|
||||||
|
'both or unknown, not {}'.format(usability))
|
||||||
|
try:
|
||||||
|
errors = [errors_dict[x] for x in errors.split()]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError('Method {} references unknown errors {}'
|
||||||
|
.format(method, errors)) from None
|
||||||
|
|
||||||
|
yield MethodInfo(method, usability, errors)
|
Loading…
Reference in New Issue
Block a user