From 463fcedf2791b995fe81e1015bf8ea17199ceb51 Mon Sep 17 00:00:00 2001 From: Lonami Exo Date: Sat, 14 Apr 2018 13:56:39 +0200 Subject: [PATCH] Split error_generator code into parsers/generators packages --- telethon_generator/error_generator.py | 151 ---------------------- telethon_generator/generator.py | 15 +++ telethon_generator/generators/__init__.py | 1 + telethon_generator/generators/errors.py | 52 ++++++++ telethon_generator/parsers/__init__.py | 1 + telethon_generator/parsers/errors.py | 145 +++++++++++++++++++++ 6 files changed, 214 insertions(+), 151 deletions(-) delete mode 100644 telethon_generator/error_generator.py create mode 100644 telethon_generator/generator.py create mode 100644 telethon_generator/generators/__init__.py create mode 100644 telethon_generator/generators/errors.py create mode 100644 telethon_generator/parsers/__init__.py create mode 100644 telethon_generator/parsers/errors.py diff --git a/telethon_generator/error_generator.py b/telethon_generator/error_generator.py deleted file mode 100644 index 676ef669..00000000 --- a/telethon_generator/error_generator.py +++ /dev/null @@ -1,151 +0,0 @@ -import json -import re -from collections import defaultdict - -known_base_classes = { - 303: 'InvalidDCError', - 400: 'BadRequestError', - 401: 'UnauthorizedError', - 403: 'ForbiddenError', - 404: 'NotFoundError', - 406: 'AuthKeyError', - 420: 'FloodError', - 500: 'ServerError', -} - -# The API doesn't return the code for some (vital) errors. They are -# all assumed to be 400, except these well-known ones that aren't. -known_codes = { - 'ACTIVE_USER_REQUIRED': 401, - 'AUTH_KEY_UNREGISTERED': 401, - 'USER_DEACTIVATED': 401 -} - - -def get_class_name(error_code): - if isinstance(error_code, int): - return known_base_classes.get( - error_code, 'RPCError' + str(error_code).replace('-', 'Neg') - ) - - if 'FIRSTNAME' in error_code: - error_code = error_code.replace('FIRSTNAME', 'FIRST_NAME') - - result = re.sub( - r'_([a-z])', lambda m: m.group(1).upper(), error_code.lower() - ) - return result[:1].upper() + result[1:].replace('_', '') + 'Error' - - -def write_error(f, code, name, desc, capture_name): - f.write( - '\n\nclass {}({}):\n def __init__(self, **kwargs):\n ' - ''.format(name, get_class_name(code)) - ) - if capture_name: - f.write( - "self.{} = int(kwargs.get('capture', 0))\n ".format(capture_name) - ) - f.write('super(Exception, self).__init__({}'.format(repr(desc))) - if capture_name: - f.write('.format(self.{})'.format(capture_name)) - f.write(')\n') - - -def generate_code(output, json_file, errors_desc): - with open(json_file, encoding='utf-8') as f: - data = json.load(f) - - errors = defaultdict(set) - # PWRTelegram's API doesn't return all errors, which we do need here. - # Add some special known-cases manually first. - errors[420].update(( - 'FLOOD_WAIT_X', 'FLOOD_TEST_PHONE_WAIT_X' - )) - errors[401].update(( - 'AUTH_KEY_INVALID', 'SESSION_EXPIRED', 'SESSION_REVOKED' - )) - errors[303].update(( - 'FILE_MIGRATE_X', 'PHONE_MIGRATE_X', - 'NETWORK_MIGRATE_X', 'USER_MIGRATE_X' - )) - for error_code, method_errors in data['result'].items(): - for error_list in method_errors.values(): - for error in error_list: - errors[int(error_code)].add(re.sub('_\d+', '_X', error).upper()) - - # Some errors are in the human result, but not with a code. Assume code 400 - for error in data['human_result']: - if error[0] != '-' and not error.isdigit(): - error = re.sub('_\d+', '_X', error).upper() - if not any(error in es for es in errors.values()): - errors[known_codes.get(error, 400)].add(error) - - # Some error codes are not known, so create custom base classes if needed - needed_base_classes = [ - (e, get_class_name(e)) for e in errors if e not in known_base_classes - ] - - # Prefer the descriptions that are related with Telethon way of coding to - # those that PWRTelegram's API provides. - telethon_descriptions = {} - with open(errors_desc, encoding='utf-8') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#'): - equal = line.index('=') - message, description = line[:equal], line[equal + 1:] - telethon_descriptions[message.rstrip()] = description.lstrip() - - # Names for the captures, or 'x' if unknown - capture_names = { - 'FloodWaitError': 'seconds', - 'FloodTestPhoneWaitError': 'seconds', - 'FileMigrateError': 'new_dc', - 'NetworkMigrateError': 'new_dc', - 'PhoneMigrateError': 'new_dc', - 'UserMigrateError': 'new_dc', - 'FilePartMissingError': 'which' - } - - # Everything ready, generate the code - with open(output, 'w', encoding='utf-8') as f: - f.write( - 'from .rpc_base_errors import RPCError, BadMessageError, {}\n'.format( - ", ".join(known_base_classes.values())) - ) - for code, cls in needed_base_classes: - f.write( - '\n\nclass {}(RPCError):\n code = {}\n'.format(cls, code) - ) - - patterns = [] # Save this dictionary later in the generated code - for error_code, error_set in errors.items(): - for error in sorted(error_set): - description = telethon_descriptions.get( - error, '\n'.join(data['human_result'].get( - error, ['No description known.'] - )) - ) - has_captures = '_X' in error - if has_captures: - name = get_class_name(error.replace('_X', '')) - pattern = error.replace('_X', r'_(\d+)') - else: - name, pattern = get_class_name(error), error - - patterns.append((pattern, name)) - capture = capture_names.get(name, 'x') if has_captures else None - # TODO Some errors have the same name but different code, - # split this across different files? - write_error(f, error_code, name, description, capture) - - f.write('\n\nrpc_errors_all = {\n') - for pattern, name in patterns: - f.write(' {}: {},\n'.format(repr(pattern), name)) - f.write('}\n') - - -if __name__ == '__main__': - generate_code('../telethon/errors/rpc_error_list.py', - 'errors.json', 'error_descriptions') diff --git a/telethon_generator/generator.py b/telethon_generator/generator.py new file mode 100644 index 00000000..e9c3659b --- /dev/null +++ b/telethon_generator/generator.py @@ -0,0 +1,15 @@ +from telethon_generator.parsers import parse_errors +from telethon_generator.generators import generate_errors + + +INPUT_JSON = 'errors.json' +INPUT_DESCRIPTIONS = 'error_descriptions' +OUTPUT = '../telethon/errors/rpc_error_list.py' + + +if __name__ == '__main__': + with open(OUTPUT, 'w', encoding='utf-8') as file: + generate_errors( + errors=list(parse_errors(INPUT_JSON, INPUT_DESCRIPTIONS)), + f=file + ) diff --git a/telethon_generator/generators/__init__.py b/telethon_generator/generators/__init__.py new file mode 100644 index 00000000..b998617b --- /dev/null +++ b/telethon_generator/generators/__init__.py @@ -0,0 +1 @@ +from .errors import generate_errors diff --git a/telethon_generator/generators/errors.py b/telethon_generator/generators/errors.py new file mode 100644 index 00000000..136809a0 --- /dev/null +++ b/telethon_generator/generators/errors.py @@ -0,0 +1,52 @@ +import itertools + + +def generate_errors(errors, f): + # Exact/regex match to create {CODE: ErrorClassName} + exact_match = [] + regex_match = [] + + # Find out what subclasses to import and which to create + import_base, create_base = set(), {} + for error in errors: + if error.subclass_exists: + import_base.add(error.subclass) + else: + create_base[error.subclass] = error.int_code + + if error.has_captures: + regex_match.append(error) + else: + exact_match.append(error) + + # Imports and new subclass creation + f.write('from .rpc_base_errors import RPCError, {}\n' + .format(", ".join(sorted(import_base)))) + + for cls, int_code in sorted(create_base.items(), key=lambda t: t[1]): + f.write('\n\nclass {}(RPCError):\n code = {}\n' + .format(cls, int_code)) + + # Error classes generation + for error in errors: + f.write('\n\nclass {}({}):\n def __init__(self, **kwargs):\n' + ' '.format(error.name, error.subclass)) + + if error.has_captures: + f.write("self.{} = int(kwargs.get('capture', 0))\n " + .format(error.capture_name)) + + f.write('super(Exception, self).__init__({}' + .format(repr(error.description))) + + if error.has_captures: + f.write('.format(self.{})'.format(error.capture_name)) + + f.write(')\n') + + # Create the actual {CODE: ErrorClassName} dict once classes are defined + # TODO Actually make a difference between regex/exact + f.write('\n\nrpc_errors_all = {\n') + for error in itertools.chain(regex_match, exact_match): + f.write(' {}: {},\n'.format(repr(error.pattern), error.name)) + f.write('}\n') diff --git a/telethon_generator/parsers/__init__.py b/telethon_generator/parsers/__init__.py new file mode 100644 index 00000000..d8550c73 --- /dev/null +++ b/telethon_generator/parsers/__init__.py @@ -0,0 +1 @@ +from .errors import Error, parse_errors diff --git a/telethon_generator/parsers/errors.py b/telethon_generator/parsers/errors.py new file mode 100644 index 00000000..1b0e1552 --- /dev/null +++ b/telethon_generator/parsers/errors.py @@ -0,0 +1,145 @@ +import json +from collections import defaultdict + +import re + +# Core base classes depending on the integer error code +KNOWN_BASE_CLASSES = { + 303: 'InvalidDCError', + 400: 'BadRequestError', + 401: 'UnauthorizedError', + 403: 'ForbiddenError', + 404: 'NotFoundError', + 406: 'AuthKeyError', + 420: 'FloodError', + 500: 'ServerError', +} + +# The API doesn't return the code for some (vital) errors. They are +# all assumed to be 400, except these well-known ones that aren't. +KNOWN_CODES = { + 'ACTIVE_USER_REQUIRED': 401, + 'AUTH_KEY_UNREGISTERED': 401, + 'USER_DEACTIVATED': 401 +} + +# Give better semantic names to some captures +CAPTURE_NAMES = { + 'FloodWaitError': 'seconds', + 'FloodTestPhoneWaitError': 'seconds', + 'FileMigrateError': 'new_dc', + 'NetworkMigrateError': 'new_dc', + 'PhoneMigrateError': 'new_dc', + 'UserMigrateError': 'new_dc', + 'FilePartMissingError': 'which' +} + + +def _get_class_name(error_code): + """ + Gets the corresponding class name for the given error code, + this either being an integer (thus base error name) or str. + """ + if isinstance(error_code, int): + return KNOWN_BASE_CLASSES.get( + error_code, 'RPCError' + str(error_code).replace('-', 'Neg') + ) + + if 'FIRSTNAME' in error_code: + error_code = error_code.replace('FIRSTNAME', 'FIRST_NAME') + + result = re.sub( + r'_([a-z])', lambda m: m.group(1).upper(), error_code.lower() + ) + return result[:1].upper() + result[1:].replace('_', '') + 'Error' + + +class Error: + def __init__(self, int_code, str_code, description): + # TODO Some errors have the same str_code but different int_code + # Should these be split into different files or doesn't really matter? + # Telegram isn't exactly consistent with returned errors anyway. + self.int_code = int_code + self.str_code = str_code + self.subclass = _get_class_name(int_code) + self.subclass_exists = int_code in KNOWN_BASE_CLASSES + self.description = description + + self.has_captures = '_X' in str_code + if self.has_captures: + self.name = _get_class_name(str_code.replace('_X', '')) + self.pattern = str_code.replace('_X', r'_(\d+)') + self.capture_name = CAPTURE_NAMES.get(self.name, 'x') + else: + self.name = _get_class_name(str_code) + self.pattern = str_code + self.capture_name = None + + +def parse_errors(json_file, descriptions_file): + """ + Parses the given JSON file in the following format: + { + "ok": true, + "human_result": {"int_code": ["descriptions"]}, + "result": {"int_code": {"full_method_name": ["str_error"]}} + } + + The descriptions file, which has precedence over the JSON's human_result, + should have the following format: + # comment + str_error=Description + + The method yields `Error` instances as a result. + """ + with open(json_file, encoding='utf-8') as f: + data = json.load(f) + + errors = defaultdict(set) + # PWRTelegram's API doesn't return all errors, which we do need here. + # Add some special known-cases manually first. + errors[420].update(( + 'FLOOD_WAIT_X', 'FLOOD_TEST_PHONE_WAIT_X' + )) + errors[401].update(( + 'AUTH_KEY_INVALID', 'SESSION_EXPIRED', 'SESSION_REVOKED' + )) + errors[303].update(( + 'FILE_MIGRATE_X', 'PHONE_MIGRATE_X', + 'NETWORK_MIGRATE_X', 'USER_MIGRATE_X' + )) + for int_code, method_errors in data['result'].items(): + for error_list in method_errors.values(): + for error in error_list: + errors[int(int_code)].add(re.sub('_\d+', '_X', error).upper()) + + # Some errors are in the human result, but not with a code. Assume 400 + for error in data['human_result']: + if error[0] != '-' and not error.isdigit(): + error = re.sub('_\d+', '_X', error).upper() + if not any(error in es for es in errors.values()): + errors[KNOWN_CODES.get(error, 400)].add(error) + + # Prefer the descriptions that are related with Telethon way of coding + # to those that PWRTelegram's API provides. + telethon_descriptions = {} + with open(descriptions_file, encoding='utf-8') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + equal = line.index('=') + message, description = line[:equal], line[equal + 1:] + telethon_descriptions[message.rstrip()] = description.lstrip() + + for int_code, error_set in errors.items(): + for str_code in sorted(error_set): + description = telethon_descriptions.get( + str_code, '\n'.join(data['human_result'].get( + str_code, ['No description known.'] + )) + ) + yield Error( + int_code=int_code, + str_code=str_code, + description=description, + )