Update code generator to parse CSV files

2025-10-30 15:37:30 +03:00 · 2018-11-30 21:39:13 +01:00 · 2018-11-30 21:39:13 +01:00 · f05109f186
commit f05109f186
parent f646863149
5 changed files with 68 additions and 102 deletions
--- a/setup.py
+++ b/setup.py
@ -15,7 +15,6 @@ import json
 import os
 import re
 import shutil
 from codecs import open
 from sys import argv
 from setuptools import find_packages, setup
@ -40,11 +39,10 @@ class TempWorkDir:
 GENERATOR_DIR = 'telethon_generator'
 LIBRARY_DIR = 'telethon'
-ERRORS_IN_JSON = os.path.join(GENERATOR_DIR, 'data', 'errors.json')
+ERRORS_IN = os.path.join(GENERATOR_DIR, 'data', 'errors.csv')
 ERRORS_IN_DESC = os.path.join(GENERATOR_DIR, 'data', 'error_descriptions')
 ERRORS_OUT = os.path.join(LIBRARY_DIR, 'errors', 'rpcerrorlist.py')
-INVALID_BM_IN = os.path.join(GENERATOR_DIR, 'data', 'invalid_bot_methods.json')
+METHODS_IN = os.path.join(GENERATOR_DIR, 'data', 'methods.csv')
 TLOBJECT_IN_CORE_TL = os.path.join(GENERATOR_DIR, 'data', 'mtproto_api.tl')
 TLOBJECT_IN_TL = os.path.join(GENERATOR_DIR, 'data', 'telegram_api.tl')
@ -56,16 +54,19 @@ DOCS_OUT = 'docs'
 def generate(which):
-    from telethon_generator.parsers import parse_errors, parse_tl, find_layer
+    # TODO make docs generator use the new CSV too
    from telethon_generator.parsers import\
        parse_errors, parse_methods, parse_tl, find_layer
    from telethon_generator.generators import\
        generate_errors, generate_tlobjects, generate_docs, clean_tlobjects
    # Older Python versions open the file as bytes instead (3.4.2)
    with open(INVALID_BM_IN, 'r') as f:
        invalid_bot_methods = set(json.load(f))
    layer = find_layer(TLOBJECT_IN_TL)
-    errors = list(parse_errors(ERRORS_IN_JSON, ERRORS_IN_DESC))
+    errors = list(parse_errors(ERRORS_IN))
    methods = list(parse_methods(METHODS_IN, {e.str_code: e for e in errors}))
    invalid_bot_methods = {m.name for m in methods
                           if not m.usability.startswith('bot')}
    tlobjects = list(itertools.chain(
        parse_tl(TLOBJECT_IN_CORE_TL, layer, invalid_bot_methods),
        parse_tl(TLOBJECT_IN_TL, layer, invalid_bot_methods)))
--- a/telethon_generator/generators/docs.py
+++ b/telethon_generator/generators/docs.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python3
 import csv
 import functools
 import os
 import re
@ -9,7 +10,6 @@ from ..docswriter import DocsWriter
 from ..parsers import TLObject
 from ..utils import snake_to_camel_case
 CORE_TYPES = {
    'int', 'long', 'int128', 'int256', 'double',
    'vector', 'string', 'bool', 'true', 'bytes', 'date'
--- a/telethon_generator/parsers/init.py
+++ b/telethon_generator/parsers/init.py
@ -1,2 +1,3 @@
 from .errors import Error, parse_errors
 from .methods import MethodInfo, parse_methods
 from .tlobject import TLObject, parse_tl, find_layer
--- a/telethon_generator/parsers/errors.py
+++ b/telethon_generator/parsers/errors.py
@ -1,6 +1,4 @@
-import json
+import csv
 import re
 from collections import defaultdict
 from ..utils import snake_to_camel_case
@ -16,15 +14,8 @@ KNOWN_BASE_CLASSES = {
    500: 'ServerError',
 }
 # The API doesn't return the code for some (vital) errors. They are
 # all assumed to be 400, except these well-known ones that aren't.
 KNOWN_CODES = {
    'ACTIVE_USER_REQUIRED': 401,
    'AUTH_KEY_UNREGISTERED': 401,
    'USER_DEACTIVATED': 401
 }
 # Give better semantic names to some captures
 # TODO Move this to the CSV?
 CAPTURE_NAMES = {
    'FloodWaitError': 'seconds',
    'FloodTestPhoneWaitError': 'seconds',
@ -52,96 +43,40 @@ def _get_class_name(error_code):
 class Error:
-    def __init__(self, int_code, str_code, description, caused_by):
+    def __init__(self, codes, name, description):
-        # TODO Some errors have the same str_code but different int_code
+        # TODO Some errors have the same name but different integer codes
        # Should these be split into different files or doesn't really matter?
        # Telegram isn't exactly consistent with returned errors anyway.
-        self.int_code = int_code
+        self.int_code = codes[0]
-        self.str_code = str_code
+        self.str_code = name
-        self.subclass = _get_class_name(int_code)
+        self.subclass = _get_class_name(codes[0])
-        self.subclass_exists = int_code in KNOWN_BASE_CLASSES
+        self.subclass_exists = codes[0] in KNOWN_BASE_CLASSES
        self.description = description
        self.caused_by = list(sorted(caused_by))
-        self.has_captures = '_X' in str_code
+        self.has_captures = '_X' in name
        if self.has_captures:
-            self.name = _get_class_name(str_code.replace('_X', ''))
+            self.name = _get_class_name(name.replace('_X', ''))
-            self.pattern = str_code.replace('_X', r'_(\d+)')
+            self.pattern = name.replace('_X', r'_(\d+)')
            self.capture_name = CAPTURE_NAMES.get(self.name, 'x')
        else:
-            self.name = _get_class_name(str_code)
+            self.name = _get_class_name(name)
-            self.pattern = str_code
+            self.pattern = name
            self.capture_name = None
-def parse_errors(json_file, descriptions_file):
+def parse_errors(csv_file):
    """
-    Parses the given JSON file in the following format:
+    Parses the input CSV file with columns (name, error codes, description)
-        {
+    and yields `Error` instances as a result.
            "ok": true,
            "human_result": {"int_code": ["descriptions"]},
            "result": {"int_code": {"full_method_name": ["str_error"]}}
        }
    The descriptions file, which has precedence over the JSON's human_result,
    should have the following format:
        # comment
        str_error=Description
    The method yields `Error` instances as a result.
    """
-    with open(json_file, 'r', encoding='utf-8') as f:
+    with open(csv_file, newline='') as f:
-        data = json.load(f)
+        f = csv.reader(f)
        next(f, None)  # header
        for line, (name, codes, description) in enumerate(f, start=2):
            try:
                codes = [int(x) for x in codes.split()] or [400]
            except ValueError:
                raise ValueError('Not all codes are integers '
                                 '(line {})'.format(line)) from None
-    errors = defaultdict(set)
+            yield Error([int(x) for x in codes], name, description)
    error_to_method = defaultdict(set)
    # PWRTelegram's API doesn't return all errors, which we do need here.
    # Add some special known-cases manually first.
    errors[420].update((
        'FLOOD_WAIT_X', 'FLOOD_TEST_PHONE_WAIT_X', 'TAKEOUT_INIT_DELAY_X'
    ))
    errors[401].update((
        'AUTH_KEY_INVALID', 'SESSION_EXPIRED', 'SESSION_REVOKED'
    ))
    errors[303].update((
        'FILE_MIGRATE_X', 'PHONE_MIGRATE_X',
        'NETWORK_MIGRATE_X', 'USER_MIGRATE_X'
    ))
    for int_code, method_errors in data['result'].items():
        for method, error_list in method_errors.items():
            for error in error_list:
                error = re.sub('_\d+', '_X', error).upper()
                errors[int(int_code)].add(error)
                error_to_method[error].add(method)
    # Some errors are in the human result, but not with a code. Assume 400
    for error in data['human_result']:
        if error[0] != '-' and not error.isdigit():
            error = re.sub('_\d+', '_X', error).upper()
            if not any(error in es for es in errors.values()):
                errors[KNOWN_CODES.get(error, 400)].add(error)
    # Prefer the descriptions that are related with Telethon way of coding
    # to those that PWRTelegram's API provides.
    telethon_descriptions = {}
    with open(descriptions_file, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith('#'):
                equal = line.index('=')
                message, description = line[:equal], line[equal + 1:]
                telethon_descriptions[message.rstrip()] = description.lstrip()
    for int_code, error_set in errors.items():
        for str_code in sorted(error_set):
            description = telethon_descriptions.get(
                str_code, '\n'.join(data['human_result'].get(
                    str_code, ['No description known']
                ))
            )
            yield Error(
                int_code=int_code,
                str_code=str_code,
                description=description,
                caused_by=error_to_method[str_code]
            )
--- a/telethon_generator/parsers/methods.py
+++ b/telethon_generator/parsers/methods.py
@ -0,0 +1,29 @@
 import csv
 class MethodInfo:
    def __init__(self, name, usability, errors):
        self.name = name
        self.usability = usability
        self.errors = errors
 def parse_methods(csv_file, errors_dict):
    """
    Parses the input CSV file with columns (method, usability, errors)
    and yields `MethodInfo` instances as a result.
    """
    with open(csv_file, newline='') as f:
        f = csv.reader(f)
        next(f, None)  # header
        for line, (method, usability, errors) in enumerate(f, start=2):
            if usability not in ('user', 'bot', 'both', 'unknown'):
                raise ValueError('Usability must be either user, bot, '
                                 'both or unknown, not {}'.format(usability))
            try:
                errors = [errors_dict[x] for x in errors.split()]
            except KeyError:
                raise ValueError('Method {} references unknown errors {}'
                                 .format(method, errors)) from None
            yield MethodInfo(method, usability, errors)