psycopg2/scripts/make_errorcodes.py

#!/usr/bin/env python
"""Generate the errorcodes module starting from PostgreSQL documentation.

The script can be run at a new PostgreSQL release to refresh the module.
"""

# Copyright (C) 2010 Daniele Varrazzo  <daniele.varrazzo@gmail.com>
#
# psycopg2 is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# psycopg2 is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
# License for more details.

import re
import sys
import urllib2
from collections import defaultdict

from BeautifulSoup import BeautifulSoup as BS


def main():
    if len(sys.argv) != 2:
        print >>sys.stderr, "usage: %s /path/to/errorcodes.py" % sys.argv[0]
        return 2

    filename = sys.argv[1]

    file_start = read_base_file(filename)
    # If you add a version to the list fix the docs (in errorcodes.rst)
    classes, errors = fetch_errors(
        ['8.1', '8.2', '8.3', '8.4', '9.0', '9.1', '9.2', '9.3', '9.4', '9.5',
         '9.6', '10', '11'])

    f = open(filename, "w")
    for line in file_start:
        print >>f, line
    for line in generate_module_data(classes, errors):
        print >>f, line


def read_base_file(filename):
    rv = []
    for line in open(filename):
        rv.append(line.rstrip("\n"))
        if line.startswith("# autogenerated"):
            return rv

    raise ValueError("can't find the separator. Is this the right file?")


def parse_errors_txt(url):
    classes = {}
    errors = defaultdict(dict)

    page = urllib2.urlopen(url)
    for line in page:
        # Strip comments and skip blanks
        line = line.split('#')[0].strip()
        if not line:
            continue

        # Parse a section
        m = re.match(r"Section: (Class (..) - .+)", line)
        if m:
            label, class_ = m.groups()
            classes[class_] = label
            continue

        # Parse an error
        m = re.match(r"(.....)\s+(?:E|W|S)\s+ERRCODE_(\S+)(?:\s+(\S+))?$", line)
        if m:
            errcode, macro, spec = m.groups()
            # skip errcodes without specs as they are not publically visible
            if not spec:
                continue
            errlabel = spec.upper()
            errors[class_][errcode] = errlabel
            continue

        # We don't expect anything else
        raise ValueError("unexpected line:\n%s" % line)

    return classes, errors


def parse_errors_sgml(url):
    page = BS(urllib2.urlopen(url))
    table = page('table')[1]('tbody')[0]

    classes = {}
    errors = defaultdict(dict)

    for tr in table('tr'):
        if tr.td.get('colspan'):    # it's a class
            label = ' '.join(' '.join(tr(text=True)).split()) \
                .replace(u'\u2014', '-').encode('ascii')
            assert label.startswith('Class')
            class_ = label.split()[1]
            assert len(class_) == 2
            classes[class_] = label

        else:   # it's an error
            errcode = tr.tt.string.encode("ascii")
            assert len(errcode) == 5

            tds = tr('td')
            if len(tds) == 3:
                errlabel = '_'.join(tds[1].string.split()).encode('ascii')

                # double check the columns are equal
                cond_name = tds[2].string.strip().upper().encode("ascii")
                assert errlabel == cond_name, tr

            elif len(tds) == 2:
                # found in PG 9.1 docs
                errlabel = tds[1].tt.string.upper().encode("ascii")

            else:
                assert False, tr

            errors[class_][errcode] = errlabel

    return classes, errors

errors_sgml_url = \
    "http://www.postgresql.org/docs/%s/static/errcodes-appendix.html"

errors_txt_url = \
    "http://git.postgresql.org/gitweb/?p=postgresql.git;a=blob_plain;" \
    "f=src/backend/utils/errcodes.txt;hb=%s"


def fetch_errors(versions):
    classes = {}
    errors = defaultdict(dict)

    for version in versions:
        print >> sys.stderr, version
        tver = tuple(map(int, version.split()[0].split('.')))
        if tver < (9, 1):
            c1, e1 = parse_errors_sgml(errors_sgml_url % version)
        else:
            tag = '%s%s_STABLE' % (
                (tver[0] >= 10 and 'REL_' or 'REL'),
                version.replace('.', '_'))
            c1, e1 = parse_errors_txt(errors_txt_url % tag)
        classes.update(c1)

        # TODO: this error was added in PG 10 beta 1 but dropped in the
        # final release. It doesn't harm leaving it in the file. Check if it
        # will be added back in PG 12.
        # https://github.com/postgres/postgres/commit/28e0727076
        errors['55']['55P04'] = 'UNSAFE_NEW_ENUM_VALUE_USAGE'

        for c, cerrs in e1.iteritems():
            errors[c].update(cerrs)

    return classes, errors


def generate_module_data(classes, errors):
    yield ""
    yield "# Error classes"
    for clscode, clslabel in sorted(classes.items()):
        err = clslabel.split(" - ")[1].split("(")[0] \
            .strip().replace(" ", "_").replace('/', "_").upper()
        yield "CLASS_%s = %r" % (err, clscode)

    for clscode, clslabel in sorted(classes.items()):
        yield ""
        yield "# %s" % clslabel

        for errcode, errlabel in sorted(errors[clscode].items()):
            yield "%s = %r" % (errlabel, errcode)


if __name__ == '__main__':
    sys.exit(main())
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`#!/usr/bin/env python`
			`"""Generate the errorcodes module starting from PostgreSQL documentation.`

			`The script can be run at a new PostgreSQL release to refresh the module.`
			`"""`

			`# Copyright (C) 2010 Daniele Varrazzo <daniele.varrazzo@gmail.com>`
			`#`
cleanup remaining GPL license text 2014-05-20 20:50:53 +04:00			`# psycopg2 is free software: you can redistribute it and/or modify it`
			`# under the terms of the GNU Lesser General Public License as published`
			`# by the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`#`
cleanup remaining GPL license text 2014-05-20 20:50:53 +04:00			`# psycopg2 is distributed in the hope that it will be useful, but WITHOUT`
			`# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or`
			`# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public`
			`# License for more details.`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`import re`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`import sys`
			`import urllib2`
			`from collections import defaultdict`

			`from BeautifulSoup import BeautifulSoup as BS`

Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`def main():`
			`if len(sys.argv) != 2:`
			`print >>sys.stderr, "usage: %s /path/to/errorcodes.py" % sys.argv[0]`
			`return 2`

			`filename = sys.argv[1]`

			`file_start = read_base_file(filename)`
Parse PG 10 error codes from final version 2017-10-19 04:16:27 +03:00			`# If you add a version to the list fix the docs (in errorcodes.rst)`
errorcodes map updated to PostgreSQL 9.2 2012-09-21 04:59:02 +04:00			`classes, errors = fetch_errors(`
errcodes updated to PG 9.6 2017-06-05 14:18:21 +03:00			`['8.1', '8.2', '8.3', '8.4', '9.0', '9.1', '9.2', '9.3', '9.4', '9.5',`
errorcodes map update to PostgreSQL 11 2018-10-15 00:57:01 +03:00			`'9.6', '10', '11'])`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00
			`f = open(filename, "w")`
			`for line in file_start:`
			`print >>f, line`
			`for line in generate_module_data(classes, errors):`
			`print >>f, line`

Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`def read_base_file(filename):`
			`rv = []`
			`for line in open(filename):`
			`rv.append(line.rstrip("\n"))`
			`if line.startswith("# autogenerated"):`
			`return rv`

			`raise ValueError("can't find the separator. Is this the right file?")`

Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`def parse_errors_txt(url):`
			`classes = {}`
			`errors = defaultdict(dict)`

			`page = urllib2.urlopen(url)`
			`for line in page:`
			`# Strip comments and skip blanks`
			`line = line.split('#')[0].strip()`
			`if not line:`
			`continue`

			`# Parse a section`
			`m = re.match(r"Section: (Class (..) - .+)", line)`
			`if m:`
			`label, class_ = m.groups()`
			`classes[class_] = label`
			`continue`

			`# Parse an error`
			`m = re.match(r"(.....)\s+(?:E\|W\|S)\s+ERRCODE_(\S+)(?:\s+(\S+))?$", line)`
			`if m:`
			`errcode, macro, spec = m.groups()`
Dropped creation of errcodes with missing spec field On further inspection these names are just aliases for values already defined: we don't need the duplication. 2014-08-28 05:05:54 +04:00			`# skip errcodes without specs as they are not publically visible`
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`if not spec:`
Dropped creation of errcodes with missing spec field On further inspection these names are just aliases for values already defined: we don't need the duplication. 2014-08-28 05:05:54 +04:00			`continue`
			`errlabel = spec.upper()`
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`errors[class_][errcode] = errlabel`
			`continue`

			`# We don't expect anything else`
			`raise ValueError("unexpected line:\n%s" % line)`

			`return classes, errors`

Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`def parse_errors_sgml(url):`
make_errorcodes updated to the current page style 2011-12-16 18:47:09 +04:00			`page = BS(urllib2.urlopen(url))`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`table = page('table')[1]('tbody')[0]`

			`classes = {}`
			`errors = defaultdict(dict)`

			`for tr in table('tr'):`
Python source cleanup using flake8 2016-10-11 02:10:53 +03:00			`if tr.td.get('colspan'): # it's a class`
make_errorcodes updated to the current page style 2011-12-16 18:47:09 +04:00			`label = ' '.join(' '.join(tr(text=True)).split()) \`
			`.replace(u'\u2014', '-').encode('ascii')`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`assert label.startswith('Class')`
			`class_ = label.split()[1]`
			`assert len(class_) == 2`
			`classes[class_] = label`

Python source cleanup using flake8 2016-10-11 02:10:53 +03:00			`else: # it's an error`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`errcode = tr.tt.string.encode("ascii")`
			`assert len(errcode) == 5`
Completed documentation for errorcodes module. 2010-02-16 00:59:49 +03:00
errorcodes map updated to PostgreSQL 9.1 2011-08-22 20:20:56 +04:00			`tds = tr('td')`
			`if len(tds) == 3:`
make_errorcodes updated to the current page style 2011-12-16 18:47:09 +04:00			`errlabel = '_'.join(tds[1].string.split()).encode('ascii')`
errorcodes map updated to PostgreSQL 9.1 2011-08-22 20:20:56 +04:00
			`# double check the columns are equal`
make_errorcodes updated to the current page style 2011-12-16 18:47:09 +04:00			`cond_name = tds[2].string.strip().upper().encode("ascii")`
errorcodes map updated to PostgreSQL 9.1 2011-08-22 20:20:56 +04:00			`assert errlabel == cond_name, tr`

			`elif len(tds) == 2:`
make_errorcodes updated to the current page style 2011-12-16 18:47:09 +04:00			`# found in PG 9.1 docs`
errorcodes map updated to PostgreSQL 9.1 2011-08-22 20:20:56 +04:00			`errlabel = tds[1].tt.string.upper().encode("ascii")`

			`else:`
			`assert False, tr`
Completed documentation for errorcodes module. 2010-02-16 00:59:49 +03:00
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`errors[class_][errcode] = errlabel`

			`return classes, errors`

Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`errors_sgml_url = \`
Python source cleanup using flake8 2016-10-11 02:10:53 +03:00			`"http://www.postgresql.org/docs/%s/static/errcodes-appendix.html"`
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00
			`errors_txt_url = \`
Python source cleanup using flake8 2016-10-11 02:10:53 +03:00			`"http://git.postgresql.org/gitweb/?p=postgresql.git;a=blob_plain;" \`
Error codes updated to PG 10 beta 1 2017-06-05 14:26:29 +03:00			`"f=src/backend/utils/errcodes.txt;hb=%s"`
Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00
			`def fetch_errors(versions):`
			`classes = {}`
			`errors = defaultdict(dict)`

			`for version in versions:`
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`print >> sys.stderr, version`
Error codes updated to PG 10 beta 1 2017-06-05 14:26:29 +03:00			`tver = tuple(map(int, version.split()[0].split('.')))`
Parse the error codes from the text file if available The text file was added in PG 9.1. It contains a few errors not available in the SGML. 2014-07-31 16:04:53 +04:00			`if tver < (9, 1):`
			`c1, e1 = parse_errors_sgml(errors_sgml_url % version)`
			`else:`
Parse PG 10 error codes from final version 2017-10-19 04:16:27 +03:00			`tag = '%s%s_STABLE' % (`
			`(tver[0] >= 10 and 'REL_' or 'REL'),`
			`version.replace('.', '_'))`
Error codes updated to PG 10 beta 1 2017-06-05 14:26:29 +03:00			`c1, e1 = parse_errors_txt(errors_txt_url % tag)`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`classes.update(c1)`
Parse PG 10 error codes from final version 2017-10-19 04:16:27 +03:00
			`# TODO: this error was added in PG 10 beta 1 but dropped in the`
			`# final release. It doesn't harm leaving it in the file. Check if it`
errorcodes map update to PostgreSQL 11 2018-10-15 00:57:01 +03:00			`# will be added back in PG 12.`
Parse PG 10 error codes from final version 2017-10-19 04:16:27 +03:00			`# https://github.com/postgres/postgres/commit/28e0727076`
			`errors['55']['55P04'] = 'UNSAFE_NEW_ENUM_VALUE_USAGE'`

Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`for c, cerrs in e1.iteritems():`
			`errors[c].update(cerrs)`

			`return classes, errors`

Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`def generate_module_data(classes, errors):`
			`yield ""`
			`yield "# Error classes"`
			`for clscode, clslabel in sorted(classes.items()):`
			`err = clslabel.split(" - ")[1].split("(")[0] \`
Python source cleanup using flake8 2016-10-11 02:10:53 +03:00			`.strip().replace(" ", "_").replace('/', "_").upper()`
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`yield "CLASS_%s = %r" % (err, clscode)`
Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`for clscode, clslabel in sorted(classes.items()):`
			`yield ""`
			`yield "# %s" % clslabel`

			`for errcode, errlabel in sorted(errors[clscode].items()):`
			`yield "%s = %r" % (errlabel, errcode)`

Python source cleanup using flake8 2016-10-11 02:10:53 +03:00
Added script to update the errorcodes module. 2010-02-15 04:12:13 +03:00			`if __name__ == '__main__':`
			`sys.exit(main())`