make_errorcodes updated to the current page style

This commit is contained in:
Daniele Varrazzo 2011-12-16 14:47:09 +00:00
parent 3094371621
commit 2af563227a

View File

@ -48,11 +48,7 @@ def read_base_file(filename):
raise ValueError("can't find the separator. Is this the right file?") raise ValueError("can't find the separator. Is this the right file?")
def parse_errors(url): def parse_errors(url):
page = urllib2.urlopen(url).read() page = BS(urllib2.urlopen(url))
page = page.replace( # make things easier
'<SPAN CLASS="PRODUCTNAME">PostgreSQL</SPAN>',
'PostgreSQL')
page = BS(page)
table = page('table')[1]('tbody')[0] table = page('table')[1]('tbody')[0]
classes = {} classes = {}
@ -60,9 +56,9 @@ def parse_errors(url):
for tr in table('tr'): for tr in table('tr'):
if tr.td.get('colspan'): # it's a class if tr.td.get('colspan'): # it's a class
label = tr.b.string.encode("ascii") label = ' '.join(' '.join(tr(text=True)).split()) \
.replace(u'\u2014', '-').encode('ascii')
assert label.startswith('Class') assert label.startswith('Class')
label = label.replace("&mdash;", "-")
class_ = label.split()[1] class_ = label.split()[1]
assert len(class_) == 2 assert len(class_) == 2
classes[class_] = label classes[class_] = label
@ -73,14 +69,14 @@ def parse_errors(url):
tds = tr('td') tds = tr('td')
if len(tds) == 3: if len(tds) == 3:
errlabel = tds[1].string.replace(" ", "_").encode("ascii") errlabel = '_'.join(tds[1].string.split()).encode('ascii')
# double check the columns are equal # double check the columns are equal
cond_name = tds[2].string.upper().encode("ascii") cond_name = tds[2].string.strip().upper().encode("ascii")
assert errlabel == cond_name, tr assert errlabel == cond_name, tr
elif len(tds) == 2: elif len(tds) == 2:
# found in PG 9.1 beta3 docs # found in PG 9.1 docs
errlabel = tds[1].tt.string.upper().encode("ascii") errlabel = tds[1].tt.string.upper().encode("ascii")
else: else: