Minor patch

This commit is contained in:
Miroslav Stampar 2013-03-26 20:06:50 +01:00
parent 7accba4cf9
commit c19a283434

View File

@ -254,7 +254,14 @@ def decodePage(page, contentEncoding, contentType):
# e.g. ’…™
if "&#" in page:
page = re.sub(r"&#(\d+);", lambda _: unichr(int(_.group(1))), page)
def _(match):
retVal = match.group(0)
try:
retVal = unichr(int(match.group(1)))
except ValueError:
pass
return retVal
page = re.sub(r"&#(\d+);", _, page)
# e.g. ζ
page = re.sub(r"&([^;]+);", lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)