fix for "unknown charset 'windows-874'" reported by Phat R.

This commit is contained in:
Miroslav Stampar 2010-07-15 08:44:42 +00:00
parent 82bce81e28
commit 48a67d6d51

View File

@ -91,9 +91,19 @@ def parseResponse(page, headers):
kb.absFilePaths.add(absFilePath) kb.absFilePaths.add(absFilePath)
def checkCharEncoding(encoding): def checkCharEncoding(encoding):
if encoding:
encoding = encoding.lower()
else:
return encoding
#http://www.destructor.de/charsets/index.htm
translate = { 'windows-874':'iso-8859-11' }
#http://philip.html5.org/data/charsets-2.html #http://philip.html5.org/data/charsets-2.html
if encoding and encoding.startswith('cp-'): if encoding and encoding.startswith('cp-'):
encoding = 'cp%s' % encoding[3:] encoding = 'cp%s' % encoding[3:]
elif encoding in translate:
encoding = translate[encoding]
try: try:
codecs.lookup(encoding) codecs.lookup(encoding)
except LookupError: except LookupError: