From 48a67d6d5150ffabc09996ba13b50238835a5c2f Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Thu, 15 Jul 2010 08:44:42 +0000 Subject: [PATCH] fix for "unknown charset 'windows-874'" reported by Phat R. --- lib/request/basic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/request/basic.py b/lib/request/basic.py index 685ec79a4..28e208e7e 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -91,9 +91,19 @@ def parseResponse(page, headers): kb.absFilePaths.add(absFilePath) def checkCharEncoding(encoding): + if encoding: + encoding = encoding.lower() + else: + return encoding + + #http://www.destructor.de/charsets/index.htm + translate = { 'windows-874':'iso-8859-11' } + #http://philip.html5.org/data/charsets-2.html if encoding and encoding.startswith('cp-'): encoding = 'cp%s' % encoding[3:] + elif encoding in translate: + encoding = translate[encoding] try: codecs.lookup(encoding) except LookupError: