fix for bug reported by Marek Sarvas (unicode)

This commit is contained in:
Miroslav Stampar 2010-09-09 14:03:45 +00:00
parent 1b3d287a09
commit 53289c6a42

View File

@ -30,6 +30,7 @@ import StringIO
import zlib import zlib
from lib.core.common import getCompiledRegex from lib.core.common import getCompiledRegex
from lib.core.common import getUnicode
from lib.core.common import isWindowsDriveLetterPath from lib.core.common import isWindowsDriveLetterPath
from lib.core.common import posixToNtSlashes from lib.core.common import posixToNtSlashes
from lib.core.common import urlEncodeCookieValues from lib.core.common import urlEncodeCookieValues
@ -99,13 +100,15 @@ def checkCharEncoding(encoding):
#http://www.destructor.de/charsets/index.htm #http://www.destructor.de/charsets/index.htm
translate = { 'windows-874':'iso-8859-11' } translate = { 'windows-874':'iso-8859-11' }
#http://philip.html5.org/data/charsets-2.html if ';' in encoding:
if encoding.startswith('cp-'):
encoding = 'cp%s' % encoding[3:]
elif ';' in encoding:
encoding = encoding[:encoding.find(';')] encoding = encoding[:encoding.find(';')]
elif encoding in translate: #http://philip.html5.org/data/charsets-2.html
if encoding in translate:
encoding = translate[encoding] encoding = translate[encoding]
elif encoding.startswith('cp-'):
encoding = 'cp%s' % encoding[3:]
elif encoding.startswith('windows') and not encoding.startswith('windows-'):
encoding = 'windows-%s' % encoding[7:]
try: try:
codecs.lookup(encoding) codecs.lookup(encoding)
except LookupError: except LookupError:
@ -134,6 +137,6 @@ def decodePage(page, contentEncoding, contentType):
if contentType and (contentType.find('charset=') != -1): if contentType and (contentType.find('charset=') != -1):
charset = checkCharEncoding(contentType.split('charset=')[-1]) charset = checkCharEncoding(contentType.split('charset=')[-1])
if charset: if charset:
page = unicode(page, charset) #don't use getUnicode here. it needs to stay as is. page = getUnicode(page, charset)
return page return page