diff --git a/lib/request/basic.py b/lib/request/basic.py index 87908c343..07dd6f075 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -89,13 +89,13 @@ def parseResponse(page, headers): kb.absFilePaths.add(absFilePath) -def decodePage(page, encoding): +def decodePage(page, contentEncoding, contentType): """ - Decode gzip/deflate HTTP response + Decode compressed/charset HTTP response """ - if isinstance(encoding, basestring) and encoding.lower() in ('gzip', 'x-gzip', 'deflate'): - if encoding == 'deflate': + if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ('gzip', 'x-gzip', 'deflate'): + if contentEncoding == 'deflate': # http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations data = StringIO.StringIO(zlib.decompress(page, -15)) else: @@ -103,4 +103,8 @@ def decodePage(page, encoding): page = data.read() + #http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode + if contentType and (contentType.find('charset=') != -1): + page = unicode(page, contentType.split('charset=')[-1]) + return page diff --git a/lib/request/connect.py b/lib/request/connect.py index 3e71dc547..227ad6dc7 100644 --- a/lib/request/connect.py +++ b/lib/request/connect.py @@ -88,33 +88,32 @@ class Connect: try: if silent: socket.setdefaulttimeout(3) - + if direct: if "?" in url: url, params = url.split("?") params = urlencode(params) url = "%s?%s" % (url, params) requestMsg += "?%s" % params - + elif multipart: # Needed in this form because of potential circle dependency # problem (option -> update -> connect -> option) from lib.core.option import proxyHandler - + multipartOpener = urllib2.build_opener(proxyHandler, multipartpost.MultipartPostHandler) conn = multipartOpener.open(url, multipart) - page = conn.read() + page = conn.read() responseHeaders = conn.info() - - encoding = responseHeaders.get("Content-Encoding") - page = decodePage(page, encoding) - + + page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type")) + return page - + else: if conf.parameters.has_key("GET") and not get: get = conf.parameters["GET"] - + if get: get = urlencode(get) url = "%s?%s" % (url, get) @@ -190,8 +189,7 @@ class Connect: status = conn.msg responseHeaders = conn.info() - encoding = responseHeaders.get("Content-Encoding") - page = decodePage(page, encoding) + page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type")) except urllib2.HTTPError, e: if e.code == 401: