some charset fix up

2025-12-18 07:34:11 +03:00 · 2010-06-30 12:09:33 +00:00 · 2010-06-30 12:09:33 +00:00 · 0d08903bc3
commit 0d08903bc3
parent 24428c1a1b
1 changed files with 20 additions and 2 deletions
--- a/lib/request/basic.py
+++ b/lib/request/basic.py
@ -22,6 +22,7 @@ with sqlmap; if not, write to the Free Software Foundation, Inc., 51
 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 """
 import codecs
 import gzip
 import os
 import re
@ -34,6 +35,7 @@ from lib.core.common import posixToNtSlashes
 from lib.core.common import urlEncodeCookieValues
 from lib.core.data import conf
 from lib.core.data import kb
 from lib.core.data import logger
 from lib.parse.headers import headersParser
 from lib.parse.html import htmlParser
@ -88,6 +90,20 @@ def parseResponse(page, headers):
                if absFilePath not in kb.absFilePaths:
                    kb.absFilePaths.add(absFilePath)
 def checkCharEncoding(encoding):
    #http://philip.html5.org/data/charsets-2.html
    if encoding and encoding.startswith('cp-'):
        encoding = 'cp%s' % encoding[3:]
    try:
        codecs.lookup(encoding)
    except LookupError:
        warnMsg  = "unknown charset '%s'. " % encoding
        warnMsg += "please report by e-mail to sqlmap-users@lists.sourceforge.net."
        logger.warn(warnMsg)
        encoding = conf.dataEncoding
    return encoding
 def decodePage(page, contentEncoding, contentType):
    """
    Decode compressed/charset HTTP response
@ -104,6 +120,8 @@ def decodePage(page, contentEncoding, contentType):
    #http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
    if contentType and (contentType.find('charset=') != -1):
-        page = unicode(page, contentType.split('charset=')[-1])     #don't use getUnicode here. it needs to stay as is.
+        charset = checkCharEncoding(contentType.split('charset=')[-1])
        if charset:
            page = unicode(page, charset)     #don't use getUnicode here. it needs to stay as is.
    return page