mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-05-07 01:03:43 +03:00
minor update
This commit is contained in:
parent
e94f86a1ad
commit
af71e3c563
|
@ -108,46 +108,46 @@ def checkCharEncoding(encoding):
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
# http://www.destructor.de/charsets/index.htm
|
# http://www.destructor.de/charsets/index.htm
|
||||||
translate = { 'windows-874': 'iso-8859-11', 'en_us': 'utf8', 'macintosh': 'iso-8859-1', 'euc_tw': 'big5_tw', 'th': 'tis-620' }
|
translate = { "windows-874": "iso-8859-11", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8" }
|
||||||
|
|
||||||
for delimiter in (';', ',', '('):
|
for delimiter in (';', ',', '('):
|
||||||
if delimiter in encoding:
|
if delimiter in encoding:
|
||||||
encoding = encoding[:encoding.find(delimiter)].strip()
|
encoding = encoding[:encoding.find(delimiter)].strip()
|
||||||
|
|
||||||
# popular typos/errors
|
# popular typos/errors
|
||||||
if '8858' in encoding:
|
if "8858" in encoding:
|
||||||
encoding = encoding.replace('8858', '8859') # iso-8858 -> iso-8859
|
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
|
||||||
elif '8559' in encoding:
|
elif "8559" in encoding:
|
||||||
encoding = encoding.replace('8559', '8859') # iso-8559 -> iso-8859
|
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
|
||||||
elif '5889' in encoding:
|
elif "5889" in encoding:
|
||||||
encoding = encoding.replace('5889', '8859') # iso-5889 -> iso-8859
|
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
|
||||||
elif '2313' in encoding:
|
elif "2313" in encoding:
|
||||||
encoding = encoding.replace('2313', '2312') # gb2313 -> gb2312
|
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
|
||||||
elif 'x-euc' in encoding:
|
elif "x-euc" in encoding:
|
||||||
encoding = encoding.replace('x-euc', 'euc') # x-euc-kr -> euc-kr
|
encoding = encoding.replace("x-euc", "euc") # x-euc-kr -> euc-kr
|
||||||
|
|
||||||
# name adjustment for compatibility
|
# name adjustment for compatibility
|
||||||
if encoding.startswith('8859'):
|
if encoding.startswith("8859"):
|
||||||
encoding = 'iso-%s' % encoding
|
encoding = "iso-%s" % encoding
|
||||||
elif encoding.startswith('cp-'):
|
elif encoding.startswith("cp-"):
|
||||||
encoding = 'cp%s' % encoding[3:]
|
encoding = "cp%s" % encoding[3:]
|
||||||
elif encoding.startswith('euc-'):
|
elif encoding.startswith("euc-"):
|
||||||
encoding = 'euc_%s' % encoding[4:]
|
encoding = "euc_%s" % encoding[4:]
|
||||||
elif encoding.startswith('windows') and not encoding.startswith('windows-'):
|
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
|
||||||
encoding = 'windows-%s' % encoding[7:]
|
encoding = "windows-%s" % encoding[7:]
|
||||||
elif encoding.find('iso-88') > 0:
|
elif encoding.find("iso-88") > 0:
|
||||||
encoding = encoding[encoding.find('iso-88'):]
|
encoding = encoding[encoding.find("iso-88"):]
|
||||||
elif encoding.startswith('is0-'):
|
elif encoding.startswith("is0-"):
|
||||||
encoding = 'iso%s' % encoding[4:]
|
encoding = "iso%s" % encoding[4:]
|
||||||
elif encoding.find('ascii') > 0:
|
elif encoding.find("ascii") > 0:
|
||||||
encoding = 'ascii'
|
encoding = "ascii"
|
||||||
elif encoding.find('utf8') > 0:
|
elif encoding.find("utf8") > 0:
|
||||||
encoding = 'utf8'
|
encoding = "utf8"
|
||||||
|
|
||||||
# http://philip.html5.org/data/charsets-2.html
|
# http://philip.html5.org/data/charsets-2.html
|
||||||
if encoding in translate:
|
if encoding in translate:
|
||||||
encoding = translate[encoding]
|
encoding = translate[encoding]
|
||||||
elif encoding in ('null', '{charset}', '*'):
|
elif encoding in ("null", "{charset}", "*"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://www.iana.org/assignments/character-sets
|
# http://www.iana.org/assignments/character-sets
|
||||||
|
@ -167,7 +167,7 @@ def getHeuristicCharEncoding(page):
|
||||||
Returns page encoding charset detected by usage of heuristics
|
Returns page encoding charset detected by usage of heuristics
|
||||||
Reference: http://chardet.feedparser.org/docs/
|
Reference: http://chardet.feedparser.org/docs/
|
||||||
"""
|
"""
|
||||||
retVal = detect(page)['encoding']
|
retVal = detect(page)["encoding"]
|
||||||
|
|
||||||
infoMsg = "heuristics detected web page charset '%s'" % retVal
|
infoMsg = "heuristics detected web page charset '%s'" % retVal
|
||||||
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
|
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
|
||||||
|
@ -182,8 +182,8 @@ def decodePage(page, contentEncoding, contentType):
|
||||||
if not page or (conf.nullConnection and len(page) < 2):
|
if not page or (conf.nullConnection and len(page) < 2):
|
||||||
return getUnicode(page)
|
return getUnicode(page)
|
||||||
|
|
||||||
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ('gzip', 'x-gzip', 'deflate'):
|
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ("gzip", "x-gzip", "deflate"):
|
||||||
if contentEncoding == 'deflate':
|
if contentEncoding == "deflate":
|
||||||
# http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
|
# http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
|
||||||
data = StringIO.StringIO(zlib.decompress(page, -15))
|
data = StringIO.StringIO(zlib.decompress(page, -15))
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user