2008-10-15 19:38:22 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
"""
|
2012-07-12 21:38:03 +04:00
|
|
|
Copyright (c) 2006-2012 sqlmap developers (http://sqlmap.org/)
|
2010-10-15 03:18:29 +04:00
|
|
|
See the file 'doc/COPYING' for copying permission
|
2008-10-15 19:38:22 +04:00
|
|
|
"""
|
|
|
|
|
2010-06-30 16:09:33 +04:00
|
|
|
import codecs
|
2010-01-02 05:02:12 +03:00
|
|
|
import gzip
|
2011-04-20 02:54:13 +04:00
|
|
|
import logging
|
2008-10-15 19:38:22 +04:00
|
|
|
import re
|
2010-01-02 05:02:12 +03:00
|
|
|
import StringIO
|
|
|
|
import zlib
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2010-12-25 13:16:20 +03:00
|
|
|
from lib.core.common import extractErrorMessage
|
2011-01-04 18:49:20 +03:00
|
|
|
from lib.core.common import extractRegexResult
|
2010-09-09 18:03:45 +04:00
|
|
|
from lib.core.common import getUnicode
|
2012-01-11 18:28:08 +04:00
|
|
|
from lib.core.common import readInput
|
2012-03-08 14:19:34 +04:00
|
|
|
from lib.core.common import resetCookieJar
|
2011-06-08 18:42:48 +04:00
|
|
|
from lib.core.common import singleTimeLogMessage
|
2008-10-15 19:38:22 +04:00
|
|
|
from lib.core.data import conf
|
|
|
|
from lib.core.data import kb
|
2010-06-30 16:09:33 +04:00
|
|
|
from lib.core.data import logger
|
2011-11-29 23:17:07 +04:00
|
|
|
from lib.core.enums import HTTPHEADER
|
2012-01-11 18:28:08 +04:00
|
|
|
from lib.core.enums import PLACE
|
|
|
|
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
|
2011-04-01 20:40:28 +04:00
|
|
|
from lib.core.settings import ML
|
2011-01-04 18:49:20 +03:00
|
|
|
from lib.core.settings import META_CHARSET_REGEX
|
2011-11-22 16:18:24 +04:00
|
|
|
from lib.core.settings import PARSE_HEADERS_LIMIT
|
2008-11-17 03:00:54 +03:00
|
|
|
from lib.parse.headers import headersParser
|
2008-10-15 19:38:22 +04:00
|
|
|
from lib.parse.html import htmlParser
|
2012-07-14 19:01:04 +04:00
|
|
|
from thirdparty.chardet import detect
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2012-01-11 18:28:08 +04:00
|
|
|
def forgeHeaders(items=None):
|
2008-10-15 19:38:22 +04:00
|
|
|
"""
|
2011-02-12 02:07:03 +03:00
|
|
|
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
|
2008-10-15 19:38:22 +04:00
|
|
|
the HTTP requests
|
|
|
|
"""
|
|
|
|
|
2012-01-30 13:17:22 +04:00
|
|
|
items = items or {}
|
|
|
|
|
|
|
|
for _ in items.keys():
|
|
|
|
if items[_] is None:
|
|
|
|
del items[_]
|
|
|
|
|
2012-01-11 18:28:08 +04:00
|
|
|
headers = dict(conf.httpHeaders)
|
|
|
|
headers.update(items or {})
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2012-01-11 18:28:08 +04:00
|
|
|
if conf.cj:
|
|
|
|
if HTTPHEADER.COOKIE in headers:
|
|
|
|
for cookie in conf.cj:
|
|
|
|
if ("%s=" % cookie.name) in headers[HTTPHEADER.COOKIE]:
|
|
|
|
if kb.mergeCookies is None:
|
|
|
|
message = "you provided a HTTP %s header value. " % HTTPHEADER.COOKIE
|
2012-03-14 02:03:23 +04:00
|
|
|
message += "The target url provided its own cookies within "
|
2012-01-11 18:28:08 +04:00
|
|
|
message += "the HTTP %s header which intersect with yours. " % HTTPHEADER.SET_COOKIE
|
|
|
|
message += "Do you want to merge them in futher requests? [Y/n] "
|
2012-07-13 12:28:03 +04:00
|
|
|
_ = readInput(message, default="Y")
|
|
|
|
kb.mergeCookies = not _ or _[0] in ("y", "Y")
|
2012-01-11 18:28:08 +04:00
|
|
|
|
|
|
|
if kb.mergeCookies:
|
2012-03-28 23:33:00 +04:00
|
|
|
_ = lambda x: re.sub("(?i)%s=[^%s]+" % (cookie.name, DEFAULT_COOKIE_DELIMITER), "%s=%s" % (cookie.name, cookie.value), x)
|
2012-01-11 18:28:08 +04:00
|
|
|
headers[HTTPHEADER.COOKIE] = _(headers[HTTPHEADER.COOKIE])
|
|
|
|
|
|
|
|
if PLACE.COOKIE in conf.parameters:
|
|
|
|
conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
|
2012-07-13 12:28:03 +04:00
|
|
|
|
2012-01-11 18:28:08 +04:00
|
|
|
conf.httpHeaders = [(item[0], item[1] if item[0] != HTTPHEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
|
|
|
|
|
|
|
|
elif not kb.testMode:
|
|
|
|
headers[HTTPHEADER.COOKIE] += "%s %s=%s" % (DEFAULT_COOKIE_DELIMITER, cookie.name, cookie.value)
|
|
|
|
|
|
|
|
if kb.testMode:
|
2012-03-08 14:19:34 +04:00
|
|
|
resetCookieJar(conf.cj)
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2011-06-03 19:29:22 +04:00
|
|
|
if kb.redirectSetCookie and not conf.dropSetCookie:
|
2011-11-29 23:17:07 +04:00
|
|
|
if HTTPHEADER.COOKIE in headers:
|
2012-01-11 18:28:08 +04:00
|
|
|
headers[HTTPHEADER.COOKIE] += "%s %s" % (DEFAULT_COOKIE_DELIMITER, kb.redirectSetCookie)
|
2010-12-03 20:41:10 +03:00
|
|
|
else:
|
2011-11-29 23:17:07 +04:00
|
|
|
headers[HTTPHEADER.COOKIE] = kb.redirectSetCookie
|
2010-12-03 20:41:10 +03:00
|
|
|
|
2008-10-15 19:38:22 +04:00
|
|
|
return headers
|
|
|
|
|
2008-11-17 03:00:54 +03:00
|
|
|
def parseResponse(page, headers):
|
2008-10-15 19:38:22 +04:00
|
|
|
"""
|
|
|
|
@param page: the page to parse to feed the knowledge base htmlFp
|
|
|
|
(back-end DBMS fingerprint based upon DBMS error messages return
|
|
|
|
through the web application) list and absFilePaths (absolute file
|
|
|
|
paths) set.
|
|
|
|
"""
|
|
|
|
|
2008-11-17 03:00:54 +03:00
|
|
|
if headers:
|
|
|
|
headersParser(headers)
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2008-11-17 03:00:54 +03:00
|
|
|
if page:
|
|
|
|
htmlParser(page)
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2010-06-30 16:09:33 +04:00
|
|
|
def checkCharEncoding(encoding):
|
2010-07-15 12:44:42 +04:00
|
|
|
if encoding:
|
|
|
|
encoding = encoding.lower()
|
|
|
|
else:
|
|
|
|
return encoding
|
|
|
|
|
2010-11-07 19:23:03 +03:00
|
|
|
# http://www.destructor.de/charsets/index.htm
|
2012-05-28 01:44:42 +04:00
|
|
|
translate = { "windows-874": "iso-8859-11", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8"}
|
2010-07-15 12:44:42 +04:00
|
|
|
|
2011-06-12 12:36:21 +04:00
|
|
|
for delimiter in (';', ',', '('):
|
2010-11-02 21:01:10 +03:00
|
|
|
if delimiter in encoding:
|
2011-06-12 12:36:21 +04:00
|
|
|
encoding = encoding[:encoding.find(delimiter)].strip()
|
2010-10-14 19:28:54 +04:00
|
|
|
|
2011-03-24 12:27:19 +03:00
|
|
|
# popular typos/errors
|
2012-02-06 13:48:44 +04:00
|
|
|
if "8858" in encoding:
|
|
|
|
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
|
|
|
|
elif "8559" in encoding:
|
|
|
|
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
|
|
|
|
elif "5889" in encoding:
|
|
|
|
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
|
2012-06-25 20:24:33 +04:00
|
|
|
elif "5589" in encoding:
|
|
|
|
encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
|
2012-02-06 13:48:44 +04:00
|
|
|
elif "2313" in encoding:
|
|
|
|
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
|
|
|
|
elif "x-euc" in encoding:
|
|
|
|
encoding = encoding.replace("x-euc", "euc") # x-euc-kr -> euc-kr
|
2011-04-04 22:24:16 +04:00
|
|
|
|
|
|
|
# name adjustment for compatibility
|
2012-02-06 13:48:44 +04:00
|
|
|
if encoding.startswith("8859"):
|
|
|
|
encoding = "iso-%s" % encoding
|
|
|
|
elif encoding.startswith("cp-"):
|
|
|
|
encoding = "cp%s" % encoding[3:]
|
|
|
|
elif encoding.startswith("euc-"):
|
|
|
|
encoding = "euc_%s" % encoding[4:]
|
|
|
|
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
|
|
|
|
encoding = "windows-%s" % encoding[7:]
|
|
|
|
elif encoding.find("iso-88") > 0:
|
|
|
|
encoding = encoding[encoding.find("iso-88"):]
|
|
|
|
elif encoding.startswith("is0-"):
|
|
|
|
encoding = "iso%s" % encoding[4:]
|
|
|
|
elif encoding.find("ascii") > 0:
|
|
|
|
encoding = "ascii"
|
|
|
|
elif encoding.find("utf8") > 0:
|
|
|
|
encoding = "utf8"
|
2011-04-04 22:24:16 +04:00
|
|
|
|
|
|
|
# http://philip.html5.org/data/charsets-2.html
|
|
|
|
if encoding in translate:
|
|
|
|
encoding = translate[encoding]
|
2012-02-06 13:48:44 +04:00
|
|
|
elif encoding in ("null", "{charset}", "*"):
|
2010-11-17 12:57:32 +03:00
|
|
|
return None
|
2010-10-14 19:28:54 +04:00
|
|
|
|
2011-03-21 19:43:46 +03:00
|
|
|
# http://www.iana.org/assignments/character-sets
|
2011-05-18 02:55:22 +04:00
|
|
|
# http://docs.python.org/library/codecs.html
|
2010-06-30 16:09:33 +04:00
|
|
|
try:
|
|
|
|
codecs.lookup(encoding)
|
|
|
|
except LookupError:
|
2011-04-30 17:20:05 +04:00
|
|
|
warnMsg = "unknown web page charset '%s'. " % encoding
|
2011-04-20 02:54:13 +04:00
|
|
|
warnMsg += "Please report by e-mail to %s." % ML
|
2011-06-08 18:42:48 +04:00
|
|
|
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
|
2011-04-18 17:38:46 +04:00
|
|
|
encoding = None
|
2010-11-02 21:01:10 +03:00
|
|
|
|
2010-06-30 16:09:33 +04:00
|
|
|
return encoding
|
|
|
|
|
2011-04-18 17:38:46 +04:00
|
|
|
def getHeuristicCharEncoding(page):
|
|
|
|
"""
|
|
|
|
Returns page encoding charset detected by usage of heuristics
|
|
|
|
Reference: http://chardet.feedparser.org/docs/
|
|
|
|
"""
|
2012-02-06 13:48:44 +04:00
|
|
|
retVal = detect(page)["encoding"]
|
2011-04-18 17:38:46 +04:00
|
|
|
|
2012-07-13 17:05:19 +04:00
|
|
|
infoMsg = "heuristics detected web page charset '%s'" % retVal
|
2011-06-08 18:42:48 +04:00
|
|
|
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
|
2011-04-18 17:38:46 +04:00
|
|
|
|
|
|
|
return retVal
|
|
|
|
|
2010-06-09 18:40:36 +04:00
|
|
|
def decodePage(page, contentEncoding, contentType):
|
2010-01-02 05:02:12 +03:00
|
|
|
"""
|
2010-06-09 18:40:36 +04:00
|
|
|
Decode compressed/charset HTTP response
|
2010-01-02 05:02:12 +03:00
|
|
|
"""
|
|
|
|
|
2011-01-20 14:01:01 +03:00
|
|
|
if not page or (conf.nullConnection and len(page) < 2):
|
|
|
|
return getUnicode(page)
|
|
|
|
|
2012-02-06 13:48:44 +04:00
|
|
|
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ("gzip", "x-gzip", "deflate"):
|
|
|
|
if contentEncoding == "deflate":
|
2010-01-02 05:02:12 +03:00
|
|
|
# http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
|
|
|
|
data = StringIO.StringIO(zlib.decompress(page, -15))
|
|
|
|
else:
|
2012-07-23 16:26:42 +04:00
|
|
|
data = gzip.GzipFile("", "rb", 9, StringIO.StringIO(page))
|
2010-01-02 05:02:12 +03:00
|
|
|
|
2011-07-06 09:44:47 +04:00
|
|
|
try:
|
|
|
|
page = data.read()
|
|
|
|
except Exception, msg:
|
|
|
|
errMsg = "detected invalid data for declared content "
|
|
|
|
errMsg += "encoding '%s' ('%s')" % (contentEncoding, msg)
|
|
|
|
singleTimeLogMessage(errMsg, logging.ERROR)
|
2010-11-03 13:08:27 +03:00
|
|
|
|
2011-05-18 02:55:22 +04:00
|
|
|
if not conf.charset:
|
|
|
|
httpCharset, metaCharset = None, None
|
2011-01-04 18:49:20 +03:00
|
|
|
|
2011-05-18 02:55:22 +04:00
|
|
|
# http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
|
2012-07-23 16:26:42 +04:00
|
|
|
if contentType and (contentType.find("charset=") != -1):
|
|
|
|
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
|
2010-11-07 19:23:03 +03:00
|
|
|
|
2011-05-18 02:55:22 +04:00
|
|
|
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE))
|
2011-04-20 12:35:47 +04:00
|
|
|
|
2011-05-18 02:55:22 +04:00
|
|
|
if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\
|
|
|
|
or (httpCharset == metaCharset and all([httpCharset, metaCharset])):
|
|
|
|
kb.pageEncoding = httpCharset or metaCharset
|
|
|
|
else:
|
|
|
|
kb.pageEncoding = None
|
2011-04-20 12:35:47 +04:00
|
|
|
else:
|
2011-05-18 02:55:22 +04:00
|
|
|
kb.pageEncoding = conf.charset
|
2011-01-04 15:56:55 +03:00
|
|
|
|
2012-07-23 17:14:52 +04:00
|
|
|
# can't do for all responses because we need to support binary files too
|
2012-07-23 16:26:42 +04:00
|
|
|
if contentType and not isinstance(page, unicode) and any(map(lambda x: x in contentType.lower(), ("text/txt", "text/raw", "text/html", "text/xml"))):
|
2012-07-23 20:38:46 +04:00
|
|
|
# e.g. Ãëàâà
|
2012-07-23 17:14:52 +04:00
|
|
|
if "&#" in page:
|
2012-07-23 20:44:50 +04:00
|
|
|
page = re.sub('&#(\d{1,3});', lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
|
2012-07-23 20:38:46 +04:00
|
|
|
|
2011-05-16 23:26:58 +04:00
|
|
|
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
|
2011-01-28 01:00:34 +03:00
|
|
|
page = getUnicode(page, kb.pageEncoding)
|
|
|
|
|
2012-07-23 20:38:46 +04:00
|
|
|
# e.g. ’…™
|
|
|
|
if "&#" in page:
|
|
|
|
page = re.sub('&#(\d+);', lambda _: unichr(int(_.group(1))), page)
|
|
|
|
|
2011-01-28 01:00:34 +03:00
|
|
|
return page
|
2010-12-25 13:16:20 +03:00
|
|
|
|
|
|
|
def processResponse(page, responseHeaders):
|
2011-11-22 16:18:24 +04:00
|
|
|
kb.processResponseCounter += 1
|
|
|
|
|
2012-07-12 16:31:28 +04:00
|
|
|
if not kb.dumpTable:
|
2011-11-22 16:18:24 +04:00
|
|
|
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None)
|
2011-01-07 18:41:09 +03:00
|
|
|
|
2010-12-25 13:16:20 +03:00
|
|
|
if conf.parseErrors:
|
|
|
|
msg = extractErrorMessage(page)
|
|
|
|
|
|
|
|
if msg:
|
2012-02-22 14:40:11 +04:00
|
|
|
logger.info("parsed error message: '%s'" % msg)
|