sqlmap/lib/request/basic.py

300 lines
12 KiB
Python
Raw Normal View History

#!/usr/bin/env python
2008-10-15 19:38:22 +04:00
"""
2013-01-18 18:07:51 +04:00
Copyright (c) 2006-2013 sqlmap developers (http://sqlmap.org/)
2010-10-15 03:18:29 +04:00
See the file 'doc/COPYING' for copying permission
2008-10-15 19:38:22 +04:00
"""
2010-06-30 16:09:33 +04:00
import codecs
import gzip
2011-04-20 02:54:13 +04:00
import logging
2008-10-15 19:38:22 +04:00
import re
import StringIO
2012-09-12 13:50:38 +04:00
import struct
import zlib
2008-10-15 19:38:22 +04:00
from lib.core.common import extractErrorMessage
from lib.core.common import extractRegexResult
from lib.core.common import getUnicode
from lib.core.common import readInput
2012-03-08 14:19:34 +04:00
from lib.core.common import resetCookieJar
2011-06-08 18:42:48 +04:00
from lib.core.common import singleTimeLogMessage
2012-09-11 16:58:52 +04:00
from lib.core.common import singleTimeWarnMessage
2008-10-15 19:38:22 +04:00
from lib.core.data import conf
from lib.core.data import kb
2010-06-30 16:09:33 +04:00
from lib.core.data import logger
from lib.core.enums import HTTP_HEADER
from lib.core.enums import PLACE
from lib.core.exception import SqlmapCompressionException
from lib.core.htmlentities import htmlEntities
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
2012-10-19 13:02:14 +04:00
from lib.core.settings import EVENTVALIDATION_REGEX
2012-09-12 13:50:38 +04:00
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
2011-04-01 20:40:28 +04:00
from lib.core.settings import ML
from lib.core.settings import META_CHARSET_REGEX
2011-11-22 16:18:24 +04:00
from lib.core.settings import PARSE_HEADERS_LIMIT
2012-10-19 13:02:14 +04:00
from lib.core.settings import VIEWSTATE_REGEX
from lib.parse.headers import headersParser
2008-10-15 19:38:22 +04:00
from lib.parse.html import htmlParser
2012-07-14 19:01:04 +04:00
from thirdparty.chardet import detect
2008-10-15 19:38:22 +04:00
def forgeHeaders(items=None):
2008-10-15 19:38:22 +04:00
"""
2011-02-12 02:07:03 +03:00
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
2008-10-15 19:38:22 +04:00
the HTTP requests
"""
items = items or {}
for _ in items.keys():
if items[_] is None:
del items[_]
headers = dict(conf.httpHeaders)
headers.update(items or {})
2008-10-15 19:38:22 +04:00
2012-10-07 22:28:24 +04:00
headers = dict(("-".join(_.capitalize() for _ in key.split('-')), value) for (key, value) in headers.items())
if conf.cj:
if HTTP_HEADER.COOKIE in headers:
for cookie in conf.cj:
if cookie.domain_specified and not conf.hostname.endswith(cookie.domain):
continue
if ("%s=" % cookie.name) in headers[HTTP_HEADER.COOKIE]:
if conf.loadCookies:
conf.httpHeaders = filter(None, ((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders))
elif kb.mergeCookies is None:
message = "you provided a HTTP %s header value. " % HTTP_HEADER.COOKIE
message += "The target URL provided its own cookies within "
message += "the HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
message += "Do you want to merge them in futher requests? [Y/n] "
2012-07-13 12:28:03 +04:00
_ = readInput(message, default="Y")
kb.mergeCookies = not _ or _[0] in ("y", "Y")
if kb.mergeCookies:
_ = lambda x: re.sub("(?i)%s=[^%s]+" % (cookie.name, DEFAULT_COOKIE_DELIMITER), "%s=%s" % (cookie.name, cookie.value), x)
headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
if PLACE.COOKIE in conf.parameters:
conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
2012-07-13 12:28:03 +04:00
conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
elif not kb.testMode:
headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (DEFAULT_COOKIE_DELIMITER, cookie.name, cookie.value)
if kb.testMode:
2012-03-08 14:19:34 +04:00
resetCookieJar(conf.cj)
2008-10-15 19:38:22 +04:00
return headers
def parseResponse(page, headers):
2008-10-15 19:38:22 +04:00
"""
@param page: the page to parse to feed the knowledge base htmlFp
(back-end DBMS fingerprint based upon DBMS error messages return
through the web application) list and absFilePaths (absolute file
paths) set.
"""
if headers:
headersParser(headers)
2008-10-15 19:38:22 +04:00
if page:
htmlParser(page)
2008-10-15 19:38:22 +04:00
2012-09-25 12:17:25 +04:00
def checkCharEncoding(encoding, warn=True):
2013-03-13 22:42:22 +04:00
"""
Checks encoding name, repairs common misspellings and adjusts to
proper namings used in codecs module
>>> checkCharEncoding('iso-8858', False)
'iso8859-1'
>>> checkCharEncoding('en_us', False)
'utf8'
"""
if encoding:
encoding = encoding.lower()
else:
return encoding
2013-01-10 16:18:44 +04:00
# Reference: http://www.destructor.de/charsets/index.htm
2013-03-13 22:42:22 +04:00
translate = {"windows-874": "iso-8859-11", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1"}
for delimiter in (';', ',', '('):
if delimiter in encoding:
encoding = encoding[:encoding.find(delimiter)].strip()
2010-10-14 19:28:54 +04:00
# popular typos/errors
2012-02-06 13:48:44 +04:00
if "8858" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "8559" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "5889" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
2012-06-25 20:24:33 +04:00
elif "5589" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "2313" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
2012-02-06 13:48:44 +04:00
elif "x-euc" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("x-euc", "euc") # x-euc-kr -> euc-kr
2013-04-07 13:02:43 +04:00
elif "windows-cp" in encoding:
encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254
2011-04-04 22:24:16 +04:00
# name adjustment for compatibility
2012-02-06 13:48:44 +04:00
if encoding.startswith("8859"):
encoding = "iso-%s" % encoding
elif encoding.startswith("cp-"):
encoding = "cp%s" % encoding[3:]
elif encoding.startswith("euc-"):
encoding = "euc_%s" % encoding[4:]
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
encoding = "windows-%s" % encoding[7:]
elif encoding.find("iso-88") > 0:
encoding = encoding[encoding.find("iso-88"):]
elif encoding.startswith("is0-"):
encoding = "iso%s" % encoding[4:]
elif encoding.find("ascii") > 0:
encoding = "ascii"
elif encoding.find("utf8") > 0:
encoding = "utf8"
2011-04-04 22:24:16 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://philip.html5.org/data/charsets-2.html
2011-04-04 22:24:16 +04:00
if encoding in translate:
encoding = translate[encoding]
2012-02-06 13:48:44 +04:00
elif encoding in ("null", "{charset}", "*"):
return None
2010-10-14 19:28:54 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://www.iana.org/assignments/character-sets
# Reference: http://docs.python.org/library/codecs.html
2010-06-30 16:09:33 +04:00
try:
codecs.lookup(encoding)
except LookupError:
2012-09-25 12:17:25 +04:00
if warn:
warnMsg = "unknown web page charset '%s'. " % encoding
warnMsg += "Please report by e-mail to %s." % ML
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
encoding = None
2010-06-30 16:09:33 +04:00
return encoding
def getHeuristicCharEncoding(page):
"""
Returns page encoding charset detected by usage of heuristics
Reference: http://chardet.feedparser.org/docs/
"""
2012-02-06 13:48:44 +04:00
retVal = detect(page)["encoding"]
if retVal:
infoMsg = "heuristics detected web page charset '%s'" % retVal
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
return retVal
def decodePage(page, contentEncoding, contentType):
"""
Decode compressed/charset HTTP response
"""
if not page or (conf.nullConnection and len(page) < 2):
return getUnicode(page)
2012-02-06 13:48:44 +04:00
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ("gzip", "x-gzip", "deflate"):
2012-09-12 13:50:38 +04:00
if not kb.pageCompress:
return None
try:
2012-09-11 16:58:52 +04:00
if contentEncoding.lower() == "deflate":
2012-09-12 13:50:38 +04:00
data = StringIO.StringIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
2012-09-11 14:08:34 +04:00
else:
data = gzip.GzipFile("", "rb", 9, StringIO.StringIO(page))
2012-09-12 13:50:38 +04:00
size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
if size > MAX_CONNECTION_TOTAL_SIZE:
raise Exception("size too large")
2012-09-11 14:08:34 +04:00
page = data.read()
except Exception, msg:
errMsg = "detected invalid data for declared content "
errMsg += "encoding '%s' ('%s')" % (contentEncoding, msg)
singleTimeLogMessage(errMsg, logging.ERROR)
2012-09-11 16:58:52 +04:00
warnMsg = "turning off page compression"
singleTimeWarnMessage(warnMsg)
kb.pageCompress = False
raise SqlmapCompressionException
if not conf.charset:
httpCharset, metaCharset = None, None
2013-01-10 16:18:44 +04:00
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
2012-07-23 16:26:42 +04:00
if contentType and (contentType.find("charset=") != -1):
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
2010-11-07 19:23:03 +03:00
2012-12-03 15:13:59 +04:00
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
if (any((httpCharset, metaCharset)) and not all((httpCharset, metaCharset)))\
or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
kb.pageEncoding = httpCharset or metaCharset
debugMsg = "declared web page charset '%s'" % kb.pageEncoding
singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
else:
kb.pageEncoding = None
else:
kb.pageEncoding = conf.charset
2012-07-23 17:14:52 +04:00
# can't do for all responses because we need to support binary files too
if contentType and not isinstance(page, unicode) and "text/" in contentType.lower():
2012-07-23 20:38:46 +04:00
# e.g. &#195;&#235;&#224;&#226;&#224;
2012-07-23 17:14:52 +04:00
if "&#" in page:
page = re.sub(r"&#(\d{1,3});", lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
# e.g. %20%28%29
if "%" in page:
page = re.sub(r"%([0-9a-fA-F]{2})", lambda _: _.group(1).decode("hex"), page)
2012-07-23 20:38:46 +04:00
# e.g. &amp;
page = re.sub(r"&([^;]+);", lambda _: chr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 256) < 256 else _.group(0), page)
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
2011-01-28 01:00:34 +03:00
page = getUnicode(page, kb.pageEncoding)
2012-07-23 20:38:46 +04:00
# e.g. &#8217;&#8230;&#8482;
if "&#" in page:
2013-03-26 23:06:50 +04:00
def _(match):
retVal = match.group(0)
try:
retVal = unichr(int(match.group(1)))
except ValueError:
pass
return retVal
page = re.sub(r"&#(\d+);", _, page)
2012-12-14 15:00:45 +04:00
# e.g. &zeta;
page = re.sub(r"&([^;]+);", lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)
2012-07-23 20:38:46 +04:00
2011-01-28 01:00:34 +03:00
return page
def processResponse(page, responseHeaders):
2011-11-22 16:18:24 +04:00
kb.processResponseCounter += 1
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None)
2011-01-07 18:41:09 +03:00
if conf.parseErrors:
msg = extractErrorMessage(page)
if msg:
logger.warning("parsed error message: '%s'" % msg)
2012-10-19 13:02:14 +04:00
2012-10-19 13:29:03 +04:00
if kb.originalPage is None:
for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
2012-10-29 13:48:49 +04:00
match = re.search(regex, page)
2012-10-19 13:29:03 +04:00
if match and PLACE.POST in conf.parameters:
name, value = match.groups()
if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
if conf.paramDict[PLACE.POST][name] in page:
continue
conf.paramDict[PLACE.POST][name] = value
conf.parameters[PLACE.POST] = re.sub("(?i)(%s=)[^&]+" % name, r"\g<1>%s" % value, conf.parameters[PLACE.POST])