sqlmap/lib/request/basic.py

354 lines
14 KiB
Python
Raw Normal View History

#!/usr/bin/env python
2008-10-15 19:38:22 +04:00
"""
2016-01-06 02:06:12 +03:00
Copyright (c) 2006-2016 sqlmap developers (http://sqlmap.org/)
2010-10-15 03:18:29 +04:00
See the file 'doc/COPYING' for copying permission
2008-10-15 19:38:22 +04:00
"""
2010-06-30 16:09:33 +04:00
import codecs
import gzip
2011-04-20 02:54:13 +04:00
import logging
2008-10-15 19:38:22 +04:00
import re
import StringIO
2012-09-12 13:50:38 +04:00
import struct
import zlib
2008-10-15 19:38:22 +04:00
from lib.core.common import extractErrorMessage
from lib.core.common import extractRegexResult
2014-01-02 15:09:58 +04:00
from lib.core.common import getPublicTypeMembers
from lib.core.common import getUnicode
2015-10-29 18:52:17 +03:00
from lib.core.common import randomStr
from lib.core.common import readInput
2012-03-08 14:19:34 +04:00
from lib.core.common import resetCookieJar
2011-06-08 18:42:48 +04:00
from lib.core.common import singleTimeLogMessage
2012-09-11 16:58:52 +04:00
from lib.core.common import singleTimeWarnMessage
2008-10-15 19:38:22 +04:00
from lib.core.data import conf
from lib.core.data import kb
2010-06-30 16:09:33 +04:00
from lib.core.data import logger
from lib.core.enums import HTTP_HEADER
from lib.core.enums import PLACE
from lib.core.exception import SqlmapCompressionException
from lib.core.settings import BLOCKED_IP_REGEX
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
2012-10-19 13:02:14 +04:00
from lib.core.settings import EVENTVALIDATION_REGEX
2012-09-12 13:50:38 +04:00
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
from lib.core.settings import META_CHARSET_REGEX
2011-11-22 16:18:24 +04:00
from lib.core.settings import PARSE_HEADERS_LIMIT
2015-10-19 12:08:58 +03:00
from lib.core.settings import UNICODE_ENCODING
2012-10-19 13:02:14 +04:00
from lib.core.settings import VIEWSTATE_REGEX
from lib.parse.headers import headersParser
2008-10-15 19:38:22 +04:00
from lib.parse.html import htmlParser
2013-06-19 12:59:26 +04:00
from lib.utils.htmlentities import htmlEntities
2012-07-14 19:01:04 +04:00
from thirdparty.chardet import detect
2014-10-22 15:32:49 +04:00
from thirdparty.odict.odict import OrderedDict
2008-10-15 19:38:22 +04:00
def forgeHeaders(items=None):
2008-10-15 19:38:22 +04:00
"""
2011-02-12 02:07:03 +03:00
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
2008-10-15 19:38:22 +04:00
the HTTP requests
"""
items = items or {}
for _ in items.keys():
if items[_] is None:
del items[_]
2014-10-22 15:32:49 +04:00
headers = OrderedDict(conf.httpHeaders)
2014-10-23 01:16:46 +04:00
headers.update(items.items())
2008-10-15 19:38:22 +04:00
2014-01-02 15:09:58 +04:00
class _str(str):
def capitalize(self):
return _str(self)
def title(self):
return _str(self)
_ = headers
2014-10-22 15:32:49 +04:00
headers = OrderedDict()
2014-01-02 15:09:58 +04:00
for key, value in _.items():
success = False
2015-06-05 18:18:21 +03:00
for _ in headers:
if _.upper() == key.upper():
del headers[_]
break
2014-01-02 15:09:58 +04:00
if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
try:
headers[_str(key)] = value # dirty hack for http://bugs.python.org/issue12455
except UnicodeEncodeError: # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
pass
else:
success = True
if not success:
key = '-'.join(_.capitalize() for _ in key.split('-'))
headers[key] = value
2012-10-07 22:28:24 +04:00
if conf.cj:
if HTTP_HEADER.COOKIE in headers:
for cookie in conf.cj:
if cookie.domain_specified and not conf.hostname.endswith(cookie.domain):
continue
if ("%s=" % cookie.name) in headers[HTTP_HEADER.COOKIE]:
if conf.loadCookies:
conf.httpHeaders = filter(None, ((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders))
elif kb.mergeCookies is None:
message = "you provided a HTTP %s header value. " % HTTP_HEADER.COOKIE
message += "The target URL provided its own cookies within "
message += "the HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
message += "Do you want to merge them in futher requests? [Y/n] "
2012-07-13 12:28:03 +04:00
_ = readInput(message, default="Y")
kb.mergeCookies = not _ or _[0] in ("y", "Y")
2015-03-26 13:40:19 +03:00
if kb.mergeCookies and kb.injection.place != PLACE.COOKIE:
2015-12-24 12:34:42 +03:00
_ = lambda x: re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(cookie.name), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (cookie.name, getUnicode(cookie.value))).replace('\\', r'\\'), x)
2014-10-28 15:44:55 +03:00
headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
if PLACE.COOKIE in conf.parameters:
conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
2012-07-13 12:28:03 +04:00
conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
elif not kb.testMode:
2014-04-06 18:50:58 +04:00
headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, cookie.name, getUnicode(cookie.value))
2015-11-06 13:19:55 +03:00
if kb.testMode and not any((conf.csrfToken, conf.safeUrl)):
2012-03-08 14:19:34 +04:00
resetCookieJar(conf.cj)
2008-10-15 19:38:22 +04:00
return headers
def parseResponse(page, headers):
2008-10-15 19:38:22 +04:00
"""
@param page: the page to parse to feed the knowledge base htmlFp
(back-end DBMS fingerprint based upon DBMS error messages return
through the web application) list and absFilePaths (absolute file
paths) set.
"""
if headers:
headersParser(headers)
2008-10-15 19:38:22 +04:00
if page:
htmlParser(page)
2008-10-15 19:38:22 +04:00
2012-09-25 12:17:25 +04:00
def checkCharEncoding(encoding, warn=True):
2013-03-13 22:42:22 +04:00
"""
Checks encoding name, repairs common misspellings and adjusts to
proper namings used in codecs module
>>> checkCharEncoding('iso-8858', False)
'iso8859-1'
>>> checkCharEncoding('en_us', False)
'utf8'
"""
if encoding:
encoding = encoding.lower()
else:
return encoding
2013-01-10 16:18:44 +04:00
# Reference: http://www.destructor.de/charsets/index.htm
2016-04-06 12:43:53 +03:00
translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"}
for delimiter in (';', ',', '('):
if delimiter in encoding:
encoding = encoding[:encoding.find(delimiter)].strip()
2010-10-14 19:28:54 +04:00
2015-12-20 01:45:10 +03:00
encoding = encoding.replace("&quot", "")
# popular typos/errors
2012-02-06 13:48:44 +04:00
if "8858" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "8559" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "5889" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
2012-06-25 20:24:33 +04:00
elif "5589" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "2313" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
elif encoding.startswith("x-"):
encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
2013-04-07 13:02:43 +04:00
elif "windows-cp" in encoding:
encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254
2011-04-04 22:24:16 +04:00
# name adjustment for compatibility
2012-02-06 13:48:44 +04:00
if encoding.startswith("8859"):
encoding = "iso-%s" % encoding
elif encoding.startswith("cp-"):
encoding = "cp%s" % encoding[3:]
elif encoding.startswith("euc-"):
encoding = "euc_%s" % encoding[4:]
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
encoding = "windows-%s" % encoding[7:]
elif encoding.find("iso-88") > 0:
encoding = encoding[encoding.find("iso-88"):]
elif encoding.startswith("is0-"):
encoding = "iso%s" % encoding[4:]
elif encoding.find("ascii") > 0:
encoding = "ascii"
elif encoding.find("utf8") > 0:
encoding = "utf8"
2015-12-20 01:45:10 +03:00
elif encoding.find("utf-8") > 0:
encoding = "utf-8"
2011-04-04 22:24:16 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://philip.html5.org/data/charsets-2.html
2011-04-04 22:24:16 +04:00
if encoding in translate:
encoding = translate[encoding]
2015-08-12 22:40:51 +03:00
elif encoding in ("null", "{charset}", "*") or not re.search(r"\w", encoding):
return None
2010-10-14 19:28:54 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://www.iana.org/assignments/character-sets
# Reference: http://docs.python.org/library/codecs.html
2010-06-30 16:09:33 +04:00
try:
2015-10-19 12:08:58 +03:00
codecs.lookup(encoding.encode(UNICODE_ENCODING) if isinstance(encoding, unicode) else encoding)
2016-03-01 17:36:00 +03:00
except (LookupError, ValueError):
2012-09-25 12:17:25 +04:00
if warn:
warnMsg = "unknown web page charset '%s'. " % encoding
2015-01-06 14:30:49 +03:00
warnMsg += "Please report by e-mail to 'dev@sqlmap.org'"
2012-09-25 12:17:25 +04:00
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
encoding = None
2015-10-29 18:52:17 +03:00
if encoding:
try:
unicode(randomStr(), encoding)
except:
if warn:
warnMsg = "invalid web page charset '%s'" % encoding
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
encoding = None
2010-06-30 16:09:33 +04:00
return encoding
def getHeuristicCharEncoding(page):
"""
Returns page encoding charset detected by usage of heuristics
Reference: http://chardet.feedparser.org/docs/
"""
2012-02-06 13:48:44 +04:00
retVal = detect(page)["encoding"]
if retVal:
infoMsg = "heuristics detected web page charset '%s'" % retVal
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
return retVal
def decodePage(page, contentEncoding, contentType):
"""
Decode compressed/charset HTTP response
"""
if not page or (conf.nullConnection and len(page) < 2):
return getUnicode(page)
2012-02-06 13:48:44 +04:00
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ("gzip", "x-gzip", "deflate"):
2012-09-12 13:50:38 +04:00
if not kb.pageCompress:
return None
try:
2012-09-11 16:58:52 +04:00
if contentEncoding.lower() == "deflate":
2012-09-12 13:50:38 +04:00
data = StringIO.StringIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
2012-09-11 14:08:34 +04:00
else:
data = gzip.GzipFile("", "rb", 9, StringIO.StringIO(page))
2012-09-12 13:50:38 +04:00
size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
if size > MAX_CONNECTION_TOTAL_SIZE:
raise Exception("size too large")
2012-09-11 14:08:34 +04:00
page = data.read()
except Exception, msg:
2016-05-27 17:43:01 +03:00
if "<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
errMsg = "detected invalid data for declared content "
errMsg += "encoding '%s' ('%s')" % (contentEncoding, msg)
singleTimeLogMessage(errMsg, logging.ERROR)
2012-09-11 16:58:52 +04:00
2016-05-27 17:43:01 +03:00
warnMsg = "turning off page compression"
singleTimeWarnMessage(warnMsg)
2012-09-11 16:58:52 +04:00
2016-05-27 17:43:01 +03:00
kb.pageCompress = False
raise SqlmapCompressionException
if not conf.charset:
httpCharset, metaCharset = None, None
2013-01-10 16:18:44 +04:00
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
2012-07-23 16:26:42 +04:00
if contentType and (contentType.find("charset=") != -1):
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
2010-11-07 19:23:03 +03:00
2012-12-03 15:13:59 +04:00
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
if (any((httpCharset, metaCharset)) and not all((httpCharset, metaCharset)))\
or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
debugMsg = "declared web page charset '%s'" % kb.pageEncoding
singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
else:
kb.pageEncoding = None
else:
kb.pageEncoding = conf.charset
2012-07-23 17:14:52 +04:00
# can't do for all responses because we need to support binary files too
if contentType and not isinstance(page, unicode) and "text/" in contentType.lower():
if kb.heuristicMode:
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
page = getUnicode(page, kb.pageEncoding)
else:
# e.g. &#195;&#235;&#224;&#226;&#224;
if "&#" in page:
page = re.sub(r"&#(\d{1,3});", lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
# e.g. %20%28%29
if "%" in page:
page = re.sub(r"%([0-9a-fA-F]{2})", lambda _: _.group(1).decode("hex"), page)
# e.g. &amp;
page = re.sub(r"&([^;]+);", lambda _: chr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 256) < 256 else _.group(0), page)
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
page = getUnicode(page, kb.pageEncoding)
# e.g. &#8217;&#8230;&#8482;
if "&#" in page:
def _(match):
retVal = match.group(0)
try:
retVal = unichr(int(match.group(1)))
except ValueError:
pass
return retVal
page = re.sub(r"&#(\d+);", _, page)
# e.g. &zeta;
page = re.sub(r"&([^;]+);", lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)
2012-07-23 20:38:46 +04:00
2011-01-28 01:00:34 +03:00
return page
def processResponse(page, responseHeaders):
2011-11-22 16:18:24 +04:00
kb.processResponseCounter += 1
page = page or ""
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None)
2011-01-07 18:41:09 +03:00
if conf.parseErrors:
msg = extractErrorMessage(page)
if msg:
logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.'))
2012-10-19 13:02:14 +04:00
2012-10-19 13:29:03 +04:00
if kb.originalPage is None:
for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
2012-10-29 13:48:49 +04:00
match = re.search(regex, page)
2012-10-19 13:29:03 +04:00
if match and PLACE.POST in conf.parameters:
name, value = match.groups()
if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
if conf.paramDict[PLACE.POST][name] in page:
continue
conf.paramDict[PLACE.POST][name] = value
conf.parameters[PLACE.POST] = re.sub("(?i)(%s=)[^&]+" % name, r"\g<1>%s" % value, conf.parameters[PLACE.POST])
if re.search(BLOCKED_IP_REGEX, page):
errMsg = "it appears that you have been blocked by the target server"
singleTimeLogMessage(errMsg, logging.ERROR)