sqlmap/lib/request/basic.py

450 lines
18 KiB
Python
Raw Normal View History

2019-05-08 13:47:52 +03:00
#!/usr/bin/env python
2008-10-15 19:38:22 +04:00
"""
2020-01-01 15:25:15 +03:00
Copyright (c) 2006-2020 sqlmap developers (http://sqlmap.org/)
2017-10-11 15:50:46 +03:00
See the file 'LICENSE' for copying permission
2008-10-15 19:38:22 +04:00
"""
2010-06-30 16:09:33 +04:00
import codecs
import gzip
import io
2011-04-20 02:54:13 +04:00
import logging
2008-10-15 19:38:22 +04:00
import re
2012-09-12 13:50:38 +04:00
import struct
import zlib
2008-10-15 19:38:22 +04:00
from lib.core.common import Backend
from lib.core.common import extractErrorMessage
from lib.core.common import extractRegexResult
2019-03-29 04:28:16 +03:00
from lib.core.common import filterNone
2014-01-02 15:09:58 +04:00
from lib.core.common import getPublicTypeMembers
2019-01-22 03:20:27 +03:00
from lib.core.common import getSafeExString
2018-01-02 02:42:20 +03:00
from lib.core.common import isListLike
2015-10-29 18:52:17 +03:00
from lib.core.common import randomStr
from lib.core.common import readInput
2012-03-08 14:19:34 +04:00
from lib.core.common import resetCookieJar
2011-06-08 18:42:48 +04:00
from lib.core.common import singleTimeLogMessage
2012-09-11 16:58:52 +04:00
from lib.core.common import singleTimeWarnMessage
2018-01-02 02:42:20 +03:00
from lib.core.common import unArrayizeValue
2019-05-03 14:20:15 +03:00
from lib.core.convert import decodeHex
from lib.core.convert import getBytes
2019-05-27 14:09:13 +03:00
from lib.core.convert import getText
2019-05-06 01:54:21 +03:00
from lib.core.convert import getUnicode
2008-10-15 19:38:22 +04:00
from lib.core.data import conf
from lib.core.data import kb
2010-06-30 16:09:33 +04:00
from lib.core.data import logger
2016-09-09 12:06:38 +03:00
from lib.core.decorators import cachedmethod
2019-10-31 22:59:14 +03:00
from lib.core.decorators import lockedmethod
2019-07-03 11:56:05 +03:00
from lib.core.dicts import HTML_ENTITIES
from lib.core.enums import DBMS
from lib.core.enums import HTTP_HEADER
from lib.core.enums import PLACE
from lib.core.exception import SqlmapCompressionException
from lib.core.settings import BLOCKED_IP_REGEX
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
2012-10-19 13:02:14 +04:00
from lib.core.settings import EVENTVALIDATION_REGEX
2019-05-28 15:12:35 +03:00
from lib.core.settings import IDENTYWAF_PARSE_LIMIT
2012-09-12 13:50:38 +04:00
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
from lib.core.settings import META_CHARSET_REGEX
2011-11-22 16:18:24 +04:00
from lib.core.settings import PARSE_HEADERS_LIMIT
from lib.core.settings import SELECT_FROM_TABLE_REGEX
2019-11-11 14:24:42 +03:00
from lib.core.settings import UNICODE_ENCODING
2012-10-19 13:02:14 +04:00
from lib.core.settings import VIEWSTATE_REGEX
from lib.parse.headers import headersParser
2008-10-15 19:38:22 +04:00
from lib.parse.html import htmlParser
2019-04-19 12:24:34 +03:00
from thirdparty import six
2012-07-14 19:01:04 +04:00
from thirdparty.chardet import detect
2019-05-24 14:54:10 +03:00
from thirdparty.identywaf import identYwaf
from thirdparty.odict import OrderedDict
2019-05-15 11:57:22 +03:00
from thirdparty.six import unichr as _unichr
2019-05-24 14:54:10 +03:00
from thirdparty.six.moves import http_client as _http_client
2008-10-15 19:38:22 +04:00
2019-10-31 22:59:14 +03:00
@lockedmethod
2017-08-16 04:08:58 +03:00
def forgeHeaders(items=None, base=None):
2008-10-15 19:38:22 +04:00
"""
2011-02-12 02:07:03 +03:00
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
2008-10-15 19:38:22 +04:00
the HTTP requests
"""
items = items or {}
for _ in list(items.keys()):
if items[_] is None:
del items[_]
2018-03-26 16:39:48 +03:00
headers = OrderedDict(conf.httpHeaders if base is None else base)
2014-10-23 01:16:46 +04:00
headers.update(items.items())
2008-10-15 19:38:22 +04:00
2014-01-02 15:09:58 +04:00
class _str(str):
def capitalize(self):
return _str(self)
def title(self):
return _str(self)
_ = headers
2014-10-22 15:32:49 +04:00
headers = OrderedDict()
2014-01-02 15:09:58 +04:00
for key, value in _.items():
success = False
2015-06-05 18:18:21 +03:00
for _ in headers:
if _.upper() == key.upper():
del headers[_]
break
2014-01-02 15:09:58 +04:00
if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
try:
headers[_str(key)] = value # dirty hack for http://bugs.python.org/issue12455
except UnicodeEncodeError: # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
pass
else:
success = True
if not success:
key = '-'.join(_.capitalize() for _ in key.split('-'))
headers[key] = value
2012-10-07 22:28:24 +04:00
if conf.cj:
if HTTP_HEADER.COOKIE in headers:
for cookie in conf.cj:
2017-10-17 17:34:09 +03:00
if cookie.domain_specified and not (conf.hostname or "").endswith(cookie.domain):
continue
2017-05-26 15:34:32 +03:00
if ("%s=" % getUnicode(cookie.name)) in getUnicode(headers[HTTP_HEADER.COOKIE]):
if conf.loadCookies:
2019-03-29 04:28:16 +03:00
conf.httpHeaders = filterNone((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders)
elif kb.mergeCookies is None:
2019-10-31 23:40:28 +03:00
message = "you provided a HTTP %s header value, while " % HTTP_HEADER.COOKIE
message += "target URL provides its own cookies within "
message += "HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
2017-02-26 17:05:36 +03:00
message += "Do you want to merge them in further requests? [Y/n] "
2017-04-18 16:48:05 +03:00
kb.mergeCookies = readInput(message, default='Y', boolean=True)
2015-03-26 13:40:19 +03:00
if kb.mergeCookies and kb.injection.place != PLACE.COOKIE:
def _(value):
return re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(getUnicode(cookie.name)), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value))).replace('\\', r'\\'), value)
2014-10-28 15:44:55 +03:00
headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
if PLACE.COOKIE in conf.parameters:
conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
2012-07-13 12:28:03 +04:00
conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
elif not kb.testMode:
2016-06-10 18:52:22 +03:00
headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, getUnicode(cookie.name), getUnicode(cookie.value))
2015-11-06 13:19:55 +03:00
if kb.testMode and not any((conf.csrfToken, conf.safeUrl)):
2012-03-08 14:19:34 +04:00
resetCookieJar(conf.cj)
2008-10-15 19:38:22 +04:00
return headers
2017-05-17 01:22:18 +03:00
def parseResponse(page, headers, status=None):
2008-10-15 19:38:22 +04:00
"""
@param page: the page to parse to feed the knowledge base htmlFp
(back-end DBMS fingerprint based upon DBMS error messages return
through the web application) list and absFilePaths (absolute file
paths) set.
"""
if headers:
headersParser(headers)
2008-10-15 19:38:22 +04:00
if page:
2017-05-17 01:22:18 +03:00
htmlParser(page if not status else "%s\n\n%s" % (status, page))
2008-10-15 19:38:22 +04:00
2016-09-09 12:06:38 +03:00
@cachedmethod
2012-09-25 12:17:25 +04:00
def checkCharEncoding(encoding, warn=True):
2013-03-13 22:42:22 +04:00
"""
Checks encoding name, repairs common misspellings and adjusts to
proper namings used in codecs module
>>> checkCharEncoding('iso-8858', False)
'iso8859-1'
>>> checkCharEncoding('en_us', False)
'utf8'
"""
2019-05-02 01:45:44 +03:00
if isinstance(encoding, six.binary_type):
encoding = getUnicode(encoding)
2018-01-02 02:42:20 +03:00
if isListLike(encoding):
encoding = unArrayizeValue(encoding)
if encoding:
encoding = encoding.lower()
else:
2020-02-07 00:44:37 +03:00
return encoding
2013-01-10 16:18:44 +04:00
# Reference: http://www.destructor.de/charsets/index.htm
translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"}
for delimiter in (';', ',', '('):
if delimiter in encoding:
encoding = encoding[:encoding.find(delimiter)].strip()
2010-10-14 19:28:54 +04:00
2015-12-20 01:45:10 +03:00
encoding = encoding.replace("&quot", "")
# popular typos/errors
2012-02-06 13:48:44 +04:00
if "8858" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "8559" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
elif "8895" in encoding:
encoding = encoding.replace("8895", "8859") # iso-8895 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "5889" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
2012-06-25 20:24:33 +04:00
elif "5589" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
2012-02-06 13:48:44 +04:00
elif "2313" in encoding:
2013-01-10 16:18:44 +04:00
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
elif encoding.startswith("x-"):
encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
2013-04-07 13:02:43 +04:00
elif "windows-cp" in encoding:
encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254
2011-04-04 22:24:16 +04:00
# name adjustment for compatibility
2012-02-06 13:48:44 +04:00
if encoding.startswith("8859"):
encoding = "iso-%s" % encoding
elif encoding.startswith("cp-"):
encoding = "cp%s" % encoding[3:]
elif encoding.startswith("euc-"):
encoding = "euc_%s" % encoding[4:]
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
encoding = "windows-%s" % encoding[7:]
elif encoding.find("iso-88") > 0:
encoding = encoding[encoding.find("iso-88"):]
elif encoding.startswith("is0-"):
encoding = "iso%s" % encoding[4:]
elif encoding.find("ascii") > 0:
encoding = "ascii"
elif encoding.find("utf8") > 0:
encoding = "utf8"
2015-12-20 01:45:10 +03:00
elif encoding.find("utf-8") > 0:
encoding = "utf-8"
2011-04-04 22:24:16 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://philip.html5.org/data/charsets-2.html
2011-04-04 22:24:16 +04:00
if encoding in translate:
encoding = translate[encoding]
2017-05-15 01:34:13 +03:00
elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding):
2020-02-07 00:44:37 +03:00
return None
2010-10-14 19:28:54 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://www.iana.org/assignments/character-sets
# Reference: http://docs.python.org/library/codecs.html
2010-06-30 16:09:33 +04:00
try:
2019-04-19 12:24:34 +03:00
codecs.lookup(encoding)
except:
encoding = None
2015-10-29 18:52:17 +03:00
if encoding:
try:
2020-02-07 00:52:45 +03:00
six.text_type(getBytes(randomStr()), encoding)
except:
2015-10-29 18:52:17 +03:00
if warn:
warnMsg = "invalid web page charset '%s'" % encoding
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
encoding = None
2010-06-30 16:09:33 +04:00
return encoding
def getHeuristicCharEncoding(page):
"""
Returns page encoding charset detected by usage of heuristics
2019-05-09 17:22:18 +03:00
Reference: https://chardet.readthedocs.io/en/latest/usage.html
>>> getHeuristicCharEncoding(b"<html></html>")
'ascii'
"""
2016-09-09 12:06:38 +03:00
key = hash(page)
retVal = kb.cache.encoding.get(key) or detect(page)["encoding"]
kb.cache.encoding[key] = retVal
2019-11-11 14:24:42 +03:00
if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):
infoMsg = "heuristics detected web page charset '%s'" % retVal
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
return retVal
2019-11-13 00:51:11 +03:00
def decodePage(page, contentEncoding, contentType, percentDecode=True):
"""
Decode compressed/charset HTTP response
2019-05-09 17:22:18 +03:00
>>> getText(decodePage(b"<html>foo&amp;bar</html>", None, "text/html; charset=utf-8"))
'<html>foo&bar</html>'
"""
if not page or (conf.nullConnection and len(page) < 2):
return getUnicode(page)
if hasattr(contentEncoding, "lower"):
2017-11-09 14:24:58 +03:00
contentEncoding = contentEncoding.lower()
else:
contentEncoding = ""
if hasattr(contentType, "lower"):
2017-11-09 14:24:58 +03:00
contentType = contentType.lower()
else:
contentType = ""
if contentEncoding in ("gzip", "x-gzip", "deflate"):
2012-09-12 13:50:38 +04:00
if not kb.pageCompress:
return None
try:
2017-11-09 14:24:58 +03:00
if contentEncoding == "deflate":
data = io.BytesIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
2012-09-11 14:08:34 +04:00
else:
data = gzip.GzipFile("", "rb", 9, io.BytesIO(page))
2012-09-12 13:50:38 +04:00
size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
if size > MAX_CONNECTION_TOTAL_SIZE:
raise Exception("size too large")
2012-09-11 14:08:34 +04:00
page = data.read()
2019-01-22 03:20:27 +03:00
except Exception as ex:
2016-05-27 17:43:01 +03:00
if "<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
errMsg = "detected invalid data for declared content "
2019-01-22 03:20:27 +03:00
errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex))
2016-05-27 17:43:01 +03:00
singleTimeLogMessage(errMsg, logging.ERROR)
2012-09-11 16:58:52 +04:00
2016-05-27 17:43:01 +03:00
warnMsg = "turning off page compression"
singleTimeWarnMessage(warnMsg)
2012-09-11 16:58:52 +04:00
2016-05-27 17:43:01 +03:00
kb.pageCompress = False
raise SqlmapCompressionException
if not conf.encoding:
httpCharset, metaCharset = None, None
2013-01-10 16:18:44 +04:00
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
2017-11-09 14:24:58 +03:00
if contentType.find("charset=") != -1:
2012-07-23 16:26:42 +04:00
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
2010-11-07 19:23:03 +03:00
2012-12-03 15:13:59 +04:00
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
2017-12-06 15:42:15 +03:00
if (any((httpCharset, metaCharset)) and not all((httpCharset, metaCharset))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
debugMsg = "declared web page charset '%s'" % kb.pageEncoding
singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
else:
kb.pageEncoding = None
else:
kb.pageEncoding = conf.encoding
2012-07-23 17:14:52 +04:00
# can't do for all responses because we need to support binary files too
2019-04-19 12:24:34 +03:00
if isinstance(page, six.binary_type) and "text/" in contentType:
if not kb.disableHtmlDecoding:
# e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
if b"&#" in page:
page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)), page)
page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
# e.g. %20%28%29
if percentDecode:
if b"%" in page:
page = re.sub(b"%([0-9a-f]{2})", lambda _: decodeHex(_.group(1)), page)
page = re.sub(b"%([0-9A-F]{2})", lambda _: decodeHex(_.group(1)), page) # Note: %DeepSee_SQL in CACHE
# e.g. &amp;
page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
if (kb.pageEncoding or "").lower() == "utf-8-sig":
kb.pageEncoding = "utf-8"
if page and page.startswith("\xef\xbb\xbf"): # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
page = page[3:]
page = getUnicode(page, kb.pageEncoding)
# e.g. &#8217;&#8230;&#8482;
if "&#" in page:
def _(match):
retVal = match.group(0)
try:
retVal = _unichr(int(match.group(1)))
except (ValueError, OverflowError):
pass
return retVal
page = re.sub(r"&#(\d+);", _, page)
# e.g. &zeta;
page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
else:
page = getUnicode(page, kb.pageEncoding)
2012-07-23 20:38:46 +04:00
2011-01-28 01:00:34 +03:00
return page
2019-05-24 14:54:10 +03:00
def processResponse(page, responseHeaders, code=None, status=None):
2011-11-22 16:18:24 +04:00
kb.processResponseCounter += 1
page = page or ""
2017-05-17 01:22:18 +03:00
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None, status)
2011-01-07 18:41:09 +03:00
if not kb.tableFrom and Backend.getIdentifiedDbms() in (DBMS.ACCESS,):
kb.tableFrom = extractRegexResult(SELECT_FROM_TABLE_REGEX, page)
2016-09-29 19:02:20 +03:00
else:
kb.tableFrom = None
if conf.parseErrors:
msg = extractErrorMessage(page)
if msg:
logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.'))
2012-10-19 13:02:14 +04:00
2019-05-28 15:12:35 +03:00
if kb.processResponseCounter < IDENTYWAF_PARSE_LIMIT:
2019-05-27 14:15:45 +03:00
rawResponse = "%s %s %s\n%s\n%s" % (_http_client.HTTPConnection._http_vsn_str, code or "", status or "", getUnicode("".join(responseHeaders.headers if responseHeaders else [])), page)
2019-05-25 09:23:05 +03:00
2019-05-24 15:18:18 +03:00
identYwaf.non_blind.clear()
if identYwaf.non_blind_check(rawResponse, silent=True):
for waf in identYwaf.non_blind:
if waf not in kb.identifiedWafs:
kb.identifiedWafs.add(waf)
errMsg = "WAF/IPS identified as '%s'" % identYwaf.format_name(waf)
singleTimeLogMessage(errMsg, logging.CRITICAL)
2019-05-24 14:54:10 +03:00
2012-10-19 13:29:03 +04:00
if kb.originalPage is None:
for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
2012-10-29 13:48:49 +04:00
match = re.search(regex, page)
2012-10-19 13:29:03 +04:00
if match and PLACE.POST in conf.parameters:
name, value = match.groups()
if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
if conf.paramDict[PLACE.POST][name] in page:
continue
2016-09-06 16:03:17 +03:00
else:
msg = "do you want to automatically adjust the value of '%s'? [y/N]" % name
2017-04-18 16:48:05 +03:00
if not readInput(msg, default='N', boolean=True):
2016-09-06 16:03:17 +03:00
continue
2017-04-18 16:48:05 +03:00
2016-09-06 16:03:17 +03:00
conf.paramDict[PLACE.POST][name] = value
2018-05-29 15:54:43 +03:00
conf.parameters[PLACE.POST] = re.sub(r"(?i)(%s=)[^&]+" % re.escape(name), r"\g<1>%s" % value.replace('\\', r'\\'), conf.parameters[PLACE.POST])
2017-06-07 13:55:14 +03:00
if not kb.browserVerification and re.search(r"(?i)browser.?verification", page or ""):
kb.browserVerification = True
warnMsg = "potential browser verification protection mechanism detected"
if re.search(r"(?i)CloudFlare", page):
warnMsg += " (CloudFlare)"
singleTimeWarnMessage(warnMsg)
2016-06-01 16:48:04 +03:00
if not kb.captchaDetected and re.search(r"(?i)captcha", page or ""):
for match in re.finditer(r"(?si)<form.+?</form>", page):
if re.search(r"(?i)captcha", match.group(0)):
kb.captchaDetected = True
break
2019-11-16 00:27:33 +03:00
if re.search(r"<meta[^>]+\brefresh\b[^>]+\bcaptcha\b", page):
kb.captchaDetected = True
if kb.captchaDetected:
warnMsg = "potential CAPTCHA protection mechanism detected"
if re.search(r"(?i)<title>[^<]*CloudFlare", page):
warnMsg += " (CloudFlare)"
singleTimeWarnMessage(warnMsg)
if re.search(BLOCKED_IP_REGEX, page):
2016-06-26 02:46:49 +03:00
warnMsg = "it appears that you have been blocked by the target server"
singleTimeWarnMessage(warnMsg)