From 0977f6df61b437b54541b7607813780c369e1b93 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Mon, 18 Nov 2019 11:40:07 +0100 Subject: [PATCH] Bug fix (disable HTML decoding in XSS checks) --- lib/controller/checks.py | 2 ++ lib/core/option.py | 1 + lib/core/settings.py | 2 +- lib/request/basic.py | 59 +++++++++++++++++++++------------------- 4 files changed, 35 insertions(+), 29 deletions(-) diff --git a/lib/controller/checks.py b/lib/controller/checks.py index 52614e83a..aa7e12542 100644 --- a/lib/controller/checks.py +++ b/lib/controller/checks.py @@ -1104,6 +1104,7 @@ def heuristicCheckSqlInjection(place, parameter): logger.warn(infoMsg) kb.heuristicMode = True + kb.disableHtmlDecoding = True randStr1, randStr2 = randomStr(NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH), randomStr(NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH) value = "%s%s%s" % (randStr1, DUMMY_NON_SQLI_CHECK_APPENDIX, randStr2) @@ -1123,6 +1124,7 @@ def heuristicCheckSqlInjection(place, parameter): logger.info(infoMsg) break + kb.disableHtmlDecoding = False kb.heuristicMode = False return kb.heuristicTest diff --git a/lib/core/option.py b/lib/core/option.py index 94d274f74..3704f2733 100644 --- a/lib/core/option.py +++ b/lib/core/option.py @@ -1872,6 +1872,7 @@ def _setKnowledgeBaseAttributes(flushAll=True): kb.delayCandidates = TIME_DELAY_CANDIDATES * [0] kb.dep = None + kb.disableHtmlDecoding = False kb.dnsMode = False kb.dnsTest = None kb.docRoot = None diff --git a/lib/core/settings.py b/lib/core/settings.py index 97237d6b4..db37bd449 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -18,7 +18,7 @@ from lib.core.enums import OS from thirdparty.six import unichr as _unichr # sqlmap version (...) -VERSION = "1.3.11.75" +VERSION = "1.3.11.76" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) diff --git a/lib/request/basic.py b/lib/request/basic.py index 6062366c6..d4d78fc66 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -334,41 +334,44 @@ def decodePage(page, contentEncoding, contentType, percentDecode=True): # can't do for all responses because we need to support binary files too if isinstance(page, six.binary_type) and "text/" in contentType: - # e.g. Ãëàâà - if b"&#" in page: - page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)), page) - page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page) + if not kb.disableHtmlDecoding: + # e.g. Ãëàâà + if b"&#" in page: + page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)), page) + page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page) - # e.g. %20%28%29 - if percentDecode: - if b"%" in page: - page = re.sub(b"%([0-9a-fA-F]{2})", lambda _: decodeHex(_.group(1)), page) + # e.g. %20%28%29 + if percentDecode: + if b"%" in page: + page = re.sub(b"%([0-9a-fA-F]{2})", lambda _: decodeHex(_.group(1)), page) - # e.g. & - page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page) + # e.g. & + page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page) - kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page)) + kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page)) - if (kb.pageEncoding or "").lower() == "utf-8-sig": - kb.pageEncoding = "utf-8" - if page and page.startswith("\xef\xbb\xbf"): # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling) - page = page[3:] + if (kb.pageEncoding or "").lower() == "utf-8-sig": + kb.pageEncoding = "utf-8" + if page and page.startswith("\xef\xbb\xbf"): # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling) + page = page[3:] - page = getUnicode(page, kb.pageEncoding) + page = getUnicode(page, kb.pageEncoding) - # e.g. ’…™ - if "&#" in page: - def _(match): - retVal = match.group(0) - try: - retVal = _unichr(int(match.group(1))) - except (ValueError, OverflowError): - pass - return retVal - page = re.sub(r"&#(\d+);", _, page) + # e.g. ’…™ + if "&#" in page: + def _(match): + retVal = match.group(0) + try: + retVal = _unichr(int(match.group(1))) + except (ValueError, OverflowError): + pass + return retVal + page = re.sub(r"&#(\d+);", _, page) - # e.g. ζ - page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page) + # e.g. ζ + page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page) + else: + page = getUnicode(page, kb.pageEncoding) return page