From bdf6452af6e47805e6b72789ec3ae82e776b8a50 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Wed, 17 Apr 2019 17:19:22 +0200 Subject: [PATCH] 'Safe' unicode decoding replacements --- lib/core/common.py | 33 +++++++++++++++++++++++++++++++-- lib/core/settings.py | 14 +++++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/lib/core/common.py b/lib/core/common.py index 3270723d0..2ce0b4026 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -125,6 +125,7 @@ from lib.core.settings import HTTP_CHUNKED_SPLIT_KEYWORDS from lib.core.settings import IGNORE_SAVE_OPTIONS from lib.core.settings import INFERENCE_UNKNOWN_CHAR from lib.core.settings import INVALID_UNICODE_CHAR_FORMAT +from lib.core.settings import INVALID_UNICODE_PRIVATE_AREA from lib.core.settings import IP_ADDRESS_REGEX from lib.core.settings import ISSUES_PAGE from lib.core.settings import IS_WIN @@ -153,6 +154,7 @@ from lib.core.settings import REFLECTED_REPLACEMENT_REGEX from lib.core.settings import REFLECTED_REPLACEMENT_TIMEOUT from lib.core.settings import REFLECTED_VALUE_MARKER from lib.core.settings import REFLECTIVE_MISS_THRESHOLD +from lib.core.settings import SAFE_HEX_MARKER from lib.core.settings import SENSITIVE_DATA_REGEX from lib.core.settings import SENSITIVE_OPTIONS from lib.core.settings import STDIN_PIPE_DASH @@ -2424,7 +2426,10 @@ def getUnicode(value, encoding=None, noneToNull=False): try: return six.text_type(value, UNICODE_ENCODING) except: - value = value[:ex.start] + "".join(INVALID_UNICODE_CHAR_FORMAT % ord(_) for _ in value[ex.start:ex.end]) + value[ex.end:] + if INVALID_UNICODE_PRIVATE_AREA: + value = value[:ex.start] + "".join(unichr(int('000f00%2x' % ord(_), 16)).encode(UNICODE_ENCODING) for _ in value[ex.start:ex.end]) + value[ex.end:] + else: + value = value[:ex.start] + "".join(INVALID_UNICODE_CHAR_FORMAT % ord(_) for _ in value[ex.start:ex.end]) + value[ex.end:] elif isListLike(value): value = list(getUnicode(_, encoding, noneToNull) for _ in value) return value @@ -2434,6 +2439,30 @@ def getUnicode(value, encoding=None, noneToNull=False): except UnicodeDecodeError: return six.text_type(str(value), errors="ignore") # encoding ignored for non-basestring instances +def getASCII(value): + """ + Returns ASCII representation of provided Unicode value + + >>> getASCII(getUnicode("foo\x01\x83\xffbar")) == "foo\x01\x83\xffbar" + True + """ + + retVal = value + + if isinstance(value, six.text_type): + if INVALID_UNICODE_PRIVATE_AREA: + for char in xrange(0xF0000, 0xF00FF + 1): + value = value.replace(unichr(char), "%s%02x" % (SAFE_HEX_MARKER, char - 0xF0000)) + + retVal = value.encode(UNICODE_ENCODING) + + retVal = re.sub(r"%s([0-9a-f]{2})" % SAFE_HEX_MARKER, lambda _: _.group(1).decode("hex"), retVal) + else: + retVal = value.encode(UNICODE_ENCODING) + retVal = re.sub(r"\\x([0-9a-f]{2})", lambda _: _.group(1).decode("hex"), retVal) + + return retVal + def longestCommonPrefix(*sequences): """ Returns longest common prefix occuring in given sequences @@ -3339,7 +3368,7 @@ def showHttpErrorCodes(): msg += "could mean that some kind of protection is involved (e.g. WAF)" logger.debug(msg) -def openFile(filename, mode='r', encoding=UNICODE_ENCODING, errors="replace", buffering=1): # "buffering=1" means line buffered (Reference: http://stackoverflow.com/a/3168436) +def openFile(filename, mode='r', encoding=UNICODE_ENCODING, errors="reversible", buffering=1): # "buffering=1" means line buffered (Reference: http://stackoverflow.com/a/3168436) """ Returns file handle of a given filename """ diff --git a/lib/core/settings.py b/lib/core/settings.py index 653ef30d9..53427d219 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -17,7 +17,7 @@ from lib.core.enums import DBMS_DIRECTORY_NAME from lib.core.enums import OS # sqlmap version (...) -VERSION = "1.3.4.14" +VERSION = "1.3.4.15" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) @@ -65,6 +65,7 @@ ASTERISK_MARKER = "__ASTERISK_MARK__" REPLACEMENT_MARKER = "__REPLACEMENT_MARK__" BOUNDED_INJECTION_MARKER = "__BOUNDED_INJECTION_MARK__" SAFE_VARIABLE_MARKER = "__SAFE__" +SAFE_HEX_MARKER = "__SAFE_HEX__" RANDOM_INTEGER_MARKER = "[RANDINT]" RANDOM_STRING_MARKER = "[RANDSTR]" @@ -714,6 +715,9 @@ RESTAPI_DEFAULT_ADDRESS = "127.0.0.1" # Default REST-JSON API server listen port RESTAPI_DEFAULT_PORT = 8775 +# Use "Supplementary Private Use Area-A" +INVALID_UNICODE_PRIVATE_AREA = False + # Format used for representing invalid unicode characters INVALID_UNICODE_CHAR_FORMAT = r"\x%02x" @@ -827,3 +831,11 @@ for key, value in os.environ.items(): _ = key[len(SQLMAP_ENVIRONMENT_PREFIX) + 1:].upper() if _ in globals(): globals()[_] = value + +# Installing "reversible" unicode (decoding) error handler + +def reversible(ex): + if isinstance(ex, UnicodeDecodeError): + return ("".join(INVALID_UNICODE_CHAR_FORMAT % ord(_) for _ in ex.object[ex.start:ex.end]).decode(UNICODE_ENCODING), ex.end) + +codecs.register_error("reversible", reversible)