diff --git a/extra/safe2bin/README.txt b/extra/safe2bin/README.txt index 49d0fc700..06400d6ea 100755 --- a/extra/safe2bin/README.txt +++ b/extra/safe2bin/README.txt @@ -7,7 +7,7 @@ $ python ./safe2bin.py -i output.txt -o output.txt.bin This will create an binary decoded file output.txt.bin. For example, if the content of output.txt is: "\ttest\t\x32\x33\x34\nnewline" it will -be decoded to: " test 234 +be decoded to: " test 234 newline" If you skip the output file name, general rule is that the binary diff --git a/extra/safe2bin/safe2bin.py b/extra/safe2bin/safe2bin.py index 129cc4a61..3b1d81a8a 100755 --- a/extra/safe2bin/safe2bin.py +++ b/extra/safe2bin/safe2bin.py @@ -11,6 +11,7 @@ See the file 'doc/COPYING' for copying permission import binascii import re +import string import os import sys @@ -23,9 +24,33 @@ HEX_ENCODED_CHAR_REGEX = r"(?P\\x[0-9A-Fa-f]{2})" # Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n) SAFE_ENCODE_SLASH_REPLACEMENTS = "\\\t\n\r\x0b\x0c" +def safecharencode(value): + """ + Returns safe representation of a given basestring value + + >>> safecharencode(u'test123') + u'test123' + >>> safecharencode(u'test\x01\x02\xff') + u'test\\01\\02\\03\\ff' + """ + + retVal = value + + if isinstance(value, basestring): + for char in SAFE_ENCODE_SLASH_REPLACEMENTS: + retVal = retVal.replace(char, repr(char).strip('\'')) + + retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\\x%02x' % ord(y)), retVal, unicode()) + + elif isinstance(value, list): + for i in xrange(len(value)): + retVal[i] = safecharencode(value[i]) + + return retVal + def safechardecode(value): """ - Decode safe(hex) encoded values + Reverse function to safecharencode """ retVal = value diff --git a/lib/core/convert.py b/lib/core/convert.py index 373be9b93..b4873d23b 100644 --- a/lib/core/convert.py +++ b/lib/core/convert.py @@ -21,10 +21,10 @@ import string import struct import urllib +from extra.safe2bin.safe2bin import safecharencode +from extra.safe2bin.safe2bin import safechardecode from lib.core.data import conf from lib.core.data import logger -from lib.core.settings import HEX_ENCODED_CHAR_REGEX -from lib.core.settings import SAFE_ENCODE_SLASH_REPLACEMENTS from lib.core.settings import UNICODE_ENCODING from lib.core.settings import URLENCODE_CHAR_LIMIT from lib.core.settings import URLENCODE_FAILSAFE_CHARS @@ -136,52 +136,3 @@ def htmlunescape(value): retVal = value.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace(''', "'").replace(' ', ' ') retVal = re.sub('&#(\d+);', lambda x: unichr(int(x.group(1))), retVal) return retVal - -def safecharencode(value): - """ - Returns safe representation of a given basestring value - - >>> safecharencode(u'test123') - u'test123' - >>> safecharencode(u'test\x01\x02\xff') - u'test\\01\\02\\03\\ff' - """ - - retVal = value - - if isinstance(value, basestring): - for char in SAFE_ENCODE_SLASH_REPLACEMENTS: - retVal = retVal.replace(char, repr(char).strip('\'')) - - retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\\x%02x' % ord(y)), retVal, unicode()) - - elif isinstance(value, list): - for i in xrange(len(value)): - retVal[i] = safecharencode(value[i]) - - return retVal - -def safechardecode(value): - """ - Reverse function to safecharencode - """ - - retVal = value - if isinstance(value, basestring): - regex = re.compile(HEX_ENCODED_CHAR_REGEX) - - while True: - match = regex.search(retVal) - if match: - retVal = retVal.replace(match.group("result"), binascii.unhexlify(match.group("result").lstrip('\\x'))) - else: - break - - for char in SAFE_ENCODE_SLASH_REPLACEMENTS[::-1]: - retVal = retVal.replace(repr(char).strip('\''), char) - - elif isinstance(value, (list, tuple)): - for i in xrange(len(value)): - retVal[i] = safechardecode(value[i]) - - return retVal diff --git a/lib/core/settings.py b/lib/core/settings.py index 6df7258fd..45b328280 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -304,9 +304,3 @@ MAX_INT = sys.maxint # Parameters to be ignored in detection phase (upper case) IGNORE_PARAMETERS = ("__VIEWSTATE", "__EVENTARGUMENT", "__EVENTTARGET", "__EVENTVALIDATION", "ASPSESSIONID", "ASP.NET_SESSIONID", "JSESSIONID", "CFID", "CFTOKEN") - -# Regex used for recognition of hex encoded characters -HEX_ENCODED_CHAR_REGEX = r"(?P\\x[0-9A-Fa-f]{2})" - -# Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n) -SAFE_ENCODE_SLASH_REPLACEMENTS = "\\\t\n\r\x0b\x0c"