From ff5ec48abd4ecdec4dd4cad40f1f35198cdbe11d Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Wed, 16 Jan 2013 14:16:22 +0100 Subject: [PATCH 1/2] Minor update for an Issue #8 --- extra/safe2bin/safe2bin.py | 19 ++++++++++++++++--- lib/core/dump.py | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/extra/safe2bin/safe2bin.py b/extra/safe2bin/safe2bin.py index 7bdcf3346..4304f0fcc 100644 --- a/extra/safe2bin/safe2bin.py +++ b/extra/safe2bin/safe2bin.py @@ -19,6 +19,9 @@ from optparse import OptionParser # Regex used for recognition of hex encoded characters HEX_ENCODED_CHAR_REGEX = r"(?P\\x[0-9A-Fa-f]{2})" +# Regex used for recognition of representation for hex encoded invalid unicode characters +INVALID_UNICODE_CHAR_REGEX = r"(?P\\\?[0-9A-Fa-f]{2})" + # Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n) SAFE_ENCODE_SLASH_REPLACEMENTS = "\t\n\r\x0b\x0c" @@ -49,14 +52,14 @@ def safecharencode(value): retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\\x%02x' % ord(y)), retVal, (unicode if isinstance(value, unicode) else str)()) - retVal = retVal.replace(SLASH_MARKER, '\\\\') + retVal = retVal.replace(SLASH_MARKER, "\\\\") elif isinstance(value, list): for i in xrange(len(value)): retVal[i] = safecharencode(value[i]) return retVal -def safechardecode(value): +def safechardecode(value, binary=False): """ Reverse function to safecharencode """ @@ -68,7 +71,7 @@ def safechardecode(value): while True: match = re.search(HEX_ENCODED_CHAR_REGEX, retVal) if match: - retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, unicode) else chr)(ord(binascii.unhexlify(match.group("result").lstrip('\\x'))))) + retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, unicode) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x"))))) else: break @@ -77,6 +80,16 @@ def safechardecode(value): retVal = retVal.replace(SLASH_MARKER, '\\') + if binary: + if isinstance(retVal, unicode): + retVal = retVal.encode("utf8") + while True: + match = re.search(INVALID_UNICODE_CHAR_REGEX, retVal) + if match: + retVal = retVal.replace(match.group("result"), chr(ord(binascii.unhexlify(match.group("result").lstrip("\\?"))))) + else: + break + elif isinstance(value, (list, tuple)): for i in xrange(len(value)): retVal[i] = safechardecode(value[i]) diff --git a/lib/core/dump.py b/lib/core/dump.py index 8ce184906..3ca145a3b 100644 --- a/lib/core/dump.py +++ b/lib/core/dump.py @@ -483,7 +483,7 @@ class Dump(object): # mimetype = magic.from_buffer(value, mime=True) # if mimetype.startswith("application") or mimetype.startswith("image"): # with codecs.open("%s%s%s" % (dumpDbPath, os.sep, "%s-%d.bin" % (column, randomInt(8))), "wb", UNICODE_ENCODING) as f: - # _ = safechardecode(value) + # _ = safechardecode(value, True) # f.write(_) if conf.dumpFormat == DUMP_FORMAT.CSV: From c0a6e1c3a795abed98f7fb1111f5680cab43408e Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Wed, 16 Jan 2013 14:54:37 +0100 Subject: [PATCH 2/2] Finishing first usable prototype for an Issue #8 --- lib/core/dump.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/core/dump.py b/lib/core/dump.py index 3ca145a3b..aedb3e9f4 100644 --- a/lib/core/dump.py +++ b/lib/core/dump.py @@ -479,12 +479,16 @@ class Dump(object): blank = " " * (maxlength - len(value)) self._write("| %s%s" % (value, blank), newline=False, console=console) - #if len(value) > 10 and r'\x' in value: - # mimetype = magic.from_buffer(value, mime=True) - # if mimetype.startswith("application") or mimetype.startswith("image"): - # with codecs.open("%s%s%s" % (dumpDbPath, os.sep, "%s-%d.bin" % (column, randomInt(8))), "wb", UNICODE_ENCODING) as f: - # _ = safechardecode(value, True) - # f.write(_) + if len(value) > 10 and r'\x' in value: + mimetype = magic.from_buffer(value, mime=True) + if any(mimetype.startswith(_) for _ in ("application", "image")): + filepath = os.path.join(dumpDbPath, "%s-%d.bin" % (column, randomInt(8))) + warnMsg = "writing binary ('%s') content to file '%s' " % (mimetype, filepath) + logger.warn(warnMsg) + + with open(filepath, "wb") as f: + _ = safechardecode(value, True) + f.write(_) if conf.dumpFormat == DUMP_FORMAT.CSV: if field == fields: