Minor update for an Issue #8

This commit is contained in:
Miroslav Stampar 2013-01-16 14:16:22 +01:00
parent 3464a70ac2
commit ff5ec48abd
2 changed files with 17 additions and 4 deletions

View File

@ -19,6 +19,9 @@ from optparse import OptionParser
# Regex used for recognition of hex encoded characters
HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\x[0-9A-Fa-f]{2})"
# Regex used for recognition of representation for hex encoded invalid unicode characters
INVALID_UNICODE_CHAR_REGEX = r"(?P<result>\\\?[0-9A-Fa-f]{2})"
# Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n)
SAFE_ENCODE_SLASH_REPLACEMENTS = "\t\n\r\x0b\x0c"
@ -49,14 +52,14 @@ def safecharencode(value):
retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\\x%02x' % ord(y)), retVal, (unicode if isinstance(value, unicode) else str)())
retVal = retVal.replace(SLASH_MARKER, '\\\\')
retVal = retVal.replace(SLASH_MARKER, "\\\\")
elif isinstance(value, list):
for i in xrange(len(value)):
retVal[i] = safecharencode(value[i])
return retVal
def safechardecode(value):
def safechardecode(value, binary=False):
"""
Reverse function to safecharencode
"""
@ -68,7 +71,7 @@ def safechardecode(value):
while True:
match = re.search(HEX_ENCODED_CHAR_REGEX, retVal)
if match:
retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, unicode) else chr)(ord(binascii.unhexlify(match.group("result").lstrip('\\x')))))
retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, unicode) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x")))))
else:
break
@ -77,6 +80,16 @@ def safechardecode(value):
retVal = retVal.replace(SLASH_MARKER, '\\')
if binary:
if isinstance(retVal, unicode):
retVal = retVal.encode("utf8")
while True:
match = re.search(INVALID_UNICODE_CHAR_REGEX, retVal)
if match:
retVal = retVal.replace(match.group("result"), chr(ord(binascii.unhexlify(match.group("result").lstrip("\\?")))))
else:
break
elif isinstance(value, (list, tuple)):
for i in xrange(len(value)):
retVal[i] = safechardecode(value[i])

View File

@ -483,7 +483,7 @@ class Dump(object):
# mimetype = magic.from_buffer(value, mime=True)
# if mimetype.startswith("application") or mimetype.startswith("image"):
# with codecs.open("%s%s%s" % (dumpDbPath, os.sep, "%s-%d.bin" % (column, randomInt(8))), "wb", UNICODE_ENCODING) as f:
# _ = safechardecode(value)
# _ = safechardecode(value, True)
# f.write(_)
if conf.dumpFormat == DUMP_FORMAT.CSV: