minor fixes and refactoring regarding safecharencoding

This commit is contained in:
Miroslav Stampar 2011-04-14 15:54:00 +00:00
parent 866cdb4cf7
commit ded28442fb
2 changed files with 12 additions and 6 deletions

View File

@ -13,6 +13,7 @@ except:
import md5
import sha
import binascii
import pickle
import re
import sys
@ -23,6 +24,7 @@ import urllib
from lib.core.data import conf
from lib.core.data import logger
from lib.core.settings import HEX_ENCODED_CHAR_REGEX
from lib.core.settings import SAFE_ENCODE_SLASH_REPLACEMENTS
from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import URLENCODE_CHAR_LIMIT
from lib.core.settings import URLENCODE_FAILSAFE_CHARS
@ -148,10 +150,11 @@ def safecharencode(value):
retVal = value
if isinstance(value, basestring):
retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\%02x' % ord(y)), value, unicode())
for char in "\t\n\r\x0b\x0c":
for char in SAFE_ENCODE_SLASH_REPLACEMENTS:
retVal = retVal.replace(char, repr(char).strip('\''))
retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\%02x' % ord(y)), retVal, unicode())
elif isinstance(value, list):
for i in xrange(len(value)):
retVal[i] = safecharencode(value[i])
@ -165,18 +168,18 @@ def safechardecode(value):
retVal = value
if isinstance(value, basestring):
for char in "\t\n\r\x0b\x0c":
retVal = retVal.replace(repr(char).strip('\''), char)
regex = re.compile(HEX_ENCODED_CHAR_REGEX)
while True:
match = regex.search(retVal)
if match:
retVal = retVal.replace(match.group("result"), unhexlify(value.lstrip('\\')))
retVal = retVal.replace(match.group("result"), binascii.unhexlify(match.group("result").lstrip('\\')))
else:
break
for char in SAFE_ENCODE_SLASH_REPLACEMENTS[::-1]:
retVal = retVal.replace(repr(char).strip('\''), char)
elif isinstance(value, (list, tuple)):
for i in xrange(len(value)):
retVal[i] = safechardecode(value[i])

View File

@ -307,3 +307,6 @@ IGNORE_PARAMETERS = ("__VIEWSTATE", "__EVENTARGUMENT", "__EVENTTARGET", "__EVENT
# Regex used for recognition of hex encoded characters
HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\[0-9A-Fa-f]{2})"
# Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n)
SAFE_ENCODE_SLASH_REPLACEMENTS = "\\\t\n\r\x0b\x0c"