minor fixes and refactoring regarding safecharencoding

This commit is contained in:
Miroslav Stampar 2011-04-14 15:54:00 +00:00
parent 866cdb4cf7
commit ded28442fb
2 changed files with 12 additions and 6 deletions

View File

@ -13,6 +13,7 @@ except:
import md5 import md5
import sha import sha
import binascii
import pickle import pickle
import re import re
import sys import sys
@ -23,6 +24,7 @@ import urllib
from lib.core.data import conf from lib.core.data import conf
from lib.core.data import logger from lib.core.data import logger
from lib.core.settings import HEX_ENCODED_CHAR_REGEX from lib.core.settings import HEX_ENCODED_CHAR_REGEX
from lib.core.settings import SAFE_ENCODE_SLASH_REPLACEMENTS
from lib.core.settings import UNICODE_ENCODING from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import URLENCODE_CHAR_LIMIT from lib.core.settings import URLENCODE_CHAR_LIMIT
from lib.core.settings import URLENCODE_FAILSAFE_CHARS from lib.core.settings import URLENCODE_FAILSAFE_CHARS
@ -148,10 +150,11 @@ def safecharencode(value):
retVal = value retVal = value
if isinstance(value, basestring): if isinstance(value, basestring):
retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\%02x' % ord(y)), value, unicode()) for char in SAFE_ENCODE_SLASH_REPLACEMENTS:
for char in "\t\n\r\x0b\x0c":
retVal = retVal.replace(char, repr(char).strip('\'')) retVal = retVal.replace(char, repr(char).strip('\''))
retVal = reduce(lambda x, y: x + (y if (y in string.printable or ord(y) > 255) else '\%02x' % ord(y)), retVal, unicode())
elif isinstance(value, list): elif isinstance(value, list):
for i in xrange(len(value)): for i in xrange(len(value)):
retVal[i] = safecharencode(value[i]) retVal[i] = safecharencode(value[i])
@ -165,18 +168,18 @@ def safechardecode(value):
retVal = value retVal = value
if isinstance(value, basestring): if isinstance(value, basestring):
for char in "\t\n\r\x0b\x0c":
retVal = retVal.replace(repr(char).strip('\''), char)
regex = re.compile(HEX_ENCODED_CHAR_REGEX) regex = re.compile(HEX_ENCODED_CHAR_REGEX)
while True: while True:
match = regex.search(retVal) match = regex.search(retVal)
if match: if match:
retVal = retVal.replace(match.group("result"), unhexlify(value.lstrip('\\'))) retVal = retVal.replace(match.group("result"), binascii.unhexlify(match.group("result").lstrip('\\')))
else: else:
break break
for char in SAFE_ENCODE_SLASH_REPLACEMENTS[::-1]:
retVal = retVal.replace(repr(char).strip('\''), char)
elif isinstance(value, (list, tuple)): elif isinstance(value, (list, tuple)):
for i in xrange(len(value)): for i in xrange(len(value)):
retVal[i] = safechardecode(value[i]) retVal[i] = safechardecode(value[i])

View File

@ -307,3 +307,6 @@ IGNORE_PARAMETERS = ("__VIEWSTATE", "__EVENTARGUMENT", "__EVENTTARGET", "__EVENT
# Regex used for recognition of hex encoded characters # Regex used for recognition of hex encoded characters
HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\[0-9A-Fa-f]{2})" HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\[0-9A-Fa-f]{2})"
# Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n)
SAFE_ENCODE_SLASH_REPLACEMENTS = "\\\t\n\r\x0b\x0c"