Update for an Issue #352

This commit is contained in:
Miroslav Stampar 2013-03-12 20:10:32 +01:00
parent db0a1e58b9
commit 65306f1ac1

View File

@ -713,7 +713,14 @@ def getDirs():
return list(directories) return list(directories)
def filePathToString(filePath): def filePathToSafeString(filePath):
"""
Returns string representation of a given filepath safe for a single filename usage
>>> filePathToSafeString('C:/Windows/system32')
'C__Windows_system32'
"""
retVal = filePath.replace("/", "_").replace("\\", "_") retVal = filePath.replace("/", "_").replace("\\", "_")
retVal = retVal.replace(" ", "_").replace(":", "_") retVal = retVal.replace(" ", "_").replace(":", "_")
@ -885,6 +892,10 @@ def readInput(message, default=None, checkBatch=True):
def randomRange(start=0, stop=1000): def randomRange(start=0, stop=1000):
""" """
Returns random integer value in given range Returns random integer value in given range
>>> random.seed(0)
>>> randomRange(1, 500)
423
""" """
return int(random.randint(start, stop)) return int(random.randint(start, stop))
@ -892,6 +903,10 @@ def randomRange(start=0, stop=1000):
def randomInt(length=4): def randomInt(length=4):
""" """
Returns random integer value with provided number of digits Returns random integer value with provided number of digits
>>> random.seed(0)
>>> randomInt(6)
874254
""" """
return int("".join(random.choice(string.digits if _ != 0 else string.digits.replace('0', '')) for _ in xrange(0, length))) return int("".join(random.choice(string.digits if _ != 0 else string.digits.replace('0', '')) for _ in xrange(0, length)))
@ -899,6 +914,10 @@ def randomInt(length=4):
def randomStr(length=4, lowercase=False, alphabet=None): def randomStr(length=4, lowercase=False, alphabet=None):
""" """
Returns random string value with provided number of characters Returns random string value with provided number of characters
>>> random.seed(0)
>>> randomStr(6)
'RNvnAv'
""" """
if alphabet: if alphabet:
@ -913,6 +932,9 @@ def randomStr(length=4, lowercase=False, alphabet=None):
def sanitizeStr(value): def sanitizeStr(value):
""" """
Sanitizes string value in respect to newline and line-feed characters Sanitizes string value in respect to newline and line-feed characters
>>> sanitizeStr('foo\\n\\rbar')
u'foo bar'
""" """
return getUnicode(value).replace("\n", " ").replace("\r", "") return getUnicode(value).replace("\n", " ").replace("\r", "")
@ -1214,6 +1236,9 @@ def expandAsteriskForColumns(expression):
def getLimitRange(count, dump=False, plusOne=False): def getLimitRange(count, dump=False, plusOne=False):
""" """
Returns range of values used in limit/offset constructs Returns range of values used in limit/offset constructs
>>> [_ for _ in getLimitRange(10)]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
""" """
retVal = None retVal = None
@ -1321,6 +1346,14 @@ def getFileType(filePath):
return "text" if "ASCII" in _ or "text" in _ else "binary" return "text" if "ASCII" in _ or "text" in _ else "binary"
def getCharset(charsetType=None): def getCharset(charsetType=None):
"""
Returns list with integers representing characters of a given
charset type appropriate for inference techniques
>>> getCharset(CHARSET_TYPE.BINARY)
[0, 1, 47, 48, 49]
"""
asciiTbl = [] asciiTbl = []
if charsetType is None: if charsetType is None:
@ -1363,6 +1396,9 @@ def getCharset(charsetType=None):
def directoryPath(filepath): def directoryPath(filepath):
""" """
Returns directory path for a given filepath Returns directory path for a given filepath
>>> directoryPath('/var/log/apache.log')
'/var/log'
""" """
retVal = filepath retVal = filepath
@ -1375,6 +1411,9 @@ def directoryPath(filepath):
def normalizePath(filepath): def normalizePath(filepath):
""" """
Returns normalized string representation of a given filepath Returns normalized string representation of a given filepath
>>> normalizePath('//var///log/apache.log')
'//var/log/apache.log'
""" """
retVal = filepath retVal = filepath
@ -1388,6 +1427,9 @@ def normalizePath(filepath):
def safeStringFormat(format_, params): def safeStringFormat(format_, params):
""" """
Avoids problems with inappropriate string format strings Avoids problems with inappropriate string format strings
>>> safeStringFormat('foobar%d%s', ('1', 2))
u'foobar12'
""" """
retVal = format_.replace("%d", "%s") retVal = format_.replace("%d", "%s")
@ -1413,6 +1455,9 @@ def getFilteredPageContent(page, onlyText=True):
""" """
Returns filtered page content without script, style and/or comments Returns filtered page content without script, style and/or comments
or all HTML tags or all HTML tags
>>> getFilteredPageContent(u'<html><title>foobar</title><body>test</body></html>')
u'foobar test'
""" """
retVal = page retVal = page
@ -1422,13 +1467,16 @@ def getFilteredPageContent(page, onlyText=True):
retVal = re.sub(r"(?si)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page) retVal = re.sub(r"(?si)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page)
while retVal.find(" ") != -1: while retVal.find(" ") != -1:
retVal = retVal.replace(" ", " ") retVal = retVal.replace(" ", " ")
retVal = htmlunescape(retVal) retVal = htmlunescape(retVal.strip())
return retVal return retVal
def getPageWordSet(page): def getPageWordSet(page):
""" """
Returns word set used in page content Returns word set used in page content
>>> sorted(getPageWordSet(u'<html><title>foobar</title><body>test</body></html>'))
[u'foobar', u'test']
""" """
retVal = set() retVal = set()
@ -1473,6 +1521,11 @@ def showStaticWords(firstPage, secondPage):
def isWindowsDriveLetterPath(filepath): def isWindowsDriveLetterPath(filepath):
""" """
Returns True if given filepath starts with a Windows drive letter Returns True if given filepath starts with a Windows drive letter
>>> isWindowsDriveLetterPath('C:\\boot.ini')
True
>>> isWindowsDriveLetterPath('/var/log/apache.log')
False
""" """
return re.search("\A[\w]\:", filepath) is not None return re.search("\A[\w]\:", filepath) is not None
@ -1634,6 +1687,9 @@ def stdev(values):
""" """
Computes standard deviation of a list of numbers. Computes standard deviation of a list of numbers.
Reference: http://www.goldb.org/corestats.html Reference: http://www.goldb.org/corestats.html
>>> stdev([0.9, 0.9, 0.9, 1.0, 0.8, 0.9])
0.06324555320336757
""" """
if not values or len(values) < 2: if not values or len(values) < 2:
@ -1654,6 +1710,9 @@ def stdev(values):
def average(values): def average(values):
""" """
Computes the arithmetic mean of a list of numbers. Computes the arithmetic mean of a list of numbers.
>>> average([0.9, 0.9, 0.9, 1.0, 0.8, 0.9])
0.9
""" """
return (sum(values) / len(values)) if values else None return (sum(values) / len(values)) if values else None
@ -1872,6 +1931,9 @@ def longestCommonPrefix(*sequences):
""" """
Returns longest common prefix occuring in given sequences Returns longest common prefix occuring in given sequences
Reference: http://boredzo.org/blog/archives/2007-01-06/longest-common-prefix-in-python-2 Reference: http://boredzo.org/blog/archives/2007-01-06/longest-common-prefix-in-python-2
>>> longestCommonPrefix('foobar', 'fobar')
'fo'
""" """
if len(sequences) == 1: if len(sequences) == 1:
@ -1904,6 +1966,10 @@ def pushValue(value):
def popValue(): def popValue():
""" """
Pop value from the stack (thread dependent) Pop value from the stack (thread dependent)
>>> pushValue('foobar')
>>> popValue()
'foobar'
""" """
return getCurrentThreadData().valueStack.pop() return getCurrentThreadData().valueStack.pop()
@ -2028,6 +2094,13 @@ def findMultipartPostBoundary(post):
return retVal return retVal
def urldecode(value, encoding=None, unsafe="%%&=;+%s" % CUSTOM_INJECTION_MARK_CHAR, convall=False, plusspace=True): def urldecode(value, encoding=None, unsafe="%%&=;+%s" % CUSTOM_INJECTION_MARK_CHAR, convall=False, plusspace=True):
"""
URL decodes given value
>>> urldecode('AND%201%3E%282%2B3%29%23', convall=True)
u'AND 1>(2+3)#'
"""
result = value result = value
if value: if value:
@ -2044,10 +2117,10 @@ def urldecode(value, encoding=None, unsafe="%%&=;+%s" % CUSTOM_INJECTION_MARK_CH
charset = reduce(lambda x, y: x.replace(y, ""), unsafe, string.printable) charset = reduce(lambda x, y: x.replace(y, ""), unsafe, string.printable)
char = chr(ord(match.group(1).decode("hex"))) char = chr(ord(match.group(1).decode("hex")))
return char if char in charset else match.group(0) return char if char in charset else match.group(0)
result = re.sub("%([0-9a-fA-F]{2})", _, value) result = value
if plusspace: if plusspace:
result = result.replace("+", " ") # plus sign has a special meaning in url encoded data (hence the usage of urllib.unquote_plus in convall case) result = result.replace("+", " ") # plus sign has a special meaning in url encoded data (hence the usage of urllib.unquote_plus in convall case)
result = re.sub("%([0-9a-fA-F]{2})", _, result)
if isinstance(result, str): if isinstance(result, str):
result = unicode(result, encoding or UNICODE_ENCODING, "replace") result = unicode(result, encoding or UNICODE_ENCODING, "replace")