fix for Bug #165

This commit is contained in:
Miroslav Stampar 2010-09-13 13:31:01 +00:00
parent 61120b0bac
commit 19fb2e3dcf
4 changed files with 96 additions and 32 deletions

View File

@ -28,8 +28,10 @@ import time
from lib.core.agent import agent
from lib.core.common import getUnicode
from lib.core.common import preparePageForLineComparison
from lib.core.common import randomInt
from lib.core.common import randomStr
from lib.core.common import DynamicContentItem
from lib.core.convert import md5hash
from lib.core.data import conf
from lib.core.data import kb
@ -278,6 +280,31 @@ def checkDynParam(place, parameter, value):
return condition
def checkDynamicContent(firstPage, secondPage):
infoMsg = "testing for dynamic content lines"
logger.info(infoMsg)
linesFirst = preparePageForLineComparison(firstPage)
linesSecond = preparePageForLineComparison(secondPage)
if len(linesFirst) == len(linesSecond):
lastLineNumber = None
pageLinesNumber = len(linesFirst)
for i in range(0, pageLinesNumber):
if (linesFirst[i] != linesSecond[i]):
if lastLineNumber == i - 1:
item = kb.dynamicContent[-1]
if isinstance(item.lineNumber, int):
item.lineNumber = [item.lineNumber]
item.lineNumber.append(i)
else:
kb.dynamicContent.append(DynamicContentItem(i, pageLinesNumber, linesFirst[i-1] if i > 0 else None, linesFirst[i+1] if i < pageLinesNumber - 1 else None))
lastLineNumber = i
if kb.dynamicContent:
infoMsg = "found probably removable dynamic lines"
logger.info(infoMsg)
def checkStability():
"""
This function checks if the URL content is stable requesting the
@ -318,6 +345,8 @@ def checkStability():
warnMsg += "string or regular expression to match on"
logger.warn(warnMsg)
checkDynamicContent(firstPage, secondPage)
return condition
def checkString():

View File

@ -1068,6 +1068,12 @@ def sanitizeAsciiString(subject):
else:
return None
def preparePageForLineComparison(page):
retVal = page
if isinstance(page, basestring):
return page.replace("><", ">\n<").replace("<br>", "\n").splitlines()
return retVal
def decloakToNamedTemporaryFile(filepath, name=None):
retVal = NamedTemporaryFile()
@ -1410,32 +1416,6 @@ def getBruteUnicode(string):
retVal += unichr(ord(char))
return retVal
class UnicodeRawConfigParser(RawConfigParser):
def write(self, fp):
"""
Write an .ini-format representation of the configuration state.
"""
if self._defaults:
fp.write("[%s]\n" % DEFAULTSECT)
for (key, value) in self._defaults.items():
fp.write("%s = %s\n" % (key, getUnicode(value).replace('\n', '\n\t')))
fp.write("\n")
for section in self._sections:
fp.write("[%s]\n" % section)
for (key, value) in self._sections[section].items():
if key != "__name__":
if value is None:
fp.write("%s\n" % (key))
else:
fp.write("%s = %s\n" % (key, getUnicode(value).replace('\n', '\n\t')))
fp.write("\n")
# http://boredzo.org/blog/archives/2007-01-06/longest-common-prefix-in-python-2
def longestCommonPrefix(*sequences):
if len(sequences) == 1:
@ -1489,3 +1469,40 @@ def smokeTest():
infoMsg += "FAILED"
logger.error(infoMsg)
return retVal
class UnicodeRawConfigParser(RawConfigParser):
def write(self, fp):
"""
Write an .ini-format representation of the configuration state.
"""
if self._defaults:
fp.write("[%s]\n" % DEFAULTSECT)
for (key, value) in self._defaults.items():
fp.write("%s = %s\n" % (key, getUnicode(value).replace('\n', '\n\t')))
fp.write("\n")
for section in self._sections:
fp.write("[%s]\n" % section)
for (key, value) in self._sections[section].items():
if key != "__name__":
if value is None:
fp.write("%s\n" % (key))
else:
fp.write("%s = %s\n" % (key, getUnicode(value).replace('\n', '\n\t')))
fp.write("\n")
class DynamicContentItem:
"""
Represents line in content page with dynamic properties (candidate for removal prior detection phase)
"""
def __init__(self, lineNumber, pageTotal, lineContentBefore, lineContentAfter):
self.lineNumber = lineNumber
self.pageTotal = pageTotal
self.lineContentBefore = lineContentBefore
self.lineContentAfter = lineContentAfter

View File

@ -1001,6 +1001,7 @@ def __setKnowledgeBaseAttributes():
kb.dep = None
kb.docRoot = None
kb.dynamicContent = []
kb.headersCount = 0
kb.headersFp = {}
kb.htmlFp = []

View File

@ -24,7 +24,9 @@ Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import re
from lib.core.common import preparePageForLineComparison
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.session import setMatchRatio
@ -59,6 +61,21 @@ def comparison(page, headers=None, getSeqMatcher=False):
if conf.regexp:
return re.search(conf.regexp, page, re.I | re.M) is not None
# Dynamic content lines to be excluded before calculating page hash
if kb.dynamicContent:
lines = preparePageForLineComparison(page)
for item in kb.dynamicContent:
if len(lines) == item.pageTotal:
before = item.lineNumber - 1 if isinstance(item.lineNumber, int) else item.lineNumber[0] - 1
after = item.lineNumber + 1 if isinstance(item.lineNumber, int) else item.lineNumber[-1] + 1
if (item.lineContentBefore and lines[before] != item.lineContentBefore) or (item.lineContentAfter and lines[after] != item.lineContentAfter):
continue
if isinstance(item.lineNumber, int):
page = page.replace(lines[item.lineNumber], '')
else:
for i in item.lineNumber:
page = page.replace(lines[i], '')
if conf.seqLock:
conf.seqLock.acquire()