added powerful switch --longest-common for dealing with heavy dynamicity

This commit is contained in:
Miroslav Stampar 2010-11-07 08:52:09 +00:00
parent 16f52ab7ba
commit 00dfd55830
3 changed files with 19 additions and 2 deletions

View File

@ -164,6 +164,12 @@ def checkDynamicContent(firstPage, secondPage):
logger.debug(debugMsg)
return
if conf.longestCommon:
debugMsg = "dynamic content checking skipped "
debugMsg += "because longest common comparison used"
logger.debug(debugMsg)
return
infoMsg = "searching for dynamic content"
logger.info(infoMsg)

View File

@ -197,6 +197,10 @@ def cmdLineParser():
action="store_true", default=False,
help="Compare pages based only on their textual content")
injection.add_option("--longest-common", dest="longestCommon",
action="store_true", default=False,
help="Compare pages based on their longest common match")
injection.add_option("--tamper", dest="tamper",
help="Use given script(s) for tampering injection data")

View File

@ -9,6 +9,8 @@ See the file 'doc/COPYING' for copying permission
import re
from difflib import SequenceMatcher
from lib.core.common import wasLastRequestError
from lib.core.data import conf
from lib.core.data import kb
@ -50,7 +52,7 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
return re.search(conf.regexp, page, re.I | re.M) is not None
# Dynamic content lines to be excluded before comparison
if not kb.nullConnection:
if not kb.nullConnection and not conf.longestCommon:
for item in kb.dynamicMarkings:
prefix, postfix = item
if prefix is None:
@ -66,7 +68,12 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
if kb.locks.seqLock:
kb.locks.seqLock.acquire()
if not conf.eRegexp and not conf.eString and kb.nullConnection and pageLength:
if conf.longestCommon:
(firstPage, secondPage) = (conf.seqMatcher.a, page)
match = SequenceMatcher(None, firstPage, secondPage).find_longest_match(0, len(firstPage), 0, len(secondPage))
ratio = round(SequenceMatcher(None, firstPage[match[0]:match[0]+match[2]], secondPage[match[1]:match[1]+match[2]]).ratio(), 3)
elif not conf.eRegexp and not conf.eString and kb.nullConnection and pageLength:
ratio = 1. * pageLength / len(conf.seqMatcher.a)
if ratio > 1.:
ratio = 1. / ratio