added powerful switch --longest-common for dealing with heavy dynamicity

This commit is contained in:
Miroslav Stampar 2010-11-07 08:52:09 +00:00
parent 16f52ab7ba
commit 00dfd55830
3 changed files with 19 additions and 2 deletions

View File

@ -164,6 +164,12 @@ def checkDynamicContent(firstPage, secondPage):
logger.debug(debugMsg) logger.debug(debugMsg)
return return
if conf.longestCommon:
debugMsg = "dynamic content checking skipped "
debugMsg += "because longest common comparison used"
logger.debug(debugMsg)
return
infoMsg = "searching for dynamic content" infoMsg = "searching for dynamic content"
logger.info(infoMsg) logger.info(infoMsg)

View File

@ -197,6 +197,10 @@ def cmdLineParser():
action="store_true", default=False, action="store_true", default=False,
help="Compare pages based only on their textual content") help="Compare pages based only on their textual content")
injection.add_option("--longest-common", dest="longestCommon",
action="store_true", default=False,
help="Compare pages based on their longest common match")
injection.add_option("--tamper", dest="tamper", injection.add_option("--tamper", dest="tamper",
help="Use given script(s) for tampering injection data") help="Use given script(s) for tampering injection data")

View File

@ -9,6 +9,8 @@ See the file 'doc/COPYING' for copying permission
import re import re
from difflib import SequenceMatcher
from lib.core.common import wasLastRequestError from lib.core.common import wasLastRequestError
from lib.core.data import conf from lib.core.data import conf
from lib.core.data import kb from lib.core.data import kb
@ -50,7 +52,7 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
return re.search(conf.regexp, page, re.I | re.M) is not None return re.search(conf.regexp, page, re.I | re.M) is not None
# Dynamic content lines to be excluded before comparison # Dynamic content lines to be excluded before comparison
if not kb.nullConnection: if not kb.nullConnection and not conf.longestCommon:
for item in kb.dynamicMarkings: for item in kb.dynamicMarkings:
prefix, postfix = item prefix, postfix = item
if prefix is None: if prefix is None:
@ -66,7 +68,12 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
if kb.locks.seqLock: if kb.locks.seqLock:
kb.locks.seqLock.acquire() kb.locks.seqLock.acquire()
if not conf.eRegexp and not conf.eString and kb.nullConnection and pageLength: if conf.longestCommon:
(firstPage, secondPage) = (conf.seqMatcher.a, page)
match = SequenceMatcher(None, firstPage, secondPage).find_longest_match(0, len(firstPage), 0, len(secondPage))
ratio = round(SequenceMatcher(None, firstPage[match[0]:match[0]+match[2]], secondPage[match[1]:match[1]+match[2]]).ratio(), 3)
elif not conf.eRegexp and not conf.eString and kb.nullConnection and pageLength:
ratio = 1. * pageLength / len(conf.seqMatcher.a) ratio = 1. * pageLength / len(conf.seqMatcher.a)
if ratio > 1.: if ratio > 1.:
ratio = 1. / ratio ratio = 1. / ratio