From 00dfd55830a8f0cd1dbb42c299009bfc1a5168b8 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Sun, 7 Nov 2010 08:52:09 +0000 Subject: [PATCH] added powerful switch --longest-common for dealing with heavy dynamicity --- lib/controller/checks.py | 6 ++++++ lib/parse/cmdline.py | 4 ++++ lib/request/comparison.py | 11 +++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/controller/checks.py b/lib/controller/checks.py index 4b2400fa1..79090fb7d 100644 --- a/lib/controller/checks.py +++ b/lib/controller/checks.py @@ -164,6 +164,12 @@ def checkDynamicContent(firstPage, secondPage): logger.debug(debugMsg) return + if conf.longestCommon: + debugMsg = "dynamic content checking skipped " + debugMsg += "because longest common comparison used" + logger.debug(debugMsg) + return + infoMsg = "searching for dynamic content" logger.info(infoMsg) diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index 8697621be..da2d6da80 100644 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -197,6 +197,10 @@ def cmdLineParser(): action="store_true", default=False, help="Compare pages based only on their textual content") + injection.add_option("--longest-common", dest="longestCommon", + action="store_true", default=False, + help="Compare pages based on their longest common match") + injection.add_option("--tamper", dest="tamper", help="Use given script(s) for tampering injection data") diff --git a/lib/request/comparison.py b/lib/request/comparison.py index 089ff782b..162a40797 100644 --- a/lib/request/comparison.py +++ b/lib/request/comparison.py @@ -9,6 +9,8 @@ See the file 'doc/COPYING' for copying permission import re +from difflib import SequenceMatcher + from lib.core.common import wasLastRequestError from lib.core.data import conf from lib.core.data import kb @@ -50,7 +52,7 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None): return re.search(conf.regexp, page, re.I | re.M) is not None # Dynamic content lines to be excluded before comparison - if not kb.nullConnection: + if not kb.nullConnection and not conf.longestCommon: for item in kb.dynamicMarkings: prefix, postfix = item if prefix is None: @@ -66,7 +68,12 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None): if kb.locks.seqLock: kb.locks.seqLock.acquire() - if not conf.eRegexp and not conf.eString and kb.nullConnection and pageLength: + if conf.longestCommon: + (firstPage, secondPage) = (conf.seqMatcher.a, page) + match = SequenceMatcher(None, firstPage, secondPage).find_longest_match(0, len(firstPage), 0, len(secondPage)) + ratio = round(SequenceMatcher(None, firstPage[match[0]:match[0]+match[2]], secondPage[match[1]:match[1]+match[2]]).ratio(), 3) + + elif not conf.eRegexp and not conf.eString and kb.nullConnection and pageLength: ratio = 1. * pageLength / len(conf.seqMatcher.a) if ratio > 1.: ratio = 1. / ratio