From 1f795622b32e9c7dce663efef6921e5d909d0d7f Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Sat, 4 Dec 2010 13:39:35 +0000 Subject: [PATCH] some fine tuning of dynamicity removing engine --- lib/controller/checks.py | 6 ++++-- lib/core/option.py | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/controller/checks.py b/lib/controller/checks.py index 48a30949a..ca9be0f1f 100644 --- a/lib/controller/checks.py +++ b/lib/controller/checks.py @@ -490,17 +490,19 @@ def checkDynamicContent(firstPage, secondPage): blocks = SequenceMatcher(None, firstPage, secondPage).get_matching_blocks() kb.dynamicMarkings = [] + # Removing too small matching blocks i = 0 while i < len(blocks): block = blocks[i] (_, _, length) = block - if length <= conf.minMatchBlock: + if length <= conf.dynMarkLength: blocks.remove(block) else: i += 1 + # Making of dynamic markings based on prefix/suffix principle if len(blocks) > 0: blocks.insert(0, None) blocks.append(None) @@ -518,7 +520,7 @@ def checkDynamicContent(firstPage, secondPage): prefix = trimAlphaNum(prefix) suffix = trimAlphaNum(suffix) - kb.dynamicMarkings.append((re.escape(prefix[-conf.dynMarkLength:]) if prefix else None, re.escape(suffix[:conf.dynMarkLength]) if suffix else None)) + kb.dynamicMarkings.append((re.escape(prefix[-conf.dynMarkLength/2:]) if prefix else None, re.escape(suffix[:conf.dynMarkLength/2]) if suffix else None)) if len(kb.dynamicMarkings) > 0: infoMsg = "dynamic content marked for removal (%d region%s)" % (len(kb.dynamicMarkings), 's' if len(kb.dynamicMarkings) > 1 else '') diff --git a/lib/core/option.py b/lib/core/option.py index 0389c9d03..0e03667a1 100644 --- a/lib/core/option.py +++ b/lib/core/option.py @@ -1076,7 +1076,6 @@ def __setConfAttributes(): conf.dbmsConnector = None conf.dbmsHandler = None conf.dumpPath = None - conf.minMatchBlock = 8 conf.dynMarkLength = 32 conf.httpHeaders = [] conf.hostname = None