fix for a fundamentally bad presumtion (ratio should be > 0.6 in stable pages), especially today when we have stuff like where=2; also, just imagine 500s which could just say something like FALSE, while on ratio level it would be far below 0.6

This commit is contained in:
Miroslav Stampar 2010-12-24 09:49:19 +00:00
parent cb17e61f35
commit d5eebb1cbf
2 changed files with 7 additions and 2 deletions

View File

@ -67,6 +67,10 @@ INFERENCE_BLANK_BREAK = 10
# string used for representation of unknown dbms version # string used for representation of unknown dbms version
UNKNOWN_DBMS_VERSION = "Unknown" UNKNOWN_DBMS_VERSION = "Unknown"
# lower and upper values for match ratio in case of stable page
LOWER_RATIO_BOUND = 0.02
UPPER_RATIO_BOUND = 0.98
# System variables # System variables
IS_WIN = subprocess.mswindows IS_WIN = subprocess.mswindows
# The name of the operating system dependent module imported. The following # The name of the operating system dependent module imported. The following

View File

@ -18,6 +18,7 @@ from lib.core.data import kb
from lib.core.data import logger from lib.core.data import logger
from lib.core.settings import CONSTANT_RATIO from lib.core.settings import CONSTANT_RATIO
from lib.core.settings import DIFF_TOLERANCE from lib.core.settings import DIFF_TOLERANCE
from lib.core.settings import LOWER_RATIO_BOUND, UPPER_RATIO_BOUND
def comparison(page, headers=None, getSeqMatcher=False, pageLength=None): def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
if page is None and pageLength is None: if page is None and pageLength is None:
@ -93,11 +94,11 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
if conf.thold: if conf.thold:
kb.matchRatio = conf.thold kb.matchRatio = conf.thold
elif kb.pageStable and ratio > 0.6 and ratio < 0.99: elif kb.pageStable and ratio >= LOWER_RATIO_BOUND and ratio <= UPPER_RATIO_BOUND:
kb.matchRatio = ratio kb.matchRatio = ratio
logger.debug("setting match ratio for current parameter to %.3f" % kb.matchRatio) logger.debug("setting match ratio for current parameter to %.3f" % kb.matchRatio)
elif not kb.pageStable or ( kb.pageStable and ratio < 0.6 ): elif not kb.pageStable:
kb.matchRatio = CONSTANT_RATIO kb.matchRatio = CONSTANT_RATIO
logger.debug("setting match ratio for current parameter to default value 0.900") logger.debug("setting match ratio for current parameter to default value 0.900")