usage of compiled regexes in case of dynamic markings and other refactoring

This commit is contained in:
Miroslav Stampar 2010-12-04 13:23:28 +00:00
parent 0fc7a8f9e8
commit eeb199375b
3 changed files with 4 additions and 5 deletions

View File

@ -524,8 +524,6 @@ def checkDynamicContent(firstPage, secondPage):
infoMsg = "dynamic content marked for removal (%d region%s)" % (len(kb.dynamicMarkings), 's' if len(kb.dynamicMarkings) > 1 else '')
logger.info(infoMsg)
kb.pageTemplate = removeDynamicContent(kb.pageTemplate)
def checkStability():
"""
This function checks if the URL content is stable requesting the

View File

@ -1642,10 +1642,10 @@ def removeDynamicContent(page):
for item in kb.dynamicMarkings:
prefix, suffix = item
if prefix is None:
page = re.sub('(?s)^.+%s' % suffix, suffix, page)
getCompiledRegex('(?s)^.+%s' % suffix).sub(suffix, page)
elif suffix is None:
page = re.sub('(?s)%s.+$' % prefix, prefix, page)
getCompiledRegex('(?s)%s.+$' % prefix).sub(prefix, page)
else:
page = re.sub('(?s)%s.+%s' % (prefix, suffix), '%s%s' % (prefix, suffix), page)
getCompiledRegex('(?s)%s.+%s' % (prefix, suffix)).sub('%s%s' % (prefix, suffix), page)
return page

View File

@ -62,6 +62,7 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
# Dynamic content lines to be excluded before comparison
if not kb.nullConnection and not conf.longestCommon:
page = removeDynamicContent(page)
conf.seqMatcher.set_seq1(removeDynamicContent(kb.pageTemplate))
if not pageLength:
pageLength = len(page)