usage of compiled regexes in case of dynamic markings and other refactoring

This commit is contained in:
Miroslav Stampar 2010-12-04 13:23:28 +00:00
parent 0fc7a8f9e8
commit eeb199375b
3 changed files with 4 additions and 5 deletions

View File

@ -524,8 +524,6 @@ def checkDynamicContent(firstPage, secondPage):
infoMsg = "dynamic content marked for removal (%d region%s)" % (len(kb.dynamicMarkings), 's' if len(kb.dynamicMarkings) > 1 else '') infoMsg = "dynamic content marked for removal (%d region%s)" % (len(kb.dynamicMarkings), 's' if len(kb.dynamicMarkings) > 1 else '')
logger.info(infoMsg) logger.info(infoMsg)
kb.pageTemplate = removeDynamicContent(kb.pageTemplate)
def checkStability(): def checkStability():
""" """
This function checks if the URL content is stable requesting the This function checks if the URL content is stable requesting the

View File

@ -1642,10 +1642,10 @@ def removeDynamicContent(page):
for item in kb.dynamicMarkings: for item in kb.dynamicMarkings:
prefix, suffix = item prefix, suffix = item
if prefix is None: if prefix is None:
page = re.sub('(?s)^.+%s' % suffix, suffix, page) getCompiledRegex('(?s)^.+%s' % suffix).sub(suffix, page)
elif suffix is None: elif suffix is None:
page = re.sub('(?s)%s.+$' % prefix, prefix, page) getCompiledRegex('(?s)%s.+$' % prefix).sub(prefix, page)
else: else:
page = re.sub('(?s)%s.+%s' % (prefix, suffix), '%s%s' % (prefix, suffix), page) getCompiledRegex('(?s)%s.+%s' % (prefix, suffix)).sub('%s%s' % (prefix, suffix), page)
return page return page

View File

@ -62,6 +62,7 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
# Dynamic content lines to be excluded before comparison # Dynamic content lines to be excluded before comparison
if not kb.nullConnection and not conf.longestCommon: if not kb.nullConnection and not conf.longestCommon:
page = removeDynamicContent(page) page = removeDynamicContent(page)
conf.seqMatcher.set_seq1(removeDynamicContent(kb.pageTemplate))
if not pageLength: if not pageLength:
pageLength = len(page) pageLength = len(page)