minor update

This commit is contained in:
Miroslav Stampar 2010-10-12 20:01:59 +00:00
parent d2ec132469
commit 9a08f7feb8

View File

@ -1128,7 +1128,9 @@ def preparePageForLineComparison(page):
def getFilteredPageContent(page):
retVal = page
if isinstance(page, basestring):
retVal = re.sub(r"(?s)<script.+?</script>|<style.+?</style>|<[^>]+>|\t|\n|\r", "", page)
retVal = re.sub(r"(?s)<script.+?</script>|<style.+?</style>|<[^>]+>|\t|\n|\r", " ", page)
while retVal.find(" ") != -1:
retVal = retVal.replace(" ", " ")
return retVal
def getPageTextWordsSet(page):