diff --git a/lib/core/common.py b/lib/core/common.py index 8ce51f745..94e561a0d 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -1111,11 +1111,11 @@ def sanitizeAsciiString(subject): else: return None -def getFilteredPageContent(page): +def getFilteredPageContent(page, onlyText=True): retVal = page if isinstance(page, basestring): - retVal = re.sub(r"(?s)||<[^>]+>|\t|\n|\r", " ", page) + retVal = re.sub(r"(?s)|%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page) while retVal.find(" ") != -1: retVal = retVal.replace(" ", " ")