diff --git a/lib/core/common.py b/lib/core/common.py index 89b92ab50..a31945810 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -1586,7 +1586,7 @@ def getFilteredPageContent(page, onlyText=True): # only if the page's charset has been successfully identified if isinstance(page, unicode): - retVal = re.sub(r"(?s)||%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page, flags=re.I) + retVal = re.sub(r"(?si)||%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page) while retVal.find(" ") != -1: retVal = retVal.replace(" ", " ")