From 8ec4bc9d9df86b438c7ad5f741b01d2bb410349b Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Thu, 9 Jun 2011 06:32:53 +0000 Subject: [PATCH] revert of the last commit. have to think about it --- lib/core/common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/core/common.py b/lib/core/common.py index 8b07287e8..e096fcb35 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -1399,8 +1399,7 @@ def sanitizeAsciiString(subject): def getFilteredPageContent(page, onlyText=True): retVal = page - # only if the page's charset had been successfully identified - if isinstance(page, unicode): + if isinstance(page, basestring): retVal = re.sub(r"(?s)||%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page) while retVal.find(" ") != -1: @@ -1413,8 +1412,7 @@ def getFilteredPageContent(page, onlyText=True): def getPageTextWordsSet(page): retVal = None - # only if the page's charset had been successfully identified - if isinstance(page, unicode): + if isinstance(page, basestring): page = getFilteredPageContent(page) retVal = set(re.findall(r"\w+", page))