mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-02-03 05:04:11 +03:00
revert of the revert (it's a good idea to have it like this because of problems with e.g. --text-only and binary content)
This commit is contained in:
parent
8ec4bc9d9d
commit
af5fe457bd
|
@ -1399,7 +1399,8 @@ def sanitizeAsciiString(subject):
|
||||||
def getFilteredPageContent(page, onlyText=True):
|
def getFilteredPageContent(page, onlyText=True):
|
||||||
retVal = page
|
retVal = page
|
||||||
|
|
||||||
if isinstance(page, basestring):
|
# only if the page's charset has been successfully identified
|
||||||
|
if isinstance(page, unicode):
|
||||||
retVal = re.sub(r"(?s)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page)
|
retVal = re.sub(r"(?s)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page)
|
||||||
|
|
||||||
while retVal.find(" ") != -1:
|
while retVal.find(" ") != -1:
|
||||||
|
@ -1412,7 +1413,8 @@ def getFilteredPageContent(page, onlyText=True):
|
||||||
def getPageTextWordsSet(page):
|
def getPageTextWordsSet(page):
|
||||||
retVal = None
|
retVal = None
|
||||||
|
|
||||||
if isinstance(page, basestring):
|
# only if the page's charset has been successfully identified
|
||||||
|
if isinstance(page, unicode):
|
||||||
page = getFilteredPageContent(page)
|
page = getFilteredPageContent(page)
|
||||||
retVal = set(re.findall(r"\w+", page))
|
retVal = set(re.findall(r"\w+", page))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user