mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-02-09 08:00:36 +03:00
crawler fix (skip binary files)
This commit is contained in:
parent
20bb1a685b
commit
2a4a284a29
|
@ -549,9 +549,6 @@ class Tag(PageElement):
|
||||||
val))
|
val))
|
||||||
self.attrs = map(convert, self.attrs)
|
self.attrs = map(convert, self.attrs)
|
||||||
|
|
||||||
# Reference: http://bytes.com/topic/python/answers/552874-py-2-5-bug-sgmllib
|
|
||||||
SGMLParser.convert_codepoint = lambda self, codepoint: unichr(codepoint)
|
|
||||||
|
|
||||||
def getString(self):
|
def getString(self):
|
||||||
if (len(self.contents) == 1
|
if (len(self.contents) == 1
|
||||||
and isinstance(self.contents[0], NavigableString)):
|
and isinstance(self.contents[0], NavigableString)):
|
||||||
|
|
|
@ -354,3 +354,6 @@ HTML_TITLE_REGEX = "<title>(?P<result>[^<]+)</title>"
|
||||||
|
|
||||||
# Chars used to quickly distinguish if the user provided tainted parameter values
|
# Chars used to quickly distinguish if the user provided tainted parameter values
|
||||||
DUMMY_SQL_INJECTION_CHARS = ";()\"'"
|
DUMMY_SQL_INJECTION_CHARS = ";()\"'"
|
||||||
|
|
||||||
|
# Extensions skipped by crawler
|
||||||
|
CRAWL_EXCLUDE_EXTENSIONS = ("gif","jpg","jar","tif","bmp","war","ear","mpg","wmv","mpeg","scm","iso","dmp","dll","cab","so","avi","bin","exe","iso","tar","png","pdf","ps","mp3","zip","rar","gz")
|
||||||
|
|
|
@ -18,6 +18,7 @@ from lib.core.data import conf
|
||||||
from lib.core.data import kb
|
from lib.core.data import kb
|
||||||
from lib.core.data import logger
|
from lib.core.data import logger
|
||||||
from lib.core.exception import sqlmapConnectionException
|
from lib.core.exception import sqlmapConnectionException
|
||||||
|
from lib.core.settings import CRAWL_EXCLUDE_EXTENSIONS
|
||||||
from lib.core.threads import getCurrentThreadData
|
from lib.core.threads import getCurrentThreadData
|
||||||
from lib.core.threads import runThreads
|
from lib.core.threads import runThreads
|
||||||
from lib.request.connect import Connect as Request
|
from lib.request.connect import Connect as Request
|
||||||
|
@ -51,10 +52,11 @@ class Crawler:
|
||||||
kb.locks.limits.release()
|
kb.locks.limits.release()
|
||||||
break
|
break
|
||||||
|
|
||||||
|
content = None
|
||||||
try:
|
try:
|
||||||
|
if current.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
|
||||||
content = Request.getPage(url=current, raise404=False)[0]
|
content = Request.getPage(url=current, raise404=False)[0]
|
||||||
except sqlmapConnectionException, e:
|
except sqlmapConnectionException, e:
|
||||||
content = None
|
|
||||||
errMsg = "connection exception detected (%s). skipping " % e
|
errMsg = "connection exception detected (%s). skipping " % e
|
||||||
errMsg += "url '%s'" % current
|
errMsg += "url '%s'" % current
|
||||||
logger.critical(errMsg)
|
logger.critical(errMsg)
|
||||||
|
@ -62,7 +64,7 @@ class Crawler:
|
||||||
if not kb.threadContinue:
|
if not kb.threadContinue:
|
||||||
break
|
break
|
||||||
|
|
||||||
if content:
|
if isinstance(content, unicode):
|
||||||
soup = BeautifulSoup(content)
|
soup = BeautifulSoup(content)
|
||||||
for tag in soup('a'):
|
for tag in soup('a'):
|
||||||
if tag.get("href"):
|
if tag.get("href"):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user