mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-06-07 14:43:08 +03:00
adding Beautifulsoup (BSD) into extras; adding --crawl to options
This commit is contained in:
parent
8c04aa871a
commit
07e2c72943
37
extra/beautifulsoup/__init__.py
Normal file
37
extra/beautifulsoup/__init__.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Copyright (c) 2004-2010, Leonard Richardson
|
||||||
|
#
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are
|
||||||
|
# met:
|
||||||
|
#
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# * Redistributions in binary form must reproduce the above
|
||||||
|
# copyright notice, this list of conditions and the following
|
||||||
|
# disclaimer in the documentation and/or other materials provided
|
||||||
|
# with the distribution.
|
||||||
|
#
|
||||||
|
# * Neither the name of the the Beautiful Soup Consortium and All
|
||||||
|
# Night Kosher Bakery nor the names of its contributors may be
|
||||||
|
# used to endorse or promote products derived from this software
|
||||||
|
# without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
|
||||||
|
#
|
||||||
|
|
||||||
|
pass
|
2014
extra/beautifulsoup/beautifulsoup.py
Normal file
2014
extra/beautifulsoup/beautifulsoup.py
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -589,7 +589,7 @@ def start():
|
||||||
if kb.dataOutputFlag and not conf.multipleTargets:
|
if kb.dataOutputFlag and not conf.multipleTargets:
|
||||||
logger.info("Fetched data logged to text files under '%s'" % conf.outputPath)
|
logger.info("Fetched data logged to text files under '%s'" % conf.outputPath)
|
||||||
|
|
||||||
if conf.multipleTargets:
|
if conf.multipleTargets and conf.resultsFilename:
|
||||||
infoMsg = "you can find results of scanning in multiple targets "
|
infoMsg = "you can find results of scanning in multiple targets "
|
||||||
infoMsg += "mode inside the CSV file '%s'" % conf.resultsFilename
|
infoMsg += "mode inside the CSV file '%s'" % conf.resultsFilename
|
||||||
logger.info(infoMsg)
|
logger.info(infoMsg)
|
||||||
|
|
|
@ -114,6 +114,7 @@ from lib.request.certhandler import HTTPSCertAuthHandler
|
||||||
from lib.request.rangehandler import HTTPRangeHandler
|
from lib.request.rangehandler import HTTPRangeHandler
|
||||||
from lib.request.redirecthandler import SmartRedirectHandler
|
from lib.request.redirecthandler import SmartRedirectHandler
|
||||||
from lib.request.templates import getPageTemplate
|
from lib.request.templates import getPageTemplate
|
||||||
|
from lib.utils.crawler import Crawler
|
||||||
from lib.utils.deps import checkDependencies
|
from lib.utils.deps import checkDependencies
|
||||||
from lib.utils.google import Google
|
from lib.utils.google import Google
|
||||||
|
|
||||||
|
@ -388,6 +389,13 @@ def __setRequestFromFile():
|
||||||
|
|
||||||
__feedTargetsDict(conf.requestFile, addedTargetUrls)
|
__feedTargetsDict(conf.requestFile, addedTargetUrls)
|
||||||
|
|
||||||
|
def __setCrawler():
|
||||||
|
if not conf.crawl:
|
||||||
|
return
|
||||||
|
|
||||||
|
crawler = Crawler()
|
||||||
|
crawler.getTargetUrls()
|
||||||
|
|
||||||
def __setGoogleDorking():
|
def __setGoogleDorking():
|
||||||
"""
|
"""
|
||||||
This function checks if the way to request testable hosts is through
|
This function checks if the way to request testable hosts is through
|
||||||
|
@ -1278,7 +1286,7 @@ def __cleanupOptions():
|
||||||
if conf.tmpPath:
|
if conf.tmpPath:
|
||||||
conf.tmpPath = ntToPosixSlashes(normalizePath(conf.tmpPath))
|
conf.tmpPath = ntToPosixSlashes(normalizePath(conf.tmpPath))
|
||||||
|
|
||||||
if conf.googleDork or conf.logFile or conf.bulkFile or conf.forms:
|
if conf.googleDork or conf.logFile or conf.bulkFile or conf.forms or conf.crawl:
|
||||||
conf.multipleTargets = True
|
conf.multipleTargets = True
|
||||||
|
|
||||||
if conf.optimize:
|
if conf.optimize:
|
||||||
|
@ -1800,6 +1808,7 @@ def init(inputOptions=advancedDict(), overrideOptions=False):
|
||||||
__setDNSCache()
|
__setDNSCache()
|
||||||
__setSafeUrl()
|
__setSafeUrl()
|
||||||
__setGoogleDorking()
|
__setGoogleDorking()
|
||||||
|
__setCrawler()
|
||||||
__setBulkMultipleTargets()
|
__setBulkMultipleTargets()
|
||||||
__urllib2Opener()
|
__urllib2Opener()
|
||||||
__findPageForms()
|
__findPageForms()
|
||||||
|
|
|
@ -167,6 +167,8 @@ optDict = {
|
||||||
"beep": "boolean",
|
"beep": "boolean",
|
||||||
"checkPayload": "boolean",
|
"checkPayload": "boolean",
|
||||||
"cleanup": "boolean",
|
"cleanup": "boolean",
|
||||||
|
"crawl": "boolean",
|
||||||
|
"forms": "boolean",
|
||||||
"googlePage": "integer",
|
"googlePage": "integer",
|
||||||
"mobile": "boolean",
|
"mobile": "boolean",
|
||||||
"pageRank": "boolean",
|
"pageRank": "boolean",
|
||||||
|
|
|
@ -511,6 +511,10 @@ def cmdLineParser():
|
||||||
help="Clean up the DBMS by sqlmap specific "
|
help="Clean up the DBMS by sqlmap specific "
|
||||||
"UDF and tables")
|
"UDF and tables")
|
||||||
|
|
||||||
|
miscellaneous.add_option("--crawl", dest="crawl",
|
||||||
|
action="store_true",
|
||||||
|
help="Crawl the website starting from the target url")
|
||||||
|
|
||||||
miscellaneous.add_option("--forms", dest="forms",
|
miscellaneous.add_option("--forms", dest="forms",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Parse and test forms on target url")
|
help="Parse and test forms on target url")
|
||||||
|
|
95
lib/utils/crawler.py
Normal file
95
lib/utils/crawler.py
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""
|
||||||
|
$Id$
|
||||||
|
|
||||||
|
Copyright (c) 2006-2011 sqlmap developers (http://sqlmap.sourceforge.net/)
|
||||||
|
See the file 'doc/COPYING' for copying permission
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
import urlparse
|
||||||
|
|
||||||
|
from lib.core.common import dataToStdout
|
||||||
|
from lib.core.data import conf
|
||||||
|
from lib.core.data import kb
|
||||||
|
from lib.core.data import logger
|
||||||
|
from lib.core.exception import sqlmapConnectionException
|
||||||
|
from lib.core.threads import getCurrentThreadData
|
||||||
|
from lib.core.threads import runThreads
|
||||||
|
from lib.request.connect import Connect as Request
|
||||||
|
from extra.beautifulsoup.beautifulsoup import BeautifulSoup
|
||||||
|
from extra.oset.pyoset import oset
|
||||||
|
|
||||||
|
class Crawler:
|
||||||
|
"""
|
||||||
|
This class defines methods used to perform crawling (command
|
||||||
|
line option '--crawl'
|
||||||
|
"""
|
||||||
|
|
||||||
|
def getTargetUrls(self, depth=1):
|
||||||
|
try:
|
||||||
|
threadData = getCurrentThreadData()
|
||||||
|
threadData.shared.outputs = oset()
|
||||||
|
|
||||||
|
lockNames = ('limits', 'outputs')
|
||||||
|
for lock in lockNames:
|
||||||
|
kb.locks[lock] = threading.Lock()
|
||||||
|
|
||||||
|
def crawlThread():
|
||||||
|
threadData = getCurrentThreadData()
|
||||||
|
|
||||||
|
while kb.threadContinue:
|
||||||
|
kb.locks.limits.acquire()
|
||||||
|
if threadData.shared.unprocessed:
|
||||||
|
current = threadData.shared.unprocessed.pop()
|
||||||
|
kb.locks.limits.release()
|
||||||
|
else:
|
||||||
|
kb.locks.limits.release()
|
||||||
|
break
|
||||||
|
|
||||||
|
content = Request.getPage(url=conf.url)[0]
|
||||||
|
|
||||||
|
if not kb.threadContinue:
|
||||||
|
break
|
||||||
|
|
||||||
|
soup = BeautifulSoup(content)
|
||||||
|
for tag in soup('a'):
|
||||||
|
if tag.get("href"):
|
||||||
|
url = urlparse.urljoin(conf.url, tag.get("href"))
|
||||||
|
# flag to know if we are dealing with the same target host
|
||||||
|
target = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], [url, conf.url]))
|
||||||
|
if target:
|
||||||
|
kb.locks.outputs.acquire()
|
||||||
|
threadData.shared.deeper.add(url)
|
||||||
|
if re.search(r"(.*?)\?(.+)", url):
|
||||||
|
threadData.shared.outputs.add(url)
|
||||||
|
kb.locks.outputs.release()
|
||||||
|
|
||||||
|
threadData.shared.deeper = set()
|
||||||
|
threadData.shared.unprocessed = set([conf.url])
|
||||||
|
|
||||||
|
logger.info("starting crawling")
|
||||||
|
|
||||||
|
for i in xrange(depth):
|
||||||
|
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
|
||||||
|
logger.debug("processing depth: %d" % i)
|
||||||
|
runThreads(numThreads, crawlThread)
|
||||||
|
threadData.shared.unprocessed = threadData.shared.deeper
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
warnMsg = "user aborted during crawling. sqlmap "
|
||||||
|
warnMsg += "will use partial list"
|
||||||
|
logger.warn(warnMsg)
|
||||||
|
|
||||||
|
except sqlmapConnectionException, e:
|
||||||
|
errMsg = "connection exception detected. sqlmap "
|
||||||
|
errMsg += "will use partial list"
|
||||||
|
errMsg += "'%s'" % e
|
||||||
|
logger.critical(errMsg)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
for url in threadData.shared.outputs:
|
||||||
|
kb.targetUrls.add(( url, None, None, None ))
|
||||||
|
kb.suppressResumeInfo = False
|
|
@ -60,7 +60,7 @@ class Google:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for match in self.__matches:
|
for match in self.__matches:
|
||||||
if re.search(r"(.*?)\?(.+)", match, re.I):
|
if re.search(r"(.*?)\?(.+)", match):
|
||||||
kb.targetUrls.add(( htmlunescape(htmlunescape(match)), None, None, None ))
|
kb.targetUrls.add(( htmlunescape(htmlunescape(match)), None, None, None ))
|
||||||
elif re.search(URI_INJECTABLE_REGEX, match, re.I):
|
elif re.search(URI_INJECTABLE_REGEX, match, re.I):
|
||||||
kb.targetUrls.add(( htmlunescape(htmlunescape("%s" % match)), None, None, None ))
|
kb.targetUrls.add(( htmlunescape(htmlunescape("%s" % match)), None, None, None ))
|
||||||
|
|
|
@ -543,6 +543,10 @@ checkPayload = False
|
||||||
# Valid: True or False
|
# Valid: True or False
|
||||||
cleanup = False
|
cleanup = False
|
||||||
|
|
||||||
|
# Crawl the website starting from the target url
|
||||||
|
# Valid: True or False
|
||||||
|
crawl = False
|
||||||
|
|
||||||
# Parse and test forms on target url
|
# Parse and test forms on target url
|
||||||
# Valid: True or False
|
# Valid: True or False
|
||||||
forms = False
|
forms = False
|
||||||
|
|
Loading…
Reference in New Issue
Block a user