adding Beautifulsoup (BSD) into extras; adding --crawl to options

This commit is contained in:
Miroslav Stampar 2011-06-20 11:32:30 +00:00
parent 8c04aa871a
commit 07e2c72943
9 changed files with 2168 additions and 3 deletions

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
#
# Copyright (c) 2004-2010, Leonard Richardson
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# * Neither the name of the the Beautiful Soup Consortium and All
# Night Kosher Bakery nor the names of its contributors may be
# used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
#
pass

File diff suppressed because it is too large Load Diff

View File

@ -589,7 +589,7 @@ def start():
if kb.dataOutputFlag and not conf.multipleTargets:
logger.info("Fetched data logged to text files under '%s'" % conf.outputPath)
if conf.multipleTargets:
if conf.multipleTargets and conf.resultsFilename:
infoMsg = "you can find results of scanning in multiple targets "
infoMsg += "mode inside the CSV file '%s'" % conf.resultsFilename
logger.info(infoMsg)

View File

@ -114,6 +114,7 @@ from lib.request.certhandler import HTTPSCertAuthHandler
from lib.request.rangehandler import HTTPRangeHandler
from lib.request.redirecthandler import SmartRedirectHandler
from lib.request.templates import getPageTemplate
from lib.utils.crawler import Crawler
from lib.utils.deps import checkDependencies
from lib.utils.google import Google
@ -388,6 +389,13 @@ def __setRequestFromFile():
__feedTargetsDict(conf.requestFile, addedTargetUrls)
def __setCrawler():
if not conf.crawl:
return
crawler = Crawler()
crawler.getTargetUrls()
def __setGoogleDorking():
"""
This function checks if the way to request testable hosts is through
@ -1278,7 +1286,7 @@ def __cleanupOptions():
if conf.tmpPath:
conf.tmpPath = ntToPosixSlashes(normalizePath(conf.tmpPath))
if conf.googleDork or conf.logFile or conf.bulkFile or conf.forms:
if conf.googleDork or conf.logFile or conf.bulkFile or conf.forms or conf.crawl:
conf.multipleTargets = True
if conf.optimize:
@ -1800,6 +1808,7 @@ def init(inputOptions=advancedDict(), overrideOptions=False):
__setDNSCache()
__setSafeUrl()
__setGoogleDorking()
__setCrawler()
__setBulkMultipleTargets()
__urllib2Opener()
__findPageForms()

View File

@ -167,6 +167,8 @@ optDict = {
"beep": "boolean",
"checkPayload": "boolean",
"cleanup": "boolean",
"crawl": "boolean",
"forms": "boolean",
"googlePage": "integer",
"mobile": "boolean",
"pageRank": "boolean",

View File

@ -511,6 +511,10 @@ def cmdLineParser():
help="Clean up the DBMS by sqlmap specific "
"UDF and tables")
miscellaneous.add_option("--crawl", dest="crawl",
action="store_true",
help="Crawl the website starting from the target url")
miscellaneous.add_option("--forms", dest="forms",
action="store_true",
help="Parse and test forms on target url")

95
lib/utils/crawler.py Normal file
View File

@ -0,0 +1,95 @@
#!/usr/bin/env python
"""
$Id$
Copyright (c) 2006-2011 sqlmap developers (http://sqlmap.sourceforge.net/)
See the file 'doc/COPYING' for copying permission
"""
import re
import threading
import urlparse
from lib.core.common import dataToStdout
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.exception import sqlmapConnectionException
from lib.core.threads import getCurrentThreadData
from lib.core.threads import runThreads
from lib.request.connect import Connect as Request
from extra.beautifulsoup.beautifulsoup import BeautifulSoup
from extra.oset.pyoset import oset
class Crawler:
"""
This class defines methods used to perform crawling (command
line option '--crawl'
"""
def getTargetUrls(self, depth=1):
try:
threadData = getCurrentThreadData()
threadData.shared.outputs = oset()
lockNames = ('limits', 'outputs')
for lock in lockNames:
kb.locks[lock] = threading.Lock()
def crawlThread():
threadData = getCurrentThreadData()
while kb.threadContinue:
kb.locks.limits.acquire()
if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop()
kb.locks.limits.release()
else:
kb.locks.limits.release()
break
content = Request.getPage(url=conf.url)[0]
if not kb.threadContinue:
break
soup = BeautifulSoup(content)
for tag in soup('a'):
if tag.get("href"):
url = urlparse.urljoin(conf.url, tag.get("href"))
# flag to know if we are dealing with the same target host
target = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], [url, conf.url]))
if target:
kb.locks.outputs.acquire()
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.outputs.add(url)
kb.locks.outputs.release()
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([conf.url])
logger.info("starting crawling")
for i in xrange(depth):
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
logger.debug("processing depth: %d" % i)
runThreads(numThreads, crawlThread)
threadData.shared.unprocessed = threadData.shared.deeper
except KeyboardInterrupt:
warnMsg = "user aborted during crawling. sqlmap "
warnMsg += "will use partial list"
logger.warn(warnMsg)
except sqlmapConnectionException, e:
errMsg = "connection exception detected. sqlmap "
errMsg += "will use partial list"
errMsg += "'%s'" % e
logger.critical(errMsg)
finally:
for url in threadData.shared.outputs:
kb.targetUrls.add(( url, None, None, None ))
kb.suppressResumeInfo = False

View File

@ -60,7 +60,7 @@ class Google:
"""
for match in self.__matches:
if re.search(r"(.*?)\?(.+)", match, re.I):
if re.search(r"(.*?)\?(.+)", match):
kb.targetUrls.add(( htmlunescape(htmlunescape(match)), None, None, None ))
elif re.search(URI_INJECTABLE_REGEX, match, re.I):
kb.targetUrls.add(( htmlunescape(htmlunescape("%s" % match)), None, None, None ))

View File

@ -543,6 +543,10 @@ checkPayload = False
# Valid: True or False
cleanup = False
# Crawl the website starting from the target url
# Valid: True or False
crawl = False
# Parse and test forms on target url
# Valid: True or False
forms = False