Patch for an Issue #169

This commit is contained in:
Miroslav Stampar 2013-01-09 15:22:21 +01:00
parent 55a552ddc4
commit 3d4f381ab5
2 changed files with 110 additions and 113 deletions

View File

@ -134,7 +134,7 @@ from lib.request.httpshandler import HTTPSHandler
from lib.request.rangehandler import HTTPRangeHandler from lib.request.rangehandler import HTTPRangeHandler
from lib.request.redirecthandler import SmartRedirectHandler from lib.request.redirecthandler import SmartRedirectHandler
from lib.request.templates import getPageTemplate from lib.request.templates import getPageTemplate
from lib.utils.crawler import Crawler from lib.utils.crawler import crawl
from lib.utils.deps import checkDependencies from lib.utils.deps import checkDependencies
from lib.utils.google import Google from lib.utils.google import Google
from thirdparty.colorama.initialise import init as coloramainit from thirdparty.colorama.initialise import init as coloramainit
@ -461,8 +461,7 @@ def _setCrawler():
if not conf.crawlDepth: if not conf.crawlDepth:
return return
crawler = Crawler() crawl(conf.url)
crawler.getTargetUrls()
def _setGoogleDorking(): def _setGoogleDorking():
""" """
@ -570,15 +569,19 @@ def _findPageForms():
if not conf.forms or conf.crawlDepth: if not conf.forms or conf.crawlDepth:
return return
if not checkConnection(): if conf.url and not checkConnection():
return return
infoMsg = "searching for forms" infoMsg = "searching for forms"
logger.info(infoMsg) logger.info(infoMsg)
page, _ = Request.queryPage(content=True) if not conf.bulkFile:
page, _ = Request.queryPage(content=True)
findPageForms(page, conf.url, True, True) findPageForms(page, conf.url, True, True)
else:
for target, _, _, _ in kb.targets[:]:
page, _, _= Request.getPage(url=target, crawling=True, raise404=False)
findPageForms(page, target, False, True)
def _setDBMSAuthentication(): def _setDBMSAuthentication():
""" """
@ -1961,8 +1964,8 @@ def _basicOptionValidation():
errMsg = "maximum number of used threads is %d avoiding possible connection issues" % MAX_NUMBER_OF_THREADS errMsg = "maximum number of used threads is %d avoiding possible connection issues" % MAX_NUMBER_OF_THREADS
raise SqlmapSyntaxException(errMsg) raise SqlmapSyntaxException(errMsg)
if conf.forms and not conf.url: if conf.forms and not any ((conf.url, conf.bulkFile)):
errMsg = "switch '--forms' requires usage of option '-u' (--url)" errMsg = "switch '--forms' requires usage of option '-u' (--url) or '-m'"
raise SqlmapSyntaxException(errMsg) raise SqlmapSyntaxException(errMsg)
if conf.requestFile and conf.url: if conf.requestFile and conf.url:
@ -2005,8 +2008,8 @@ def _basicOptionValidation():
errMsg = "option '--proxy' is incompatible with switch '--ignore-proxy'" errMsg = "option '--proxy' is incompatible with switch '--ignore-proxy'"
raise SqlmapSyntaxException(errMsg) raise SqlmapSyntaxException(errMsg)
if conf.forms and any([conf.logFile, conf.bulkFile, conf.direct, conf.requestFile, conf.googleDork]): if conf.forms and any([conf.logFile, conf.direct, conf.requestFile, conf.googleDork]):
errMsg = "switch '--forms' is compatible only with option '-u' (--url)" errMsg = "switch '--forms' is compatible only with options '-u' (--url) and '-m'"
raise SqlmapSyntaxException(errMsg) raise SqlmapSyntaxException(errMsg)
if conf.timeSec < 1: if conf.timeSec < 1:

View File

@ -25,116 +25,110 @@ from lib.request.connect import Connect as Request
from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
from thirdparty.oset.pyoset import oset from thirdparty.oset.pyoset import oset
class Crawler(object): def crawl(target):
""" try:
This class defines methods used to perform crawling (command threadData = getCurrentThreadData()
line option '--crawl' threadData.shared.value = oset()
"""
def getTargetUrls(self): def crawlThread():
try:
threadData = getCurrentThreadData() threadData = getCurrentThreadData()
threadData.shared.value = oset()
def crawlThread(): while kb.threadContinue:
threadData = getCurrentThreadData() with kb.locks.limit:
if threadData.shared.unprocessed:
while kb.threadContinue: current = threadData.shared.unprocessed.pop()
with kb.locks.limit: else:
if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop()
else:
break
content = None
try:
if current:
content = Request.getPage(url=current, crawling=True, raise404=False)[0]
except SqlmapConnectionException, e:
errMsg = "connection exception detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
except httplib.InvalidURL, e:
errMsg = "invalid url detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
if not kb.threadContinue:
break break
if isinstance(content, unicode): content = None
try: try:
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content) if current:
if match: content = Request.getPage(url=current, crawling=True, raise404=False)[0]
content = "<html>%s</html>" % match.group(1) except SqlmapConnectionException, e:
errMsg = "connection exception detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
except httplib.InvalidURL, e:
errMsg = "invalid url detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
soup = BeautifulSoup(content) if not kb.threadContinue:
tags = soup('a')
if not tags:
tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
for tag in tags:
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
if href:
url = urlparse.urljoin(conf.url, href)
# flag to know if we are dealing with the same target host
_ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, conf.url)))
if conf.scope:
if not re.search(conf.scope, url, re.I):
continue
elif not _:
continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
with kb.locks.value:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.value.add(url)
except UnicodeEncodeError: # for non-HTML files
pass
finally:
if conf.forms:
findPageForms(content, current, False, True)
if conf.verbose in (1, 2):
threadData.shared.count += 1
status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([conf.url])
logger.info("starting crawler")
for i in xrange(conf.crawlDepth):
if i > 0 and conf.threads == 1:
singleTimeWarnMessage("running in a single-thread mode. This could take a while.")
threadData.shared.count = 0
threadData.shared.length = len(threadData.shared.unprocessed)
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
logger.info("searching for links with depth %d" % (i + 1))
runThreads(numThreads, crawlThread)
clearConsoleLine(True)
if threadData.shared.deeper:
threadData.shared.unprocessed = set(threadData.shared.deeper)
else:
break break
except KeyboardInterrupt: if isinstance(content, unicode):
warnMsg = "user aborted during crawling. sqlmap " try:
warnMsg += "will use partial list" match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
logger.warn(warnMsg) if match:
content = "<html>%s</html>" % match.group(1)
finally: soup = BeautifulSoup(content)
tags = soup('a')
if not tags:
tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
for tag in tags:
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
if href:
url = urlparse.urljoin(target, href)
# flag to know if we are dealing with the same target host
_ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, target)))
if conf.scope:
if not re.search(conf.scope, url, re.I):
continue
elif not _:
continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
with kb.locks.value:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.value.add(url)
except UnicodeEncodeError: # for non-HTML files
pass
finally:
if conf.forms:
findPageForms(content, current, False, True)
if conf.verbose in (1, 2):
threadData.shared.count += 1
status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([target])
logger.info("starting crawler")
for i in xrange(conf.crawlDepth):
if i > 0 and conf.threads == 1:
singleTimeWarnMessage("running in a single-thread mode. This could take a while.")
threadData.shared.count = 0
threadData.shared.length = len(threadData.shared.unprocessed)
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
logger.info("searching for links with depth %d" % (i + 1))
runThreads(numThreads, crawlThread)
clearConsoleLine(True) clearConsoleLine(True)
if threadData.shared.deeper:
if not threadData.shared.value: threadData.shared.unprocessed = set(threadData.shared.deeper)
warnMsg = "no usable links found (with GET parameters)"
logger.warn(warnMsg)
else: else:
for url in threadData.shared.value: break
kb.targets.add(( url, None, None, None ))
except KeyboardInterrupt:
warnMsg = "user aborted during crawling. sqlmap "
warnMsg += "will use partial list"
logger.warn(warnMsg)
finally:
clearConsoleLine(True)
if not threadData.shared.value:
warnMsg = "no usable links found (with GET parameters)"
logger.warn(warnMsg)
else:
for url in threadData.shared.value:
kb.targets.add((url, None, None, None))