Patch for an Issue #169

This commit is contained in:
Miroslav Stampar 2013-01-09 15:22:21 +01:00
parent 55a552ddc4
commit 3d4f381ab5
2 changed files with 110 additions and 113 deletions

View File

@ -134,7 +134,7 @@ from lib.request.httpshandler import HTTPSHandler
from lib.request.rangehandler import HTTPRangeHandler
from lib.request.redirecthandler import SmartRedirectHandler
from lib.request.templates import getPageTemplate
from lib.utils.crawler import Crawler
from lib.utils.crawler import crawl
from lib.utils.deps import checkDependencies
from lib.utils.google import Google
from thirdparty.colorama.initialise import init as coloramainit
@ -461,8 +461,7 @@ def _setCrawler():
if not conf.crawlDepth:
return
crawler = Crawler()
crawler.getTargetUrls()
crawl(conf.url)
def _setGoogleDorking():
"""
@ -570,15 +569,19 @@ def _findPageForms():
if not conf.forms or conf.crawlDepth:
return
if not checkConnection():
if conf.url and not checkConnection():
return
infoMsg = "searching for forms"
logger.info(infoMsg)
page, _ = Request.queryPage(content=True)
findPageForms(page, conf.url, True, True)
if not conf.bulkFile:
page, _ = Request.queryPage(content=True)
findPageForms(page, conf.url, True, True)
else:
for target, _, _, _ in kb.targets[:]:
page, _, _= Request.getPage(url=target, crawling=True, raise404=False)
findPageForms(page, target, False, True)
def _setDBMSAuthentication():
"""
@ -1961,8 +1964,8 @@ def _basicOptionValidation():
errMsg = "maximum number of used threads is %d avoiding possible connection issues" % MAX_NUMBER_OF_THREADS
raise SqlmapSyntaxException(errMsg)
if conf.forms and not conf.url:
errMsg = "switch '--forms' requires usage of option '-u' (--url)"
if conf.forms and not any ((conf.url, conf.bulkFile)):
errMsg = "switch '--forms' requires usage of option '-u' (--url) or '-m'"
raise SqlmapSyntaxException(errMsg)
if conf.requestFile and conf.url:
@ -2005,8 +2008,8 @@ def _basicOptionValidation():
errMsg = "option '--proxy' is incompatible with switch '--ignore-proxy'"
raise SqlmapSyntaxException(errMsg)
if conf.forms and any([conf.logFile, conf.bulkFile, conf.direct, conf.requestFile, conf.googleDork]):
errMsg = "switch '--forms' is compatible only with option '-u' (--url)"
if conf.forms and any([conf.logFile, conf.direct, conf.requestFile, conf.googleDork]):
errMsg = "switch '--forms' is compatible only with options '-u' (--url) and '-m'"
raise SqlmapSyntaxException(errMsg)
if conf.timeSec < 1:

View File

@ -25,116 +25,110 @@ from lib.request.connect import Connect as Request
from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
from thirdparty.oset.pyoset import oset
class Crawler(object):
"""
This class defines methods used to perform crawling (command
line option '--crawl'
"""
def crawl(target):
try:
threadData = getCurrentThreadData()
threadData.shared.value = oset()
def getTargetUrls(self):
try:
def crawlThread():
threadData = getCurrentThreadData()
threadData.shared.value = oset()
def crawlThread():
threadData = getCurrentThreadData()
while kb.threadContinue:
with kb.locks.limit:
if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop()
else:
break
content = None
try:
if current:
content = Request.getPage(url=current, crawling=True, raise404=False)[0]
except SqlmapConnectionException, e:
errMsg = "connection exception detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
except httplib.InvalidURL, e:
errMsg = "invalid url detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
if not kb.threadContinue:
while kb.threadContinue:
with kb.locks.limit:
if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop()
else:
break
if isinstance(content, unicode):
try:
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
if match:
content = "<html>%s</html>" % match.group(1)
content = None
try:
if current:
content = Request.getPage(url=current, crawling=True, raise404=False)[0]
except SqlmapConnectionException, e:
errMsg = "connection exception detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
except httplib.InvalidURL, e:
errMsg = "invalid url detected (%s). skipping " % e
errMsg += "url '%s'" % current
logger.critical(errMsg)
soup = BeautifulSoup(content)
tags = soup('a')
if not tags:
tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
for tag in tags:
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
if href:
url = urlparse.urljoin(conf.url, href)
# flag to know if we are dealing with the same target host
_ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, conf.url)))
if conf.scope:
if not re.search(conf.scope, url, re.I):
continue
elif not _:
continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
with kb.locks.value:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.value.add(url)
except UnicodeEncodeError: # for non-HTML files
pass
finally:
if conf.forms:
findPageForms(content, current, False, True)
if conf.verbose in (1, 2):
threadData.shared.count += 1
status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([conf.url])
logger.info("starting crawler")
for i in xrange(conf.crawlDepth):
if i > 0 and conf.threads == 1:
singleTimeWarnMessage("running in a single-thread mode. This could take a while.")
threadData.shared.count = 0
threadData.shared.length = len(threadData.shared.unprocessed)
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
logger.info("searching for links with depth %d" % (i + 1))
runThreads(numThreads, crawlThread)
clearConsoleLine(True)
if threadData.shared.deeper:
threadData.shared.unprocessed = set(threadData.shared.deeper)
else:
if not kb.threadContinue:
break
except KeyboardInterrupt:
warnMsg = "user aborted during crawling. sqlmap "
warnMsg += "will use partial list"
logger.warn(warnMsg)
if isinstance(content, unicode):
try:
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
if match:
content = "<html>%s</html>" % match.group(1)
finally:
soup = BeautifulSoup(content)
tags = soup('a')
if not tags:
tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
for tag in tags:
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
if href:
url = urlparse.urljoin(target, href)
# flag to know if we are dealing with the same target host
_ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, target)))
if conf.scope:
if not re.search(conf.scope, url, re.I):
continue
elif not _:
continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
with kb.locks.value:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.value.add(url)
except UnicodeEncodeError: # for non-HTML files
pass
finally:
if conf.forms:
findPageForms(content, current, False, True)
if conf.verbose in (1, 2):
threadData.shared.count += 1
status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([target])
logger.info("starting crawler")
for i in xrange(conf.crawlDepth):
if i > 0 and conf.threads == 1:
singleTimeWarnMessage("running in a single-thread mode. This could take a while.")
threadData.shared.count = 0
threadData.shared.length = len(threadData.shared.unprocessed)
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
logger.info("searching for links with depth %d" % (i + 1))
runThreads(numThreads, crawlThread)
clearConsoleLine(True)
if not threadData.shared.value:
warnMsg = "no usable links found (with GET parameters)"
logger.warn(warnMsg)
if threadData.shared.deeper:
threadData.shared.unprocessed = set(threadData.shared.deeper)
else:
for url in threadData.shared.value:
kb.targets.add(( url, None, None, None ))
break
except KeyboardInterrupt:
warnMsg = "user aborted during crawling. sqlmap "
warnMsg += "will use partial list"
logger.warn(warnMsg)
finally:
clearConsoleLine(True)
if not threadData.shared.value:
warnMsg = "no usable links found (with GET parameters)"
logger.warn(warnMsg)
else:
for url in threadData.shared.value:
kb.targets.add((url, None, None, None))