Update for an Issue #429

This commit is contained in:
stamparm 2013-04-09 11:36:33 +02:00
parent 91054099aa
commit 3948b527dd
2 changed files with 25 additions and 3 deletions

View File

@ -479,7 +479,21 @@ def _setCrawler():
if not conf.crawlDepth:
return
if not conf.bulkFile:
crawl(conf.url)
else:
targets = getFileItems(conf.bulkFile)
for i in xrange(len(targets)):
try:
target = targets[i]
crawl(target)
if conf.verbose in (1, 2):
status = '%d/%d links visited (%d%%)' % (i + 1, len(targets), round(100.0 * (i + 1) / len(targets)))
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
except Exception, ex:
errMsg = "problem occured while crawling at '%s' ('%s')" % (target, ex)
logger.error(errMsg)
def _setGoogleDorking():
"""

View File

@ -102,17 +102,25 @@ def crawl(target):
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([target])
logger.info("starting crawler")
infoMsg = "starting crawler"
if conf.bulkFile:
infoMsg += " for target URL '%s'" % target
logger.info(infoMsg)
for i in xrange(conf.crawlDepth):
if i > 0 and conf.threads == 1:
singleTimeWarnMessage("running in a single-thread mode. This could take a while")
threadData.shared.count = 0
threadData.shared.length = len(threadData.shared.unprocessed)
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
if not conf.bulkFile:
logger.info("searching for links with depth %d" % (i + 1))
runThreads(numThreads, crawlThread)
clearConsoleLine(True)
if threadData.shared.deeper:
threadData.shared.unprocessed = set(threadData.shared.deeper)
else: