From 8968c708a05d40f18d88a80dba49237d6a984268 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Mon, 20 Jun 2011 14:27:24 +0000 Subject: [PATCH] minor update --- lib/core/threads.py | 2 ++ lib/utils/crawler.py | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/core/threads.py b/lib/core/threads.py index 78e2faa77..61c20bbaf 100644 --- a/lib/core/threads.py +++ b/lib/core/threads.py @@ -124,6 +124,8 @@ def runThreads(numThreads, threadFunction, cleanupFunction=None, forwardExceptio kb.threadContinue = False kb.threadException = True + print '\r', + logger.info("waiting for threads to finish (Ctrl+C was pressed)") try: diff --git a/lib/utils/crawler.py b/lib/utils/crawler.py index 975b92194..5477377b0 100644 --- a/lib/utils/crawler.py +++ b/lib/utils/crawler.py @@ -10,7 +10,9 @@ See the file 'doc/COPYING' for copying permission import re import threading import urlparse +import time +from lib.core.common import clearConsoleLine from lib.core.common import dataToStdout from lib.core.data import conf from lib.core.data import kb @@ -33,7 +35,7 @@ class Crawler: threadData = getCurrentThreadData() threadData.shared.outputs = oset() - lockNames = ('limits', 'outputs') + lockNames = ('limits', 'outputs', 'ioLock') for lock in lockNames: kb.locks[lock] = threading.Lock() @@ -74,15 +76,25 @@ class Crawler: threadData.shared.outputs.add(url) kb.locks.outputs.release() + if conf.verbose in (1, 2): + kb.locks.ioLock.acquire() + threadData.shared.count += 1 + status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%') + dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True) + kb.locks.ioLock.release() + threadData.shared.deeper = set() threadData.shared.unprocessed = set([conf.url]) logger.info("starting crawler") for i in xrange(depth): + threadData.shared.count = 0 + threadData.shared.length = len(threadData.shared.unprocessed) numThreads = min(conf.threads, len(threadData.shared.unprocessed)) - logger.debug("processing depth: %d" % i) + logger.info("searching for links with depth %d" % (i + 1)) runThreads(numThreads, crawlThread) + clearConsoleLine(True) threadData.shared.unprocessed = threadData.shared.deeper except KeyboardInterrupt: @@ -97,6 +109,8 @@ class Crawler: logger.critical(errMsg) finally: + clearConsoleLine(True) + if not threadData.shared.outputs: warnMsg = "no usable links found (with GET parameters)" logger.warn(warnMsg)