minor update

This commit is contained in:
Miroslav Stampar 2011-06-20 14:27:24 +00:00
parent 17fac6f67f
commit 8968c708a0
2 changed files with 18 additions and 2 deletions

View File

@ -124,6 +124,8 @@ def runThreads(numThreads, threadFunction, cleanupFunction=None, forwardExceptio
kb.threadContinue = False kb.threadContinue = False
kb.threadException = True kb.threadException = True
print '\r',
logger.info("waiting for threads to finish (Ctrl+C was pressed)") logger.info("waiting for threads to finish (Ctrl+C was pressed)")
try: try:

View File

@ -10,7 +10,9 @@ See the file 'doc/COPYING' for copying permission
import re import re
import threading import threading
import urlparse import urlparse
import time
from lib.core.common import clearConsoleLine
from lib.core.common import dataToStdout from lib.core.common import dataToStdout
from lib.core.data import conf from lib.core.data import conf
from lib.core.data import kb from lib.core.data import kb
@ -33,7 +35,7 @@ class Crawler:
threadData = getCurrentThreadData() threadData = getCurrentThreadData()
threadData.shared.outputs = oset() threadData.shared.outputs = oset()
lockNames = ('limits', 'outputs') lockNames = ('limits', 'outputs', 'ioLock')
for lock in lockNames: for lock in lockNames:
kb.locks[lock] = threading.Lock() kb.locks[lock] = threading.Lock()
@ -74,15 +76,25 @@ class Crawler:
threadData.shared.outputs.add(url) threadData.shared.outputs.add(url)
kb.locks.outputs.release() kb.locks.outputs.release()
if conf.verbose in (1, 2):
kb.locks.ioLock.acquire()
threadData.shared.count += 1
status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
kb.locks.ioLock.release()
threadData.shared.deeper = set() threadData.shared.deeper = set()
threadData.shared.unprocessed = set([conf.url]) threadData.shared.unprocessed = set([conf.url])
logger.info("starting crawler") logger.info("starting crawler")
for i in xrange(depth): for i in xrange(depth):
threadData.shared.count = 0
threadData.shared.length = len(threadData.shared.unprocessed)
numThreads = min(conf.threads, len(threadData.shared.unprocessed)) numThreads = min(conf.threads, len(threadData.shared.unprocessed))
logger.debug("processing depth: %d" % i) logger.info("searching for links with depth %d" % (i + 1))
runThreads(numThreads, crawlThread) runThreads(numThreads, crawlThread)
clearConsoleLine(True)
threadData.shared.unprocessed = threadData.shared.deeper threadData.shared.unprocessed = threadData.shared.deeper
except KeyboardInterrupt: except KeyboardInterrupt:
@ -97,6 +109,8 @@ class Crawler:
logger.critical(errMsg) logger.critical(errMsg)
finally: finally:
clearConsoleLine(True)
if not threadData.shared.outputs: if not threadData.shared.outputs:
warnMsg = "no usable links found (with GET parameters)" warnMsg = "no usable links found (with GET parameters)"
logger.warn(warnMsg) logger.warn(warnMsg)