some more refactoring

This commit is contained in:
Miroslav Stampar 2012-06-14 13:52:56 +00:00
parent facce2c0df
commit d2dd47fb23
2 changed files with 19 additions and 28 deletions

View File

@ -1645,15 +1645,12 @@ def readCachedFileContent(filename, mode='rb'):
""" """
if filename not in kb.cache.content: if filename not in kb.cache.content:
kb.locks.cache.acquire() with kb.locks.cache:
if filename not in kb.cache.content:
if filename not in kb.cache.content: checkFile(filename)
checkFile(filename) with codecs.open(filename, mode, UNICODE_ENCODING) as f:
with codecs.open(filename, mode, UNICODE_ENCODING) as f: content = f.read()
content = f.read() kb.cache.content[filename] = content
kb.cache.content[filename] = content
kb.locks.cache.release()
return kb.cache.content[filename] return kb.cache.content[filename]
@ -2113,13 +2110,10 @@ def logHTTPTraffic(requestLogMsg, responseLogMsg):
if not conf.trafficFile: if not conf.trafficFile:
return return
kb.locks.log.acquire() with kb.locks.log:
dataToTrafficFile("%s%s" % (requestLogMsg, os.linesep))
dataToTrafficFile("%s%s" % (requestLogMsg, os.linesep)) dataToTrafficFile("%s%s" % (responseLogMsg, os.linesep))
dataToTrafficFile("%s%s" % (responseLogMsg, os.linesep)) dataToTrafficFile("%s%s%s%s" % (os.linesep, 76 * '#', os.linesep, os.linesep))
dataToTrafficFile("%s%s%s%s" % (os.linesep, 76 * '#', os.linesep, os.linesep))
kb.locks.log.release()
def getPageTemplate(payload, place): def getPageTemplate(payload, place):
""" """

View File

@ -42,13 +42,11 @@ class Crawler:
threadData = getCurrentThreadData() threadData = getCurrentThreadData()
while kb.threadContinue: while kb.threadContinue:
kb.locks.limits.acquire() with kb.locks.limits:
if threadData.shared.unprocessed: if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop() current = threadData.shared.unprocessed.pop()
kb.locks.limits.release() else:
else: break
kb.locks.limits.release()
break
content = None content = None
try: try:
@ -83,11 +81,10 @@ class Crawler:
continue continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS: if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
kb.locks.outputs.acquire() with kb.locks.outputs:
threadData.shared.deeper.add(url) threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url): if re.search(r"(.*?)\?(.+)", url):
threadData.shared.outputs.add(url) threadData.shared.outputs.add(url)
kb.locks.outputs.release()
except UnicodeEncodeError: # for non-HTML files except UnicodeEncodeError: # for non-HTML files
pass pass
finally: finally: