some more refactoring

This commit is contained in:
Miroslav Stampar 2012-06-14 13:52:56 +00:00
parent facce2c0df
commit d2dd47fb23
2 changed files with 19 additions and 28 deletions

View File

@ -1645,16 +1645,13 @@ def readCachedFileContent(filename, mode='rb'):
""" """
if filename not in kb.cache.content: if filename not in kb.cache.content:
kb.locks.cache.acquire() with kb.locks.cache:
if filename not in kb.cache.content: if filename not in kb.cache.content:
checkFile(filename) checkFile(filename)
with codecs.open(filename, mode, UNICODE_ENCODING) as f: with codecs.open(filename, mode, UNICODE_ENCODING) as f:
content = f.read() content = f.read()
kb.cache.content[filename] = content kb.cache.content[filename] = content
kb.locks.cache.release()
return kb.cache.content[filename] return kb.cache.content[filename]
def readXmlFile(xmlFile): def readXmlFile(xmlFile):
@ -2113,14 +2110,11 @@ def logHTTPTraffic(requestLogMsg, responseLogMsg):
if not conf.trafficFile: if not conf.trafficFile:
return return
kb.locks.log.acquire() with kb.locks.log:
dataToTrafficFile("%s%s" % (requestLogMsg, os.linesep)) dataToTrafficFile("%s%s" % (requestLogMsg, os.linesep))
dataToTrafficFile("%s%s" % (responseLogMsg, os.linesep)) dataToTrafficFile("%s%s" % (responseLogMsg, os.linesep))
dataToTrafficFile("%s%s%s%s" % (os.linesep, 76 * '#', os.linesep, os.linesep)) dataToTrafficFile("%s%s%s%s" % (os.linesep, 76 * '#', os.linesep, os.linesep))
kb.locks.log.release()
def getPageTemplate(payload, place): def getPageTemplate(payload, place):
""" """
Cross-linked method Cross-linked method

View File

@ -42,12 +42,10 @@ class Crawler:
threadData = getCurrentThreadData() threadData = getCurrentThreadData()
while kb.threadContinue: while kb.threadContinue:
kb.locks.limits.acquire() with kb.locks.limits:
if threadData.shared.unprocessed: if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop() current = threadData.shared.unprocessed.pop()
kb.locks.limits.release()
else: else:
kb.locks.limits.release()
break break
content = None content = None
@ -83,11 +81,10 @@ class Crawler:
continue continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS: if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
kb.locks.outputs.acquire() with kb.locks.outputs:
threadData.shared.deeper.add(url) threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url): if re.search(r"(.*?)\?(.+)", url):
threadData.shared.outputs.add(url) threadData.shared.outputs.add(url)
kb.locks.outputs.release()
except UnicodeEncodeError: # for non-HTML files except UnicodeEncodeError: # for non-HTML files
pass pass
finally: finally: