some more refactoring

This commit is contained in:
Miroslav Stampar 2012-06-14 13:52:56 +00:00
parent facce2c0df
commit d2dd47fb23
2 changed files with 19 additions and 28 deletions

View File

@ -1645,16 +1645,13 @@ def readCachedFileContent(filename, mode='rb'):
"""
if filename not in kb.cache.content:
kb.locks.cache.acquire()
with kb.locks.cache:
if filename not in kb.cache.content:
checkFile(filename)
with codecs.open(filename, mode, UNICODE_ENCODING) as f:
content = f.read()
kb.cache.content[filename] = content
kb.locks.cache.release()
return kb.cache.content[filename]
def readXmlFile(xmlFile):
@ -2113,14 +2110,11 @@ def logHTTPTraffic(requestLogMsg, responseLogMsg):
if not conf.trafficFile:
return
kb.locks.log.acquire()
with kb.locks.log:
dataToTrafficFile("%s%s" % (requestLogMsg, os.linesep))
dataToTrafficFile("%s%s" % (responseLogMsg, os.linesep))
dataToTrafficFile("%s%s%s%s" % (os.linesep, 76 * '#', os.linesep, os.linesep))
kb.locks.log.release()
def getPageTemplate(payload, place):
"""
Cross-linked method

View File

@ -42,12 +42,10 @@ class Crawler:
threadData = getCurrentThreadData()
while kb.threadContinue:
kb.locks.limits.acquire()
with kb.locks.limits:
if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop()
kb.locks.limits.release()
else:
kb.locks.limits.release()
break
content = None
@ -83,11 +81,10 @@ class Crawler:
continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
kb.locks.outputs.acquire()
with kb.locks.outputs:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.outputs.add(url)
kb.locks.outputs.release()
except UnicodeEncodeError: # for non-HTML files
pass
finally: