one bug fix in dynamic content engine and some code refactoring

This commit is contained in:
Miroslav Stampar 2010-11-02 07:32:08 +00:00
parent 73b33ed765
commit 13e93f564a
5 changed files with 27 additions and 23 deletions

View File

@ -185,7 +185,7 @@ def checkDynamicContent(firstPage, secondPage):
if postfix is None and (blocks[i][0] + blocks[i][2] >= len(firstPage)):
continue
kb.dynamicMarkings.append((prefix[-conf.dynMarkLength:] if prefix else None, postfix[:conf.dynMarkLength] if postfix else None))
kb.dynamicMarkings.append((re.escape(prefix[-conf.dynMarkLength:]) if prefix else None, re.escape(postfix[:conf.dynMarkLength]) if postfix else None))
if len(kb.dynamicMarkings) > 0:
infoMsg = "dynamic content marked for removal (%d region%s)" % (len(kb.dynamicMarkings), 's' if len(kb.dynamicMarkings) > 1 else '')

View File

@ -1239,21 +1239,25 @@ def getConsoleWidth(default=80):
return width if width else default
def parseXmlFile(xmlFile, handler):
if xmlFile not in kb.cache.content:
if conf.parseLock:
conf.parseLock.acquire()
if xmlFile not in kb.cache.content:
checkFile(xmlFile)
xfile = codecs.open(xmlFile, 'rb', conf.dataEncoding)
content = xfile.read()
kb.cache.content[xmlFile] = content
xfile.close()
if conf.parseLock:
conf.parseLock.release()
stream = StringIO(kb.cache.content[xmlFile])
stream = StringIO(readCachedFileContent(xmlFile))
parse(stream, handler)
stream.close()
def readCachedFileContent(filename, mode='rb'):
if filename not in kb.cache.content:
kb.data.cacheLock.acquire()
if filename not in kb.cache.content:
checkFile(filename)
xfile = codecs.open(filename, mode, conf.dataEncoding)
content = xfile.read()
kb.cache.content[filename] = content
xfile.close()
kb.data.cacheLock.release()
return kb.cache.content[filename]
def readXmlFile(xmlFile):
checkFile(xmlFile)
xfile = codecs.open(xmlFile, 'r', conf.dataEncoding)

View File

@ -16,6 +16,7 @@ import os
import re
import socket
import sys
import threading
import urllib2
import urlparse
@ -993,7 +994,6 @@ def __setConfAttributes():
conf.outputPath = None
conf.paramDict = {}
conf.parameters = {}
conf.parseLock = None
conf.path = None
conf.port = None
conf.redirectHandled = False
@ -1001,7 +1001,6 @@ def __setConfAttributes():
conf.scheme = None
#conf.seqMatcher = difflib.SequenceMatcher(lambda x: x in " \t")
conf.seqMatcher = difflib.SequenceMatcher(None)
conf.seqLock = None
conf.sessionFP = None
conf.start = True
conf.threadContinue = True
@ -1027,7 +1026,10 @@ def __setKnowledgeBaseAttributes():
kb.cache.regex = {}
kb.commonOutputs = None
kb.data = advancedDict()
kb.data.cacheLock = threading.Lock()
kb.data.seqLock = None
# Basic back-end DBMS fingerprint
kb.dbms = None

View File

@ -58,8 +58,8 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
else:
page = re.sub('(?s)%s.+%s' % (prefix, postfix), '%s%s' % (prefix, postfix), page)
if conf.seqLock:
conf.seqLock.acquire()
if kb.data.seqLock:
kb.data.seqLock.acquire()
if not conf.eRegexp and not conf.eString and kb.nullConnection:
ratio = 1. * pageLength / len(conf.seqMatcher.a)
@ -69,8 +69,8 @@ def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
conf.seqMatcher.set_seq2(page if not conf.textOnly else getFilteredPageContent(page))
ratio = round(conf.seqMatcher.ratio(), 3)
if conf.seqLock:
conf.seqLock.release()
if kb.data.seqLock:
kb.data.seqLock.release()
# If the url is stable and we did not set yet the match ratio and the
# current injected value changes the url page content

View File

@ -251,8 +251,7 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
idxlock = threading.Lock()
iolock = threading.Lock()
valuelock = threading.Lock()
conf.seqLock = threading.Lock()
conf.parseLock = threading.Lock()
kb.data.seqLock = threading.Lock()
conf.threadContinue = True
def downloadThread():
@ -416,8 +415,7 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
if conf.verbose >= 1 and not showEta and infoMsg:
dataToStdout(infoMsg)
conf.seqLock = None
conf.parseLock = None
kb.data.seqLock = None
# No multi-threading (--threads = 1)
else: