added singleValue parameter for good samaritan (same thing Bernardo wanted :)

This commit is contained in:
Miroslav Stampar 2010-05-25 13:51:03 +00:00
parent 056d1ad76e
commit 065d5b02ec
2 changed files with 18 additions and 7 deletions

View File

@ -1219,16 +1219,17 @@ def initCommonOutputs():
cfile.close() cfile.close()
def getGoodSamaritanCharsets(part, prevValue, originalCharset): def getGoodSamaritanParameters(part, prevValue, originalCharset):
###wild card . (dot) is supported for compatibility with threading ###wild card . (dot) is supported for compatibility with threading
if kb.commonOutputs is None: if kb.commonOutputs is None:
initCommonOutputs() initCommonOutputs()
if not part or not prevValue: #is not None and != "" if not part or not prevValue: #is not None and != ""
return None, originalCharset return None, None, originalCharset
predictionSet = set() predictionSet = set()
wildIndexes = [] wildIndexes = []
singleValue = None
if prevValue[-1] != '.': if prevValue[-1] != '.':
prevValue += '.' prevValue += '.'
@ -1244,6 +1245,7 @@ def getGoodSamaritanCharsets(part, prevValue, originalCharset):
if part in kb.commonOutputs: if part in kb.commonOutputs:
for item in kb.commonOutputs[part]: for item in kb.commonOutputs[part]:
if re.search('\A%s' % prevValue, item): if re.search('\A%s' % prevValue, item):
singleValue = item
for index in wildIndexes: for index in wildIndexes:
char = item[index] char = item[index]
@ -1261,9 +1263,12 @@ def getGoodSamaritanCharsets(part, prevValue, originalCharset):
predictedCharset.sort() predictedCharset.sort()
return predictedCharset, otherCharset if len(predictedCharset) > 1:
singleValue = None
return singleValue, predictedCharset, otherCharset
else: else:
return None, originalCharset return None, None, originalCharset
def getCompiledRegex(regex): def getCompiledRegex(regex):
if regex in __compiledRegularExpressions: if regex in __compiledRegularExpressions:

View File

@ -30,7 +30,7 @@ from lib.core.agent import agent
from lib.core.common import dataToSessionFile from lib.core.common import dataToSessionFile
from lib.core.common import dataToStdout from lib.core.common import dataToStdout
from lib.core.common import getCharset from lib.core.common import getCharset
from lib.core.common import getGoodSamaritanCharsets from lib.core.common import getGoodSamaritanParameters
from lib.core.common import replaceNewlineTabs from lib.core.common import replaceNewlineTabs
from lib.core.common import safeStringFormat from lib.core.common import safeStringFormat
from lib.core.convert import urlencode from lib.core.convert import urlencode
@ -370,8 +370,14 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
charStart = time.time() charStart = time.time()
if conf.useCommonPrediction: if conf.useCommonPrediction:
predictedCharset, otherCharset = getGoodSamaritanCharsets(kb.partRun, finalValue, asciiTbl) singleValue, predictedCharset, otherCharset = getGoodSamaritanParameters(kb.partRun, finalValue, asciiTbl)
val = getChar(index, predictedCharset, False) if predictedCharset else None if singleValue is None:
val = getChar(index, predictedCharset, False) if predictedCharset else None
else:
##check if that's the value
#finalValue = singleValue
#break
pass
if not val: if not val:
val = getChar(index, otherCharset) val = getChar(index, otherCharset)
else: else: