important fix for unicode based character inference

This commit is contained in:
Miroslav Stampar 2011-01-17 10:15:19 +00:00
parent 99a3a3b89c
commit 5c857779c1
2 changed files with 9 additions and 1 deletions

View File

@ -76,6 +76,9 @@ MIN_TIME_RESPONSES = 15
# after these number of blanks at the end inference should stop (just in case) # after these number of blanks at the end inference should stop (just in case)
INFERENCE_BLANK_BREAK = 10 INFERENCE_BLANK_BREAK = 10
# use this replacement character for cases when inference is not able to retrieve the proper character value
INFERENCE_UNKNOWN_CHAR = '?'
# string used for representation of unknown dbms version # string used for representation of unknown dbms version
UNKNOWN_DBMS_VERSION = "Unknown" UNKNOWN_DBMS_VERSION = "Unknown"

View File

@ -37,6 +37,7 @@ from lib.core.exception import unhandledException
from lib.core.progress import ProgressBar from lib.core.progress import ProgressBar
from lib.core.settings import CHAR_INFERENCE_MARK from lib.core.settings import CHAR_INFERENCE_MARK
from lib.core.settings import INFERENCE_BLANK_BREAK from lib.core.settings import INFERENCE_BLANK_BREAK
from lib.core.settings import INFERENCE_UNKNOWN_CHAR
from lib.core.unescaper import unescaper from lib.core.unescaper import unescaper
from lib.request.connect import Connect as Request from lib.request.connect import Connect as Request
@ -216,6 +217,7 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
# list # list
if expand and shiftTable: if expand and shiftTable:
charTbl = xrange(maxChar + 1, (maxChar + 1) << shiftTable.pop()) charTbl = xrange(maxChar + 1, (maxChar + 1) << shiftTable.pop())
originalTbl = list(charTbl)
maxChar = maxValue = charTbl[-1] maxChar = maxValue = charTbl[-1]
minChar = minValue = charTbl[0] minChar = minValue = charTbl[0]
else: else:
@ -282,7 +284,7 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
charStart = time.time() charStart = time.time()
val = getChar(curidx) val = getChar(curidx)
if val is None: if val is None:
val = '?' val = INFERENCE_UNKNOWN_CHAR
else: else:
break break
@ -496,6 +498,9 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
if len(finalValue) > INFERENCE_BLANK_BREAK and finalValue[-INFERENCE_BLANK_BREAK:].isspace(): if len(finalValue) > INFERENCE_BLANK_BREAK and finalValue[-INFERENCE_BLANK_BREAK:].isspace():
break break
if finalValue:
finalValue = finalValue.rstrip(INFERENCE_UNKNOWN_CHAR)
if conf.verbose in (1, 2) or showEta: if conf.verbose in (1, 2) or showEta:
dataToStdout("\n") dataToStdout("\n")