mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-04-05 01:34:18 +03:00
code cleanup
This commit is contained in:
parent
b12aa8a56f
commit
eea96c5b8d
|
@ -42,7 +42,7 @@ class CharDistributionAnalysis:
|
|||
self._mTableSize = None # Size of above table
|
||||
self._mTypicalDistributionRatio = None # This is a constant value which varies from language to language, used in calculating confidence. See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail.
|
||||
self.reset()
|
||||
|
||||
|
||||
def reset(self):
|
||||
"""reset analyser, clear any state"""
|
||||
self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made
|
||||
|
@ -87,7 +87,7 @@ class CharDistributionAnalysis:
|
|||
# convert this encoding string to a number, here called order.
|
||||
# This allows multiple encodings of a language to share one frequency table.
|
||||
return -1
|
||||
|
||||
|
||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
CharDistributionAnalysis.__init__(self)
|
||||
|
|
|
@ -34,7 +34,7 @@ class CharSetGroupProber(CharSetProber):
|
|||
self._mActiveNum = 0
|
||||
self._mProbers = []
|
||||
self._mBestGuessProber = None
|
||||
|
||||
|
||||
def reset(self):
|
||||
CharSetProber.reset(self)
|
||||
self._mActiveNum = 0
|
||||
|
|
|
@ -31,10 +31,10 @@ import constants, re
|
|||
class CharSetProber:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
def reset(self):
|
||||
self._mState = constants.eDetecting
|
||||
|
||||
|
||||
def get_charset_name(self):
|
||||
return None
|
||||
|
||||
|
@ -50,11 +50,11 @@ class CharSetProber:
|
|||
def filter_high_bit_only(self, aBuf):
|
||||
aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf)
|
||||
return aBuf
|
||||
|
||||
|
||||
def filter_without_english_letters(self, aBuf):
|
||||
aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf)
|
||||
return aBuf
|
||||
|
||||
|
||||
def filter_with_english_letters(self, aBuf):
|
||||
# TODO
|
||||
return aBuf
|
||||
|
|
|
@ -75,5 +75,5 @@ class EscCharSetProber(CharSetProber):
|
|||
self._mState = constants.eFoundIt
|
||||
self._mDetectedCharset = codingSM.get_coding_state_machine()
|
||||
return self.get_state()
|
||||
|
||||
|
||||
return self.get_state()
|
||||
|
|
|
@ -44,7 +44,7 @@ class EUCJPProber(MultiByteCharSetProber):
|
|||
def reset(self):
|
||||
MultiByteCharSetProber.reset(self)
|
||||
self._mContextAnalyzer.reset()
|
||||
|
||||
|
||||
def get_charset_name(self):
|
||||
return "EUC-JP"
|
||||
|
||||
|
@ -69,9 +69,9 @@ class EUCJPProber(MultiByteCharSetProber):
|
|||
else:
|
||||
self._mContextAnalyzer.feed(aBuf[i-1:i+1], charLen)
|
||||
self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
|
||||
|
||||
|
||||
self._mLastChar[0] = aBuf[aLen - 1]
|
||||
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if self._mContextAnalyzer.got_enough_data() and \
|
||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD):
|
||||
|
|
|
@ -164,7 +164,7 @@ class HebrewProber(CharSetProber):
|
|||
self._mPrev = ' '
|
||||
self._mBeforePrev = ' '
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
|
||||
def set_model_probers(self, logicalProber, visualProber):
|
||||
self._mLogicalProber = logicalProber
|
||||
self._mVisualProber = visualProber
|
||||
|
@ -184,7 +184,7 @@ class HebrewProber(CharSetProber):
|
|||
# these letters as Non-Final letters outweighs the damage since these words
|
||||
# are quite rare.
|
||||
return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
|
||||
|
||||
|
||||
def feed(self, aBuf):
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew or
|
||||
|
@ -215,7 +215,7 @@ class HebrewProber(CharSetProber):
|
|||
return constants.eNotMe
|
||||
|
||||
aBuf = self.filter_high_bit_only(aBuf)
|
||||
|
||||
|
||||
for cur in aBuf:
|
||||
if cur == ' ':
|
||||
# We stand on a space - a word just ended
|
||||
|
|
|
@ -123,7 +123,7 @@ jp2CharContext = ( \
|
|||
class JapaneseContextAnalysis:
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
|
||||
def reset(self):
|
||||
self._mTotalRel = 0 # total sequence received
|
||||
self._mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category
|
||||
|
@ -133,7 +133,7 @@ class JapaneseContextAnalysis:
|
|||
|
||||
def feed(self, aBuf, aLen):
|
||||
if self._mDone: return
|
||||
|
||||
|
||||
# The buffer we got is byte oriented, and a character may span in more than one
|
||||
# buffers. In case the last one or two byte in last buffer is not complete, we
|
||||
# record how many byte needed to complete that character and skip these bytes here.
|
||||
|
@ -158,7 +158,7 @@ class JapaneseContextAnalysis:
|
|||
|
||||
def got_enough_data(self):
|
||||
return self._mTotalRel > ENOUGH_REL_THRESHOLD
|
||||
|
||||
|
||||
def get_confidence(self):
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
|
||||
|
@ -168,7 +168,7 @@ class JapaneseContextAnalysis:
|
|||
|
||||
def get_order(self, aStr):
|
||||
return -1, 1
|
||||
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, aStr):
|
||||
if not aStr: return -1, 1
|
||||
|
|
|
@ -122,7 +122,7 @@ class Latin1Prober(CharSetProber):
|
|||
def get_confidence(self):
|
||||
if self.get_state() == constants.eNotMe:
|
||||
return 0.01
|
||||
|
||||
|
||||
total = reduce(operator.add, self._mFreqCounter)
|
||||
if total < 0.01:
|
||||
confidence = 0.0
|
||||
|
|
|
@ -68,9 +68,9 @@ class MultiByteCharSetProber(CharSetProber):
|
|||
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
|
||||
else:
|
||||
self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
|
||||
|
||||
|
||||
self._mLastChar[0] = aBuf[aLen - 1]
|
||||
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if self._mDistributionAnalyzer.got_enough_data() and \
|
||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD):
|
||||
|
|
|
@ -37,7 +37,7 @@ SYMBOL_CAT_ORDER = 250
|
|||
NUMBER_OF_SEQ_CAT = 4
|
||||
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
|
||||
#NEGATIVE_CAT = 0
|
||||
|
||||
|
||||
class SingleByteCharSetProber(CharSetProber):
|
||||
def __init__(self, model, reversed=constants.False, nameProber=None):
|
||||
CharSetProber.__init__(self)
|
||||
|
|
|
@ -44,7 +44,7 @@ class SJISProber(MultiByteCharSetProber):
|
|||
def reset(self):
|
||||
MultiByteCharSetProber.reset(self)
|
||||
self._mContextAnalyzer.reset()
|
||||
|
||||
|
||||
def get_charset_name(self):
|
||||
return "SHIFT_JIS"
|
||||
|
||||
|
@ -69,9 +69,9 @@ class SJISProber(MultiByteCharSetProber):
|
|||
else:
|
||||
self._mContextAnalyzer.feed(aBuf[i + 1 - charLen : i + 3 - charLen], charLen)
|
||||
self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen)
|
||||
|
||||
|
||||
self._mLastChar[0] = aBuf[aLen - 1]
|
||||
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if self._mContextAnalyzer.got_enough_data() and \
|
||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD):
|
||||
|
|
|
@ -63,7 +63,7 @@ class UniversalDetector:
|
|||
|
||||
aLen = len(aBuf)
|
||||
if not aLen: return
|
||||
|
||||
|
||||
if not self._mGotData:
|
||||
# If the data starts with BOM, we know it is UTF
|
||||
if aBuf[:3] == '\xEF\xBB\xBF':
|
||||
|
@ -125,7 +125,7 @@ class UniversalDetector:
|
|||
sys.stderr.write('no data received!\n')
|
||||
return
|
||||
self.done = constants.True
|
||||
|
||||
|
||||
if self._mInputState == ePureAscii:
|
||||
self.result = {'encoding': 'ascii', 'confidence': 1.0}
|
||||
return self.result
|
||||
|
|
Loading…
Reference in New Issue
Block a user