code cleanup

2025-11-03 09:27:33 +03:00 · 2011-05-05 08:50:18 +00:00 · 2011-05-05 08:50:18 +00:00 · eea96c5b8d
commit eea96c5b8d
parent b12aa8a56f
12 changed files with 27 additions and 27 deletions
--- a/extra/chardet/chardistribution.py
+++ b/extra/chardet/chardistribution.py
@ -42,7 +42,7 @@ class CharDistributionAnalysis:
        self._mTableSize = None # Size of above table
        self._mTypicalDistributionRatio = None # This is a constant value which varies from language to language, used in calculating confidence.  See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail.
        self.reset()
-        
+
    def reset(self):
        """reset analyser, clear any state"""
        self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made
@ -87,7 +87,7 @@ class CharDistributionAnalysis:
        # convert this encoding string to a number, here called order.
        # This allows multiple encodings of a language to share one frequency table.
        return -1
-    
+
 class EUCTWDistributionAnalysis(CharDistributionAnalysis):
    def __init__(self):
        CharDistributionAnalysis.__init__(self)
--- a/extra/chardet/charsetgroupprober.py
+++ b/extra/chardet/charsetgroupprober.py
@ -34,7 +34,7 @@ class CharSetGroupProber(CharSetProber):
        self._mActiveNum = 0
        self._mProbers = []
        self._mBestGuessProber = None
-        
+
    def reset(self):
        CharSetProber.reset(self)
        self._mActiveNum = 0
--- a/extra/chardet/charsetprober.py
+++ b/extra/chardet/charsetprober.py
@ -31,10 +31,10 @@ import constants, re
 class CharSetProber:
    def __init__(self):
        pass
-        
+
    def reset(self):
        self._mState = constants.eDetecting
-    
+
    def get_charset_name(self):
        return None
@ -50,11 +50,11 @@ class CharSetProber:
    def filter_high_bit_only(self, aBuf):
        aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf)
        return aBuf
-    
+
    def filter_without_english_letters(self, aBuf):
        aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf)
        return aBuf
-        
+
    def filter_with_english_letters(self, aBuf):
        # TODO
        return aBuf
--- a/extra/chardet/escprober.py
+++ b/extra/chardet/escprober.py
@ -75,5 +75,5 @@ class EscCharSetProber(CharSetProber):
                    self._mState = constants.eFoundIt
                    self._mDetectedCharset = codingSM.get_coding_state_machine()
                    return self.get_state()
-                
+
        return self.get_state()
--- a/extra/chardet/eucjpprober.py
+++ b/extra/chardet/eucjpprober.py
@ -44,7 +44,7 @@ class EUCJPProber(MultiByteCharSetProber):
    def reset(self):
        MultiByteCharSetProber.reset(self)
        self._mContextAnalyzer.reset()
-        
+
    def get_charset_name(self):
        return "EUC-JP"
@ -69,9 +69,9 @@ class EUCJPProber(MultiByteCharSetProber):
                else:
                    self._mContextAnalyzer.feed(aBuf[i-1:i+1], charLen)
                    self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
-                    
+
        self._mLastChar[0] = aBuf[aLen - 1]
-        
+
        if self.get_state() == constants.eDetecting:
            if self._mContextAnalyzer.got_enough_data() and \
                   (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
--- a/extra/chardet/hebrewprober.py
+++ b/extra/chardet/hebrewprober.py
@ -164,7 +164,7 @@ class HebrewProber(CharSetProber):
        self._mPrev = ' '
        self._mBeforePrev = ' '
        # These probers are owned by the group prober.
-        
+
    def set_model_probers(self, logicalProber, visualProber):
        self._mLogicalProber = logicalProber
        self._mVisualProber = visualProber
@ -184,7 +184,7 @@ class HebrewProber(CharSetProber):
        # these letters as Non-Final letters outweighs the damage since these words 
        # are quite rare.
        return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
-    
+
    def feed(self, aBuf):
        # Final letter analysis for logical-visual decision.
        # Look for evidence that the received buffer is either logical Hebrew or 
@ -215,7 +215,7 @@ class HebrewProber(CharSetProber):
            return constants.eNotMe
        aBuf = self.filter_high_bit_only(aBuf)
-        
+
        for cur in aBuf:
            if cur == ' ':
                # We stand on a space - a word just ended
--- a/extra/chardet/jpcntx.py
+++ b/extra/chardet/jpcntx.py
@ -123,7 +123,7 @@ jp2CharContext = ( \
 class JapaneseContextAnalysis:
    def __init__(self):
        self.reset()
-        
+
    def reset(self):
        self._mTotalRel = 0 # total sequence received
        self._mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category
@ -133,7 +133,7 @@ class JapaneseContextAnalysis:
    def feed(self, aBuf, aLen):
        if self._mDone: return
-        
+
        # The buffer we got is byte oriented, and a character may span in more than one
        # buffers. In case the last one or two byte in last buffer is not complete, we 
        # record how many byte needed to complete that character and skip these bytes here.
@ -158,7 +158,7 @@ class JapaneseContextAnalysis:
    def got_enough_data(self):
        return self._mTotalRel > ENOUGH_REL_THRESHOLD
-    
+
    def get_confidence(self):
        # This is just one way to calculate confidence. It works well for me.
        if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
@ -168,7 +168,7 @@ class JapaneseContextAnalysis:
    def get_order(self, aStr):
        return -1, 1
-        
+
 class SJISContextAnalysis(JapaneseContextAnalysis):
    def get_order(self, aStr):
        if not aStr: return -1, 1
--- a/extra/chardet/latin1prober.py
+++ b/extra/chardet/latin1prober.py
@ -122,7 +122,7 @@ class Latin1Prober(CharSetProber):
    def get_confidence(self):
        if self.get_state() == constants.eNotMe:
            return 0.01
-  
+
        total = reduce(operator.add, self._mFreqCounter)
        if total < 0.01:
            confidence = 0.0
--- a/extra/chardet/mbcharsetprober.py
+++ b/extra/chardet/mbcharsetprober.py
@ -68,9 +68,9 @@ class MultiByteCharSetProber(CharSetProber):
                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
                else:
                    self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
-                    
+
        self._mLastChar[0] = aBuf[aLen - 1]
-        
+
        if self.get_state() == constants.eDetecting:
            if self._mDistributionAnalyzer.got_enough_data() and \
               (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
--- a/extra/chardet/sbcharsetprober.py
+++ b/extra/chardet/sbcharsetprober.py
@ -37,7 +37,7 @@ SYMBOL_CAT_ORDER = 250
 NUMBER_OF_SEQ_CAT = 4
 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
 #NEGATIVE_CAT = 0
- 
+
 class SingleByteCharSetProber(CharSetProber):
    def __init__(self, model, reversed=constants.False, nameProber=None):
        CharSetProber.__init__(self)
--- a/extra/chardet/sjisprober.py
+++ b/extra/chardet/sjisprober.py
@ -44,7 +44,7 @@ class SJISProber(MultiByteCharSetProber):
    def reset(self):
        MultiByteCharSetProber.reset(self)
        self._mContextAnalyzer.reset()
-        
+
    def get_charset_name(self):
        return "SHIFT_JIS"
@ -69,9 +69,9 @@ class SJISProber(MultiByteCharSetProber):
                else:
                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen : i + 3 - charLen], charLen)
                    self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen)
-                    
+
        self._mLastChar[0] = aBuf[aLen - 1]
-        
+
        if self.get_state() == constants.eDetecting:
            if self._mContextAnalyzer.got_enough_data() and \
                   (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
--- a/extra/chardet/universaldetector.py
+++ b/extra/chardet/universaldetector.py
@ -63,7 +63,7 @@ class UniversalDetector:
        aLen = len(aBuf)
        if not aLen: return
-        
+
        if not self._mGotData:
            # If the data starts with BOM, we know it is UTF
            if aBuf[:3] == '\xEF\xBB\xBF':
@ -125,7 +125,7 @@ class UniversalDetector:
                sys.stderr.write('no data received!\n')
            return
        self.done = constants.True
-        
+
        if self._mInputState == ePureAscii:
            self.result = {'encoding': 'ascii', 'confidence': 1.0}
            return self.result