Adding new version of chardet

2025-07-16 03:02:20 +03:00 · 2015-10-09 13:35:48 +02:00 · 2015-10-09 13:35:48 +02:00 · 439d003753
commit 439d003753
parent d424d4cdc7
39 changed files with 1499 additions and 1148 deletions
--- a/thirdparty/chardet/init.py
+++ b/thirdparty/chardet/init.py
@ -15,10 +15,16 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-__version__ = "2.0.1"
+__version__ = "2.3.0"
+from sys import version_info
+

 def detect(aBuf):
-    import universaldetector
+    if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
+            (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
+        raise ValueError('Expected a bytes object, not a unicode object')
+
+    from . import universaldetector
    u = universaldetector.UniversalDetector()
    u.reset()
    u.feed(aBuf)
--- a/thirdparty/chardet/big5freq.py
+++ b/thirdparty/chardet/big5freq.py
@ -45,7 +45,7 @@ BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
 #Char to FreqOrder table
 BIG5_TABLE_SIZE = 5376

-Big5CharToFreqOrder = ( \
+Big5CharToFreqOrder = (
   1,1801,1506, 255,1431, 198,   9,  82,   6,5008, 177, 202,3681,1256,2821, 110, #   16
 3814,  33,3274, 261,  76,  44,2114,  16,2946,2187,1176, 659,3971,  26,3451,2653, #   32
 1198,3972,3350,4202, 410,2215, 302, 590, 361,1964,   8, 204,  58,4510,5009,1932, #   48
@ -921,3 +921,5 @@ Big5CharToFreqOrder = ( \
 13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952
 13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968
 13968,13969,13970,13971,13972) #13973
+
+# flake8: noqa
--- a/thirdparty/chardet/big5prober.py
+++ b/thirdparty/chardet/big5prober.py
@ -25,10 +25,11 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import Big5DistributionAnalysis
-from mbcssm import Big5SMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import Big5DistributionAnalysis
+from .mbcssm import Big5SMModel
+

 class Big5Prober(MultiByteCharSetProber):
    def __init__(self):
--- a/thirdparty/chardet/chardetect.py
+++ b/thirdparty/chardet/chardetect.py
@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+Script which takes one or more file paths and reports on their detected
+encodings
+
+Example::
+
+    % chardetect somefile someotherfile
+    somefile: windows-1252 with confidence 0.5
+    someotherfile: ascii with confidence 1.0
+
+If no paths are provided, it takes its input from stdin.
+
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import sys
+from io import open
+
+from chardet import __version__
+from chardet.universaldetector import UniversalDetector
+
+
+def description_of(lines, name='stdin'):
+    """
+    Return a string describing the probable encoding of a file or
+    list of strings.
+
+    :param lines: The lines to get the encoding of.
+    :type lines: Iterable of bytes
+    :param name: Name of file or collection of lines
+    :type name: str
+    """
+    u = UniversalDetector()
+    for line in lines:
+        u.feed(line)
+    u.close()
+    result = u.result
+    if result['encoding']:
+        return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
+                                                     result['confidence'])
+    else:
+        return '{0}: no result'.format(name)
+
+
+def main(argv=None):
+    '''
+    Handles command line arguments and gets things started.
+
+    :param argv: List of arguments, as if specified on the command-line.
+                 If None, ``sys.argv[1:]`` is used instead.
+    :type argv: list of str
+    '''
+    # Get command line arguments
+    parser = argparse.ArgumentParser(
+        description="Takes one or more file paths and reports their detected \
+                     encodings",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        conflict_handler='resolve')
+    parser.add_argument('input',
+                        help='File whose encoding we would like to determine.',
+                        type=argparse.FileType('rb'), nargs='*',
+                        default=[sys.stdin])
+    parser.add_argument('--version', action='version',
+                        version='%(prog)s {0}'.format(__version__))
+    args = parser.parse_args(argv)
+
+    for f in args.input:
+        if f.isatty():
+            print("You are running chardetect interactively. Press " +
+                  "CTRL-D twice at the start of a blank line to signal the " +
+                  "end of your input. If you want help, run chardetect " +
+                  "--help\n", file=sys.stderr)
+        print(description_of(f, f.name))
+
+
+if __name__ == '__main__':
+    main()
--- a/thirdparty/chardet/chardistribution.py
+++ b/thirdparty/chardet/chardistribution.py
@ -25,35 +25,51 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
-from euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO
-from euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO
-from gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO
-from big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO
-from jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO
+from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
+                        EUCTW_TYPICAL_DISTRIBUTION_RATIO)
+from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
+                        EUCKR_TYPICAL_DISTRIBUTION_RATIO)
+from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
+                         GB2312_TYPICAL_DISTRIBUTION_RATIO)
+from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
+                       BIG5_TYPICAL_DISTRIBUTION_RATIO)
+from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
+                      JIS_TYPICAL_DISTRIBUTION_RATIO)
+from .compat import wrap_ord

 ENOUGH_DATA_THRESHOLD = 1024
 SURE_YES = 0.99
 SURE_NO = 0.01
+MINIMUM_DATA_THRESHOLD = 3
+

 class CharDistributionAnalysis:
    def __init__(self):
-        self._mCharToFreqOrder = None # Mapping table to get frequency order from char order (get from GetOrder())
+        # Mapping table to get frequency order from char order (get from
+        # GetOrder())
+        self._mCharToFreqOrder = None
        self._mTableSize = None  # Size of above table
-        self._mTypicalDistributionRatio = None # This is a constant value which varies from language to language, used in calculating confidence.  See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail.
+        # This is a constant value which varies from language to language,
+        # used in calculating confidence.  See
+        # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
+        # for further detail.
+        self._mTypicalDistributionRatio = None
        self.reset()

    def reset(self):
        """reset analyser, clear any state"""
-        self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made
+        # If this flag is set to True, detection is done and conclusion has
+        # been made
+        self._mDone = False
        self._mTotalChars = 0  # Total characters encountered
-        self._mFreqChars = 0 # The number of characters whose frequency order is less than 512
+        # The number of characters whose frequency order is less than 512
+        self._mFreqChars = 0

-    def feed(self, aStr, aCharLen):
+    def feed(self, aBuf, aCharLen):
        """feed a character with known length"""
        if aCharLen == 2:
            # we only care about 2-bytes character in our distribution analysis
-            order = self.get_order(aStr)
+            order = self.get_order(aBuf)
        else:
            order = -1
        if order >= 0:
@ -65,12 +81,14 @@ class CharDistributionAnalysis:

    def get_confidence(self):
        """return confidence based on existing data"""
-        # if we didn't receive any character in our consideration range, return negative answer
-        if self._mTotalChars <= 0:
+        # if we didn't receive any character in our consideration range,
+        # return negative answer
+        if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
            return SURE_NO

        if self._mTotalChars != self._mFreqChars:
-            r = self._mFreqChars / ((self._mTotalChars - self._mFreqChars) * self._mTypicalDistributionRatio)
+            r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
+                 * self._mTypicalDistributionRatio))
            if r < SURE_YES:
                return r

@ -78,16 +96,18 @@ class CharDistributionAnalysis:
        return SURE_YES

    def got_enough_data(self):
-        # It is not necessary to receive all data to draw conclusion. For charset detection,
-        # certain amount of data is enough
+        # It is not necessary to receive all data to draw conclusion.
+        # For charset detection, certain amount of data is enough
        return self._mTotalChars > ENOUGH_DATA_THRESHOLD

-    def get_order(self, aStr):
-        # We do not handle characters based on the original encoding string, but 
-        # convert this encoding string to a number, here called order.
-        # This allows multiple encodings of a language to share one frequency table.
+    def get_order(self, aBuf):
+        # We do not handle characters based on the original encoding string,
+        # but convert this encoding string to a number, here called order.
+        # This allows multiple encodings of a language to share one frequency
+        # table.
        return -1

+
 class EUCTWDistributionAnalysis(CharDistributionAnalysis):
    def __init__(self):
        CharDistributionAnalysis.__init__(self)
@ -95,16 +115,18 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis):
        self._mTableSize = EUCTW_TABLE_SIZE
        self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, aStr):
+    def get_order(self, aBuf):
        # for euc-TW encoding, we are interested
        #   first  byte range: 0xc4 -- 0xfe
        #   second byte range: 0xa1 -- 0xfe
        # no validation needed here. State machine has done that
-        if aStr[0] >= '\xC4':
-            return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1
+        first_char = wrap_ord(aBuf[0])
+        if first_char >= 0xC4:
+            return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
        else:
            return -1

+
 class EUCKRDistributionAnalysis(CharDistributionAnalysis):
    def __init__(self):
        CharDistributionAnalysis.__init__(self)
@ -112,15 +134,17 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis):
        self._mTableSize = EUCKR_TABLE_SIZE
        self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, aStr):
+    def get_order(self, aBuf):
        # for euc-KR encoding, we are interested
        #   first  byte range: 0xb0 -- 0xfe
        #   second byte range: 0xa1 -- 0xfe
        # no validation needed here. State machine has done that
-        if aStr[0] >= '\xB0':
-            return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1
+        first_char = wrap_ord(aBuf[0])
+        if first_char >= 0xB0:
+            return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
        else:
-            return -1;
+            return -1
+

 class GB2312DistributionAnalysis(CharDistributionAnalysis):
    def __init__(self):
@ -129,15 +153,17 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
        self._mTableSize = GB2312_TABLE_SIZE
        self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, aStr):
+    def get_order(self, aBuf):
        # for GB2312 encoding, we are interested
        #  first  byte range: 0xb0 -- 0xfe
        #  second byte range: 0xa1 -- 0xfe
        # no validation needed here. State machine has done that
-        if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'):
-            return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if (first_char >= 0xB0) and (second_char >= 0xA1):
+            return 94 * (first_char - 0xB0) + second_char - 0xA1
        else:
-            return -1;
+            return -1
+

 class Big5DistributionAnalysis(CharDistributionAnalysis):
    def __init__(self):
@ -146,19 +172,21 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
        self._mTableSize = BIG5_TABLE_SIZE
        self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, aStr):
+    def get_order(self, aBuf):
        # for big5 encoding, we are interested
        #   first  byte range: 0xa4 -- 0xfe
        #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
        # no validation needed here. State machine has done that
-        if aStr[0] >= '\xA4':
-            if aStr[1] >= '\xA1':
-                return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if first_char >= 0xA4:
+            if second_char >= 0xA1:
+                return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
            else:
-                return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40
+                return 157 * (first_char - 0xA4) + second_char - 0x40
        else:
            return -1

+
 class SJISDistributionAnalysis(CharDistributionAnalysis):
    def __init__(self):
        CharDistributionAnalysis.__init__(self)
@ -166,22 +194,24 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
        self._mTableSize = JIS_TABLE_SIZE
        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, aStr):
+    def get_order(self, aBuf):
        # for sjis encoding, we are interested
        #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
        #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
        # no validation needed here. State machine has done that
-        if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'):
-            order = 188 * (ord(aStr[0]) - 0x81)
-        elif (aStr[0] >= '\xE0') and (aStr[0] <= '\xEF'):
-            order = 188 * (ord(aStr[0]) - 0xE0 + 31)
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if (first_char >= 0x81) and (first_char <= 0x9F):
+            order = 188 * (first_char - 0x81)
+        elif (first_char >= 0xE0) and (first_char <= 0xEF):
+            order = 188 * (first_char - 0xE0 + 31)
        else:
-            return -1;
-        order = order + ord(aStr[1]) - 0x40
-        if aStr[1] > '\x7F':
+            return -1
+        order = order + second_char - 0x40
+        if second_char > 0x7F:
            order = -1
        return order

+
 class EUCJPDistributionAnalysis(CharDistributionAnalysis):
    def __init__(self):
        CharDistributionAnalysis.__init__(self)
@ -189,12 +219,13 @@ class EUCJPDistributionAnalysis(CharDistributionAnalysis):
        self._mTableSize = JIS_TABLE_SIZE
        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, aStr):
+    def get_order(self, aBuf):
        # for euc-JP encoding, we are interested
        #   first  byte range: 0xa0 -- 0xfe
        #   second byte range: 0xa1 -- 0xfe
        # no validation needed here. State machine has done that
-        if aStr[0] >= '\xA0':
-            return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xa1
+        char = wrap_ord(aBuf[0])
+        if char >= 0xA0:
+            return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
        else:
            return -1
--- a/thirdparty/chardet/charsetgroupprober.py
+++ b/thirdparty/chardet/charsetgroupprober.py
@ -25,8 +25,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from charsetprober import CharSetProber
+from . import constants
+import sys
+from .charsetprober import CharSetProber
+

 class CharSetGroupProber(CharSetProber):
    def __init__(self):
@ -41,28 +43,32 @@ class CharSetGroupProber(CharSetProber):
        for prober in self._mProbers:
            if prober:
                prober.reset()
-                prober.active = constants.True
+                prober.active = True
                self._mActiveNum += 1
        self._mBestGuessProber = None

    def get_charset_name(self):
        if not self._mBestGuessProber:
            self.get_confidence()
-            if not self._mBestGuessProber: return None
+            if not self._mBestGuessProber:
+                return None
 #                self._mBestGuessProber = self._mProbers[0]
        return self._mBestGuessProber.get_charset_name()

    def feed(self, aBuf):
        for prober in self._mProbers:
-            if not prober: continue
-            if not prober.active: continue
+            if not prober:
+                continue
+            if not prober.active:
+                continue
            st = prober.feed(aBuf)
-            if not st: continue
+            if not st:
+                continue
            if st == constants.eFoundIt:
                self._mBestGuessProber = prober
                return self.get_state()
            elif st == constants.eNotMe:
-                prober.active = constants.False
+                prober.active = False
                self._mActiveNum -= 1
                if self._mActiveNum <= 0:
                    self._mState = constants.eNotMe
@ -78,18 +84,22 @@ class CharSetGroupProber(CharSetProber):
        bestConf = 0.0
        self._mBestGuessProber = None
        for prober in self._mProbers:
-            if not prober: continue
+            if not prober:
+                continue
            if not prober.active:
                if constants._debug:
-                    sys.stderr.write(prober.get_charset_name() + ' not active\n')
+                    sys.stderr.write(prober.get_charset_name()
+                                     + ' not active\n')
                continue
            cf = prober.get_confidence()
            if constants._debug:
-                sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf))
+                sys.stderr.write('%s confidence = %s\n' %
+                                 (prober.get_charset_name(), cf))
            if bestConf < cf:
                bestConf = cf
                self._mBestGuessProber = prober
-        if not self._mBestGuessProber: return 0.0
+        if not self._mBestGuessProber:
+            return 0.0
        return bestConf
 #        else:
 #            self._mBestGuessProber = self._mProbers[0]
--- a/thirdparty/chardet/charsetprober.py
+++ b/thirdparty/chardet/charsetprober.py
@ -26,7 +26,9 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, re
+from . import constants
+import re
+

 class CharSetProber:
    def __init__(self):
@ -48,11 +50,11 @@ class CharSetProber:
        return 0.0

    def filter_high_bit_only(self, aBuf):
-        aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf)
+        aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
        return aBuf

    def filter_without_english_letters(self, aBuf):
-        aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf)
+        aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
        return aBuf

    def filter_with_english_letters(self, aBuf):
--- a/thirdparty/chardet/codingstatemachine.py
+++ b/thirdparty/chardet/codingstatemachine.py
@ -25,7 +25,9 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from constants import eStart, eError, eItsMe
+from .constants import eStart
+from .compat import wrap_ord
+

 class CodingStateMachine:
    def __init__(self, sm):
@ -40,12 +42,15 @@ class CodingStateMachine:
    def next_state(self, c):
        # for each byte we get its class
        # if it is first byte, we also get byte length
-        byteCls = self._mModel['classTable'][ord(c)]
+        # PY3K: aBuf is a byte stream, so c is an int, not a byte
+        byteCls = self._mModel['classTable'][wrap_ord(c)]
        if self._mCurrentState == eStart:
            self._mCurrentBytePos = 0
            self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
        # from byte's class and stateTable, we get its next state
-        self._mCurrentState = self._mModel['stateTable'][self._mCurrentState * self._mModel['classFactor'] + byteCls]
+        curr_state = (self._mCurrentState * self._mModel['classFactor']
+                      + byteCls)
+        self._mCurrentState = self._mModel['stateTable'][curr_state]
        self._mCurrentBytePos += 1
        return self._mCurrentState

--- a/thirdparty/chardet/compat.py
+++ b/thirdparty/chardet/compat.py
@ -0,0 +1,34 @@
+######################## BEGIN LICENSE BLOCK ########################
+# Contributor(s):
+#   Ian Cordasco - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+
+
+if sys.version_info < (3, 0):
+    base_str = (str, unicode)
+else:
+    base_str = (bytes, str)
+
+
+def wrap_ord(a):
+    if sys.version_info < (3, 0) and isinstance(a, base_str):
+        return ord(a)
+    else:
+        return a
--- a/thirdparty/chardet/constants.py
+++ b/thirdparty/chardet/constants.py
@ -37,11 +37,3 @@ eError = 1
 eItsMe = 2

 SHORTCUT_THRESHOLD = 0.95
-
-import __builtin__
-if not hasattr(__builtin__, 'False'):
-    False = 0
-    True = 1
-else:
-    False = __builtin__.False
-    True = __builtin__.True
--- a/thirdparty/chardet/cp949prober.py
+++ b/thirdparty/chardet/cp949prober.py
@ -0,0 +1,44 @@
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import CP949SMModel
+
+
+class CP949Prober(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(CP949SMModel)
+        # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
+        #       not different.
+        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
+        self.reset()
+
+    def get_charset_name(self):
+        return "CP949"
--- a/thirdparty/chardet/escprober.py
+++ b/thirdparty/chardet/escprober.py
@ -25,15 +25,18 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel
-from charsetprober import CharSetProber
-from codingstatemachine import CodingStateMachine
+from . import constants
+from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
+                    ISO2022KRSMModel)
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .compat import wrap_ord
+

 class EscCharSetProber(CharSetProber):
    def __init__(self):
        CharSetProber.__init__(self)
-        self._mCodingSM = [ \
+        self._mCodingSM = [
            CodingStateMachine(HZSMModel),
            CodingStateMachine(ISO2022CNSMModel),
            CodingStateMachine(ISO2022JPSMModel),
@ -44,8 +47,9 @@ class EscCharSetProber(CharSetProber):
    def reset(self):
        CharSetProber.reset(self)
        for codingSM in self._mCodingSM:
-            if not codingSM: continue
-            codingSM.active = constants.True
+            if not codingSM:
+                continue
+            codingSM.active = True
            codingSM.reset()
        self._mActiveSM = len(self._mCodingSM)
        self._mDetectedCharset = None
@ -61,19 +65,22 @@ class EscCharSetProber(CharSetProber):

    def feed(self, aBuf):
        for c in aBuf:
+            # PY3K: aBuf is a byte array, so c is an int, not a byte
            for codingSM in self._mCodingSM:
-                if not codingSM: continue
-                if not codingSM.active: continue
-                codingState = codingSM.next_state(c)
+                if not codingSM:
+                    continue
+                if not codingSM.active:
+                    continue
+                codingState = codingSM.next_state(wrap_ord(c))
                if codingState == constants.eError:
-                    codingSM.active = constants.False
+                    codingSM.active = False
                    self._mActiveSM -= 1
                    if self._mActiveSM <= 0:
                        self._mState = constants.eNotMe
                        return self.get_state()
                elif codingState == constants.eItsMe:
                    self._mState = constants.eFoundIt
-                    self._mDetectedCharset = codingSM.get_coding_state_machine()
+                    self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8
                    return self.get_state()

        return self.get_state()
--- a/thirdparty/chardet/escsm.py
+++ b/thirdparty/chardet/escsm.py
@ -25,9 +25,9 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from constants import eStart, eError, eItsMe
+from .constants import eStart, eError, eItsMe

-HZ_cls = ( \
+HZ_cls = (
 1,0,0,0,0,0,0,0,  # 00 - 07
 0,0,0,0,0,0,0,0,  # 08 - 0f
 0,0,0,0,0,0,0,0,  # 10 - 17
@ -62,7 +62,7 @@ HZ_cls = ( \
 1,1,1,1,1,1,1,1,  # f8 - ff
 )

-HZ_st = ( \
+HZ_st = (
 eStart,eError,     3,eStart,eStart,eStart,eError,eError,# 00-07
 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
 eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError,# 10-17
@ -79,7 +79,7 @@ HZSMModel = {'classTable': HZ_cls,
             'charLenTable': HZCharLenTable,
             'name': "HZ-GB-2312"}

-ISO2022CN_cls = ( \
+ISO2022CN_cls = (
 2,0,0,0,0,0,0,0,  # 00 - 07
 0,0,0,0,0,0,0,0,  # 08 - 0f
 0,0,0,0,0,0,0,0,  # 10 - 17
@ -114,7 +114,7 @@ ISO2022CN_cls = ( \
 2,2,2,2,2,2,2,2,  # f8 - ff
 )

-ISO2022CN_st = ( \
+ISO2022CN_st = (
 eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
 eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
 eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
@ -133,7 +133,7 @@ ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
                    'charLenTable': ISO2022CNCharLenTable,
                    'name': "ISO-2022-CN"}

-ISO2022JP_cls = ( \
+ISO2022JP_cls = (
 2,0,0,0,0,0,0,0,  # 00 - 07
 0,0,0,0,0,0,2,2,  # 08 - 0f
 0,0,0,0,0,0,0,0,  # 10 - 17
@ -168,7 +168,7 @@ ISO2022JP_cls = ( \
 2,2,2,2,2,2,2,2,  # f8 - ff
 )

-ISO2022JP_st = ( \
+ISO2022JP_st = (
 eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
 eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
@ -188,7 +188,7 @@ ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
                    'charLenTable': ISO2022JPCharLenTable,
                    'name': "ISO-2022-JP"}

-ISO2022KR_cls = ( \
+ISO2022KR_cls = (
 2,0,0,0,0,0,0,0,  # 00 - 07
 0,0,0,0,0,0,0,0,  # 08 - 0f
 0,0,0,0,0,0,0,0,  # 10 - 17
@ -223,7 +223,7 @@ ISO2022KR_cls = ( \
 2,2,2,2,2,2,2,2,  # f8 - ff
 )

-ISO2022KR_st = ( \
+ISO2022KR_st = (
 eStart,     3,eError,eStart,eStart,eStart,eError,eError,# 00-07
 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
 eItsMe,eItsMe,eError,eError,eError,     4,eError,eError,# 10-17
@ -238,3 +238,5 @@ ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
                    'stateTable': ISO2022KR_st,
                    'charLenTable': ISO2022KRCharLenTable,
                    'name': "ISO-2022-KR"}
+
+# flake8: noqa
--- a/thirdparty/chardet/eucjpprober.py
+++ b/thirdparty/chardet/eucjpprober.py
@ -25,13 +25,14 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from constants import eStart, eError, eItsMe
-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import EUCJPDistributionAnalysis
-from jpcntx import EUCJPContextAnalysis
-from mbcssm import EUCJPSMModel
+import sys
+from . import constants
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCJPDistributionAnalysis
+from .jpcntx import EUCJPContextAnalysis
+from .mbcssm import EUCJPSMModel
+

 class EUCJPProber(MultiByteCharSetProber):
    def __init__(self):
@ -50,17 +51,20 @@ class EUCJPProber(MultiByteCharSetProber):

    def feed(self, aBuf):
        aLen = len(aBuf)
-        for i in xrange(0, aLen):
+        for i in range(0, aLen):
+            # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
            codingState = self._mCodingSM.next_state(aBuf[i])
-            if codingState == eError:
+            if codingState == constants.eError:
                if constants._debug:
-                    sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
                self._mState = constants.eNotMe
                break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                self._mState = constants.eFoundIt
                break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                charLen = self._mCodingSM.get_current_charlen()
                if i == 0:
                    self._mLastChar[1] = aBuf[0]
@ -68,13 +72,14 @@ class EUCJPProber(MultiByteCharSetProber):
                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
                else:
                    self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
-                    self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)

        self._mLastChar[0] = aBuf[aLen - 1]

        if self.get_state() == constants.eDetecting:
-            if self._mContextAnalyzer.got_enough_data() and \
-                   (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
+            if (self._mContextAnalyzer.got_enough_data() and
+               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
                self._mState = constants.eFoundIt

        return self.get_state()
--- a/thirdparty/chardet/euckrfreq.py
+++ b/thirdparty/chardet/euckrfreq.py
@ -592,3 +592,5 @@ EUCKRCharToFreqOrder = ( \
 8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
 8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
 8736,8737,8738,8739,8740,8741)
+
+# flake8: noqa
--- a/thirdparty/chardet/euckrprober.py
+++ b/thirdparty/chardet/euckrprober.py
@ -25,10 +25,11 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import EUCKRDistributionAnalysis
-from mbcssm import EUCKRSMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import EUCKRSMModel
+

 class EUCKRProber(MultiByteCharSetProber):
    def __init__(self):
--- a/thirdparty/chardet/euctwfreq.py
+++ b/thirdparty/chardet/euctwfreq.py
@ -46,7 +46,7 @@ EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
 # Char to FreqOrder table ,
 EUCTW_TABLE_SIZE = 8102

-EUCTWCharToFreqOrder = ( \
+EUCTWCharToFreqOrder = (
   1,1800,1506, 255,1431, 198,   9,  82,   6,7310, 177, 202,3615,1256,2808, 110, # 2742
 3735,  33,3241, 261,  76,  44,2113,  16,2931,2184,1176, 659,3868,  26,3404,2643, # 2758
 1198,3869,3313,4060, 410,2211, 302, 590, 361,1963,   8, 204,  58,4296,7311,1931, # 2774
@ -424,3 +424,5 @@ EUCTWCharToFreqOrder = ( \
 8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
 8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
 8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742
+
+# flake8: noqa
--- a/thirdparty/chardet/euctwprober.py
+++ b/thirdparty/chardet/euctwprober.py
@ -25,10 +25,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import EUCTWDistributionAnalysis
-from mbcssm import EUCTWSMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCTWDistributionAnalysis
+from .mbcssm import EUCTWSMModel

 class EUCTWProber(MultiByteCharSetProber):
    def __init__(self):
--- a/thirdparty/chardet/gb2312freq.py
+++ b/thirdparty/chardet/gb2312freq.py
@ -43,7 +43,7 @@ GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9

 GB2312_TABLE_SIZE = 3760

-GB2312CharToFreqOrder = ( \
+GB2312CharToFreqOrder = (
 1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
 2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
 2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
@ -469,3 +469,4 @@ GB2312CharToFreqOrder = ( \
 5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
 4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767)

+# flake8: noqa
--- a/thirdparty/chardet/gb2312prober.py
+++ b/thirdparty/chardet/gb2312prober.py
@ -25,10 +25,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import GB2312DistributionAnalysis
-from mbcssm import GB2312SMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import GB2312DistributionAnalysis
+from .mbcssm import GB2312SMModel

 class GB2312Prober(MultiByteCharSetProber):
    def __init__(self):
--- a/thirdparty/chardet/hebrewprober.py
+++ b/thirdparty/chardet/hebrewprober.py
@ -25,8 +25,9 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from charsetprober import CharSetProber
-import constants
+from .charsetprober import CharSetProber
+from .constants import eNotMe, eDetecting
+from .compat import wrap_ord

 # This prober doesn't actually recognize a language or a charset.
 # It is a helper prober for the use of the Hebrew model probers
@ -126,28 +127,31 @@ import constants
 # charset identified, either "windows-1255" or "ISO-8859-8".

 # windows-1255 / ISO-8859-8 code points of interest
-FINAL_KAF = '\xea'
-NORMAL_KAF = '\xeb'
-FINAL_MEM = '\xed'
-NORMAL_MEM = '\xee'
-FINAL_NUN = '\xef'
-NORMAL_NUN = '\xf0'
-FINAL_PE = '\xf3'
-NORMAL_PE = '\xf4'
-FINAL_TSADI = '\xf5'
-NORMAL_TSADI = '\xf6'
+FINAL_KAF = 0xea
+NORMAL_KAF = 0xeb
+FINAL_MEM = 0xed
+NORMAL_MEM = 0xee
+FINAL_NUN = 0xef
+NORMAL_NUN = 0xf0
+FINAL_PE = 0xf3
+NORMAL_PE = 0xf4
+FINAL_TSADI = 0xf5
+NORMAL_TSADI = 0xf6

 # Minimum Visual vs Logical final letter score difference.
-# If the difference is below this, don't rely solely on the final letter score distance.
+# If the difference is below this, don't rely solely on the final letter score
+# distance.
 MIN_FINAL_CHAR_DISTANCE = 5

 # Minimum Visual vs Logical model score difference.
-# If the difference is below this, don't rely at all on the model score distance.
+# If the difference is below this, don't rely at all on the model score
+# distance.
 MIN_MODEL_DISTANCE = 0.01

 VISUAL_HEBREW_NAME = "ISO-8859-8"
 LOGICAL_HEBREW_NAME = "windows-1255"

+
 class HebrewProber(CharSetProber):
    def __init__(self):
        CharSetProber.__init__(self)
@ -159,8 +163,8 @@ class HebrewProber(CharSetProber):
        self._mFinalCharLogicalScore = 0
        self._mFinalCharVisualScore = 0
        # The two last characters seen in the previous buffer,
-        # mPrev and mBeforePrev are initialized to space in order to simulate a word 
-        # delimiter at the beginning of the data
+        # mPrev and mBeforePrev are initialized to space in order to simulate
+        # a word delimiter at the beginning of the data
        self._mPrev = ' '
        self._mBeforePrev = ' '
        # These probers are owned by the group prober.
@ -170,49 +174,52 @@ class HebrewProber(CharSetProber):
        self._mVisualProber = visualProber

    def is_final(self, c):
-        return c in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, FINAL_TSADI]
+        return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
+                               FINAL_TSADI]

    def is_non_final(self, c):
        # The normal Tsadi is not a good Non-Final letter due to words like
        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
-        # apostrophe is converted to a space in FilterWithoutEnglishLetters causing 
-        # the Non-Final tsadi to appear at an end of a word even though this is not 
-        # the case in the original text.
-        # The letters Pe and Kaf rarely display a related behavior of not being a 
-        # good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for 
-        # example legally end with a Non-Final Pe or Kaf. However, the benefit of 
-        # these letters as Non-Final letters outweighs the damage since these words 
-        # are quite rare.
-        return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
+        # apostrophe is converted to a space in FilterWithoutEnglishLetters
+        # causing the Non-Final tsadi to appear at an end of a word even
+        # though this is not the case in the original text.
+        # The letters Pe and Kaf rarely display a related behavior of not being
+        # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
+        # for example legally end with a Non-Final Pe or Kaf. However, the
+        # benefit of these letters as Non-Final letters outweighs the damage
+        # since these words are quite rare.
+        return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]

    def feed(self, aBuf):
        # Final letter analysis for logical-visual decision.
-        # Look for evidence that the received buffer is either logical Hebrew or 
-        # visual Hebrew.
+        # Look for evidence that the received buffer is either logical Hebrew
+        # or visual Hebrew.
        # The following cases are checked:
-        # 1) A word longer than 1 letter, ending with a final letter. This is an 
-        #    indication that the text is laid out "naturally" since the final letter 
-        #    really appears at the end. +1 for logical score.
-        # 2) A word longer than 1 letter, ending with a Non-Final letter. In normal
-        #    Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with
-        #    the Non-Final form of that letter. Exceptions to this rule are mentioned
-        #    above in isNonFinal(). This is an indication that the text is laid out
-        #    backwards. +1 for visual score
-        # 3) A word longer than 1 letter, starting with a final letter. Final letters 
-        #    should not appear at the beginning of a word. This is an indication that 
-        #    the text is laid out backwards. +1 for visual score.
+        # 1) A word longer than 1 letter, ending with a final letter. This is
+        #    an indication that the text is laid out "naturally" since the
+        #    final letter really appears at the end. +1 for logical score.
+        # 2) A word longer than 1 letter, ending with a Non-Final letter. In
+        #    normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
+        #    should not end with the Non-Final form of that letter. Exceptions
+        #    to this rule are mentioned above in isNonFinal(). This is an
+        #    indication that the text is laid out backwards. +1 for visual
+        #    score
+        # 3) A word longer than 1 letter, starting with a final letter. Final
+        #    letters should not appear at the beginning of a word. This is an
+        #    indication that the text is laid out backwards. +1 for visual
+        #    score.
        #
-        # The visual score and logical score are accumulated throughout the text and 
-        # are finally checked against each other in GetCharSetName().
-        # No checking for final letters in the middle of words is done since that case
-        # is not an indication for either Logical or Visual text.
+        # The visual score and logical score are accumulated throughout the
+        # text and are finally checked against each other in GetCharSetName().
+        # No checking for final letters in the middle of words is done since
+        # that case is not an indication for either Logical or Visual text.
        #
-        # We automatically filter out all 7-bit characters (replace them with spaces)
-        # so the word boundary detection works properly. [MAP]
+        # We automatically filter out all 7-bit characters (replace them with
+        # spaces) so the word boundary detection works properly. [MAP]

-        if self.get_state() == constants.eNotMe:
+        if self.get_state() == eNotMe:
            # Both model probers say it's not them. No reason to continue.
-            return constants.eNotMe
+            return eNotMe

        aBuf = self.filter_high_bit_only(aBuf)

@ -220,23 +227,27 @@ class HebrewProber(CharSetProber):
            if cur == ' ':
                # We stand on a space - a word just ended
                if self._mBeforePrev != ' ':
-                    # next-to-last char was not a space so self._mPrev is not a 1 letter word
+                    # next-to-last char was not a space so self._mPrev is not a
+                    # 1 letter word
                    if self.is_final(self._mPrev):
                        # case (1) [-2:not space][-1:final letter][cur:space]
                        self._mFinalCharLogicalScore += 1
                    elif self.is_non_final(self._mPrev):
-                        # case (2) [-2:not space][-1:Non-Final letter][cur:space]
+                        # case (2) [-2:not space][-1:Non-Final letter][
+                        #  cur:space]
                        self._mFinalCharVisualScore += 1
            else:
                # Not standing on a space
-                if (self._mBeforePrev == ' ') and (self.is_final(self._mPrev)) and (cur != ' '):
+                if ((self._mBeforePrev == ' ') and
+                        (self.is_final(self._mPrev)) and (cur != ' ')):
                    # case (3) [-2:space][-1:final letter][cur:not space]
                    self._mFinalCharVisualScore += 1
            self._mBeforePrev = self._mPrev
            self._mPrev = cur

-        # Forever detecting, till the end or until both model probers return eNotMe (handled above)
-        return constants.eDetecting
+        # Forever detecting, till the end or until both model probers return
+        # eNotMe (handled above)
+        return eDetecting

    def get_charset_name(self):
        # Make the decision: is it Logical or Visual?
@ -248,22 +259,25 @@ class HebrewProber(CharSetProber):
            return VISUAL_HEBREW_NAME

        # It's not dominant enough, try to rely on the model scores instead.
-        modelsub = self._mLogicalProber.get_confidence() - self._mVisualProber.get_confidence()
+        modelsub = (self._mLogicalProber.get_confidence()
+                    - self._mVisualProber.get_confidence())
        if modelsub > MIN_MODEL_DISTANCE:
            return LOGICAL_HEBREW_NAME
        if modelsub < -MIN_MODEL_DISTANCE:
            return VISUAL_HEBREW_NAME

-        # Still no good, back to final letter distance, maybe it'll save the day.
+        # Still no good, back to final letter distance, maybe it'll save the
+        # day.
        if finalsub < 0.0:
            return VISUAL_HEBREW_NAME

-        # (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
+        # (finalsub > 0 - Logical) or (don't know what to do) default to
+        # Logical.
        return LOGICAL_HEBREW_NAME

    def get_state(self):
        # Remain active as long as any of the model probers are active.
-        if (self._mLogicalProber.get_state() == constants.eNotMe) and \
-           (self._mVisualProber.get_state() == constants.eNotMe):
-            return constants.eNotMe
-        return constants.eDetecting
+        if (self._mLogicalProber.get_state() == eNotMe) and \
+           (self._mVisualProber.get_state() == eNotMe):
+            return eNotMe
+        return eDetecting
--- a/thirdparty/chardet/jisfreq.py
+++ b/thirdparty/chardet/jisfreq.py
@ -46,7 +46,7 @@ JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
 # Char to FreqOrder table ,
 JIS_TABLE_SIZE = 4368

-JISCharToFreqOrder = ( \
+JISCharToFreqOrder = (
  40,   1,   6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, #   16
 3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247,  18, 179,5071, 856,1661, #   32
 1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, #   48
@ -565,3 +565,5 @@ JISCharToFreqOrder = ( \
 8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240
 8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256
 8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272
+
+# flake8: noqa
--- a/thirdparty/chardet/jpcntx.py
+++ b/thirdparty/chardet/jpcntx.py
@ -25,7 +25,7 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
+from .compat import wrap_ord

 NUM_OF_CATEGORY = 6
 DONT_KNOW = -1
@ -34,7 +34,7 @@ MAX_REL_THRESHOLD = 1000
 MINIMUM_DATA_THRESHOLD = 4

 # This is hiragana 2-char sequence table, the number in each cell represents its frequency category
-jp2CharContext = ( \
+jp2CharContext = (
 (0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
 (2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
 (0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
@ -126,19 +126,26 @@ class JapaneseContextAnalysis:

    def reset(self):
        self._mTotalRel = 0  # total sequence received
-        self._mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category
-        self._mNeedToSkipCharNum = 0 # if last byte in current buffer is not the last byte of a character, we need to know how many bytes to skip in next buffer
+        # category counters, each interger counts sequence in its category
+        self._mRelSample = [0] * NUM_OF_CATEGORY
+        # if last byte in current buffer is not the last byte of a character,
+        # we need to know how many bytes to skip in next buffer
+        self._mNeedToSkipCharNum = 0
        self._mLastCharOrder = -1  # The order of previous char
-        self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made
+        # If this flag is set to True, detection is done and conclusion has
+        # been made
+        self._mDone = False

    def feed(self, aBuf, aLen):
-        if self._mDone: return
+        if self._mDone:
+            return

        # The buffer we got is byte oriented, and a character may span in more than one
-        # buffers. In case the last one or two byte in last buffer is not complete, we 
-        # record how many byte needed to complete that character and skip these bytes here.
-        # We can choose to record those bytes as well and analyse the character once it 
-        # is complete, but since a character will not make much difference, by simply skipping
+        # buffers. In case the last one or two byte in last buffer is not
+        # complete, we record how many byte needed to complete that character
+        # and skip these bytes here.  We can choose to record those bytes as
+        # well and analyse the character once it is complete, but since a
+        # character will not make much difference, by simply skipping
        # this character will simply our logic and improve performance.
        i = self._mNeedToSkipCharNum
        while i < aLen:
@ -151,7 +158,7 @@ class JapaneseContextAnalysis:
                if (order != -1) and (self._mLastCharOrder != -1):
                    self._mTotalRel += 1
                    if self._mTotalRel > MAX_REL_THRESHOLD:
-                        self._mDone = constants.True
+                        self._mDone = True
                        break
                    self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
                self._mLastCharOrder = order
@ -166,45 +173,55 @@ class JapaneseContextAnalysis:
        else:
            return DONT_KNOW

-    def get_order(self, aStr):
+    def get_order(self, aBuf):
        return -1, 1

 class SJISContextAnalysis(JapaneseContextAnalysis):
-    def get_order(self, aStr):
-        if not aStr: return -1, 1
+    def __init__(self):
+        self.charset_name = "SHIFT_JIS"
+
+    def get_charset_name(self):
+        return self.charset_name
+
+    def get_order(self, aBuf):
+        if not aBuf:
+            return -1, 1
        # find out current char's byte length
-        if ((aStr[0] >= '\x81') and (aStr[0] <= '\x9F')) or \
-           ((aStr[0] >= '\xE0') and (aStr[0] <= '\xFC')):
+        first_char = wrap_ord(aBuf[0])
+        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
            charLen = 2
+            if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
+                self.charset_name = "CP932"
        else:
            charLen = 1

        # return its order if it is hiragana
-        if len(aStr) > 1:
-            if (aStr[0] == '\202') and \
-               (aStr[1] >= '\x9F') and \
-               (aStr[1] <= '\xF1'):
-                return ord(aStr[1]) - 0x9F, charLen
+        if len(aBuf) > 1:
+            second_char = wrap_ord(aBuf[1])
+            if (first_char == 202) and (0x9F <= second_char <= 0xF1):
+                return second_char - 0x9F, charLen

        return -1, charLen

 class EUCJPContextAnalysis(JapaneseContextAnalysis):
-    def get_order(self, aStr):
-        if not aStr: return -1, 1
+    def get_order(self, aBuf):
+        if not aBuf:
+            return -1, 1
        # find out current char's byte length
-        if (aStr[0] == '\x8E') or \
-           ((aStr[0] >= '\xA1') and (aStr[0] <= '\xFE')):
+        first_char = wrap_ord(aBuf[0])
+        if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
            charLen = 2
-        elif aStr[0] == '\x8F':
+        elif first_char == 0x8F:
            charLen = 3
        else:
            charLen = 1

        # return its order if it is hiragana
-        if len(aStr) > 1:
-            if (aStr[0] == '\xA4') and \
-               (aStr[1] >= '\xA1') and \
-               (aStr[1] <= '\xF3'):
-                return ord(aStr[1]) - 0xA1, charLen
+        if len(aBuf) > 1:
+            second_char = wrap_ord(aBuf[1])
+            if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
+                return second_char - 0xA1, charLen

        return -1, charLen
+
+# flake8: noqa
--- a/thirdparty/chardet/langbulgarianmodel.py
+++ b/thirdparty/chardet/langbulgarianmodel.py
@ -25,8 +25,6 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
@ -36,7 +34,7 @@ import constants
 # this table is modified base on win1251BulgarianCharToOrderMap, so
 # only number <64 is sure valid

-Latin5_BulgarianCharToOrderMap = ( \
+Latin5_BulgarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -55,7 +53,7 @@ Latin5_BulgarianCharToOrderMap = ( \
 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253,  # f0
 )

-win1251BulgarianCharToOrderMap = ( \
+win1251BulgarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -80,7 +78,7 @@ win1251BulgarianCharToOrderMap = ( \
 # first 1024 sequences:3.0618%
 # rest  sequences:     0.2992%
 # negative sequences:  0.0020%
-BulgarianLangModel = ( \
+BulgarianLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
 3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
@ -211,18 +209,21 @@ BulgarianLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 )

-Latin5BulgarianModel = { \
+Latin5BulgarianModel = {
  'charToOrderMap': Latin5_BulgarianCharToOrderMap,
  'precedenceMatrix': BulgarianLangModel,
  'mTypicalPositiveRatio': 0.969392,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "ISO-8859-5"
 }

-Win1251BulgarianModel = { \
+Win1251BulgarianModel = {
  'charToOrderMap': win1251BulgarianCharToOrderMap,
  'precedenceMatrix': BulgarianLangModel,
  'mTypicalPositiveRatio': 0.969392,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "windows-1251"
 }
+
+
+# flake8: noqa
--- a/thirdparty/chardet/langcyrillicmodel.py
+++ b/thirdparty/chardet/langcyrillicmodel.py
@ -25,11 +25,9 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
-
 # KOI8-R language model
 # Character Mapping Table:
-KOI8R_CharToOrderMap = ( \
+KOI8R_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -48,7 +46,7 @@ KOI8R_CharToOrderMap = ( \
 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70,  # f0
 )

-win1251_CharToOrderMap = ( \
+win1251_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -67,7 +65,7 @@ win1251_CharToOrderMap = ( \
  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
 )

-latin5_CharToOrderMap = ( \
+latin5_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -86,7 +84,7 @@ latin5_CharToOrderMap = ( \
 239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
 )

-macCyrillic_CharToOrderMap = ( \
+macCyrillic_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -105,7 +103,7 @@ macCyrillic_CharToOrderMap = ( \
  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
 )

-IBM855_CharToOrderMap = ( \
+IBM855_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -124,7 +122,7 @@ IBM855_CharToOrderMap = ( \
 250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
 )

-IBM866_CharToOrderMap = ( \
+IBM866_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -149,7 +147,7 @@ IBM866_CharToOrderMap = ( \
 # first 1024 sequences: 2.3389%
 # rest  sequences:      0.1237%
 # negative sequences:   0.0009%
-RussianLangModel = ( \
+RussianLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
@ -280,50 +278,52 @@ RussianLangModel = ( \
 0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
 )

-Koi8rModel = { \
+Koi8rModel = {
  'charToOrderMap': KOI8R_CharToOrderMap,
  'precedenceMatrix': RussianLangModel,
  'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "KOI8-R"
 }

-Win1251CyrillicModel = { \
+Win1251CyrillicModel = {
  'charToOrderMap': win1251_CharToOrderMap,
  'precedenceMatrix': RussianLangModel,
  'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "windows-1251"
 }

-Latin5CyrillicModel = { \
+Latin5CyrillicModel = {
  'charToOrderMap': latin5_CharToOrderMap,
  'precedenceMatrix': RussianLangModel,
  'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "ISO-8859-5"
 }

-MacCyrillicModel = { \
+MacCyrillicModel = {
  'charToOrderMap': macCyrillic_CharToOrderMap,
  'precedenceMatrix': RussianLangModel,
  'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "MacCyrillic"
 };

-Ibm866Model = { \
+Ibm866Model = {
  'charToOrderMap': IBM866_CharToOrderMap,
  'precedenceMatrix': RussianLangModel,
  'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "IBM866"
 }

-Ibm855Model = { \
+Ibm855Model = {
  'charToOrderMap': IBM855_CharToOrderMap,
  'precedenceMatrix': RussianLangModel,
  'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "IBM855"
 }
+
+# flake8: noqa
--- a/thirdparty/chardet/langgreekmodel.py
+++ b/thirdparty/chardet/langgreekmodel.py
@ -25,15 +25,13 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
 # 252: 0 - 9

 # Character Mapping Table:
-Latin7_CharToOrderMap = ( \
+Latin7_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -52,7 +50,7 @@ Latin7_CharToOrderMap = ( \
  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0
 )

-win1253_CharToOrderMap = ( \
+win1253_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -77,7 +75,7 @@ win1253_CharToOrderMap = ( \
 # first 1024 sequences:1.7001%
 # rest  sequences:     0.0359%
 # negative sequences:  0.0148%
-GreekLangModel = ( \
+GreekLangModel = (
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
@ -208,18 +206,20 @@ GreekLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 )

-Latin7GreekModel = { \
+Latin7GreekModel = {
  'charToOrderMap': Latin7_CharToOrderMap,
  'precedenceMatrix': GreekLangModel,
  'mTypicalPositiveRatio': 0.982851,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "ISO-8859-7"
 }

-Win1253GreekModel = { \
+Win1253GreekModel = {
  'charToOrderMap': win1253_CharToOrderMap,
  'precedenceMatrix': GreekLangModel,
  'mTypicalPositiveRatio': 0.982851,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "windows-1253"
 }
+
+# flake8: noqa
--- a/thirdparty/chardet/langhebrewmodel.py
+++ b/thirdparty/chardet/langhebrewmodel.py
@ -27,8 +27,6 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
@ -36,7 +34,7 @@ import constants

 # Windows-1255 language model
 # Character Mapping Table:
-win1255_CharToOrderMap = ( \
+win1255_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -61,7 +59,7 @@ win1255_CharToOrderMap = ( \
 # first 1024 sequences: 1.5981%
 # rest  sequences:      0.087%
 # negative sequences:   0.0015%
-HebrewLangModel = ( \
+HebrewLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
 3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
@ -192,10 +190,12 @@ HebrewLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
 )

-Win1255HebrewModel = { \
+Win1255HebrewModel = {
  'charToOrderMap': win1255_CharToOrderMap,
  'precedenceMatrix': HebrewLangModel,
  'mTypicalPositiveRatio': 0.984004,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "windows-1255"
 }
+
+# flake8: noqa
--- a/thirdparty/chardet/langhungarianmodel.py
+++ b/thirdparty/chardet/langhungarianmodel.py
@ -25,15 +25,13 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
 # 252: 0 - 9

 # Character Mapping Table:
-Latin2_HungarianCharToOrderMap = ( \
+Latin2_HungarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -52,7 +50,7 @@ Latin2_HungarianCharToOrderMap = ( \
 245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
 )

-win1250HungarianCharToOrderMap = ( \
+win1250HungarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -77,7 +75,7 @@ win1250HungarianCharToOrderMap = ( \
 # first 1024 sequences:5.2623%
 # rest  sequences:     0.8894%
 # negative sequences:  0.0009%
-HungarianLangModel = ( \
+HungarianLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
 3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
@ -208,18 +206,20 @@ HungarianLangModel = ( \
 0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
 )

-Latin2HungarianModel = { \
+Latin2HungarianModel = {
  'charToOrderMap': Latin2_HungarianCharToOrderMap,
  'precedenceMatrix': HungarianLangModel,
  'mTypicalPositiveRatio': 0.947368,
-  'keepEnglishLetter': constants.True,
+  'keepEnglishLetter': True,
  'charsetName': "ISO-8859-2"
 }

-Win1250HungarianModel = { \
+Win1250HungarianModel = {
  'charToOrderMap': win1250HungarianCharToOrderMap,
  'precedenceMatrix': HungarianLangModel,
  'mTypicalPositiveRatio': 0.947368,
-  'keepEnglishLetter': constants.True,
+  'keepEnglishLetter': True,
  'charsetName': "windows-1250"
 }
+
+# flake8: noqa
--- a/thirdparty/chardet/langthaimodel.py
+++ b/thirdparty/chardet/langthaimodel.py
@ -25,8 +25,6 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
@ -35,7 +33,7 @@ import constants
 # The following result for thai was collected from a limited sample (1M).

 # Character Mapping Table:
-TIS620CharToOrderMap = ( \
+TIS620CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@ -60,7 +58,7 @@ TIS620CharToOrderMap = ( \
 # first 1024 sequences:7.3177%
 # rest  sequences:     1.0230%
 # negative sequences:  0.0436%
-ThaiLangModel = ( \
+ThaiLangModel = (
 0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
 0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
 3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
@ -191,10 +189,12 @@ ThaiLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 )

-TIS620ThaiModel = { \
+TIS620ThaiModel = {
  'charToOrderMap': TIS620CharToOrderMap,
  'precedenceMatrix': ThaiLangModel,
  'mTypicalPositiveRatio': 0.926386,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
  'charsetName': "TIS-620"
 }
+
+# flake8: noqa
--- a/thirdparty/chardet/latin1prober.py
+++ b/thirdparty/chardet/latin1prober.py
@ -26,9 +26,9 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from charsetprober import CharSetProber
-import constants
-import operator
+from .charsetprober import CharSetProber
+from .constants import eNotMe
+from .compat import wrap_ord

 FREQ_CAT_NUM = 4

@ -42,7 +42,7 @@ ASV = 6 # accent small vowel
 ASO = 7  # accent small other
 CLASS_NUM = 8  # total classes

-Latin1_CharToClass = ( \
+Latin1_CharToClass = (
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
@ -81,7 +81,7 @@ Latin1_CharToClass = ( \
 # 1 : very unlikely
 # 2 : normal
 # 3 : very likely
-Latin1ClassModel = ( \
+Latin1ClassModel = (
    # UDF OTH ASC ASS ACV ACO ASV ASO
    0,  0,  0,  0,  0,  0,  0,  0,  # UDF
    0,  3,  3,  3,  3,  3,  3,  3,  # OTH
@ -93,6 +93,7 @@ Latin1ClassModel = ( \
    0,  3,  1,  3,  1,  1,  3,  3,  # ASO
 )

+
 class Latin1Prober(CharSetProber):
    def __init__(self):
        CharSetProber.__init__(self)
@ -109,10 +110,11 @@ class Latin1Prober(CharSetProber):
    def feed(self, aBuf):
        aBuf = self.filter_with_english_letters(aBuf)
        for c in aBuf:
-            charClass = Latin1_CharToClass[ord(c)]
-            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass]
+            charClass = Latin1_CharToClass[wrap_ord(c)]
+            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
+                                    + charClass]
            if freq == 0:
-                self._mState = constants.eNotMe
+                self._mState = eNotMe
                break
            self._mFreqCounter[freq] += 1
            self._mLastCharClass = charClass
@ -120,17 +122,18 @@ class Latin1Prober(CharSetProber):
        return self.get_state()

    def get_confidence(self):
-        if self.get_state() == constants.eNotMe:
+        if self.get_state() == eNotMe:
            return 0.01

-        total = reduce(operator.add, self._mFreqCounter)
+        total = sum(self._mFreqCounter)
        if total < 0.01:
            confidence = 0.0
        else:
-            confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total)
+            confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
+                          / total)
        if confidence < 0.0:
            confidence = 0.0
-        # lower the confidence of latin1 so that other more accurate detector 
-        # can take priority.
-        confidence = confidence * 0.5
+        # lower the confidence of latin1 so that other more accurate
+        # detector can take priority.
+        confidence = confidence * 0.73
        return confidence
--- a/thirdparty/chardet/mbcharsetprober.py
+++ b/thirdparty/chardet/mbcharsetprober.py
@ -27,16 +27,17 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from constants import eStart, eError, eItsMe
-from charsetprober import CharSetProber
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+

 class MultiByteCharSetProber(CharSetProber):
    def __init__(self):
        CharSetProber.__init__(self)
        self._mDistributionAnalyzer = None
        self._mCodingSM = None
-        self._mLastChar = ['\x00', '\x00']
+        self._mLastChar = [0, 0]

    def reset(self):
        CharSetProber.reset(self)
@ -44,36 +45,39 @@ class MultiByteCharSetProber(CharSetProber):
            self._mCodingSM.reset()
        if self._mDistributionAnalyzer:
            self._mDistributionAnalyzer.reset()
-        self._mLastChar = ['\x00', '\x00']
+        self._mLastChar = [0, 0]

    def get_charset_name(self):
        pass

    def feed(self, aBuf):
        aLen = len(aBuf)
-        for i in xrange(0, aLen):
+        for i in range(0, aLen):
            codingState = self._mCodingSM.next_state(aBuf[i])
-            if codingState == eError:
+            if codingState == constants.eError:
                if constants._debug:
-                    sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
                self._mState = constants.eNotMe
                break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                self._mState = constants.eFoundIt
                break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                charLen = self._mCodingSM.get_current_charlen()
                if i == 0:
                    self._mLastChar[1] = aBuf[0]
                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
                else:
-                    self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)

        self._mLastChar[0] = aBuf[aLen - 1]

        if self.get_state() == constants.eDetecting:
-            if self._mDistributionAnalyzer.got_enough_data() and \
-               (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
+            if (self._mDistributionAnalyzer.got_enough_data() and
+                    (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
                self._mState = constants.eFoundIt

        return self.get_state()
--- a/thirdparty/chardet/mbcsgroupprober.py
+++ b/thirdparty/chardet/mbcsgroupprober.py
@ -27,24 +27,28 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from charsetgroupprober import CharSetGroupProber
-from utf8prober import UTF8Prober
-from sjisprober import SJISProber
-from eucjpprober import EUCJPProber
-from gb2312prober import GB2312Prober
-from euckrprober import EUCKRProber
-from big5prober import Big5Prober
-from euctwprober import EUCTWProber
+from .charsetgroupprober import CharSetGroupProber
+from .utf8prober import UTF8Prober
+from .sjisprober import SJISProber
+from .eucjpprober import EUCJPProber
+from .gb2312prober import GB2312Prober
+from .euckrprober import EUCKRProber
+from .cp949prober import CP949Prober
+from .big5prober import Big5Prober
+from .euctwprober import EUCTWProber
+

 class MBCSGroupProber(CharSetGroupProber):
    def __init__(self):
        CharSetGroupProber.__init__(self)
-        self._mProbers = [ \
+        self._mProbers = [
            UTF8Prober(),
            SJISProber(),
            EUCJPProber(),
            GB2312Prober(),
            EUCKRProber(),
+            CP949Prober(),
            Big5Prober(),
-            EUCTWProber()]
+            EUCTWProber()
+        ]
        self.reset()
--- a/thirdparty/chardet/mbcssm.py
+++ b/thirdparty/chardet/mbcssm.py
@ -25,11 +25,11 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from constants import eStart, eError, eItsMe
+from .constants import eStart, eError, eItsMe

 # BIG5

-BIG5_cls = ( \
+BIG5_cls = (
    1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value
    1,1,1,1,1,1,0,0,  # 08 - 0f
    1,1,1,1,1,1,1,1,  # 10 - 17
@ -61,12 +61,14 @@ BIG5_cls = ( \
    3,3,3,3,3,3,3,3,  # e0 - e7
    3,3,3,3,3,3,3,3,  # e8 - ef
    3,3,3,3,3,3,3,3,  # f0 - f7
-    3,3,3,3,3,3,3,0)  # f8 - ff 
+    3,3,3,3,3,3,3,0  # f8 - ff
+)

-BIG5_st = ( \
+BIG5_st = (
    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
    eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
-    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart)#10-17 
+    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
+)

 Big5CharLenTable = (0, 1, 1, 2, 0)

@ -76,9 +78,49 @@ Big5SMModel = {'classTable': BIG5_cls,
               'charLenTable': Big5CharLenTable,
               'name': 'Big5'}

+# CP949
+
+CP949_cls  = (
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f
+    1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f
+    1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f
+    4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f
+    1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f
+    5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f
+    0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f
+    6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f
+    6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af
+    7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf
+    7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff
+)
+
+CP949_st = (
+#cls=    0      1      2      3      4      5      6      7      8      9  # previous state =
+    eError,eStart,     3,eError,eStart,eStart,     4,     5,eError,     6, # eStart
+    eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
+    eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
+    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
+    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
+    eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
+)
+
+CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
+
+CP949SMModel = {'classTable': CP949_cls,
+                'classFactor': 10,
+                'stateTable': CP949_st,
+                'charLenTable': CP949CharLenTable,
+                'name': 'CP949'}
+
 # EUC-JP

-EUCJP_cls = ( \
+EUCJP_cls = (
    4,4,4,4,4,4,4,4,  # 00 - 07
    4,4,4,4,4,4,5,5,  # 08 - 0f
    4,4,4,4,4,4,4,4,  # 10 - 17
@ -110,14 +152,16 @@ EUCJP_cls = ( \
    0,0,0,0,0,0,0,0,  # e0 - e7
    0,0,0,0,0,0,0,0,  # e8 - ef
    0,0,0,0,0,0,0,0,  # f0 - f7
-    0,0,0,0,0,0,0,5)  # f8 - ff 
+    0,0,0,0,0,0,0,5  # f8 - ff
+)

-EUCJP_st = ( \
+EUCJP_st = (
          3,     4,     3,     5,eStart,eError,eError,eError,#00-07
     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
     eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
     eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f
-          3,eError,eError,eError,eStart,eStart,eStart,eStart)#20-27 
+          3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
+)

 EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)

@ -129,7 +173,7 @@ EUCJPSMModel = {'classTable': EUCJP_cls,

 # EUC-KR

-EUCKR_cls = ( \
+EUCKR_cls  = (
    1,1,1,1,1,1,1,1,  # 00 - 07
    1,1,1,1,1,1,0,0,  # 08 - 0f
    1,1,1,1,1,1,1,1,  # 10 - 17
@ -161,11 +205,13 @@ EUCKR_cls = ( \
    2,2,2,2,2,2,2,2,  # e0 - e7
    2,2,2,2,2,2,2,2,  # e8 - ef
    2,2,2,2,2,2,2,2,  # f0 - f7
-    2,2,2,2,2,2,2,0)  # f8 - ff 
+    2,2,2,2,2,2,2,0   # f8 - ff
+)

 EUCKR_st = (
    eError,eStart,     3,eError,eError,eError,eError,eError,#00-07
-    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart)#08-0f 
+    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
+)

 EUCKRCharLenTable = (0, 1, 2, 0)

@ -177,7 +223,7 @@ EUCKRSMModel = {'classTable': EUCKR_cls,

 # EUC-TW

-EUCTW_cls = ( \
+EUCTW_cls = (
    2,2,2,2,2,2,2,2,  # 00 - 07
    2,2,2,2,2,2,0,0,  # 08 - 0f
    2,2,2,2,2,2,2,2,  # 10 - 17
@ -209,15 +255,17 @@ EUCTW_cls = ( \
    3,3,3,3,3,3,3,3,  # e0 - e7
    3,3,3,3,3,3,3,3,  # e8 - ef
    3,3,3,3,3,3,3,3,  # f0 - f7
-    3,3,3,3,3,3,3,0)  # f8 - ff 
+    3,3,3,3,3,3,3,0   # f8 - ff
+)

-EUCTW_st = ( \
+EUCTW_st = (
    eError,eError,eStart,     3,     3,     3,     4,eError,#00-07
    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
    eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
         5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
-    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f 
+    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)

 EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)

@ -229,7 +277,7 @@ EUCTWSMModel = {'classTable': EUCTW_cls,

 # GB2312

-GB2312_cls = ( \
+GB2312_cls = (
    1,1,1,1,1,1,1,1,  # 00 - 07
    1,1,1,1,1,1,0,0,  # 08 - 0f
    1,1,1,1,1,1,1,1,  # 10 - 17
@ -261,15 +309,17 @@ GB2312_cls = ( \
    6,6,6,6,6,6,6,6,  # e0 - e7
    6,6,6,6,6,6,6,6,  # e8 - ef
    6,6,6,6,6,6,6,6,  # f0 - f7
-    6,6,6,6,6,6,6,0)  # f8 - ff 
+    6,6,6,6,6,6,6,0   # f8 - ff
+)

-GB2312_st = ( \
+GB2312_st = (
    eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07
    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
         4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
    eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27
-    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f 
+    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)

 # To be accurate, the length of class 6 can be either 2 or 4.
 # But it is not necessary to discriminate between the two since
@ -286,7 +336,7 @@ GB2312SMModel = {'classTable': GB2312_cls,

 # Shift_JIS

-SJIS_cls = ( \
+SJIS_cls = (
    1,1,1,1,1,1,1,1,  # 00 - 07
    1,1,1,1,1,1,0,0,  # 08 - 0f
    1,1,1,1,1,1,1,1,  # 10 - 17
@ -303,7 +353,7 @@ SJIS_cls = ( \
    2,2,2,2,2,2,2,2,  # 68 - 6f
    2,2,2,2,2,2,2,2,  # 70 - 77
    2,2,2,2,2,2,2,1,  # 78 - 7f
-    3,3,3,3,3,3,3,3,  # 80 - 87 
+    3,3,3,3,3,2,2,3,  # 80 - 87
    3,3,3,3,3,3,3,3,  # 88 - 8f
    3,3,3,3,3,3,3,3,  # 90 - 97
    3,3,3,3,3,3,3,3,  # 98 - 9f
@ -319,13 +369,15 @@ SJIS_cls = ( \
    2,2,2,2,2,2,2,2,  # d8 - df
    3,3,3,3,3,3,3,3,  # e0 - e7
    3,3,3,3,3,4,4,4,  # e8 - ef
-    4,4,4,4,4,4,4,4,  # f0 - f7 
-    4,4,4,4,4,0,0,0)  # f8 - ff 
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,0,0,0)  # f8 - ff

-SJIS_st = ( \
+
+SJIS_st = (
    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
    eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
-    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart)#10-17 
+    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
+)

 SJISCharLenTable = (0, 1, 1, 2, 0, 0)

@ -337,7 +389,7 @@ SJISSMModel = {'classTable': SJIS_cls,

 # UCS2-BE

-UCS2BE_cls = ( \
+UCS2BE_cls = (
    0,0,0,0,0,0,0,0,  # 00 - 07
    0,0,1,0,0,2,0,0,  # 08 - 0f
    0,0,0,0,0,0,0,0,  # 10 - 17
@ -369,16 +421,18 @@ UCS2BE_cls = ( \
    0,0,0,0,0,0,0,0,  # e0 - e7
    0,0,0,0,0,0,0,0,  # e8 - ef
    0,0,0,0,0,0,0,0,  # f0 - f7
-    0,0,0,0,0,0,4,5)  # f8 - ff 
+    0,0,0,0,0,0,4,5   # f8 - ff
+)

-UCS2BE_st = ( \
+UCS2BE_st  = (
          5,     7,     7,eError,     4,     3,eError,eError,#00-07
     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
     eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17
          6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f
          6,     6,     6,     6,     5,     7,     7,eError,#20-27
          5,     8,     6,     6,eError,     6,     6,     6,#28-2f
-          6,     6,     6,     6,eError,eError,eStart,eStart)#30-37 
+          6,     6,     6,     6,eError,eError,eStart,eStart #30-37
+)

 UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)

@ -390,7 +444,7 @@ UCS2BESMModel = {'classTable': UCS2BE_cls,

 # UCS2-LE

-UCS2LE_cls = ( \
+UCS2LE_cls = (
    0,0,0,0,0,0,0,0,  # 00 - 07
    0,0,1,0,0,2,0,0,  # 08 - 0f
    0,0,0,0,0,0,0,0,  # 10 - 17
@ -422,16 +476,18 @@ UCS2LE_cls = ( \
    0,0,0,0,0,0,0,0,  # e0 - e7
    0,0,0,0,0,0,0,0,  # e8 - ef
    0,0,0,0,0,0,0,0,  # f0 - f7
-    0,0,0,0,0,0,4,5)  # f8 - ff 
+    0,0,0,0,0,0,4,5   # f8 - ff
+)

-UCS2LE_st = ( \
+UCS2LE_st = (
          6,     6,     7,     6,     4,     3,eError,eError,#00-07
     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
     eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17
          5,     5,     5,eError,     5,eError,     6,     6,#18-1f
          7,     6,     8,     8,     5,     5,     5,eError,#20-27
          5,     5,     5,eError,eError,eError,     5,     5,#28-2f
-          5,     5,     5,eError,     5,eError,eStart,eStart)#30-37 
+          5,     5,     5,eError,     5,eError,eStart,eStart #30-37
+)

 UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)

@ -443,7 +499,7 @@ UCS2LESMModel = {'classTable': UCS2LE_cls,

 # UTF-8

-UTF8_cls = ( \
+UTF8_cls = (
    1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value
    1,1,1,1,1,1,0,0,  # 08 - 0f
    1,1,1,1,1,1,1,1,  # 10 - 17
@ -475,9 +531,10 @@ UTF8_cls = ( \
    7,8,8,8,8,8,8,8,  # e0 - e7
    8,8,8,8,8,9,8,8,  # e8 - ef
    10,11,11,11,11,11,11,11,  # f0 - f7
-    12,13,13,13,14,15,0,0)   # f8 - ff 
+    12,13,13,13,14,15,0,0    # f8 - ff
+)

-UTF8_st = ( \
+UTF8_st = (
    eError,eStart,eError,eError,eError,eError,     12,   10,#00-07
         9,     11,     8,     7,     6,     5,     4,    3,#08-0f
    eError,eError,eError,eError,eError,eError,eError,eError,#10-17
@ -503,7 +560,8 @@ UTF8_st = ( \
    eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7
    eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
    eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
-    eError,eError,eError,eError,eError,eError,eError,eError)#c8-cf 
+    eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
+)

 UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)

--- a/thirdparty/chardet/sbcharsetprober.py
+++ b/thirdparty/chardet/sbcharsetprober.py
@ -26,8 +26,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from charsetprober import CharSetProber
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+from .compat import wrap_ord

 SAMPLE_SIZE = 64
 SB_ENOUGH_REL_THRESHOLD = 1024
@ -38,21 +40,26 @@ NUMBER_OF_SEQ_CAT = 4
 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
 #NEGATIVE_CAT = 0

+
 class SingleByteCharSetProber(CharSetProber):
-    def __init__(self, model, reversed=constants.False, nameProber=None):
+    def __init__(self, model, reversed=False, nameProber=None):
        CharSetProber.__init__(self)
        self._mModel = model
-        self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
-        self._mNameProber = nameProber # Optional auxiliary prober for name decision
+        # TRUE if we need to reverse every pair in the model lookup
+        self._mReversed = reversed
+        # Optional auxiliary prober for name decision
+        self._mNameProber = nameProber
        self.reset()

    def reset(self):
        CharSetProber.reset(self)
-        self._mLastOrder = 255 # char order of last character
+        # char order of last character
+        self._mLastOrder = 255
        self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
        self._mTotalSeqs = 0
        self._mTotalChar = 0
-        self._mFreqChar = 0 # characters that fall in our sampling range
+        # characters that fall in our sampling range
+        self._mFreqChar = 0

    def get_charset_name(self):
        if self._mNameProber:
@ -67,7 +74,7 @@ class SingleByteCharSetProber(CharSetProber):
        if not aLen:
            return self.get_state()
        for c in aBuf:
-            order = self._mModel['charToOrderMap'][ord(c)]
+            order = self._mModel['charToOrderMap'][wrap_ord(c)]
            if order < SYMBOL_CAT_ORDER:
                self._mTotalChar += 1
            if order < SAMPLE_SIZE:
@ -75,9 +82,12 @@ class SingleByteCharSetProber(CharSetProber):
                if self._mLastOrder < SAMPLE_SIZE:
                    self._mTotalSeqs += 1
                    if not self._mReversed:
-                        self._mSeqCounters[self._mModel['precedenceMatrix'][(self._mLastOrder * SAMPLE_SIZE) + order]] += 1
+                        i = (self._mLastOrder * SAMPLE_SIZE) + order
+                        model = self._mModel['precedenceMatrix'][i]
                    else:  # reverse the order of the letters in the lookup
-                        self._mSeqCounters[self._mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + self._mLastOrder]] += 1
+                        i = (order * SAMPLE_SIZE) + self._mLastOrder
+                        model = self._mModel['precedenceMatrix'][i]
+                    self._mSeqCounters[model] += 1
            self._mLastOrder = order

        if self.get_state() == constants.eDetecting:
@ -85,11 +95,16 @@ class SingleByteCharSetProber(CharSetProber):
                cf = self.get_confidence()
                if cf > POSITIVE_SHORTCUT_THRESHOLD:
                    if constants._debug:
-                        sys.stderr.write('%s confidence = %s, we have a winner\n' % (self._mModel['charsetName'], cf))
+                        sys.stderr.write('%s confidence = %s, we have a'
+                                         'winner\n' %
+                                         (self._mModel['charsetName'], cf))
                    self._mState = constants.eFoundIt
                elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
                    if constants._debug:
-                        sys.stderr.write('%s confidence = %s, below negative shortcut threshhold %s\n' % (self._mModel['charsetName'], cf, NEGATIVE_SHORTCUT_THRESHOLD))
+                        sys.stderr.write('%s confidence = %s, below negative'
+                                         'shortcut threshhold %s\n' %
+                                         (self._mModel['charsetName'], cf,
+                                          NEGATIVE_SHORTCUT_THRESHOLD))
                    self._mState = constants.eNotMe

        return self.get_state()
@ -97,9 +112,8 @@ class SingleByteCharSetProber(CharSetProber):
    def get_confidence(self):
        r = 0.01
        if self._mTotalSeqs > 0:
-#            print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
-            r = (1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs / self._mModel['mTypicalPositiveRatio']
-#            print r, self._mFreqChar, self._mTotalChar
+            r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
+                 / self._mModel['mTypicalPositiveRatio'])
            r = r * self._mFreqChar / self._mTotalChar
            if r >= 1.0:
                r = 0.99
--- a/thirdparty/chardet/sbcsgroupprober.py
+++ b/thirdparty/chardet/sbcsgroupprober.py
@ -26,21 +26,23 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from charsetgroupprober import CharSetGroupProber
-from sbcharsetprober import SingleByteCharSetProber
-from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model
-from langgreekmodel import Latin7GreekModel, Win1253GreekModel
-from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
-from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
-from langthaimodel import TIS620ThaiModel
-from langhebrewmodel import Win1255HebrewModel
-from hebrewprober import HebrewProber
+from .charsetgroupprober import CharSetGroupProber
+from .sbcharsetprober import SingleByteCharSetProber
+from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
+                                Latin5CyrillicModel, MacCyrillicModel,
+                                Ibm866Model, Ibm855Model)
+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
+from .langthaimodel import TIS620ThaiModel
+from .langhebrewmodel import Win1255HebrewModel
+from .hebrewprober import HebrewProber
+

 class SBCSGroupProber(CharSetGroupProber):
    def __init__(self):
        CharSetGroupProber.__init__(self)
-        self._mProbers = [ \
+        self._mProbers = [
            SingleByteCharSetProber(Win1251CyrillicModel),
            SingleByteCharSetProber(Koi8rModel),
            SingleByteCharSetProber(Latin5CyrillicModel),
@ -56,9 +58,12 @@ class SBCSGroupProber(CharSetGroupProber):
            SingleByteCharSetProber(TIS620ThaiModel),
        ]
        hebrewProber = HebrewProber()
-        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.False, hebrewProber)
-        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.True, hebrewProber)
+        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
+                                                      False, hebrewProber)
+        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
+                                                     hebrewProber)
        hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
-        self._mProbers.extend([hebrewProber, logicalHebrewProber, visualHebrewProber])
+        self._mProbers.extend([hebrewProber, logicalHebrewProber,
+                               visualHebrewProber])

        self.reset()
--- a/thirdparty/chardet/sjisprober.py
+++ b/thirdparty/chardet/sjisprober.py
@ -25,13 +25,14 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import SJISDistributionAnalysis
-from jpcntx import SJISContextAnalysis
-from mbcssm import SJISSMModel
-import constants, sys
-from constants import eStart, eError, eItsMe
+import sys
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import SJISDistributionAnalysis
+from .jpcntx import SJISContextAnalysis
+from .mbcssm import SJISSMModel
+from . import constants
+

 class SJISProber(MultiByteCharSetProber):
    def __init__(self):
@ -46,35 +47,40 @@ class SJISProber(MultiByteCharSetProber):
        self._mContextAnalyzer.reset()

    def get_charset_name(self):
-        return "SHIFT_JIS"
+        return self._mContextAnalyzer.get_charset_name()

    def feed(self, aBuf):
        aLen = len(aBuf)
-        for i in xrange(0, aLen):
+        for i in range(0, aLen):
            codingState = self._mCodingSM.next_state(aBuf[i])
-            if codingState == eError:
+            if codingState == constants.eError:
                if constants._debug:
-                    sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
                self._mState = constants.eNotMe
                break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                self._mState = constants.eFoundIt
                break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                charLen = self._mCodingSM.get_current_charlen()
                if i == 0:
                    self._mLastChar[1] = aBuf[0]
-                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen :], charLen)
+                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
+                                                charLen)
                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
                else:
-                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen : i + 3 - charLen], charLen)
-                    self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen)
+                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
+                                                     - charLen], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)

        self._mLastChar[0] = aBuf[aLen - 1]

        if self.get_state() == constants.eDetecting:
-            if self._mContextAnalyzer.got_enough_data() and \
-                   (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
+            if (self._mContextAnalyzer.got_enough_data() and
+               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
                self._mState = constants.eFoundIt

        return self.get_state()
--- a/thirdparty/chardet/test.py
+++ b/thirdparty/chardet/test.py
@ -1,20 +0,0 @@
-import sys, glob
-sys.path.insert(0, '..')
-from chardet.universaldetector import UniversalDetector
-
-count = 0
-u = UniversalDetector()
-for f in glob.glob(sys.argv[1]):
-    print f.ljust(60),
-    u.reset()
-    for line in file(f, 'rb'):
-        u.feed(line)
-        if u.done: break
-    u.close()
-    result = u.result
-    if result['encoding']:
-        print result['encoding'], 'with confidence', result['confidence']
-    else:
-        print '******** no result'
-    count += 1
-print count, 'tests'
--- a/thirdparty/chardet/universaldetector.py
+++ b/thirdparty/chardet/universaldetector.py
@ -26,11 +26,13 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from latin1prober import Latin1Prober # windows-1252
-from mbcsgroupprober import MBCSGroupProber # multi-byte character sets
-from sbcsgroupprober import SBCSGroupProber # single-byte character sets
-from escprober import EscCharSetProber # ISO-2122, etc.
+from . import constants
+import sys
+import codecs
+from .latin1prober import Latin1Prober  # windows-1252
+from .mbcsgroupprober import MBCSGroupProber  # multi-byte character sets
+from .sbcsgroupprober import SBCSGroupProber  # single-byte character sets
+from .escprober import EscCharSetProber  # ISO-2122, etc.
 import re

 MINIMUM_THRESHOLD = 0.20
@ -38,68 +40,78 @@ ePureAscii = 0
 eEscAscii = 1
 eHighbyte = 2

+
 class UniversalDetector:
    def __init__(self):
-        self._highBitDetector = re.compile(r'[\x80-\xFF]')
-        self._escDetector = re.compile(r'(\033|~{)')
+        self._highBitDetector = re.compile(b'[\x80-\xFF]')
+        self._escDetector = re.compile(b'(\033|~{)')
        self._mEscCharSetProber = None
        self._mCharSetProbers = []
        self.reset()

    def reset(self):
        self.result = {'encoding': None, 'confidence': 0.0}
-        self.done = constants.False
-        self._mStart = constants.True
-        self._mGotData = constants.False
+        self.done = False
+        self._mStart = True
+        self._mGotData = False
        self._mInputState = ePureAscii
-        self._mLastChar = ''
+        self._mLastChar = b''
        if self._mEscCharSetProber:
            self._mEscCharSetProber.reset()
        for prober in self._mCharSetProbers:
            prober.reset()

    def feed(self, aBuf):
-        if self.done: return
+        if self.done:
+            return

        aLen = len(aBuf)
-        if not aLen: return
+        if not aLen:
+            return

        if not self._mGotData:
            # If the data starts with BOM, we know it is UTF
-            if aBuf[:3] == '\xEF\xBB\xBF':
+            if aBuf[:3] == codecs.BOM_UTF8:
                # EF BB BF  UTF-8 with BOM
-                self.result = {'encoding': "UTF-8", 'confidence': 1.0}
-            elif aBuf[:4] == '\xFF\xFE\x00\x00':
+                self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
+            elif aBuf[:4] == codecs.BOM_UTF32_LE:
                # FF FE 00 00  UTF-32, little-endian BOM
                self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
-            elif aBuf[:4] == '\x00\x00\xFE\xFF': 
+            elif aBuf[:4] == codecs.BOM_UTF32_BE:
                # 00 00 FE FF  UTF-32, big-endian BOM
                self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
-            elif aBuf[:4] == '\xFE\xFF\x00\x00':
+            elif aBuf[:4] == b'\xFE\xFF\x00\x00':
                # FE FF 00 00  UCS-4, unusual octet order BOM (3412)
-                self.result = {'encoding': "X-ISO-10646-UCS-4-3412", 'confidence': 1.0}
-            elif aBuf[:4] == '\x00\x00\xFF\xFE':
+                self.result = {
+                    'encoding': "X-ISO-10646-UCS-4-3412",
+                    'confidence': 1.0
+                }
+            elif aBuf[:4] == b'\x00\x00\xFF\xFE':
                # 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
-                self.result = {'encoding': "X-ISO-10646-UCS-4-2143", 'confidence': 1.0}
-            elif aBuf[:2] == '\xFF\xFE':
+                self.result = {
+                    'encoding': "X-ISO-10646-UCS-4-2143",
+                    'confidence': 1.0
+                }
+            elif aBuf[:2] == codecs.BOM_LE:
                # FF FE  UTF-16, little endian BOM
                self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
-            elif aBuf[:2] == '\xFE\xFF':
+            elif aBuf[:2] == codecs.BOM_BE:
                # FE FF  UTF-16, big endian BOM
                self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}

-        self._mGotData = constants.True
+        self._mGotData = True
        if self.result['encoding'] and (self.result['confidence'] > 0.0):
-            self.done = constants.True
+            self.done = True
            return

        if self._mInputState == ePureAscii:
            if self._highBitDetector.search(aBuf):
                self._mInputState = eHighbyte
-            elif (self._mInputState == ePureAscii) and self._escDetector.search(self._mLastChar + aBuf):
+            elif ((self._mInputState == ePureAscii) and
+                    self._escDetector.search(self._mLastChar + aBuf)):
                self._mInputState = eEscAscii

-        self._mLastChar = aBuf[-1]
+        self._mLastChar = aBuf[-1:]

        if self._mInputState == eEscAscii:
            if not self._mEscCharSetProber:
@ -107,24 +119,26 @@ class UniversalDetector:
            if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
                self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
                               'confidence': self._mEscCharSetProber.get_confidence()}
-                self.done = constants.True
+                self.done = True
        elif self._mInputState == eHighbyte:
            if not self._mCharSetProbers:
-                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), Latin1Prober()]
+                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
+                                         Latin1Prober()]
            for prober in self._mCharSetProbers:
                if prober.feed(aBuf) == constants.eFoundIt:
                    self.result = {'encoding': prober.get_charset_name(),
                                   'confidence': prober.get_confidence()}
-                    self.done = constants.True
+                    self.done = True
                    break

    def close(self):
-        if self.done: return
+        if self.done:
+            return
        if not self._mGotData:
            if constants._debug:
                sys.stderr.write('no data received!\n')
            return
-        self.done = constants.True
+        self.done = True

        if self._mInputState == ePureAscii:
            self.result = {'encoding': 'ascii', 'confidence': 1.0}
@ -135,7 +149,8 @@ class UniversalDetector:
            maxProberConfidence = 0.0
            maxProber = None
            for prober in self._mCharSetProbers:
-                if not prober: continue
+                if not prober:
+                    continue
                proberConfidence = prober.get_confidence()
                if proberConfidence > maxProberConfidence:
                    maxProberConfidence = proberConfidence
@ -148,7 +163,8 @@ class UniversalDetector:
        if constants._debug:
            sys.stderr.write('no probers hit minimum threshhold\n')
            for prober in self._mCharSetProbers[0].mProbers:
-                if not prober: continue
-                sys.stderr.write('%s confidence = %s\n' % \
-                                 (prober.get_charset_name(), \
+                if not prober:
+                    continue
+                sys.stderr.write('%s confidence = %s\n' %
+                                 (prober.get_charset_name(),
                                  prober.get_confidence()))
--- a/thirdparty/chardet/utf8prober.py
+++ b/thirdparty/chardet/utf8prober.py
@ -25,14 +25,14 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-import constants, sys
-from constants import eStart, eError, eItsMe
-from charsetprober import CharSetProber
-from codingstatemachine import CodingStateMachine
-from mbcssm import UTF8SMModel
+from . import constants
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .mbcssm import UTF8SMModel

 ONE_CHAR_PROB = 0.5

+
 class UTF8Prober(CharSetProber):
    def __init__(self):
        CharSetProber.__init__(self)
@ -50,13 +50,13 @@ class UTF8Prober(CharSetProber):
    def feed(self, aBuf):
        for c in aBuf:
            codingState = self._mCodingSM.next_state(c)
-            if codingState == eError:
+            if codingState == constants.eError:
                self._mState = constants.eNotMe
                break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                self._mState = constants.eFoundIt
                break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                if self._mCodingSM.get_current_charlen() >= 2:
                    self._mNumOfMBChar += 1

@ -69,7 +69,7 @@ class UTF8Prober(CharSetProber):
    def get_confidence(self):
        unlike = 0.99
        if self._mNumOfMBChar < 6:
-            for i in xrange(0, self._mNumOfMBChar):
+            for i in range(0, self._mNumOfMBChar):
                unlike = unlike * ONE_CHAR_PROB
            return 1.0 - unlike
        else: