From 439d0037539fca01b3b2f9ffdb4808bc5871c342 Mon Sep 17 00:00:00 2001
From: Miroslav Stampar <miroslav.stampar@gmail.com>
Date: Fri, 9 Oct 2015 13:35:48 +0200
Subject: [PATCH] Adding new version of chardet

---
 thirdparty/chardet/__init__.py           |  14 +-
 thirdparty/chardet/big5freq.py           |  20 +-
 thirdparty/chardet/big5prober.py         |  17 +-
 thirdparty/chardet/chardetect.py         |  80 +++
 thirdparty/chardet/chardistribution.py   | 153 +++--
 thirdparty/chardet/charsetgroupprober.py |  34 +-
 thirdparty/chardet/charsetprober.py      |  16 +-
 thirdparty/chardet/codingstatemachine.py |  15 +-
 thirdparty/chardet/compat.py             |  34 +
 thirdparty/chardet/constants.py          |   8 -
 thirdparty/chardet/cp949prober.py        |  44 ++
 thirdparty/chardet/escprober.py          |  37 +-
 thirdparty/chardet/escsm.py              | 336 +++++-----
 thirdparty/chardet/eucjpprober.py        |  41 +-
 thirdparty/chardet/euckrfreq.py          |   2 +
 thirdparty/chardet/euckrprober.py        |  13 +-
 thirdparty/chardet/euctwfreq.py          |  16 +-
 thirdparty/chardet/euctwprober.py        |   8 +-
 thirdparty/chardet/gb2312freq.py         |   9 +-
 thirdparty/chardet/gb2312prober.py       |   8 +-
 thirdparty/chardet/hebrewprober.py       | 178 ++---
 thirdparty/chardet/jisfreq.py            |  16 +-
 thirdparty/chardet/jpcntx.py             |  89 ++-
 thirdparty/chardet/langbulgarianmodel.py |  29 +-
 thirdparty/chardet/langcyrillicmodel.py  |  50 +-
 thirdparty/chardet/langgreekmodel.py     |  26 +-
 thirdparty/chardet/langhebrewmodel.py    |  20 +-
 thirdparty/chardet/langhungarianmodel.py |  26 +-
 thirdparty/chardet/langthaimodel.py      |  22 +-
 thirdparty/chardet/latin1prober.py       | 141 ++--
 thirdparty/chardet/mbcharsetprober.py    |  34 +-
 thirdparty/chardet/mbcsgroupprober.py    |  28 +-
 thirdparty/chardet/mbcssm.py             | 814 ++++++++++++-----------
 thirdparty/chardet/sbcharsetprober.py    |  50 +-
 thirdparty/chardet/sbcsgroupprober.py    |  39 +-
 thirdparty/chardet/sjisprober.py         |  46 +-
 thirdparty/chardet/test.py               |  20 -
 thirdparty/chardet/universaldetector.py  |  92 +--
 thirdparty/chardet/utf8prober.py         |  22 +-
 39 files changed, 1499 insertions(+), 1148 deletions(-)
 create mode 100644 thirdparty/chardet/chardetect.py
 create mode 100644 thirdparty/chardet/compat.py
 create mode 100644 thirdparty/chardet/cp949prober.py
 delete mode 100644 thirdparty/chardet/test.py

diff --git a/thirdparty/chardet/__init__.py b/thirdparty/chardet/__init__.py
index 953b39942..82c2a48d2 100644
--- a/thirdparty/chardet/__init__.py
+++ b/thirdparty/chardet/__init__.py
@@ -3,22 +3,28 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-__version__ = "2.0.1"
+__version__ = "2.3.0"
+from sys import version_info
+
 
 def detect(aBuf):
-    import universaldetector
+    if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
+            (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
+        raise ValueError('Expected a bytes object, not a unicode object')
+
+    from . import universaldetector
     u = universaldetector.UniversalDetector()
     u.reset()
     u.feed(aBuf)
diff --git a/thirdparty/chardet/big5freq.py b/thirdparty/chardet/big5freq.py
index c1b0f3cec..65bffc04b 100644
--- a/thirdparty/chardet/big5freq.py
+++ b/thirdparty/chardet/big5freq.py
@@ -1,11 +1,11 @@
 ######################## BEGIN LICENSE BLOCK ########################
 # The Original Code is Mozilla Communicator client code.
-# 
+#
 # The Initial Developer of the Original Code is
 # Netscape Communications Corporation.
 # Portions created by the Initial Developer are Copyright (C) 1998
 # the Initial Developer. All Rights Reserved.
-# 
+#
 # Contributor(s):
 #   Mark Pilgrim - port to Python
 #
@@ -13,12 +13,12 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -26,18 +26,18 @@
 ######################### END LICENSE BLOCK #########################
 
 # Big5 frequency table
-# by Taiwan's Mandarin Promotion Council 
+# by Taiwan's Mandarin Promotion Council
 # <http://www.edu.tw:81/mandr/>
-# 
+#
 # 128  --> 0.42261
 # 256  --> 0.57851
 # 512  --> 0.74851
 # 1024 --> 0.89384
 # 2048 --> 0.97583
-# 
+#
 # Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
 # Random Distribution Ration = 512/(5401-512)=0.105
-# 
+#
 # Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
 
 BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
@@ -45,7 +45,7 @@ BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
 #Char to FreqOrder table
 BIG5_TABLE_SIZE = 5376
 
-Big5CharToFreqOrder = ( \
+Big5CharToFreqOrder = (
    1,1801,1506, 255,1431, 198,   9,  82,   6,5008, 177, 202,3681,1256,2821, 110, #   16
 3814,  33,3274, 261,  76,  44,2114,  16,2946,2187,1176, 659,3971,  26,3451,2653, #   32
 1198,3972,3350,4202, 410,2215, 302, 590, 361,1964,   8, 204,  58,4510,5009,1932, #   48
@@ -921,3 +921,5 @@ Big5CharToFreqOrder = ( \
 13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952
 13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968
 13968,13969,13970,13971,13972) #13973
+
+# flake8: noqa
diff --git a/thirdparty/chardet/big5prober.py b/thirdparty/chardet/big5prober.py
index e6b52aadb..becce81e5 100644
--- a/thirdparty/chardet/big5prober.py
+++ b/thirdparty/chardet/big5prober.py
@@ -1,11 +1,11 @@
 ######################## BEGIN LICENSE BLOCK ########################
 # The Original Code is Mozilla Communicator client code.
-# 
+#
 # The Initial Developer of the Original Code is
 # Netscape Communications Corporation.
 # Portions created by the Initial Developer are Copyright (C) 1998
 # the Initial Developer. All Rights Reserved.
-# 
+#
 # Contributor(s):
 #   Mark Pilgrim - port to Python
 #
@@ -13,22 +13,23 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import Big5DistributionAnalysis
-from mbcssm import Big5SMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import Big5DistributionAnalysis
+from .mbcssm import Big5SMModel
+
 
 class Big5Prober(MultiByteCharSetProber):
     def __init__(self):
diff --git a/thirdparty/chardet/chardetect.py b/thirdparty/chardet/chardetect.py
new file mode 100644
index 000000000..ffe892f25
--- /dev/null
+++ b/thirdparty/chardet/chardetect.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+Script which takes one or more file paths and reports on their detected
+encodings
+
+Example::
+
+    % chardetect somefile someotherfile
+    somefile: windows-1252 with confidence 0.5
+    someotherfile: ascii with confidence 1.0
+
+If no paths are provided, it takes its input from stdin.
+
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import sys
+from io import open
+
+from chardet import __version__
+from chardet.universaldetector import UniversalDetector
+
+
+def description_of(lines, name='stdin'):
+    """
+    Return a string describing the probable encoding of a file or
+    list of strings.
+
+    :param lines: The lines to get the encoding of.
+    :type lines: Iterable of bytes
+    :param name: Name of file or collection of lines
+    :type name: str
+    """
+    u = UniversalDetector()
+    for line in lines:
+        u.feed(line)
+    u.close()
+    result = u.result
+    if result['encoding']:
+        return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
+                                                     result['confidence'])
+    else:
+        return '{0}: no result'.format(name)
+
+
+def main(argv=None):
+    '''
+    Handles command line arguments and gets things started.
+
+    :param argv: List of arguments, as if specified on the command-line.
+                 If None, ``sys.argv[1:]`` is used instead.
+    :type argv: list of str
+    '''
+    # Get command line arguments
+    parser = argparse.ArgumentParser(
+        description="Takes one or more file paths and reports their detected \
+                     encodings",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        conflict_handler='resolve')
+    parser.add_argument('input',
+                        help='File whose encoding we would like to determine.',
+                        type=argparse.FileType('rb'), nargs='*',
+                        default=[sys.stdin])
+    parser.add_argument('--version', action='version',
+                        version='%(prog)s {0}'.format(__version__))
+    args = parser.parse_args(argv)
+
+    for f in args.input:
+        if f.isatty():
+            print("You are running chardetect interactively. Press " +
+                  "CTRL-D twice at the start of a blank line to signal the " +
+                  "end of your input. If you want help, run chardetect " +
+                  "--help\n", file=sys.stderr)
+        print(description_of(f, f.name))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/thirdparty/chardet/chardistribution.py b/thirdparty/chardet/chardistribution.py
index 1f95fc848..4e64a00be 100644
--- a/thirdparty/chardet/chardistribution.py
+++ b/thirdparty/chardet/chardistribution.py
@@ -1,11 +1,11 @@
 ######################## BEGIN LICENSE BLOCK ########################
 # The Original Code is Mozilla Communicator client code.
-# 
+#
 # The Initial Developer of the Original Code is
 # Netscape Communications Corporation.
 # Portions created by the Initial Developer are Copyright (C) 1998
 # the Initial Developer. All Rights Reserved.
-# 
+#
 # Contributor(s):
 #   Mark Pilgrim - port to Python
 #
@@ -13,47 +13,63 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
-from euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO
-from euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO
-from gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO
-from big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO
-from jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO
+from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
+                        EUCTW_TYPICAL_DISTRIBUTION_RATIO)
+from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
+                        EUCKR_TYPICAL_DISTRIBUTION_RATIO)
+from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
+                         GB2312_TYPICAL_DISTRIBUTION_RATIO)
+from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
+                       BIG5_TYPICAL_DISTRIBUTION_RATIO)
+from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
+                      JIS_TYPICAL_DISTRIBUTION_RATIO)
+from .compat import wrap_ord
 
 ENOUGH_DATA_THRESHOLD = 1024
 SURE_YES = 0.99
 SURE_NO = 0.01
+MINIMUM_DATA_THRESHOLD = 3
+
 
 class CharDistributionAnalysis:
     def __init__(self):
-        self._mCharToFreqOrder = None # Mapping table to get frequency order from char order (get from GetOrder())
-        self._mTableSize = None # Size of above table
-        self._mTypicalDistributionRatio = None # This is a constant value which varies from language to language, used in calculating confidence.  See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail.
+        # Mapping table to get frequency order from char order (get from
+        # GetOrder())
+        self._mCharToFreqOrder = None
+        self._mTableSize = None  # Size of above table
+        # This is a constant value which varies from language to language,
+        # used in calculating confidence.  See
+        # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
+        # for further detail.
+        self._mTypicalDistributionRatio = None
         self.reset()
 
     def reset(self):
         """reset analyser, clear any state"""
-        self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made
-        self._mTotalChars = 0 # Total characters encountered
-        self._mFreqChars = 0 # The number of characters whose frequency order is less than 512
+        # If this flag is set to True, detection is done and conclusion has
+        # been made
+        self._mDone = False
+        self._mTotalChars = 0  # Total characters encountered
+        # The number of characters whose frequency order is less than 512
+        self._mFreqChars = 0
 
-    def feed(self, aStr, aCharLen):
+    def feed(self, aBuf, aCharLen):
         """feed a character with known length"""
         if aCharLen == 2:
             # we only care about 2-bytes character in our distribution analysis
-            order = self.get_order(aStr)
+            order = self.get_order(aBuf)
         else:
             order = -1
         if order >= 0:
@@ -65,12 +81,14 @@ class CharDistributionAnalysis:
 
     def get_confidence(self):
         """return confidence based on existing data"""
-        # if we didn't receive any character in our consideration range, return negative answer
-        if self._mTotalChars <= 0:
+        # if we didn't receive any character in our consideration range,
+        # return negative answer
+        if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
             return SURE_NO
 
         if self._mTotalChars != self._mFreqChars:
-            r = self._mFreqChars / ((self._mTotalChars - self._mFreqChars) * self._mTypicalDistributionRatio)
+            r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
+                 * self._mTypicalDistributionRatio))
             if r < SURE_YES:
                 return r
 
@@ -78,16 +96,18 @@ class CharDistributionAnalysis:
         return SURE_YES
 
     def got_enough_data(self):
-        # It is not necessary to receive all data to draw conclusion. For charset detection,
-        # certain amount of data is enough
+        # It is not necessary to receive all data to draw conclusion.
+        # For charset detection, certain amount of data is enough
         return self._mTotalChars > ENOUGH_DATA_THRESHOLD
 
-    def get_order(self, aStr):
-        # We do not handle characters based on the original encoding string, but 
-        # convert this encoding string to a number, here called order.
-        # This allows multiple encodings of a language to share one frequency table.
+    def get_order(self, aBuf):
+        # We do not handle characters based on the original encoding string,
+        # but convert this encoding string to a number, here called order.
+        # This allows multiple encodings of a language to share one frequency
+        # table.
         return -1
 
+
 class EUCTWDistributionAnalysis(CharDistributionAnalysis):
     def __init__(self):
         CharDistributionAnalysis.__init__(self)
@@ -95,16 +115,18 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis):
         self._mTableSize = EUCTW_TABLE_SIZE
         self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
 
-    def get_order(self, aStr):
-        # for euc-TW encoding, we are interested 
+    def get_order(self, aBuf):
+        # for euc-TW encoding, we are interested
         #   first  byte range: 0xc4 -- 0xfe
         #   second byte range: 0xa1 -- 0xfe
         # no validation needed here. State machine has done that
-        if aStr[0] >= '\xC4':
-            return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1
+        first_char = wrap_ord(aBuf[0])
+        if first_char >= 0xC4:
+            return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
         else:
             return -1
 
+
 class EUCKRDistributionAnalysis(CharDistributionAnalysis):
     def __init__(self):
         CharDistributionAnalysis.__init__(self)
@@ -112,15 +134,17 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis):
         self._mTableSize = EUCKR_TABLE_SIZE
         self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
 
-    def get_order(self, aStr):
-        # for euc-KR encoding, we are interested 
+    def get_order(self, aBuf):
+        # for euc-KR encoding, we are interested
         #   first  byte range: 0xb0 -- 0xfe
         #   second byte range: 0xa1 -- 0xfe
         # no validation needed here. State machine has done that
-        if aStr[0] >= '\xB0':
-            return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1
+        first_char = wrap_ord(aBuf[0])
+        if first_char >= 0xB0:
+            return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
         else:
-            return -1;
+            return -1
+
 
 class GB2312DistributionAnalysis(CharDistributionAnalysis):
     def __init__(self):
@@ -129,15 +153,17 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
         self._mTableSize = GB2312_TABLE_SIZE
         self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
 
-    def get_order(self, aStr):
-        # for GB2312 encoding, we are interested 
+    def get_order(self, aBuf):
+        # for GB2312 encoding, we are interested
         #  first  byte range: 0xb0 -- 0xfe
         #  second byte range: 0xa1 -- 0xfe
         # no validation needed here. State machine has done that
-        if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'):
-            return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if (first_char >= 0xB0) and (second_char >= 0xA1):
+            return 94 * (first_char - 0xB0) + second_char - 0xA1
         else:
-            return -1;
+            return -1
+
 
 class Big5DistributionAnalysis(CharDistributionAnalysis):
     def __init__(self):
@@ -146,19 +172,21 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
         self._mTableSize = BIG5_TABLE_SIZE
         self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
 
-    def get_order(self, aStr):
-        # for big5 encoding, we are interested 
+    def get_order(self, aBuf):
+        # for big5 encoding, we are interested
         #   first  byte range: 0xa4 -- 0xfe
         #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
         # no validation needed here. State machine has done that
-        if aStr[0] >= '\xA4':
-            if aStr[1] >= '\xA1':
-                return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if first_char >= 0xA4:
+            if second_char >= 0xA1:
+                return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
             else:
-                return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40
+                return 157 * (first_char - 0xA4) + second_char - 0x40
         else:
             return -1
 
+
 class SJISDistributionAnalysis(CharDistributionAnalysis):
     def __init__(self):
         CharDistributionAnalysis.__init__(self)
@@ -166,22 +194,24 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
         self._mTableSize = JIS_TABLE_SIZE
         self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
 
-    def get_order(self, aStr):
-        # for sjis encoding, we are interested 
+    def get_order(self, aBuf):
+        # for sjis encoding, we are interested
         #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
         #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
         # no validation needed here. State machine has done that
-        if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'):
-            order = 188 * (ord(aStr[0]) - 0x81)
-        elif (aStr[0] >= '\xE0') and (aStr[0] <= '\xEF'):
-            order = 188 * (ord(aStr[0]) - 0xE0 + 31)
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if (first_char >= 0x81) and (first_char <= 0x9F):
+            order = 188 * (first_char - 0x81)
+        elif (first_char >= 0xE0) and (first_char <= 0xEF):
+            order = 188 * (first_char - 0xE0 + 31)
         else:
-            return -1;
-        order = order + ord(aStr[1]) - 0x40
-        if aStr[1] > '\x7F':
-            order =- 1
+            return -1
+        order = order + second_char - 0x40
+        if second_char > 0x7F:
+            order = -1
         return order
 
+
 class EUCJPDistributionAnalysis(CharDistributionAnalysis):
     def __init__(self):
         CharDistributionAnalysis.__init__(self)
@@ -189,12 +219,13 @@ class EUCJPDistributionAnalysis(CharDistributionAnalysis):
         self._mTableSize = JIS_TABLE_SIZE
         self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
 
-    def get_order(self, aStr):
-        # for euc-JP encoding, we are interested 
+    def get_order(self, aBuf):
+        # for euc-JP encoding, we are interested
         #   first  byte range: 0xa0 -- 0xfe
         #   second byte range: 0xa1 -- 0xfe
         # no validation needed here. State machine has done that
-        if aStr[0] >= '\xA0':
-            return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xa1
+        char = wrap_ord(aBuf[0])
+        if char >= 0xA0:
+            return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
         else:
             return -1
diff --git a/thirdparty/chardet/charsetgroupprober.py b/thirdparty/chardet/charsetgroupprober.py
index 9037af480..85e7a1c67 100644
--- a/thirdparty/chardet/charsetgroupprober.py
+++ b/thirdparty/chardet/charsetgroupprober.py
@@ -25,8 +25,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from charsetprober import CharSetProber
+from . import constants
+import sys
+from .charsetprober import CharSetProber
+
 
 class CharSetGroupProber(CharSetProber):
     def __init__(self):
@@ -41,28 +43,32 @@ class CharSetGroupProber(CharSetProber):
         for prober in self._mProbers:
             if prober:
                 prober.reset()
-                prober.active = constants.True
+                prober.active = True
                 self._mActiveNum += 1
         self._mBestGuessProber = None
 
     def get_charset_name(self):
         if not self._mBestGuessProber:
             self.get_confidence()
-            if not self._mBestGuessProber: return None
+            if not self._mBestGuessProber:
+                return None
 #                self._mBestGuessProber = self._mProbers[0]
         return self._mBestGuessProber.get_charset_name()
 
     def feed(self, aBuf):
         for prober in self._mProbers:
-            if not prober: continue
-            if not prober.active: continue
+            if not prober:
+                continue
+            if not prober.active:
+                continue
             st = prober.feed(aBuf)
-            if not st: continue
+            if not st:
+                continue
             if st == constants.eFoundIt:
                 self._mBestGuessProber = prober
                 return self.get_state()
             elif st == constants.eNotMe:
-                prober.active = constants.False
+                prober.active = False
                 self._mActiveNum -= 1
                 if self._mActiveNum <= 0:
                     self._mState = constants.eNotMe
@@ -78,18 +84,22 @@ class CharSetGroupProber(CharSetProber):
         bestConf = 0.0
         self._mBestGuessProber = None
         for prober in self._mProbers:
-            if not prober: continue
+            if not prober:
+                continue
             if not prober.active:
                 if constants._debug:
-                    sys.stderr.write(prober.get_charset_name() + ' not active\n')
+                    sys.stderr.write(prober.get_charset_name()
+                                     + ' not active\n')
                 continue
             cf = prober.get_confidence()
             if constants._debug:
-                sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf))
+                sys.stderr.write('%s confidence = %s\n' %
+                                 (prober.get_charset_name(), cf))
             if bestConf < cf:
                 bestConf = cf
                 self._mBestGuessProber = prober
-        if not self._mBestGuessProber: return 0.0
+        if not self._mBestGuessProber:
+            return 0.0
         return bestConf
 #        else:
 #            self._mBestGuessProber = self._mProbers[0]
diff --git a/thirdparty/chardet/charsetprober.py b/thirdparty/chardet/charsetprober.py
index 6ad198cd4..97581712c 100644
--- a/thirdparty/chardet/charsetprober.py
+++ b/thirdparty/chardet/charsetprober.py
@@ -1,11 +1,11 @@
 ######################## BEGIN LICENSE BLOCK ########################
 # The Original Code is Mozilla Universal charset detector code.
-# 
+#
 # The Initial Developer of the Original Code is
 # Netscape Communications Corporation.
 # Portions created by the Initial Developer are Copyright (C) 2001
 # the Initial Developer. All Rights Reserved.
-# 
+#
 # Contributor(s):
 #   Mark Pilgrim - port to Python
 #   Shy Shalom - original C code
@@ -14,19 +14,21 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, re
+from . import constants
+import re
+
 
 class CharSetProber:
     def __init__(self):
@@ -48,11 +50,11 @@ class CharSetProber:
         return 0.0
 
     def filter_high_bit_only(self, aBuf):
-        aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf)
+        aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
         return aBuf
 
     def filter_without_english_letters(self, aBuf):
-        aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf)
+        aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
         return aBuf
 
     def filter_with_english_letters(self, aBuf):
diff --git a/thirdparty/chardet/codingstatemachine.py b/thirdparty/chardet/codingstatemachine.py
index 452d3b0a0..8dd8c9179 100644
--- a/thirdparty/chardet/codingstatemachine.py
+++ b/thirdparty/chardet/codingstatemachine.py
@@ -13,19 +13,21 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from constants import eStart, eError, eItsMe
+from .constants import eStart
+from .compat import wrap_ord
+
 
 class CodingStateMachine:
     def __init__(self, sm):
@@ -40,12 +42,15 @@ class CodingStateMachine:
     def next_state(self, c):
         # for each byte we get its class
         # if it is first byte, we also get byte length
-        byteCls = self._mModel['classTable'][ord(c)]
+        # PY3K: aBuf is a byte stream, so c is an int, not a byte
+        byteCls = self._mModel['classTable'][wrap_ord(c)]
         if self._mCurrentState == eStart:
             self._mCurrentBytePos = 0
             self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
         # from byte's class and stateTable, we get its next state
-        self._mCurrentState = self._mModel['stateTable'][self._mCurrentState * self._mModel['classFactor'] + byteCls]
+        curr_state = (self._mCurrentState * self._mModel['classFactor']
+                      + byteCls)
+        self._mCurrentState = self._mModel['stateTable'][curr_state]
         self._mCurrentBytePos += 1
         return self._mCurrentState
 
diff --git a/thirdparty/chardet/compat.py b/thirdparty/chardet/compat.py
new file mode 100644
index 000000000..d9e30addf
--- /dev/null
+++ b/thirdparty/chardet/compat.py
@@ -0,0 +1,34 @@
+######################## BEGIN LICENSE BLOCK ########################
+# Contributor(s):
+#   Ian Cordasco - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+
+
+if sys.version_info < (3, 0):
+    base_str = (str, unicode)
+else:
+    base_str = (bytes, str)
+
+
+def wrap_ord(a):
+    if sys.version_info < (3, 0) and isinstance(a, base_str):
+        return ord(a)
+    else:
+        return a
diff --git a/thirdparty/chardet/constants.py b/thirdparty/chardet/constants.py
index e94e226b0..e4d148b3c 100644
--- a/thirdparty/chardet/constants.py
+++ b/thirdparty/chardet/constants.py
@@ -37,11 +37,3 @@ eError = 1
 eItsMe = 2
 
 SHORTCUT_THRESHOLD = 0.95
-
-import __builtin__
-if not hasattr(__builtin__, 'False'):
-    False = 0
-    True = 1
-else:
-    False = __builtin__.False
-    True = __builtin__.True
diff --git a/thirdparty/chardet/cp949prober.py b/thirdparty/chardet/cp949prober.py
new file mode 100644
index 000000000..ff4272f82
--- /dev/null
+++ b/thirdparty/chardet/cp949prober.py
@@ -0,0 +1,44 @@
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import CP949SMModel
+
+
+class CP949Prober(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(CP949SMModel)
+        # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
+        #       not different.
+        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
+        self.reset()
+
+    def get_charset_name(self):
+        return "CP949"
diff --git a/thirdparty/chardet/escprober.py b/thirdparty/chardet/escprober.py
index c2e979e7b..80a844ff3 100644
--- a/thirdparty/chardet/escprober.py
+++ b/thirdparty/chardet/escprober.py
@@ -13,39 +13,43 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel
-from charsetprober import CharSetProber
-from codingstatemachine import CodingStateMachine
+from . import constants
+from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
+                    ISO2022KRSMModel)
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .compat import wrap_ord
+
 
 class EscCharSetProber(CharSetProber):
     def __init__(self):
         CharSetProber.__init__(self)
-        self._mCodingSM = [ \
+        self._mCodingSM = [
             CodingStateMachine(HZSMModel),
             CodingStateMachine(ISO2022CNSMModel),
             CodingStateMachine(ISO2022JPSMModel),
             CodingStateMachine(ISO2022KRSMModel)
-            ]
+        ]
         self.reset()
 
     def reset(self):
         CharSetProber.reset(self)
         for codingSM in self._mCodingSM:
-            if not codingSM: continue
-            codingSM.active = constants.True
+            if not codingSM:
+                continue
+            codingSM.active = True
             codingSM.reset()
         self._mActiveSM = len(self._mCodingSM)
         self._mDetectedCharset = None
@@ -61,19 +65,22 @@ class EscCharSetProber(CharSetProber):
 
     def feed(self, aBuf):
         for c in aBuf:
+            # PY3K: aBuf is a byte array, so c is an int, not a byte
             for codingSM in self._mCodingSM:
-                if not codingSM: continue
-                if not codingSM.active: continue
-                codingState = codingSM.next_state(c)
+                if not codingSM:
+                    continue
+                if not codingSM.active:
+                    continue
+                codingState = codingSM.next_state(wrap_ord(c))
                 if codingState == constants.eError:
-                    codingSM.active = constants.False
+                    codingSM.active = False
                     self._mActiveSM -= 1
                     if self._mActiveSM <= 0:
                         self._mState = constants.eNotMe
                         return self.get_state()
                 elif codingState == constants.eItsMe:
                     self._mState = constants.eFoundIt
-                    self._mDetectedCharset = codingSM.get_coding_state_machine()
+                    self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8
                     return self.get_state()
 
         return self.get_state()
diff --git a/thirdparty/chardet/escsm.py b/thirdparty/chardet/escsm.py
index 9fa22952e..bd302b4c6 100644
--- a/thirdparty/chardet/escsm.py
+++ b/thirdparty/chardet/escsm.py
@@ -13,62 +13,62 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from constants import eStart, eError, eItsMe
+from .constants import eStart, eError, eItsMe
 
-HZ_cls = ( \
-1,0,0,0,0,0,0,0,  # 00 - 07 
-0,0,0,0,0,0,0,0,  # 08 - 0f 
-0,0,0,0,0,0,0,0,  # 10 - 17 
-0,0,0,1,0,0,0,0,  # 18 - 1f 
-0,0,0,0,0,0,0,0,  # 20 - 27 
-0,0,0,0,0,0,0,0,  # 28 - 2f 
-0,0,0,0,0,0,0,0,  # 30 - 37 
-0,0,0,0,0,0,0,0,  # 38 - 3f 
-0,0,0,0,0,0,0,0,  # 40 - 47 
-0,0,0,0,0,0,0,0,  # 48 - 4f 
-0,0,0,0,0,0,0,0,  # 50 - 57 
-0,0,0,0,0,0,0,0,  # 58 - 5f 
-0,0,0,0,0,0,0,0,  # 60 - 67 
-0,0,0,0,0,0,0,0,  # 68 - 6f 
-0,0,0,0,0,0,0,0,  # 70 - 77 
-0,0,0,4,0,5,2,0,  # 78 - 7f 
-1,1,1,1,1,1,1,1,  # 80 - 87 
-1,1,1,1,1,1,1,1,  # 88 - 8f 
-1,1,1,1,1,1,1,1,  # 90 - 97 
-1,1,1,1,1,1,1,1,  # 98 - 9f 
-1,1,1,1,1,1,1,1,  # a0 - a7 
-1,1,1,1,1,1,1,1,  # a8 - af 
-1,1,1,1,1,1,1,1,  # b0 - b7 
-1,1,1,1,1,1,1,1,  # b8 - bf 
-1,1,1,1,1,1,1,1,  # c0 - c7 
-1,1,1,1,1,1,1,1,  # c8 - cf 
-1,1,1,1,1,1,1,1,  # d0 - d7 
-1,1,1,1,1,1,1,1,  # d8 - df 
-1,1,1,1,1,1,1,1,  # e0 - e7 
-1,1,1,1,1,1,1,1,  # e8 - ef 
-1,1,1,1,1,1,1,1,  # f0 - f7 
-1,1,1,1,1,1,1,1,  # f8 - ff 
+HZ_cls = (
+1,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,0,0,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,0,0,0,0,  # 20 - 27
+0,0,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+0,0,0,0,0,0,0,0,  # 40 - 47
+0,0,0,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,4,0,5,2,0,  # 78 - 7f
+1,1,1,1,1,1,1,1,  # 80 - 87
+1,1,1,1,1,1,1,1,  # 88 - 8f
+1,1,1,1,1,1,1,1,  # 90 - 97
+1,1,1,1,1,1,1,1,  # 98 - 9f
+1,1,1,1,1,1,1,1,  # a0 - a7
+1,1,1,1,1,1,1,1,  # a8 - af
+1,1,1,1,1,1,1,1,  # b0 - b7
+1,1,1,1,1,1,1,1,  # b8 - bf
+1,1,1,1,1,1,1,1,  # c0 - c7
+1,1,1,1,1,1,1,1,  # c8 - cf
+1,1,1,1,1,1,1,1,  # d0 - d7
+1,1,1,1,1,1,1,1,  # d8 - df
+1,1,1,1,1,1,1,1,  # e0 - e7
+1,1,1,1,1,1,1,1,  # e8 - ef
+1,1,1,1,1,1,1,1,  # f0 - f7
+1,1,1,1,1,1,1,1,  # f8 - ff
 )
 
-HZ_st = ( \
-eStart,eError,     3,eStart,eStart,eStart,eError,eError,# 00-07 
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 
-eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError,# 10-17 
-     5,eError,     6,eError,     5,     5,     4,eError,# 18-1f 
-     4,eError,     4,     4,     4,eError,     4,eError,# 20-27 
-     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f 
+HZ_st = (
+eStart,eError,     3,eStart,eStart,eStart,eError,eError,# 00-07
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
+eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError,# 10-17
+     5,eError,     6,eError,     5,     5,     4,eError,# 18-1f
+     4,eError,     4,     4,     4,eError,     4,eError,# 20-27
+     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
 )
 
 HZCharLenTable = (0, 0, 0, 0, 0, 0)
@@ -79,50 +79,50 @@ HZSMModel = {'classTable': HZ_cls,
              'charLenTable': HZCharLenTable,
              'name': "HZ-GB-2312"}
 
-ISO2022CN_cls = ( \
-2,0,0,0,0,0,0,0,  # 00 - 07 
-0,0,0,0,0,0,0,0,  # 08 - 0f 
-0,0,0,0,0,0,0,0,  # 10 - 17 
-0,0,0,1,0,0,0,0,  # 18 - 1f 
-0,0,0,0,0,0,0,0,  # 20 - 27 
-0,3,0,0,0,0,0,0,  # 28 - 2f 
-0,0,0,0,0,0,0,0,  # 30 - 37 
-0,0,0,0,0,0,0,0,  # 38 - 3f 
-0,0,0,4,0,0,0,0,  # 40 - 47 
-0,0,0,0,0,0,0,0,  # 48 - 4f 
-0,0,0,0,0,0,0,0,  # 50 - 57 
-0,0,0,0,0,0,0,0,  # 58 - 5f 
-0,0,0,0,0,0,0,0,  # 60 - 67 
-0,0,0,0,0,0,0,0,  # 68 - 6f 
-0,0,0,0,0,0,0,0,  # 70 - 77 
-0,0,0,0,0,0,0,0,  # 78 - 7f 
-2,2,2,2,2,2,2,2,  # 80 - 87 
-2,2,2,2,2,2,2,2,  # 88 - 8f 
-2,2,2,2,2,2,2,2,  # 90 - 97 
-2,2,2,2,2,2,2,2,  # 98 - 9f 
-2,2,2,2,2,2,2,2,  # a0 - a7 
-2,2,2,2,2,2,2,2,  # a8 - af 
-2,2,2,2,2,2,2,2,  # b0 - b7 
-2,2,2,2,2,2,2,2,  # b8 - bf 
-2,2,2,2,2,2,2,2,  # c0 - c7 
-2,2,2,2,2,2,2,2,  # c8 - cf 
-2,2,2,2,2,2,2,2,  # d0 - d7 
-2,2,2,2,2,2,2,2,  # d8 - df 
-2,2,2,2,2,2,2,2,  # e0 - e7 
-2,2,2,2,2,2,2,2,  # e8 - ef 
-2,2,2,2,2,2,2,2,  # f0 - f7 
-2,2,2,2,2,2,2,2,  # f8 - ff 
+ISO2022CN_cls = (
+2,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,0,0,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,0,0,0,0,  # 20 - 27
+0,3,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+0,0,0,4,0,0,0,0,  # 40 - 47
+0,0,0,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,0,0,0,0,0,  # 78 - 7f
+2,2,2,2,2,2,2,2,  # 80 - 87
+2,2,2,2,2,2,2,2,  # 88 - 8f
+2,2,2,2,2,2,2,2,  # 90 - 97
+2,2,2,2,2,2,2,2,  # 98 - 9f
+2,2,2,2,2,2,2,2,  # a0 - a7
+2,2,2,2,2,2,2,2,  # a8 - af
+2,2,2,2,2,2,2,2,  # b0 - b7
+2,2,2,2,2,2,2,2,  # b8 - bf
+2,2,2,2,2,2,2,2,  # c0 - c7
+2,2,2,2,2,2,2,2,  # c8 - cf
+2,2,2,2,2,2,2,2,  # d0 - d7
+2,2,2,2,2,2,2,2,  # d8 - df
+2,2,2,2,2,2,2,2,  # e0 - e7
+2,2,2,2,2,2,2,2,  # e8 - ef
+2,2,2,2,2,2,2,2,  # f0 - f7
+2,2,2,2,2,2,2,2,  # f8 - ff
 )
 
-ISO2022CN_st = ( \
-eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 
-eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f 
-eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 
-eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError,# 18-1f 
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 
-     5,     6,eError,eError,eError,eError,eError,eError,# 28-2f 
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 
-eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f 
+ISO2022CN_st = (
+eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
+eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
+eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
+eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError,# 18-1f
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
+     5,     6,eError,eError,eError,eError,eError,eError,# 28-2f
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
+eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
 )
 
 ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
@@ -133,51 +133,51 @@ ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
                     'charLenTable': ISO2022CNCharLenTable,
                     'name': "ISO-2022-CN"}
 
-ISO2022JP_cls = ( \
-2,0,0,0,0,0,0,0,  # 00 - 07 
-0,0,0,0,0,0,2,2,  # 08 - 0f 
-0,0,0,0,0,0,0,0,  # 10 - 17 
-0,0,0,1,0,0,0,0,  # 18 - 1f 
-0,0,0,0,7,0,0,0,  # 20 - 27 
-3,0,0,0,0,0,0,0,  # 28 - 2f 
-0,0,0,0,0,0,0,0,  # 30 - 37 
-0,0,0,0,0,0,0,0,  # 38 - 3f 
-6,0,4,0,8,0,0,0,  # 40 - 47 
-0,9,5,0,0,0,0,0,  # 48 - 4f 
-0,0,0,0,0,0,0,0,  # 50 - 57 
-0,0,0,0,0,0,0,0,  # 58 - 5f 
-0,0,0,0,0,0,0,0,  # 60 - 67 
-0,0,0,0,0,0,0,0,  # 68 - 6f 
-0,0,0,0,0,0,0,0,  # 70 - 77 
-0,0,0,0,0,0,0,0,  # 78 - 7f 
-2,2,2,2,2,2,2,2,  # 80 - 87 
-2,2,2,2,2,2,2,2,  # 88 - 8f 
-2,2,2,2,2,2,2,2,  # 90 - 97 
-2,2,2,2,2,2,2,2,  # 98 - 9f 
-2,2,2,2,2,2,2,2,  # a0 - a7 
-2,2,2,2,2,2,2,2,  # a8 - af 
-2,2,2,2,2,2,2,2,  # b0 - b7 
-2,2,2,2,2,2,2,2,  # b8 - bf 
-2,2,2,2,2,2,2,2,  # c0 - c7 
-2,2,2,2,2,2,2,2,  # c8 - cf 
-2,2,2,2,2,2,2,2,  # d0 - d7 
-2,2,2,2,2,2,2,2,  # d8 - df 
-2,2,2,2,2,2,2,2,  # e0 - e7 
-2,2,2,2,2,2,2,2,  # e8 - ef 
-2,2,2,2,2,2,2,2,  # f0 - f7 
-2,2,2,2,2,2,2,2,  # f8 - ff 
+ISO2022JP_cls = (
+2,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,2,2,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,7,0,0,0,  # 20 - 27
+3,0,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+6,0,4,0,8,0,0,0,  # 40 - 47
+0,9,5,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,0,0,0,0,0,  # 78 - 7f
+2,2,2,2,2,2,2,2,  # 80 - 87
+2,2,2,2,2,2,2,2,  # 88 - 8f
+2,2,2,2,2,2,2,2,  # 90 - 97
+2,2,2,2,2,2,2,2,  # 98 - 9f
+2,2,2,2,2,2,2,2,  # a0 - a7
+2,2,2,2,2,2,2,2,  # a8 - af
+2,2,2,2,2,2,2,2,  # b0 - b7
+2,2,2,2,2,2,2,2,  # b8 - bf
+2,2,2,2,2,2,2,2,  # c0 - c7
+2,2,2,2,2,2,2,2,  # c8 - cf
+2,2,2,2,2,2,2,2,  # d0 - d7
+2,2,2,2,2,2,2,2,  # d8 - df
+2,2,2,2,2,2,2,2,  # e0 - e7
+2,2,2,2,2,2,2,2,  # e8 - ef
+2,2,2,2,2,2,2,2,  # f0 - f7
+2,2,2,2,2,2,2,2,  # f8 - ff
 )
 
-ISO2022JP_st = ( \
-eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 
-eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f 
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 
-eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f 
-eError,     5,eError,eError,eError,     4,eError,eError,# 20-27 
-eError,eError,eError,     6,eItsMe,eError,eItsMe,eError,# 28-2f 
-eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f 
-eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 
+ISO2022JP_st = (
+eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
+eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
+eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
+eError,     5,eError,eError,eError,     4,eError,eError,# 20-27
+eError,eError,eError,     6,eItsMe,eError,eItsMe,eError,# 28-2f
+eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
+eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
 )
 
 ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
@@ -188,47 +188,47 @@ ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
                     'charLenTable': ISO2022JPCharLenTable,
                     'name': "ISO-2022-JP"}
 
-ISO2022KR_cls = ( \
-2,0,0,0,0,0,0,0,  # 00 - 07 
-0,0,0,0,0,0,0,0,  # 08 - 0f 
-0,0,0,0,0,0,0,0,  # 10 - 17 
-0,0,0,1,0,0,0,0,  # 18 - 1f 
-0,0,0,0,3,0,0,0,  # 20 - 27 
-0,4,0,0,0,0,0,0,  # 28 - 2f 
-0,0,0,0,0,0,0,0,  # 30 - 37 
-0,0,0,0,0,0,0,0,  # 38 - 3f 
-0,0,0,5,0,0,0,0,  # 40 - 47 
-0,0,0,0,0,0,0,0,  # 48 - 4f 
-0,0,0,0,0,0,0,0,  # 50 - 57 
-0,0,0,0,0,0,0,0,  # 58 - 5f 
-0,0,0,0,0,0,0,0,  # 60 - 67 
-0,0,0,0,0,0,0,0,  # 68 - 6f 
-0,0,0,0,0,0,0,0,  # 70 - 77 
-0,0,0,0,0,0,0,0,  # 78 - 7f 
-2,2,2,2,2,2,2,2,  # 80 - 87 
-2,2,2,2,2,2,2,2,  # 88 - 8f 
-2,2,2,2,2,2,2,2,  # 90 - 97 
-2,2,2,2,2,2,2,2,  # 98 - 9f 
-2,2,2,2,2,2,2,2,  # a0 - a7 
-2,2,2,2,2,2,2,2,  # a8 - af 
-2,2,2,2,2,2,2,2,  # b0 - b7 
-2,2,2,2,2,2,2,2,  # b8 - bf 
-2,2,2,2,2,2,2,2,  # c0 - c7 
-2,2,2,2,2,2,2,2,  # c8 - cf 
-2,2,2,2,2,2,2,2,  # d0 - d7 
-2,2,2,2,2,2,2,2,  # d8 - df 
-2,2,2,2,2,2,2,2,  # e0 - e7 
-2,2,2,2,2,2,2,2,  # e8 - ef 
-2,2,2,2,2,2,2,2,  # f0 - f7 
-2,2,2,2,2,2,2,2,  # f8 - ff 
+ISO2022KR_cls = (
+2,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,0,0,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,3,0,0,0,  # 20 - 27
+0,4,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+0,0,0,5,0,0,0,0,  # 40 - 47
+0,0,0,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,0,0,0,0,0,  # 78 - 7f
+2,2,2,2,2,2,2,2,  # 80 - 87
+2,2,2,2,2,2,2,2,  # 88 - 8f
+2,2,2,2,2,2,2,2,  # 90 - 97
+2,2,2,2,2,2,2,2,  # 98 - 9f
+2,2,2,2,2,2,2,2,  # a0 - a7
+2,2,2,2,2,2,2,2,  # a8 - af
+2,2,2,2,2,2,2,2,  # b0 - b7
+2,2,2,2,2,2,2,2,  # b8 - bf
+2,2,2,2,2,2,2,2,  # c0 - c7
+2,2,2,2,2,2,2,2,  # c8 - cf
+2,2,2,2,2,2,2,2,  # d0 - d7
+2,2,2,2,2,2,2,2,  # d8 - df
+2,2,2,2,2,2,2,2,  # e0 - e7
+2,2,2,2,2,2,2,2,  # e8 - ef
+2,2,2,2,2,2,2,2,  # f0 - f7
+2,2,2,2,2,2,2,2,  # f8 - ff
 )
 
-ISO2022KR_st = ( \
-eStart,     3,eError,eStart,eStart,eStart,eError,eError,# 00-07 
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 
-eItsMe,eItsMe,eError,eError,eError,     4,eError,eError,# 10-17 
-eError,eError,eError,eError,     5,eError,eError,eError,# 18-1f 
-eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 
+ISO2022KR_st = (
+eStart,     3,eError,eStart,eStart,eStart,eError,eError,# 00-07
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
+eItsMe,eItsMe,eError,eError,eError,     4,eError,eError,# 10-17
+eError,eError,eError,eError,     5,eError,eError,eError,# 18-1f
+eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
 )
 
 ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
@@ -238,3 +238,5 @@ ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
                     'stateTable': ISO2022KR_st,
                     'charLenTable': ISO2022KRCharLenTable,
                     'name': "ISO-2022-KR"}
+
+# flake8: noqa
diff --git a/thirdparty/chardet/eucjpprober.py b/thirdparty/chardet/eucjpprober.py
index faa5cb58d..8e64fdcc2 100644
--- a/thirdparty/chardet/eucjpprober.py
+++ b/thirdparty/chardet/eucjpprober.py
@@ -13,25 +13,26 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from constants import eStart, eError, eItsMe
-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import EUCJPDistributionAnalysis
-from jpcntx import EUCJPContextAnalysis
-from mbcssm import EUCJPSMModel
+import sys
+from . import constants
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCJPDistributionAnalysis
+from .jpcntx import EUCJPContextAnalysis
+from .mbcssm import EUCJPSMModel
+
 
 class EUCJPProber(MultiByteCharSetProber):
     def __init__(self):
@@ -50,31 +51,35 @@ class EUCJPProber(MultiByteCharSetProber):
 
     def feed(self, aBuf):
         aLen = len(aBuf)
-        for i in xrange(0, aLen):
+        for i in range(0, aLen):
+            # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
             codingState = self._mCodingSM.next_state(aBuf[i])
-            if codingState == eError:
+            if codingState == constants.eError:
                 if constants._debug:
-                    sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
                 self._mState = constants.eNotMe
                 break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                 self._mState = constants.eFoundIt
                 break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                 charLen = self._mCodingSM.get_current_charlen()
                 if i == 0:
                     self._mLastChar[1] = aBuf[0]
                     self._mContextAnalyzer.feed(self._mLastChar, charLen)
                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
                 else:
-                    self._mContextAnalyzer.feed(aBuf[i-1:i+1], charLen)
-                    self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
+                    self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)
 
         self._mLastChar[0] = aBuf[aLen - 1]
 
         if self.get_state() == constants.eDetecting:
-            if self._mContextAnalyzer.got_enough_data() and \
-                   (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
+            if (self._mContextAnalyzer.got_enough_data() and
+               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
                 self._mState = constants.eFoundIt
 
         return self.get_state()
diff --git a/thirdparty/chardet/euckrfreq.py b/thirdparty/chardet/euckrfreq.py
index 1463fa1d8..a179e4c21 100644
--- a/thirdparty/chardet/euckrfreq.py
+++ b/thirdparty/chardet/euckrfreq.py
@@ -592,3 +592,5 @@ EUCKRCharToFreqOrder = ( \
 8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
 8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
 8736,8737,8738,8739,8740,8741)
+
+# flake8: noqa
diff --git a/thirdparty/chardet/euckrprober.py b/thirdparty/chardet/euckrprober.py
index bd697ebf3..5982a46b6 100644
--- a/thirdparty/chardet/euckrprober.py
+++ b/thirdparty/chardet/euckrprober.py
@@ -13,22 +13,23 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import EUCKRDistributionAnalysis
-from mbcssm import EUCKRSMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import EUCKRSMModel
+
 
 class EUCKRProber(MultiByteCharSetProber):
     def __init__(self):
diff --git a/thirdparty/chardet/euctwfreq.py b/thirdparty/chardet/euctwfreq.py
index c05720950..576e7504d 100644
--- a/thirdparty/chardet/euctwfreq.py
+++ b/thirdparty/chardet/euctwfreq.py
@@ -13,12 +13,12 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -26,8 +26,8 @@
 ######################### END LICENSE BLOCK #########################
 
 # EUCTW frequency table
-# Converted from big5 work 
-# by Taiwan's Mandarin Promotion Council 
+# Converted from big5 work
+# by Taiwan's Mandarin Promotion Council
 # <http:#www.edu.tw:81/mandr/>
 
 # 128  --> 0.42261
@@ -38,15 +38,15 @@
 #
 # Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
 # Random Distribution Ration = 512/(5401-512)=0.105
-# 
+#
 # Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
 
 EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
 
-# Char to FreqOrder table , 
+# Char to FreqOrder table ,
 EUCTW_TABLE_SIZE = 8102
 
-EUCTWCharToFreqOrder = ( \
+EUCTWCharToFreqOrder = (
    1,1800,1506, 255,1431, 198,   9,  82,   6,7310, 177, 202,3615,1256,2808, 110, # 2742
 3735,  33,3241, 261,  76,  44,2113,  16,2931,2184,1176, 659,3868,  26,3404,2643, # 2758
 1198,3869,3313,4060, 410,2211, 302, 590, 361,1963,   8, 204,  58,4296,7311,1931, # 2774
@@ -424,3 +424,5 @@ EUCTWCharToFreqOrder = ( \
 8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
 8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
 8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742
+
+# flake8: noqa
diff --git a/thirdparty/chardet/euctwprober.py b/thirdparty/chardet/euctwprober.py
index b073f134f..fe652fe37 100644
--- a/thirdparty/chardet/euctwprober.py
+++ b/thirdparty/chardet/euctwprober.py
@@ -25,10 +25,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import EUCTWDistributionAnalysis
-from mbcssm import EUCTWSMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCTWDistributionAnalysis
+from .mbcssm import EUCTWSMModel
 
 class EUCTWProber(MultiByteCharSetProber):
     def __init__(self):
diff --git a/thirdparty/chardet/gb2312freq.py b/thirdparty/chardet/gb2312freq.py
index 7a4d5a1b3..1238f510f 100644
--- a/thirdparty/chardet/gb2312freq.py
+++ b/thirdparty/chardet/gb2312freq.py
@@ -13,12 +13,12 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -36,14 +36,14 @@
 #
 # Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79
 # Random Distribution Ration = 512 / (3755 - 512) = 0.157
-# 
+#
 # Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
 
 GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
 
 GB2312_TABLE_SIZE = 3760
 
-GB2312CharToFreqOrder = ( \
+GB2312CharToFreqOrder = (
 1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
 2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
 2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
@@ -469,3 +469,4 @@ GB2312CharToFreqOrder = ( \
 5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
 4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767)
 
+# flake8: noqa
diff --git a/thirdparty/chardet/gb2312prober.py b/thirdparty/chardet/gb2312prober.py
index 91eb3925a..0325a2d86 100644
--- a/thirdparty/chardet/gb2312prober.py
+++ b/thirdparty/chardet/gb2312prober.py
@@ -25,10 +25,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import GB2312DistributionAnalysis
-from mbcssm import GB2312SMModel
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import GB2312DistributionAnalysis
+from .mbcssm import GB2312SMModel
 
 class GB2312Prober(MultiByteCharSetProber):
     def __init__(self):
diff --git a/thirdparty/chardet/hebrewprober.py b/thirdparty/chardet/hebrewprober.py
index 442c0bf2b..ba225c5ef 100644
--- a/thirdparty/chardet/hebrewprober.py
+++ b/thirdparty/chardet/hebrewprober.py
@@ -13,20 +13,21 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from charsetprober import CharSetProber
-import constants
+from .charsetprober import CharSetProber
+from .constants import eNotMe, eDetecting
+from .compat import wrap_ord
 
 # This prober doesn't actually recognize a language or a charset.
 # It is a helper prober for the use of the Hebrew model probers
@@ -35,40 +36,40 @@ import constants
 #
 # Four main charsets exist in Hebrew:
 # "ISO-8859-8" - Visual Hebrew
-# "windows-1255" - Logical Hebrew 
+# "windows-1255" - Logical Hebrew
 # "ISO-8859-8-I" - Logical Hebrew
 # "x-mac-hebrew" - ?? Logical Hebrew ??
 #
 # Both "ISO" charsets use a completely identical set of code points, whereas
-# "windows-1255" and "x-mac-hebrew" are two different proper supersets of 
+# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
 # these code points. windows-1255 defines additional characters in the range
-# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific 
+# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
 # diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
-# x-mac-hebrew defines similar additional code points but with a different 
+# x-mac-hebrew defines similar additional code points but with a different
 # mapping.
 #
-# As far as an average Hebrew text with no diacritics is concerned, all four 
-# charsets are identical with respect to code points. Meaning that for the 
-# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters 
+# As far as an average Hebrew text with no diacritics is concerned, all four
+# charsets are identical with respect to code points. Meaning that for the
+# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
 # (including final letters).
 #
 # The dominant difference between these charsets is their directionality.
 # "Visual" directionality means that the text is ordered as if the renderer is
-# not aware of a BIDI rendering algorithm. The renderer sees the text and 
-# draws it from left to right. The text itself when ordered naturally is read 
+# not aware of a BIDI rendering algorithm. The renderer sees the text and
+# draws it from left to right. The text itself when ordered naturally is read
 # backwards. A buffer of Visual Hebrew generally looks like so:
 # "[last word of first line spelled backwards] [whole line ordered backwards
-# and spelled backwards] [first word of first line spelled backwards] 
+# and spelled backwards] [first word of first line spelled backwards]
 # [end of line] [last word of second line] ... etc' "
 # adding punctuation marks, numbers and English text to visual text is
 # naturally also "visual" and from left to right.
-# 
+#
 # "Logical" directionality means the text is ordered "naturally" according to
-# the order it is read. It is the responsibility of the renderer to display 
-# the text from right to left. A BIDI algorithm is used to place general 
+# the order it is read. It is the responsibility of the renderer to display
+# the text from right to left. A BIDI algorithm is used to place general
 # punctuation marks, numbers and English text in the text.
 #
-# Texts in x-mac-hebrew are almost impossible to find on the Internet. From 
+# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
 # what little evidence I could find, it seems that its general directionality
 # is Logical.
 #
@@ -76,17 +77,17 @@ import constants
 # charsets:
 # Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
 #    backwards while line order is natural. For charset recognition purposes
-#    the line order is unimportant (In fact, for this implementation, even 
+#    the line order is unimportant (In fact, for this implementation, even
 #    word order is unimportant).
 # Logical Hebrew - "windows-1255" - normal, naturally ordered text.
 #
-# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be 
+# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
 #    specifically identified.
 # "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
 #    that contain special punctuation marks or diacritics is displayed with
 #    some unconverted characters showing as question marks. This problem might
 #    be corrected using another model prober for x-mac-hebrew. Due to the fact
-#    that x-mac-hebrew texts are so rare, writing another model prober isn't 
+#    that x-mac-hebrew texts are so rare, writing another model prober isn't
 #    worth the effort and performance hit.
 #
 #### The Prober ####
@@ -126,28 +127,31 @@ import constants
 # charset identified, either "windows-1255" or "ISO-8859-8".
 
 # windows-1255 / ISO-8859-8 code points of interest
-FINAL_KAF = '\xea'
-NORMAL_KAF = '\xeb'
-FINAL_MEM = '\xed'
-NORMAL_MEM = '\xee'
-FINAL_NUN = '\xef'
-NORMAL_NUN = '\xf0'
-FINAL_PE = '\xf3'
-NORMAL_PE = '\xf4'
-FINAL_TSADI = '\xf5'
-NORMAL_TSADI = '\xf6'
+FINAL_KAF = 0xea
+NORMAL_KAF = 0xeb
+FINAL_MEM = 0xed
+NORMAL_MEM = 0xee
+FINAL_NUN = 0xef
+NORMAL_NUN = 0xf0
+FINAL_PE = 0xf3
+NORMAL_PE = 0xf4
+FINAL_TSADI = 0xf5
+NORMAL_TSADI = 0xf6
 
 # Minimum Visual vs Logical final letter score difference.
-# If the difference is below this, don't rely solely on the final letter score distance.
+# If the difference is below this, don't rely solely on the final letter score
+# distance.
 MIN_FINAL_CHAR_DISTANCE = 5
 
 # Minimum Visual vs Logical model score difference.
-# If the difference is below this, don't rely at all on the model score distance.
+# If the difference is below this, don't rely at all on the model score
+# distance.
 MIN_MODEL_DISTANCE = 0.01
 
 VISUAL_HEBREW_NAME = "ISO-8859-8"
 LOGICAL_HEBREW_NAME = "windows-1255"
 
+
 class HebrewProber(CharSetProber):
     def __init__(self):
         CharSetProber.__init__(self)
@@ -159,8 +163,8 @@ class HebrewProber(CharSetProber):
         self._mFinalCharLogicalScore = 0
         self._mFinalCharVisualScore = 0
         # The two last characters seen in the previous buffer,
-        # mPrev and mBeforePrev are initialized to space in order to simulate a word 
-        # delimiter at the beginning of the data
+        # mPrev and mBeforePrev are initialized to space in order to simulate
+        # a word delimiter at the beginning of the data
         self._mPrev = ' '
         self._mBeforePrev = ' '
         # These probers are owned by the group prober.
@@ -170,49 +174,52 @@ class HebrewProber(CharSetProber):
         self._mVisualProber = visualProber
 
     def is_final(self, c):
-        return c in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, FINAL_TSADI]
+        return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
+                               FINAL_TSADI]
 
     def is_non_final(self, c):
-        # The normal Tsadi is not a good Non-Final letter due to words like 
-        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This 
-        # apostrophe is converted to a space in FilterWithoutEnglishLetters causing 
-        # the Non-Final tsadi to appear at an end of a word even though this is not 
-        # the case in the original text.
-        # The letters Pe and Kaf rarely display a related behavior of not being a 
-        # good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for 
-        # example legally end with a Non-Final Pe or Kaf. However, the benefit of 
-        # these letters as Non-Final letters outweighs the damage since these words 
-        # are quite rare.
-        return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
+        # The normal Tsadi is not a good Non-Final letter due to words like
+        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
+        # apostrophe is converted to a space in FilterWithoutEnglishLetters
+        # causing the Non-Final tsadi to appear at an end of a word even
+        # though this is not the case in the original text.
+        # The letters Pe and Kaf rarely display a related behavior of not being
+        # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
+        # for example legally end with a Non-Final Pe or Kaf. However, the
+        # benefit of these letters as Non-Final letters outweighs the damage
+        # since these words are quite rare.
+        return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
 
     def feed(self, aBuf):
         # Final letter analysis for logical-visual decision.
-        # Look for evidence that the received buffer is either logical Hebrew or 
-        # visual Hebrew.
+        # Look for evidence that the received buffer is either logical Hebrew
+        # or visual Hebrew.
         # The following cases are checked:
-        # 1) A word longer than 1 letter, ending with a final letter. This is an 
-        #    indication that the text is laid out "naturally" since the final letter 
-        #    really appears at the end. +1 for logical score.
-        # 2) A word longer than 1 letter, ending with a Non-Final letter. In normal
-        #    Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with
-        #    the Non-Final form of that letter. Exceptions to this rule are mentioned
-        #    above in isNonFinal(). This is an indication that the text is laid out
-        #    backwards. +1 for visual score
-        # 3) A word longer than 1 letter, starting with a final letter. Final letters 
-        #    should not appear at the beginning of a word. This is an indication that 
-        #    the text is laid out backwards. +1 for visual score.
-        # 
-        # The visual score and logical score are accumulated throughout the text and 
-        # are finally checked against each other in GetCharSetName().
-        # No checking for final letters in the middle of words is done since that case
-        # is not an indication for either Logical or Visual text.
-        # 
-        # We automatically filter out all 7-bit characters (replace them with spaces)
-        # so the word boundary detection works properly. [MAP]
+        # 1) A word longer than 1 letter, ending with a final letter. This is
+        #    an indication that the text is laid out "naturally" since the
+        #    final letter really appears at the end. +1 for logical score.
+        # 2) A word longer than 1 letter, ending with a Non-Final letter. In
+        #    normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
+        #    should not end with the Non-Final form of that letter. Exceptions
+        #    to this rule are mentioned above in isNonFinal(). This is an
+        #    indication that the text is laid out backwards. +1 for visual
+        #    score
+        # 3) A word longer than 1 letter, starting with a final letter. Final
+        #    letters should not appear at the beginning of a word. This is an
+        #    indication that the text is laid out backwards. +1 for visual
+        #    score.
+        #
+        # The visual score and logical score are accumulated throughout the
+        # text and are finally checked against each other in GetCharSetName().
+        # No checking for final letters in the middle of words is done since
+        # that case is not an indication for either Logical or Visual text.
+        #
+        # We automatically filter out all 7-bit characters (replace them with
+        # spaces) so the word boundary detection works properly. [MAP]
 
-        if self.get_state() == constants.eNotMe:
+        if self.get_state() == eNotMe:
             # Both model probers say it's not them. No reason to continue.
-            return constants.eNotMe
+            return eNotMe
 
         aBuf = self.filter_high_bit_only(aBuf)
 
@@ -220,23 +227,27 @@ class HebrewProber(CharSetProber):
             if cur == ' ':
                 # We stand on a space - a word just ended
                 if self._mBeforePrev != ' ':
-                    # next-to-last char was not a space so self._mPrev is not a 1 letter word
+                    # next-to-last char was not a space so self._mPrev is not a
+                    # 1 letter word
                     if self.is_final(self._mPrev):
                         # case (1) [-2:not space][-1:final letter][cur:space]
                         self._mFinalCharLogicalScore += 1
                     elif self.is_non_final(self._mPrev):
-                        # case (2) [-2:not space][-1:Non-Final letter][cur:space]
+                        # case (2) [-2:not space][-1:Non-Final letter][
+                        #  cur:space]
                         self._mFinalCharVisualScore += 1
             else:
                 # Not standing on a space
-                if (self._mBeforePrev == ' ') and (self.is_final(self._mPrev)) and (cur != ' '):
+                if ((self._mBeforePrev == ' ') and
+                        (self.is_final(self._mPrev)) and (cur != ' ')):
                     # case (3) [-2:space][-1:final letter][cur:not space]
                     self._mFinalCharVisualScore += 1
             self._mBeforePrev = self._mPrev
             self._mPrev = cur
 
-        # Forever detecting, till the end or until both model probers return eNotMe (handled above)
-        return constants.eDetecting
+        # Forever detecting, till the end or until both model probers return
+        # eNotMe (handled above)
+        return eDetecting
 
     def get_charset_name(self):
         # Make the decision: is it Logical or Visual?
@@ -248,22 +259,25 @@ class HebrewProber(CharSetProber):
             return VISUAL_HEBREW_NAME
 
         # It's not dominant enough, try to rely on the model scores instead.
-        modelsub = self._mLogicalProber.get_confidence() - self._mVisualProber.get_confidence()
+        modelsub = (self._mLogicalProber.get_confidence()
+                    - self._mVisualProber.get_confidence())
         if modelsub > MIN_MODEL_DISTANCE:
             return LOGICAL_HEBREW_NAME
         if modelsub < -MIN_MODEL_DISTANCE:
             return VISUAL_HEBREW_NAME
 
-        # Still no good, back to final letter distance, maybe it'll save the day.
+        # Still no good, back to final letter distance, maybe it'll save the
+        # day.
         if finalsub < 0.0:
             return VISUAL_HEBREW_NAME
 
-        # (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
+        # (finalsub > 0 - Logical) or (don't know what to do) default to
+        # Logical.
         return LOGICAL_HEBREW_NAME
 
     def get_state(self):
         # Remain active as long as any of the model probers are active.
-        if (self._mLogicalProber.get_state() == constants.eNotMe) and \
-           (self._mVisualProber.get_state() == constants.eNotMe):
-            return constants.eNotMe
-        return constants.eDetecting
+        if (self._mLogicalProber.get_state() == eNotMe) and \
+           (self._mVisualProber.get_state() == eNotMe):
+            return eNotMe
+        return eDetecting
diff --git a/thirdparty/chardet/jisfreq.py b/thirdparty/chardet/jisfreq.py
index 5fe4a5c3f..064345b08 100644
--- a/thirdparty/chardet/jisfreq.py
+++ b/thirdparty/chardet/jisfreq.py
@@ -13,12 +13,12 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -28,7 +28,7 @@
 # Sampling from about 20M text materials include literature and computer technology
 #
 # Japanese frequency table, applied to both S-JIS and EUC-JP
-# They are sorted in order. 
+# They are sorted in order.
 
 # 128  --> 0.77094
 # 256  --> 0.85710
@@ -38,15 +38,15 @@
 #
 # Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
 # Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
-# 
-# Typical Distribution Ratio, 25% of IDR 
+#
+# Typical Distribution Ratio, 25% of IDR
 
 JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
 
-# Char to FreqOrder table , 
+# Char to FreqOrder table ,
 JIS_TABLE_SIZE = 4368
 
-JISCharToFreqOrder = ( \
+JISCharToFreqOrder = (
   40,   1,   6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, #   16
 3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247,  18, 179,5071, 856,1661, #   32
 1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, #   48
@@ -565,3 +565,5 @@ JISCharToFreqOrder = ( \
 8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240
 8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256
 8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272
+
+# flake8: noqa
diff --git a/thirdparty/chardet/jpcntx.py b/thirdparty/chardet/jpcntx.py
index 06d396e5b..59aeb6a87 100644
--- a/thirdparty/chardet/jpcntx.py
+++ b/thirdparty/chardet/jpcntx.py
@@ -13,19 +13,19 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
+from .compat import wrap_ord
 
 NUM_OF_CATEGORY = 6
 DONT_KNOW = -1
@@ -34,7 +34,7 @@ MAX_REL_THRESHOLD = 1000
 MINIMUM_DATA_THRESHOLD = 4
 
 # This is hiragana 2-char sequence table, the number in each cell represents its frequency category
-jp2CharContext = ( \
+jp2CharContext = (
 (0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
 (2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
 (0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
@@ -125,24 +125,31 @@ class JapaneseContextAnalysis:
         self.reset()
 
     def reset(self):
-        self._mTotalRel = 0 # total sequence received
-        self._mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category
-        self._mNeedToSkipCharNum = 0 # if last byte in current buffer is not the last byte of a character, we need to know how many bytes to skip in next buffer
-        self._mLastCharOrder = -1 # The order of previous char
-        self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made
+        self._mTotalRel = 0  # total sequence received
+        # category counters, each interger counts sequence in its category
+        self._mRelSample = [0] * NUM_OF_CATEGORY
+        # if last byte in current buffer is not the last byte of a character,
+        # we need to know how many bytes to skip in next buffer
+        self._mNeedToSkipCharNum = 0
+        self._mLastCharOrder = -1  # The order of previous char
+        # If this flag is set to True, detection is done and conclusion has
+        # been made
+        self._mDone = False
 
     def feed(self, aBuf, aLen):
-        if self._mDone: return
+        if self._mDone:
+            return
 
         # The buffer we got is byte oriented, and a character may span in more than one
-        # buffers. In case the last one or two byte in last buffer is not complete, we 
-        # record how many byte needed to complete that character and skip these bytes here.
-        # We can choose to record those bytes as well and analyse the character once it 
-        # is complete, but since a character will not make much difference, by simply skipping
+        # buffers. In case the last one or two byte in last buffer is not
+        # complete, we record how many byte needed to complete that character
+        # and skip these bytes here.  We can choose to record those bytes as
+        # well and analyse the character once it is complete, but since a
+        # character will not make much difference, by simply skipping
         # this character will simply our logic and improve performance.
         i = self._mNeedToSkipCharNum
         while i < aLen:
-            order, charLen = self.get_order(aBuf[i:i+2])
+            order, charLen = self.get_order(aBuf[i:i + 2])
             i += charLen
             if i > aLen:
                 self._mNeedToSkipCharNum = i - aLen
@@ -151,7 +158,7 @@ class JapaneseContextAnalysis:
                 if (order != -1) and (self._mLastCharOrder != -1):
                     self._mTotalRel += 1
                     if self._mTotalRel > MAX_REL_THRESHOLD:
-                        self._mDone = constants.True
+                        self._mDone = True
                         break
                     self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
                 self._mLastCharOrder = order
@@ -166,45 +173,55 @@ class JapaneseContextAnalysis:
         else:
             return DONT_KNOW
 
-    def get_order(self, aStr):
+    def get_order(self, aBuf):
         return -1, 1
 
 class SJISContextAnalysis(JapaneseContextAnalysis):
-    def get_order(self, aStr):
-        if not aStr: return -1, 1
+    def __init__(self):
+        self.charset_name = "SHIFT_JIS"
+
+    def get_charset_name(self):
+        return self.charset_name
+
+    def get_order(self, aBuf):
+        if not aBuf:
+            return -1, 1
         # find out current char's byte length
-        if ((aStr[0] >= '\x81') and (aStr[0] <= '\x9F')) or \
-           ((aStr[0] >= '\xE0') and (aStr[0] <= '\xFC')):
+        first_char = wrap_ord(aBuf[0])
+        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
             charLen = 2
+            if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
+                self.charset_name = "CP932"
         else:
             charLen = 1
 
         # return its order if it is hiragana
-        if len(aStr) > 1:
-            if (aStr[0] == '\202') and \
-               (aStr[1] >= '\x9F') and \
-               (aStr[1] <= '\xF1'):
-                return ord(aStr[1]) - 0x9F, charLen
+        if len(aBuf) > 1:
+            second_char = wrap_ord(aBuf[1])
+            if (first_char == 202) and (0x9F <= second_char <= 0xF1):
+                return second_char - 0x9F, charLen
 
         return -1, charLen
 
 class EUCJPContextAnalysis(JapaneseContextAnalysis):
-    def get_order(self, aStr):
-        if not aStr: return -1, 1
+    def get_order(self, aBuf):
+        if not aBuf:
+            return -1, 1
         # find out current char's byte length
-        if (aStr[0] == '\x8E') or \
-           ((aStr[0] >= '\xA1') and (aStr[0] <= '\xFE')):
+        first_char = wrap_ord(aBuf[0])
+        if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
             charLen = 2
-        elif aStr[0] == '\x8F':
+        elif first_char == 0x8F:
             charLen = 3
         else:
             charLen = 1
 
         # return its order if it is hiragana
-        if len(aStr) > 1:
-            if (aStr[0] == '\xA4') and \
-               (aStr[1] >= '\xA1') and \
-               (aStr[1] <= '\xF3'):
-                return ord(aStr[1]) - 0xA1, charLen
+        if len(aBuf) > 1:
+            second_char = wrap_ord(aBuf[1])
+            if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
+                return second_char - 0xA1, charLen
 
         return -1, charLen
+
+# flake8: noqa
diff --git a/thirdparty/chardet/langbulgarianmodel.py b/thirdparty/chardet/langbulgarianmodel.py
index bf5641e7b..e5788fc64 100644
--- a/thirdparty/chardet/langbulgarianmodel.py
+++ b/thirdparty/chardet/langbulgarianmodel.py
@@ -13,30 +13,28 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
 # 252: 0 - 9
 
 # Character Mapping Table:
-# this table is modified base on win1251BulgarianCharToOrderMap, so 
+# this table is modified base on win1251BulgarianCharToOrderMap, so
 # only number <64 is sure valid
 
-Latin5_BulgarianCharToOrderMap = ( \
+Latin5_BulgarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -55,7 +53,7 @@ Latin5_BulgarianCharToOrderMap = ( \
  62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253,  # f0
 )
 
-win1251BulgarianCharToOrderMap = ( \
+win1251BulgarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -74,13 +72,13 @@ win1251BulgarianCharToOrderMap = ( \
   7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16,  # f0
 )
 
-# Model Table: 
+# Model Table:
 # total sequences: 100%
 # first 512 sequences: 96.9392%
 # first 1024 sequences:3.0618%
 # rest  sequences:     0.2992%
-# negative sequences:  0.0020% 
-BulgarianLangModel = ( \
+# negative sequences:  0.0020%
+BulgarianLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
 3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
@@ -211,18 +209,21 @@ BulgarianLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 )
 
-Latin5BulgarianModel = { \
+Latin5BulgarianModel = {
   'charToOrderMap': Latin5_BulgarianCharToOrderMap,
   'precedenceMatrix': BulgarianLangModel,
   'mTypicalPositiveRatio': 0.969392,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "ISO-8859-5"
 }
 
-Win1251BulgarianModel = { \
+Win1251BulgarianModel = {
   'charToOrderMap': win1251BulgarianCharToOrderMap,
   'precedenceMatrix': BulgarianLangModel,
   'mTypicalPositiveRatio': 0.969392,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "windows-1251"
 }
+
+
+# flake8: noqa
diff --git a/thirdparty/chardet/langcyrillicmodel.py b/thirdparty/chardet/langcyrillicmodel.py
index e604cc73d..a86f54bd5 100644
--- a/thirdparty/chardet/langcyrillicmodel.py
+++ b/thirdparty/chardet/langcyrillicmodel.py
@@ -13,23 +13,21 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
-
 # KOI8-R language model
 # Character Mapping Table:
-KOI8R_CharToOrderMap = ( \
+KOI8R_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -48,7 +46,7 @@ KOI8R_CharToOrderMap = ( \
  35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70,  # f0
 )
 
-win1251_CharToOrderMap = ( \
+win1251_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -67,7 +65,7 @@ win1251_CharToOrderMap = ( \
   9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
 )
 
-latin5_CharToOrderMap = ( \
+latin5_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -86,7 +84,7 @@ latin5_CharToOrderMap = ( \
 239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
 )
 
-macCyrillic_CharToOrderMap = ( \
+macCyrillic_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -105,7 +103,7 @@ macCyrillic_CharToOrderMap = ( \
   9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
 )
 
-IBM855_CharToOrderMap = ( \
+IBM855_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -124,7 +122,7 @@ IBM855_CharToOrderMap = ( \
 250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
 )
 
-IBM866_CharToOrderMap = ( \
+IBM866_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -143,13 +141,13 @@ IBM866_CharToOrderMap = ( \
 239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
 )
 
-# Model Table: 
+# Model Table:
 # total sequences: 100%
 # first 512 sequences: 97.6601%
 # first 1024 sequences: 2.3389%
 # rest  sequences:      0.1237%
-# negative sequences:   0.0009% 
-RussianLangModel = ( \
+# negative sequences:   0.0009%
+RussianLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
@@ -280,50 +278,52 @@ RussianLangModel = ( \
 0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
 )
 
-Koi8rModel = { \
+Koi8rModel = {
   'charToOrderMap': KOI8R_CharToOrderMap,
   'precedenceMatrix': RussianLangModel,
   'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "KOI8-R"
 }
 
-Win1251CyrillicModel = { \
+Win1251CyrillicModel = {
   'charToOrderMap': win1251_CharToOrderMap,
   'precedenceMatrix': RussianLangModel,
   'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "windows-1251"
 }
 
-Latin5CyrillicModel = { \
+Latin5CyrillicModel = {
   'charToOrderMap': latin5_CharToOrderMap,
   'precedenceMatrix': RussianLangModel,
   'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "ISO-8859-5"
 }
 
-MacCyrillicModel = { \
+MacCyrillicModel = {
   'charToOrderMap': macCyrillic_CharToOrderMap,
   'precedenceMatrix': RussianLangModel,
   'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "MacCyrillic"
 };
 
-Ibm866Model = { \
+Ibm866Model = {
   'charToOrderMap': IBM866_CharToOrderMap,
   'precedenceMatrix': RussianLangModel,
   'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "IBM866"
 }
 
-Ibm855Model = { \
+Ibm855Model = {
   'charToOrderMap': IBM855_CharToOrderMap,
   'precedenceMatrix': RussianLangModel,
   'mTypicalPositiveRatio': 0.976601,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "IBM855"
 }
+
+# flake8: noqa
diff --git a/thirdparty/chardet/langgreekmodel.py b/thirdparty/chardet/langgreekmodel.py
index ec6d49e80..ddb583765 100644
--- a/thirdparty/chardet/langgreekmodel.py
+++ b/thirdparty/chardet/langgreekmodel.py
@@ -13,27 +13,25 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
 # 252: 0 - 9
 
 # Character Mapping Table:
-Latin7_CharToOrderMap = ( \
+Latin7_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -52,7 +50,7 @@ Latin7_CharToOrderMap = ( \
   9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0
 )
 
-win1253_CharToOrderMap = ( \
+win1253_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -71,13 +69,13 @@ win1253_CharToOrderMap = ( \
   9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0
 )
 
-# Model Table: 
+# Model Table:
 # total sequences: 100%
 # first 512 sequences: 98.2851%
 # first 1024 sequences:1.7001%
 # rest  sequences:     0.0359%
-# negative sequences:  0.0148% 
-GreekLangModel = ( \
+# negative sequences:  0.0148%
+GreekLangModel = (
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
@@ -208,18 +206,20 @@ GreekLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 )
 
-Latin7GreekModel = { \
+Latin7GreekModel = {
   'charToOrderMap': Latin7_CharToOrderMap,
   'precedenceMatrix': GreekLangModel,
   'mTypicalPositiveRatio': 0.982851,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "ISO-8859-7"
 }
 
-Win1253GreekModel = { \
+Win1253GreekModel = {
   'charToOrderMap': win1253_CharToOrderMap,
   'precedenceMatrix': GreekLangModel,
   'mTypicalPositiveRatio': 0.982851,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "windows-1253"
 }
+
+# flake8: noqa
diff --git a/thirdparty/chardet/langhebrewmodel.py b/thirdparty/chardet/langhebrewmodel.py
index a8bcc65bf..75f2bc7fe 100644
--- a/thirdparty/chardet/langhebrewmodel.py
+++ b/thirdparty/chardet/langhebrewmodel.py
@@ -15,20 +15,18 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
@@ -36,7 +34,7 @@ import constants
 
 # Windows-1255 language model
 # Character Mapping Table:
-win1255_CharToOrderMap = ( \
+win1255_CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -55,13 +53,13 @@ win1255_CharToOrderMap = ( \
  12, 19, 13, 26, 18, 27, 21, 17,  7, 10,  5,251,252,128, 96,253,
 )
 
-# Model Table: 
+# Model Table:
 # total sequences: 100%
 # first 512 sequences: 98.4004%
 # first 1024 sequences: 1.5981%
 # rest  sequences:      0.087%
-# negative sequences:   0.0015% 
-HebrewLangModel = ( \
+# negative sequences:   0.0015%
+HebrewLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
 3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
@@ -192,10 +190,12 @@ HebrewLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
 )
 
-Win1255HebrewModel = { \
+Win1255HebrewModel = {
   'charToOrderMap': win1255_CharToOrderMap,
   'precedenceMatrix': HebrewLangModel,
   'mTypicalPositiveRatio': 0.984004,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "windows-1255"
 }
+
+# flake8: noqa
diff --git a/thirdparty/chardet/langhungarianmodel.py b/thirdparty/chardet/langhungarianmodel.py
index d635f03c2..49d2f0fe7 100644
--- a/thirdparty/chardet/langhungarianmodel.py
+++ b/thirdparty/chardet/langhungarianmodel.py
@@ -13,27 +13,25 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
 # 252: 0 - 9
 
 # Character Mapping Table:
-Latin2_HungarianCharToOrderMap = ( \
+Latin2_HungarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -52,7 +50,7 @@ Latin2_HungarianCharToOrderMap = ( \
 245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
 )
 
-win1250HungarianCharToOrderMap = ( \
+win1250HungarianCharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -71,13 +69,13 @@ win1250HungarianCharToOrderMap = ( \
 245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
 )
 
-# Model Table: 
+# Model Table:
 # total sequences: 100%
 # first 512 sequences: 94.7368%
 # first 1024 sequences:5.2623%
 # rest  sequences:     0.8894%
-# negative sequences:  0.0009% 
-HungarianLangModel = ( \
+# negative sequences:  0.0009%
+HungarianLangModel = (
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
 3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
@@ -208,18 +206,20 @@ HungarianLangModel = ( \
 0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
 )
 
-Latin2HungarianModel = { \
+Latin2HungarianModel = {
   'charToOrderMap': Latin2_HungarianCharToOrderMap,
   'precedenceMatrix': HungarianLangModel,
   'mTypicalPositiveRatio': 0.947368,
-  'keepEnglishLetter': constants.True,
+  'keepEnglishLetter': True,
   'charsetName': "ISO-8859-2"
 }
 
-Win1250HungarianModel = { \
+Win1250HungarianModel = {
   'charToOrderMap': win1250HungarianCharToOrderMap,
   'precedenceMatrix': HungarianLangModel,
   'mTypicalPositiveRatio': 0.947368,
-  'keepEnglishLetter': constants.True,
+  'keepEnglishLetter': True,
   'charsetName': "windows-1250"
 }
+
+# flake8: noqa
diff --git a/thirdparty/chardet/langthaimodel.py b/thirdparty/chardet/langthaimodel.py
index 96ec054f2..0508b1b1a 100644
--- a/thirdparty/chardet/langthaimodel.py
+++ b/thirdparty/chardet/langthaimodel.py
@@ -13,29 +13,27 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants
-
 # 255: Control characters that usually does not exist in any text
 # 254: Carriage/Return
 # 253: symbol (punctuation) that does not belong to word
 # 252: 0 - 9
 
-# The following result for thai was collected from a limited sample (1M). 
+# The following result for thai was collected from a limited sample (1M).
 
 # Character Mapping Table:
-TIS620CharToOrderMap = ( \
+TIS620CharToOrderMap = (
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
@@ -54,13 +52,13 @@ TIS620CharToOrderMap = ( \
  68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
 )
 
-# Model Table: 
+# Model Table:
 # total sequences: 100%
 # first 512 sequences: 92.6386%
 # first 1024 sequences:7.3177%
 # rest  sequences:     1.0230%
-# negative sequences:  0.0436% 
-ThaiLangModel = ( \
+# negative sequences:  0.0436%
+ThaiLangModel = (
 0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
 0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
 3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
@@ -191,10 +189,12 @@ ThaiLangModel = ( \
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 )
 
-TIS620ThaiModel = { \
+TIS620ThaiModel = {
   'charToOrderMap': TIS620CharToOrderMap,
   'precedenceMatrix': ThaiLangModel,
   'mTypicalPositiveRatio': 0.926386,
-  'keepEnglishLetter': constants.False,
+  'keepEnglishLetter': False,
   'charsetName': "TIS-620"
 }
+
+# flake8: noqa
diff --git a/thirdparty/chardet/latin1prober.py b/thirdparty/chardet/latin1prober.py
index ae4527c75..eef357354 100644
--- a/thirdparty/chardet/latin1prober.py
+++ b/thirdparty/chardet/latin1prober.py
@@ -14,85 +14,86 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from charsetprober import CharSetProber
-import constants
-import operator
+from .charsetprober import CharSetProber
+from .constants import eNotMe
+from .compat import wrap_ord
 
 FREQ_CAT_NUM = 4
 
-UDF = 0 # undefined
-OTH = 1 # other
-ASC = 2 # ascii capital letter
-ASS = 3 # ascii small letter
-ACV = 4 # accent capital vowel
-ACO = 5 # accent capital other
-ASV = 6 # accent small vowel
-ASO = 7 # accent small other
-CLASS_NUM = 8 # total classes
+UDF = 0  # undefined
+OTH = 1  # other
+ASC = 2  # ascii capital letter
+ASS = 3  # ascii small letter
+ACV = 4  # accent capital vowel
+ACO = 5  # accent capital other
+ASV = 6  # accent small vowel
+ASO = 7  # accent small other
+CLASS_NUM = 8  # total classes
 
-Latin1_CharToClass = ( \
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
-  OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
-  ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
-  ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
-  ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
-  OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
-  ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
-  ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
-  ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
-  OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
-  OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
-  UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
-  OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
-  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
-  ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
-  ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
-  ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
-  ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
-  ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
-  ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
-  ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
-  ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
+Latin1_CharToClass = (
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
+    OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
+    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
+    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
+    ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
+    OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
+    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
+    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
+    ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
+    OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
+    OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
+    UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
+    OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
+    ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
+    ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
+    ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
+    ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
+    ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
+    ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
+    ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
+    ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
 )
 
-# 0 : illegal 
-# 1 : very unlikely 
-# 2 : normal 
+# 0 : illegal
+# 1 : very unlikely
+# 2 : normal
 # 3 : very likely
-Latin1ClassModel = ( \
-# UDF OTH ASC ASS ACV ACO ASV ASO
-   0,  0,  0,  0,  0,  0,  0,  0,  # UDF
-   0,  3,  3,  3,  3,  3,  3,  3,  # OTH
-   0,  3,  3,  3,  3,  3,  3,  3,  # ASC
-   0,  3,  3,  3,  1,  1,  3,  3,  # ASS
-   0,  3,  3,  3,  1,  2,  1,  2,  # ACV
-   0,  3,  3,  3,  3,  3,  3,  3,  # ACO
-   0,  3,  1,  3,  1,  1,  1,  3,  # ASV
-   0,  3,  1,  3,  1,  1,  3,  3,  # ASO
+Latin1ClassModel = (
+    # UDF OTH ASC ASS ACV ACO ASV ASO
+    0,  0,  0,  0,  0,  0,  0,  0,  # UDF
+    0,  3,  3,  3,  3,  3,  3,  3,  # OTH
+    0,  3,  3,  3,  3,  3,  3,  3,  # ASC
+    0,  3,  3,  3,  1,  1,  3,  3,  # ASS
+    0,  3,  3,  3,  1,  2,  1,  2,  # ACV
+    0,  3,  3,  3,  3,  3,  3,  3,  # ACO
+    0,  3,  1,  3,  1,  1,  1,  3,  # ASV
+    0,  3,  1,  3,  1,  1,  3,  3,  # ASO
 )
 
+
 class Latin1Prober(CharSetProber):
     def __init__(self):
         CharSetProber.__init__(self)
@@ -109,10 +110,11 @@ class Latin1Prober(CharSetProber):
     def feed(self, aBuf):
         aBuf = self.filter_with_english_letters(aBuf)
         for c in aBuf:
-            charClass = Latin1_CharToClass[ord(c)]
-            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass]
+            charClass = Latin1_CharToClass[wrap_ord(c)]
+            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
+                                    + charClass]
             if freq == 0:
-                self._mState = constants.eNotMe
+                self._mState = eNotMe
                 break
             self._mFreqCounter[freq] += 1
             self._mLastCharClass = charClass
@@ -120,17 +122,18 @@ class Latin1Prober(CharSetProber):
         return self.get_state()
 
     def get_confidence(self):
-        if self.get_state() == constants.eNotMe:
+        if self.get_state() == eNotMe:
             return 0.01
 
-        total = reduce(operator.add, self._mFreqCounter)
+        total = sum(self._mFreqCounter)
         if total < 0.01:
             confidence = 0.0
         else:
-            confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total)
+            confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
+                          / total)
         if confidence < 0.0:
             confidence = 0.0
-        # lower the confidence of latin1 so that other more accurate detector 
-        # can take priority.
-        confidence = confidence * 0.5
+        # lower the confidence of latin1 so that other more accurate
+        # detector can take priority.
+        confidence = confidence * 0.73
         return confidence
diff --git a/thirdparty/chardet/mbcharsetprober.py b/thirdparty/chardet/mbcharsetprober.py
index 09b035e02..bb42f2fb5 100644
--- a/thirdparty/chardet/mbcharsetprober.py
+++ b/thirdparty/chardet/mbcharsetprober.py
@@ -15,28 +15,29 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from constants import eStart, eError, eItsMe
-from charsetprober import CharSetProber
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+
 
 class MultiByteCharSetProber(CharSetProber):
     def __init__(self):
         CharSetProber.__init__(self)
         self._mDistributionAnalyzer = None
         self._mCodingSM = None
-        self._mLastChar = ['\x00', '\x00']
+        self._mLastChar = [0, 0]
 
     def reset(self):
         CharSetProber.reset(self)
@@ -44,36 +45,39 @@ class MultiByteCharSetProber(CharSetProber):
             self._mCodingSM.reset()
         if self._mDistributionAnalyzer:
             self._mDistributionAnalyzer.reset()
-        self._mLastChar = ['\x00', '\x00']
+        self._mLastChar = [0, 0]
 
     def get_charset_name(self):
         pass
 
     def feed(self, aBuf):
         aLen = len(aBuf)
-        for i in xrange(0, aLen):
+        for i in range(0, aLen):
             codingState = self._mCodingSM.next_state(aBuf[i])
-            if codingState == eError:
+            if codingState == constants.eError:
                 if constants._debug:
-                    sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
                 self._mState = constants.eNotMe
                 break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                 self._mState = constants.eFoundIt
                 break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                 charLen = self._mCodingSM.get_current_charlen()
                 if i == 0:
                     self._mLastChar[1] = aBuf[0]
                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
                 else:
-                    self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)
 
         self._mLastChar[0] = aBuf[aLen - 1]
 
         if self.get_state() == constants.eDetecting:
-            if self._mDistributionAnalyzer.got_enough_data() and \
-               (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
+            if (self._mDistributionAnalyzer.got_enough_data() and
+                    (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
                 self._mState = constants.eFoundIt
 
         return self.get_state()
diff --git a/thirdparty/chardet/mbcsgroupprober.py b/thirdparty/chardet/mbcsgroupprober.py
index 941cc3e37..03c9dcf3e 100644
--- a/thirdparty/chardet/mbcsgroupprober.py
+++ b/thirdparty/chardet/mbcsgroupprober.py
@@ -15,36 +15,40 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from charsetgroupprober import CharSetGroupProber
-from utf8prober import UTF8Prober
-from sjisprober import SJISProber
-from eucjpprober import EUCJPProber
-from gb2312prober import GB2312Prober
-from euckrprober import EUCKRProber
-from big5prober import Big5Prober
-from euctwprober import EUCTWProber
+from .charsetgroupprober import CharSetGroupProber
+from .utf8prober import UTF8Prober
+from .sjisprober import SJISProber
+from .eucjpprober import EUCJPProber
+from .gb2312prober import GB2312Prober
+from .euckrprober import EUCKRProber
+from .cp949prober import CP949Prober
+from .big5prober import Big5Prober
+from .euctwprober import EUCTWProber
+
 
 class MBCSGroupProber(CharSetGroupProber):
     def __init__(self):
         CharSetGroupProber.__init__(self)
-        self._mProbers = [ \
+        self._mProbers = [
             UTF8Prober(),
             SJISProber(),
             EUCJPProber(),
             GB2312Prober(),
             EUCKRProber(),
+            CP949Prober(),
             Big5Prober(),
-            EUCTWProber()]
+            EUCTWProber()
+        ]
         self.reset()
diff --git a/thirdparty/chardet/mbcssm.py b/thirdparty/chardet/mbcssm.py
index 2b68306b0..efe678ca0 100644
--- a/thirdparty/chardet/mbcssm.py
+++ b/thirdparty/chardet/mbcssm.py
@@ -13,60 +13,62 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from constants import eStart, eError, eItsMe
+from .constants import eStart, eError, eItsMe
 
-# BIG5 
+# BIG5
 
-BIG5_cls = ( \
+BIG5_cls = (
     1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value
-    1,1,1,1,1,1,0,0,  # 08 - 0f 
-    1,1,1,1,1,1,1,1,  # 10 - 17 
-    1,1,1,0,1,1,1,1,  # 18 - 1f 
-    1,1,1,1,1,1,1,1,  # 20 - 27 
-    1,1,1,1,1,1,1,1,  # 28 - 2f 
-    1,1,1,1,1,1,1,1,  # 30 - 37 
-    1,1,1,1,1,1,1,1,  # 38 - 3f 
-    2,2,2,2,2,2,2,2,  # 40 - 47 
-    2,2,2,2,2,2,2,2,  # 48 - 4f 
-    2,2,2,2,2,2,2,2,  # 50 - 57 
-    2,2,2,2,2,2,2,2,  # 58 - 5f 
-    2,2,2,2,2,2,2,2,  # 60 - 67 
-    2,2,2,2,2,2,2,2,  # 68 - 6f 
-    2,2,2,2,2,2,2,2,  # 70 - 77 
-    2,2,2,2,2,2,2,1,  # 78 - 7f 
-    4,4,4,4,4,4,4,4,  # 80 - 87 
-    4,4,4,4,4,4,4,4,  # 88 - 8f 
-    4,4,4,4,4,4,4,4,  # 90 - 97 
-    4,4,4,4,4,4,4,4,  # 98 - 9f 
-    4,3,3,3,3,3,3,3,  # a0 - a7 
-    3,3,3,3,3,3,3,3,  # a8 - af 
-    3,3,3,3,3,3,3,3,  # b0 - b7 
-    3,3,3,3,3,3,3,3,  # b8 - bf 
-    3,3,3,3,3,3,3,3,  # c0 - c7 
-    3,3,3,3,3,3,3,3,  # c8 - cf 
-    3,3,3,3,3,3,3,3,  # d0 - d7 
-    3,3,3,3,3,3,3,3,  # d8 - df 
-    3,3,3,3,3,3,3,3,  # e0 - e7 
-    3,3,3,3,3,3,3,3,  # e8 - ef 
-    3,3,3,3,3,3,3,3,  # f0 - f7 
-    3,3,3,3,3,3,3,0)  # f8 - ff 
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,1,  # 78 - 7f
+    4,4,4,4,4,4,4,4,  # 80 - 87
+    4,4,4,4,4,4,4,4,  # 88 - 8f
+    4,4,4,4,4,4,4,4,  # 90 - 97
+    4,4,4,4,4,4,4,4,  # 98 - 9f
+    4,3,3,3,3,3,3,3,  # a0 - a7
+    3,3,3,3,3,3,3,3,  # a8 - af
+    3,3,3,3,3,3,3,3,  # b0 - b7
+    3,3,3,3,3,3,3,3,  # b8 - bf
+    3,3,3,3,3,3,3,3,  # c0 - c7
+    3,3,3,3,3,3,3,3,  # c8 - cf
+    3,3,3,3,3,3,3,3,  # d0 - d7
+    3,3,3,3,3,3,3,3,  # d8 - df
+    3,3,3,3,3,3,3,3,  # e0 - e7
+    3,3,3,3,3,3,3,3,  # e8 - ef
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,3,3,0  # f8 - ff
+)
 
-BIG5_st = ( \
-    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07 
-    eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f 
-    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart)#10-17 
+BIG5_st = (
+    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
+    eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
+    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
+)
 
 Big5CharLenTable = (0, 1, 1, 2, 0)
 
@@ -76,48 +78,90 @@ Big5SMModel = {'classTable': BIG5_cls,
                'charLenTable': Big5CharLenTable,
                'name': 'Big5'}
 
+# CP949
+
+CP949_cls  = (
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f
+    1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f
+    1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f
+    4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f
+    1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f
+    5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f
+    0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f
+    6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f
+    6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af
+    7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf
+    7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff
+)
+
+CP949_st = (
+#cls=    0      1      2      3      4      5      6      7      8      9  # previous state =
+    eError,eStart,     3,eError,eStart,eStart,     4,     5,eError,     6, # eStart
+    eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
+    eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
+    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
+    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
+    eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
+)
+
+CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
+
+CP949SMModel = {'classTable': CP949_cls,
+                'classFactor': 10,
+                'stateTable': CP949_st,
+                'charLenTable': CP949CharLenTable,
+                'name': 'CP949'}
+
 # EUC-JP
 
-EUCJP_cls = ( \
-    4,4,4,4,4,4,4,4,  # 00 - 07 
-    4,4,4,4,4,4,5,5,  # 08 - 0f 
-    4,4,4,4,4,4,4,4,  # 10 - 17 
-    4,4,4,5,4,4,4,4,  # 18 - 1f 
-    4,4,4,4,4,4,4,4,  # 20 - 27 
-    4,4,4,4,4,4,4,4,  # 28 - 2f 
-    4,4,4,4,4,4,4,4,  # 30 - 37 
-    4,4,4,4,4,4,4,4,  # 38 - 3f 
-    4,4,4,4,4,4,4,4,  # 40 - 47 
-    4,4,4,4,4,4,4,4,  # 48 - 4f 
-    4,4,4,4,4,4,4,4,  # 50 - 57 
-    4,4,4,4,4,4,4,4,  # 58 - 5f 
-    4,4,4,4,4,4,4,4,  # 60 - 67 
-    4,4,4,4,4,4,4,4,  # 68 - 6f 
-    4,4,4,4,4,4,4,4,  # 70 - 77 
-    4,4,4,4,4,4,4,4,  # 78 - 7f 
-    5,5,5,5,5,5,5,5,  # 80 - 87 
-    5,5,5,5,5,5,1,3,  # 88 - 8f 
-    5,5,5,5,5,5,5,5,  # 90 - 97 
-    5,5,5,5,5,5,5,5,  # 98 - 9f 
-    5,2,2,2,2,2,2,2,  # a0 - a7 
-    2,2,2,2,2,2,2,2,  # a8 - af 
-    2,2,2,2,2,2,2,2,  # b0 - b7 
-    2,2,2,2,2,2,2,2,  # b8 - bf 
-    2,2,2,2,2,2,2,2,  # c0 - c7 
-    2,2,2,2,2,2,2,2,  # c8 - cf 
-    2,2,2,2,2,2,2,2,  # d0 - d7 
-    2,2,2,2,2,2,2,2,  # d8 - df 
-    0,0,0,0,0,0,0,0,  # e0 - e7 
-    0,0,0,0,0,0,0,0,  # e8 - ef 
-    0,0,0,0,0,0,0,0,  # f0 - f7 
-    0,0,0,0,0,0,0,5)  # f8 - ff 
+EUCJP_cls = (
+    4,4,4,4,4,4,4,4,  # 00 - 07
+    4,4,4,4,4,4,5,5,  # 08 - 0f
+    4,4,4,4,4,4,4,4,  # 10 - 17
+    4,4,4,5,4,4,4,4,  # 18 - 1f
+    4,4,4,4,4,4,4,4,  # 20 - 27
+    4,4,4,4,4,4,4,4,  # 28 - 2f
+    4,4,4,4,4,4,4,4,  # 30 - 37
+    4,4,4,4,4,4,4,4,  # 38 - 3f
+    4,4,4,4,4,4,4,4,  # 40 - 47
+    4,4,4,4,4,4,4,4,  # 48 - 4f
+    4,4,4,4,4,4,4,4,  # 50 - 57
+    4,4,4,4,4,4,4,4,  # 58 - 5f
+    4,4,4,4,4,4,4,4,  # 60 - 67
+    4,4,4,4,4,4,4,4,  # 68 - 6f
+    4,4,4,4,4,4,4,4,  # 70 - 77
+    4,4,4,4,4,4,4,4,  # 78 - 7f
+    5,5,5,5,5,5,5,5,  # 80 - 87
+    5,5,5,5,5,5,1,3,  # 88 - 8f
+    5,5,5,5,5,5,5,5,  # 90 - 97
+    5,5,5,5,5,5,5,5,  # 98 - 9f
+    5,2,2,2,2,2,2,2,  # a0 - a7
+    2,2,2,2,2,2,2,2,  # a8 - af
+    2,2,2,2,2,2,2,2,  # b0 - b7
+    2,2,2,2,2,2,2,2,  # b8 - bf
+    2,2,2,2,2,2,2,2,  # c0 - c7
+    2,2,2,2,2,2,2,2,  # c8 - cf
+    2,2,2,2,2,2,2,2,  # d0 - d7
+    2,2,2,2,2,2,2,2,  # d8 - df
+    0,0,0,0,0,0,0,0,  # e0 - e7
+    0,0,0,0,0,0,0,0,  # e8 - ef
+    0,0,0,0,0,0,0,0,  # f0 - f7
+    0,0,0,0,0,0,0,5  # f8 - ff
+)
 
-EUCJP_st = ( \
-          3,     4,     3,     5,eStart,eError,eError,eError,#00-07 
-     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f 
-     eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 
-     eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f 
-          3,eError,eError,eError,eStart,eStart,eStart,eStart)#20-27 
+EUCJP_st = (
+          3,     4,     3,     5,eStart,eError,eError,eError,#00-07
+     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+     eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
+     eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f
+          3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
+)
 
 EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
 
@@ -129,43 +173,45 @@ EUCJPSMModel = {'classTable': EUCJP_cls,
 
 # EUC-KR
 
-EUCKR_cls = ( \
-    1,1,1,1,1,1,1,1,  # 00 - 07 
-    1,1,1,1,1,1,0,0,  # 08 - 0f 
-    1,1,1,1,1,1,1,1,  # 10 - 17 
-    1,1,1,0,1,1,1,1,  # 18 - 1f 
-    1,1,1,1,1,1,1,1,  # 20 - 27 
-    1,1,1,1,1,1,1,1,  # 28 - 2f 
-    1,1,1,1,1,1,1,1,  # 30 - 37 
-    1,1,1,1,1,1,1,1,  # 38 - 3f 
-    1,1,1,1,1,1,1,1,  # 40 - 47 
-    1,1,1,1,1,1,1,1,  # 48 - 4f 
-    1,1,1,1,1,1,1,1,  # 50 - 57 
-    1,1,1,1,1,1,1,1,  # 58 - 5f 
-    1,1,1,1,1,1,1,1,  # 60 - 67 
-    1,1,1,1,1,1,1,1,  # 68 - 6f 
-    1,1,1,1,1,1,1,1,  # 70 - 77 
-    1,1,1,1,1,1,1,1,  # 78 - 7f 
-    0,0,0,0,0,0,0,0,  # 80 - 87 
-    0,0,0,0,0,0,0,0,  # 88 - 8f 
-    0,0,0,0,0,0,0,0,  # 90 - 97 
-    0,0,0,0,0,0,0,0,  # 98 - 9f 
-    0,2,2,2,2,2,2,2,  # a0 - a7 
-    2,2,2,2,2,3,3,3,  # a8 - af 
-    2,2,2,2,2,2,2,2,  # b0 - b7 
-    2,2,2,2,2,2,2,2,  # b8 - bf 
-    2,2,2,2,2,2,2,2,  # c0 - c7 
-    2,3,2,2,2,2,2,2,  # c8 - cf 
-    2,2,2,2,2,2,2,2,  # d0 - d7 
-    2,2,2,2,2,2,2,2,  # d8 - df 
-    2,2,2,2,2,2,2,2,  # e0 - e7 
-    2,2,2,2,2,2,2,2,  # e8 - ef 
-    2,2,2,2,2,2,2,2,  # f0 - f7 
-    2,2,2,2,2,2,2,0)  # f8 - ff 
+EUCKR_cls  = (
+    1,1,1,1,1,1,1,1,  # 00 - 07
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    1,1,1,1,1,1,1,1,  # 40 - 47
+    1,1,1,1,1,1,1,1,  # 48 - 4f
+    1,1,1,1,1,1,1,1,  # 50 - 57
+    1,1,1,1,1,1,1,1,  # 58 - 5f
+    1,1,1,1,1,1,1,1,  # 60 - 67
+    1,1,1,1,1,1,1,1,  # 68 - 6f
+    1,1,1,1,1,1,1,1,  # 70 - 77
+    1,1,1,1,1,1,1,1,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,0,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,2,2,2,2,2,2,2,  # a0 - a7
+    2,2,2,2,2,3,3,3,  # a8 - af
+    2,2,2,2,2,2,2,2,  # b0 - b7
+    2,2,2,2,2,2,2,2,  # b8 - bf
+    2,2,2,2,2,2,2,2,  # c0 - c7
+    2,3,2,2,2,2,2,2,  # c8 - cf
+    2,2,2,2,2,2,2,2,  # d0 - d7
+    2,2,2,2,2,2,2,2,  # d8 - df
+    2,2,2,2,2,2,2,2,  # e0 - e7
+    2,2,2,2,2,2,2,2,  # e8 - ef
+    2,2,2,2,2,2,2,2,  # f0 - f7
+    2,2,2,2,2,2,2,0   # f8 - ff
+)
 
 EUCKR_st = (
-    eError,eStart,     3,eError,eError,eError,eError,eError,#00-07 
-    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart)#08-0f 
+    eError,eStart,     3,eError,eError,eError,eError,eError,#00-07
+    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
+)
 
 EUCKRCharLenTable = (0, 1, 2, 0)
 
@@ -177,47 +223,49 @@ EUCKRSMModel = {'classTable': EUCKR_cls,
 
 # EUC-TW
 
-EUCTW_cls = ( \
-    2,2,2,2,2,2,2,2,  # 00 - 07 
-    2,2,2,2,2,2,0,0,  # 08 - 0f 
-    2,2,2,2,2,2,2,2,  # 10 - 17 
-    2,2,2,0,2,2,2,2,  # 18 - 1f 
-    2,2,2,2,2,2,2,2,  # 20 - 27 
-    2,2,2,2,2,2,2,2,  # 28 - 2f 
-    2,2,2,2,2,2,2,2,  # 30 - 37 
-    2,2,2,2,2,2,2,2,  # 38 - 3f 
-    2,2,2,2,2,2,2,2,  # 40 - 47 
-    2,2,2,2,2,2,2,2,  # 48 - 4f 
-    2,2,2,2,2,2,2,2,  # 50 - 57 
-    2,2,2,2,2,2,2,2,  # 58 - 5f 
-    2,2,2,2,2,2,2,2,  # 60 - 67 
-    2,2,2,2,2,2,2,2,  # 68 - 6f 
-    2,2,2,2,2,2,2,2,  # 70 - 77 
-    2,2,2,2,2,2,2,2,  # 78 - 7f 
-    0,0,0,0,0,0,0,0,  # 80 - 87 
-    0,0,0,0,0,0,6,0,  # 88 - 8f 
-    0,0,0,0,0,0,0,0,  # 90 - 97 
-    0,0,0,0,0,0,0,0,  # 98 - 9f 
-    0,3,4,4,4,4,4,4,  # a0 - a7 
-    5,5,1,1,1,1,1,1,  # a8 - af 
-    1,1,1,1,1,1,1,1,  # b0 - b7 
-    1,1,1,1,1,1,1,1,  # b8 - bf 
-    1,1,3,1,3,3,3,3,  # c0 - c7 
-    3,3,3,3,3,3,3,3,  # c8 - cf 
-    3,3,3,3,3,3,3,3,  # d0 - d7 
-    3,3,3,3,3,3,3,3,  # d8 - df 
-    3,3,3,3,3,3,3,3,  # e0 - e7 
-    3,3,3,3,3,3,3,3,  # e8 - ef 
-    3,3,3,3,3,3,3,3,  # f0 - f7 
-    3,3,3,3,3,3,3,0)  # f8 - ff 
+EUCTW_cls = (
+    2,2,2,2,2,2,2,2,  # 00 - 07
+    2,2,2,2,2,2,0,0,  # 08 - 0f
+    2,2,2,2,2,2,2,2,  # 10 - 17
+    2,2,2,0,2,2,2,2,  # 18 - 1f
+    2,2,2,2,2,2,2,2,  # 20 - 27
+    2,2,2,2,2,2,2,2,  # 28 - 2f
+    2,2,2,2,2,2,2,2,  # 30 - 37
+    2,2,2,2,2,2,2,2,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,2,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,6,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,3,4,4,4,4,4,4,  # a0 - a7
+    5,5,1,1,1,1,1,1,  # a8 - af
+    1,1,1,1,1,1,1,1,  # b0 - b7
+    1,1,1,1,1,1,1,1,  # b8 - bf
+    1,1,3,1,3,3,3,3,  # c0 - c7
+    3,3,3,3,3,3,3,3,  # c8 - cf
+    3,3,3,3,3,3,3,3,  # d0 - d7
+    3,3,3,3,3,3,3,3,  # d8 - df
+    3,3,3,3,3,3,3,3,  # e0 - e7
+    3,3,3,3,3,3,3,3,  # e8 - ef
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,3,3,0   # f8 - ff
+)
 
-EUCTW_st = ( \
-    eError,eError,eStart,     3,     3,     3,     4,eError,#00-07 
-    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f 
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 
-    eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f 
-         5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 
-    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f 
+EUCTW_st = (
+    eError,eError,eStart,     3,     3,     3,     4,eError,#00-07
+    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
+    eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
+         5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
+    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)
 
 EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
 
@@ -229,53 +277,55 @@ EUCTWSMModel = {'classTable': EUCTW_cls,
 
 # GB2312
 
-GB2312_cls = ( \
-    1,1,1,1,1,1,1,1,  # 00 - 07 
-    1,1,1,1,1,1,0,0,  # 08 - 0f 
-    1,1,1,1,1,1,1,1,  # 10 - 17 
-    1,1,1,0,1,1,1,1,  # 18 - 1f 
-    1,1,1,1,1,1,1,1,  # 20 - 27 
-    1,1,1,1,1,1,1,1,  # 28 - 2f 
-    3,3,3,3,3,3,3,3,  # 30 - 37 
-    3,3,1,1,1,1,1,1,  # 38 - 3f 
-    2,2,2,2,2,2,2,2,  # 40 - 47 
-    2,2,2,2,2,2,2,2,  # 48 - 4f 
-    2,2,2,2,2,2,2,2,  # 50 - 57 
-    2,2,2,2,2,2,2,2,  # 58 - 5f 
-    2,2,2,2,2,2,2,2,  # 60 - 67 
-    2,2,2,2,2,2,2,2,  # 68 - 6f 
-    2,2,2,2,2,2,2,2,  # 70 - 77 
-    2,2,2,2,2,2,2,4,  # 78 - 7f 
-    5,6,6,6,6,6,6,6,  # 80 - 87 
-    6,6,6,6,6,6,6,6,  # 88 - 8f 
-    6,6,6,6,6,6,6,6,  # 90 - 97 
-    6,6,6,6,6,6,6,6,  # 98 - 9f 
-    6,6,6,6,6,6,6,6,  # a0 - a7 
-    6,6,6,6,6,6,6,6,  # a8 - af 
-    6,6,6,6,6,6,6,6,  # b0 - b7 
-    6,6,6,6,6,6,6,6,  # b8 - bf 
-    6,6,6,6,6,6,6,6,  # c0 - c7 
-    6,6,6,6,6,6,6,6,  # c8 - cf 
-    6,6,6,6,6,6,6,6,  # d0 - d7 
-    6,6,6,6,6,6,6,6,  # d8 - df 
-    6,6,6,6,6,6,6,6,  # e0 - e7 
-    6,6,6,6,6,6,6,6,  # e8 - ef 
-    6,6,6,6,6,6,6,6,  # f0 - f7 
-    6,6,6,6,6,6,6,0)  # f8 - ff 
+GB2312_cls = (
+    1,1,1,1,1,1,1,1,  # 00 - 07
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    3,3,3,3,3,3,3,3,  # 30 - 37
+    3,3,1,1,1,1,1,1,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,4,  # 78 - 7f
+    5,6,6,6,6,6,6,6,  # 80 - 87
+    6,6,6,6,6,6,6,6,  # 88 - 8f
+    6,6,6,6,6,6,6,6,  # 90 - 97
+    6,6,6,6,6,6,6,6,  # 98 - 9f
+    6,6,6,6,6,6,6,6,  # a0 - a7
+    6,6,6,6,6,6,6,6,  # a8 - af
+    6,6,6,6,6,6,6,6,  # b0 - b7
+    6,6,6,6,6,6,6,6,  # b8 - bf
+    6,6,6,6,6,6,6,6,  # c0 - c7
+    6,6,6,6,6,6,6,6,  # c8 - cf
+    6,6,6,6,6,6,6,6,  # d0 - d7
+    6,6,6,6,6,6,6,6,  # d8 - df
+    6,6,6,6,6,6,6,6,  # e0 - e7
+    6,6,6,6,6,6,6,6,  # e8 - ef
+    6,6,6,6,6,6,6,6,  # f0 - f7
+    6,6,6,6,6,6,6,0   # f8 - ff
+)
 
-GB2312_st = ( \
-    eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07 
-    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f 
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 
-         4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f 
-    eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27 
-    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f 
+GB2312_st = (
+    eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07
+    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
+         4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
+    eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27
+    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)
 
-# To be accurate, the length of class 6 can be either 2 or 4. 
-# But it is not necessary to discriminate between the two since 
-# it is used for frequency analysis only, and we are validing 
-# each code range there as well. So it is safe to set it to be 
-# 2 here. 
+# To be accurate, the length of class 6 can be either 2 or 4.
+# But it is not necessary to discriminate between the two since
+# it is used for frequency analysis only, and we are validing
+# each code range there as well. So it is safe to set it to be
+# 2 here.
 GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
 
 GB2312SMModel = {'classTable': GB2312_cls,
@@ -286,46 +336,48 @@ GB2312SMModel = {'classTable': GB2312_cls,
 
 # Shift_JIS
 
-SJIS_cls = ( \
-    1,1,1,1,1,1,1,1,  # 00 - 07 
-    1,1,1,1,1,1,0,0,  # 08 - 0f 
-    1,1,1,1,1,1,1,1,  # 10 - 17 
-    1,1,1,0,1,1,1,1,  # 18 - 1f 
-    1,1,1,1,1,1,1,1,  # 20 - 27 
-    1,1,1,1,1,1,1,1,  # 28 - 2f 
-    1,1,1,1,1,1,1,1,  # 30 - 37 
-    1,1,1,1,1,1,1,1,  # 38 - 3f 
-    2,2,2,2,2,2,2,2,  # 40 - 47 
-    2,2,2,2,2,2,2,2,  # 48 - 4f 
-    2,2,2,2,2,2,2,2,  # 50 - 57 
-    2,2,2,2,2,2,2,2,  # 58 - 5f 
-    2,2,2,2,2,2,2,2,  # 60 - 67 
-    2,2,2,2,2,2,2,2,  # 68 - 6f 
-    2,2,2,2,2,2,2,2,  # 70 - 77 
-    2,2,2,2,2,2,2,1,  # 78 - 7f 
-    3,3,3,3,3,3,3,3,  # 80 - 87 
-    3,3,3,3,3,3,3,3,  # 88 - 8f 
-    3,3,3,3,3,3,3,3,  # 90 - 97 
-    3,3,3,3,3,3,3,3,  # 98 - 9f 
-    #0xa0 is illegal in sjis encoding, but some pages does 
+SJIS_cls = (
+    1,1,1,1,1,1,1,1,  # 00 - 07
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,1,  # 78 - 7f
+    3,3,3,3,3,2,2,3,  # 80 - 87
+    3,3,3,3,3,3,3,3,  # 88 - 8f
+    3,3,3,3,3,3,3,3,  # 90 - 97
+    3,3,3,3,3,3,3,3,  # 98 - 9f
+    #0xa0 is illegal in sjis encoding, but some pages does
     #contain such byte. We need to be more error forgiven.
-    2,2,2,2,2,2,2,2,  # a0 - a7     
-    2,2,2,2,2,2,2,2,  # a8 - af 
-    2,2,2,2,2,2,2,2,  # b0 - b7 
-    2,2,2,2,2,2,2,2,  # b8 - bf 
-    2,2,2,2,2,2,2,2,  # c0 - c7 
-    2,2,2,2,2,2,2,2,  # c8 - cf 
-    2,2,2,2,2,2,2,2,  # d0 - d7 
-    2,2,2,2,2,2,2,2,  # d8 - df 
-    3,3,3,3,3,3,3,3,  # e0 - e7 
-    3,3,3,3,3,4,4,4,  # e8 - ef 
-    4,4,4,4,4,4,4,4,  # f0 - f7 
-    4,4,4,4,4,0,0,0)  # f8 - ff 
+    2,2,2,2,2,2,2,2,  # a0 - a7
+    2,2,2,2,2,2,2,2,  # a8 - af
+    2,2,2,2,2,2,2,2,  # b0 - b7
+    2,2,2,2,2,2,2,2,  # b8 - bf
+    2,2,2,2,2,2,2,2,  # c0 - c7
+    2,2,2,2,2,2,2,2,  # c8 - cf
+    2,2,2,2,2,2,2,2,  # d0 - d7
+    2,2,2,2,2,2,2,2,  # d8 - df
+    3,3,3,3,3,3,3,3,  # e0 - e7
+    3,3,3,3,3,4,4,4,  # e8 - ef
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,0,0,0)  # f8 - ff
 
-SJIS_st = ( \
-    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07 
-    eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f 
-    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart)#10-17 
+
+SJIS_st = (
+    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
+    eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
+)
 
 SJISCharLenTable = (0, 1, 1, 2, 0, 0)
 
@@ -337,48 +389,50 @@ SJISSMModel = {'classTable': SJIS_cls,
 
 # UCS2-BE
 
-UCS2BE_cls = ( \
-    0,0,0,0,0,0,0,0,  # 00 - 07 
-    0,0,1,0,0,2,0,0,  # 08 - 0f 
-    0,0,0,0,0,0,0,0,  # 10 - 17 
-    0,0,0,3,0,0,0,0,  # 18 - 1f 
-    0,0,0,0,0,0,0,0,  # 20 - 27 
-    0,3,3,3,3,3,0,0,  # 28 - 2f 
-    0,0,0,0,0,0,0,0,  # 30 - 37 
-    0,0,0,0,0,0,0,0,  # 38 - 3f 
-    0,0,0,0,0,0,0,0,  # 40 - 47 
-    0,0,0,0,0,0,0,0,  # 48 - 4f 
-    0,0,0,0,0,0,0,0,  # 50 - 57 
-    0,0,0,0,0,0,0,0,  # 58 - 5f 
-    0,0,0,0,0,0,0,0,  # 60 - 67 
-    0,0,0,0,0,0,0,0,  # 68 - 6f 
-    0,0,0,0,0,0,0,0,  # 70 - 77 
-    0,0,0,0,0,0,0,0,  # 78 - 7f 
-    0,0,0,0,0,0,0,0,  # 80 - 87 
-    0,0,0,0,0,0,0,0,  # 88 - 8f 
-    0,0,0,0,0,0,0,0,  # 90 - 97 
-    0,0,0,0,0,0,0,0,  # 98 - 9f 
-    0,0,0,0,0,0,0,0,  # a0 - a7 
-    0,0,0,0,0,0,0,0,  # a8 - af 
-    0,0,0,0,0,0,0,0,  # b0 - b7 
-    0,0,0,0,0,0,0,0,  # b8 - bf 
-    0,0,0,0,0,0,0,0,  # c0 - c7 
-    0,0,0,0,0,0,0,0,  # c8 - cf 
-    0,0,0,0,0,0,0,0,  # d0 - d7 
-    0,0,0,0,0,0,0,0,  # d8 - df 
-    0,0,0,0,0,0,0,0,  # e0 - e7 
-    0,0,0,0,0,0,0,0,  # e8 - ef 
-    0,0,0,0,0,0,0,0,  # f0 - f7 
-    0,0,0,0,0,0,4,5)  # f8 - ff 
+UCS2BE_cls = (
+    0,0,0,0,0,0,0,0,  # 00 - 07
+    0,0,1,0,0,2,0,0,  # 08 - 0f
+    0,0,0,0,0,0,0,0,  # 10 - 17
+    0,0,0,3,0,0,0,0,  # 18 - 1f
+    0,0,0,0,0,0,0,0,  # 20 - 27
+    0,3,3,3,3,3,0,0,  # 28 - 2f
+    0,0,0,0,0,0,0,0,  # 30 - 37
+    0,0,0,0,0,0,0,0,  # 38 - 3f
+    0,0,0,0,0,0,0,0,  # 40 - 47
+    0,0,0,0,0,0,0,0,  # 48 - 4f
+    0,0,0,0,0,0,0,0,  # 50 - 57
+    0,0,0,0,0,0,0,0,  # 58 - 5f
+    0,0,0,0,0,0,0,0,  # 60 - 67
+    0,0,0,0,0,0,0,0,  # 68 - 6f
+    0,0,0,0,0,0,0,0,  # 70 - 77
+    0,0,0,0,0,0,0,0,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,0,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,0,0,0,0,0,0,0,  # a0 - a7
+    0,0,0,0,0,0,0,0,  # a8 - af
+    0,0,0,0,0,0,0,0,  # b0 - b7
+    0,0,0,0,0,0,0,0,  # b8 - bf
+    0,0,0,0,0,0,0,0,  # c0 - c7
+    0,0,0,0,0,0,0,0,  # c8 - cf
+    0,0,0,0,0,0,0,0,  # d0 - d7
+    0,0,0,0,0,0,0,0,  # d8 - df
+    0,0,0,0,0,0,0,0,  # e0 - e7
+    0,0,0,0,0,0,0,0,  # e8 - ef
+    0,0,0,0,0,0,0,0,  # f0 - f7
+    0,0,0,0,0,0,4,5   # f8 - ff
+)
 
-UCS2BE_st = ( \
-          5,     7,     7,eError,     4,     3,eError,eError,#00-07 
-     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f 
-     eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17 
-          6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f 
-          6,     6,     6,     6,     5,     7,     7,eError,#20-27 
-          5,     8,     6,     6,eError,     6,     6,     6,#28-2f 
-          6,     6,     6,     6,eError,eError,eStart,eStart)#30-37 
+UCS2BE_st  = (
+          5,     7,     7,eError,     4,     3,eError,eError,#00-07
+     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+     eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17
+          6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f
+          6,     6,     6,     6,     5,     7,     7,eError,#20-27
+          5,     8,     6,     6,eError,     6,     6,     6,#28-2f
+          6,     6,     6,     6,eError,eError,eStart,eStart #30-37
+)
 
 UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
 
@@ -390,48 +444,50 @@ UCS2BESMModel = {'classTable': UCS2BE_cls,
 
 # UCS2-LE
 
-UCS2LE_cls = ( \
-    0,0,0,0,0,0,0,0,  # 00 - 07 
-    0,0,1,0,0,2,0,0,  # 08 - 0f 
-    0,0,0,0,0,0,0,0,  # 10 - 17 
-    0,0,0,3,0,0,0,0,  # 18 - 1f 
-    0,0,0,0,0,0,0,0,  # 20 - 27 
-    0,3,3,3,3,3,0,0,  # 28 - 2f 
-    0,0,0,0,0,0,0,0,  # 30 - 37 
-    0,0,0,0,0,0,0,0,  # 38 - 3f 
-    0,0,0,0,0,0,0,0,  # 40 - 47 
-    0,0,0,0,0,0,0,0,  # 48 - 4f 
-    0,0,0,0,0,0,0,0,  # 50 - 57 
-    0,0,0,0,0,0,0,0,  # 58 - 5f 
-    0,0,0,0,0,0,0,0,  # 60 - 67 
-    0,0,0,0,0,0,0,0,  # 68 - 6f 
-    0,0,0,0,0,0,0,0,  # 70 - 77 
-    0,0,0,0,0,0,0,0,  # 78 - 7f 
-    0,0,0,0,0,0,0,0,  # 80 - 87 
-    0,0,0,0,0,0,0,0,  # 88 - 8f 
-    0,0,0,0,0,0,0,0,  # 90 - 97 
-    0,0,0,0,0,0,0,0,  # 98 - 9f 
-    0,0,0,0,0,0,0,0,  # a0 - a7 
-    0,0,0,0,0,0,0,0,  # a8 - af 
-    0,0,0,0,0,0,0,0,  # b0 - b7 
-    0,0,0,0,0,0,0,0,  # b8 - bf 
-    0,0,0,0,0,0,0,0,  # c0 - c7 
-    0,0,0,0,0,0,0,0,  # c8 - cf 
-    0,0,0,0,0,0,0,0,  # d0 - d7 
-    0,0,0,0,0,0,0,0,  # d8 - df 
-    0,0,0,0,0,0,0,0,  # e0 - e7 
-    0,0,0,0,0,0,0,0,  # e8 - ef 
-    0,0,0,0,0,0,0,0,  # f0 - f7 
-    0,0,0,0,0,0,4,5)  # f8 - ff 
+UCS2LE_cls = (
+    0,0,0,0,0,0,0,0,  # 00 - 07
+    0,0,1,0,0,2,0,0,  # 08 - 0f
+    0,0,0,0,0,0,0,0,  # 10 - 17
+    0,0,0,3,0,0,0,0,  # 18 - 1f
+    0,0,0,0,0,0,0,0,  # 20 - 27
+    0,3,3,3,3,3,0,0,  # 28 - 2f
+    0,0,0,0,0,0,0,0,  # 30 - 37
+    0,0,0,0,0,0,0,0,  # 38 - 3f
+    0,0,0,0,0,0,0,0,  # 40 - 47
+    0,0,0,0,0,0,0,0,  # 48 - 4f
+    0,0,0,0,0,0,0,0,  # 50 - 57
+    0,0,0,0,0,0,0,0,  # 58 - 5f
+    0,0,0,0,0,0,0,0,  # 60 - 67
+    0,0,0,0,0,0,0,0,  # 68 - 6f
+    0,0,0,0,0,0,0,0,  # 70 - 77
+    0,0,0,0,0,0,0,0,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,0,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,0,0,0,0,0,0,0,  # a0 - a7
+    0,0,0,0,0,0,0,0,  # a8 - af
+    0,0,0,0,0,0,0,0,  # b0 - b7
+    0,0,0,0,0,0,0,0,  # b8 - bf
+    0,0,0,0,0,0,0,0,  # c0 - c7
+    0,0,0,0,0,0,0,0,  # c8 - cf
+    0,0,0,0,0,0,0,0,  # d0 - d7
+    0,0,0,0,0,0,0,0,  # d8 - df
+    0,0,0,0,0,0,0,0,  # e0 - e7
+    0,0,0,0,0,0,0,0,  # e8 - ef
+    0,0,0,0,0,0,0,0,  # f0 - f7
+    0,0,0,0,0,0,4,5   # f8 - ff
+)
 
-UCS2LE_st = ( \
-          6,     6,     7,     6,     4,     3,eError,eError,#00-07 
-     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f 
-     eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17 
-          5,     5,     5,eError,     5,eError,     6,     6,#18-1f 
-          7,     6,     8,     8,     5,     5,     5,eError,#20-27 
-          5,     5,     5,eError,eError,eError,     5,     5,#28-2f 
-          5,     5,     5,eError,     5,eError,eStart,eStart)#30-37 
+UCS2LE_st = (
+          6,     6,     7,     6,     4,     3,eError,eError,#00-07
+     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+     eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17
+          5,     5,     5,eError,     5,eError,     6,     6,#18-1f
+          7,     6,     8,     8,     5,     5,     5,eError,#20-27
+          5,     5,     5,eError,eError,eError,     5,     5,#28-2f
+          5,     5,     5,eError,     5,eError,eStart,eStart #30-37
+)
 
 UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
 
@@ -443,67 +499,69 @@ UCS2LESMModel = {'classTable': UCS2LE_cls,
 
 # UTF-8
 
-UTF8_cls = ( \
+UTF8_cls = (
     1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value
-    1,1,1,1,1,1,0,0,  # 08 - 0f 
-    1,1,1,1,1,1,1,1,  # 10 - 17 
-    1,1,1,0,1,1,1,1,  # 18 - 1f 
-    1,1,1,1,1,1,1,1,  # 20 - 27 
-    1,1,1,1,1,1,1,1,  # 28 - 2f 
-    1,1,1,1,1,1,1,1,  # 30 - 37 
-    1,1,1,1,1,1,1,1,  # 38 - 3f 
-    1,1,1,1,1,1,1,1,  # 40 - 47 
-    1,1,1,1,1,1,1,1,  # 48 - 4f 
-    1,1,1,1,1,1,1,1,  # 50 - 57 
-    1,1,1,1,1,1,1,1,  # 58 - 5f 
-    1,1,1,1,1,1,1,1,  # 60 - 67 
-    1,1,1,1,1,1,1,1,  # 68 - 6f 
-    1,1,1,1,1,1,1,1,  # 70 - 77 
-    1,1,1,1,1,1,1,1,  # 78 - 7f 
-    2,2,2,2,3,3,3,3,  # 80 - 87 
-    4,4,4,4,4,4,4,4,  # 88 - 8f 
-    4,4,4,4,4,4,4,4,  # 90 - 97 
-    4,4,4,4,4,4,4,4,  # 98 - 9f 
-    5,5,5,5,5,5,5,5,  # a0 - a7 
-    5,5,5,5,5,5,5,5,  # a8 - af 
-    5,5,5,5,5,5,5,5,  # b0 - b7 
-    5,5,5,5,5,5,5,5,  # b8 - bf 
-    0,0,6,6,6,6,6,6,  # c0 - c7 
-    6,6,6,6,6,6,6,6,  # c8 - cf 
-    6,6,6,6,6,6,6,6,  # d0 - d7 
-    6,6,6,6,6,6,6,6,  # d8 - df 
-    7,8,8,8,8,8,8,8,  # e0 - e7 
-    8,8,8,8,8,9,8,8,  # e8 - ef 
-    10,11,11,11,11,11,11,11,  # f0 - f7 
-    12,13,13,13,14,15,0,0)   # f8 - ff 
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    1,1,1,1,1,1,1,1,  # 40 - 47
+    1,1,1,1,1,1,1,1,  # 48 - 4f
+    1,1,1,1,1,1,1,1,  # 50 - 57
+    1,1,1,1,1,1,1,1,  # 58 - 5f
+    1,1,1,1,1,1,1,1,  # 60 - 67
+    1,1,1,1,1,1,1,1,  # 68 - 6f
+    1,1,1,1,1,1,1,1,  # 70 - 77
+    1,1,1,1,1,1,1,1,  # 78 - 7f
+    2,2,2,2,3,3,3,3,  # 80 - 87
+    4,4,4,4,4,4,4,4,  # 88 - 8f
+    4,4,4,4,4,4,4,4,  # 90 - 97
+    4,4,4,4,4,4,4,4,  # 98 - 9f
+    5,5,5,5,5,5,5,5,  # a0 - a7
+    5,5,5,5,5,5,5,5,  # a8 - af
+    5,5,5,5,5,5,5,5,  # b0 - b7
+    5,5,5,5,5,5,5,5,  # b8 - bf
+    0,0,6,6,6,6,6,6,  # c0 - c7
+    6,6,6,6,6,6,6,6,  # c8 - cf
+    6,6,6,6,6,6,6,6,  # d0 - d7
+    6,6,6,6,6,6,6,6,  # d8 - df
+    7,8,8,8,8,8,8,8,  # e0 - e7
+    8,8,8,8,8,9,8,8,  # e8 - ef
+    10,11,11,11,11,11,11,11,  # f0 - f7
+    12,13,13,13,14,15,0,0    # f8 - ff
+)
 
-UTF8_st = ( \
-    eError,eStart,eError,eError,eError,eError,     12,   10,#00-07 
-         9,     11,     8,     7,     6,     5,     4,    3,#08-0f 
-    eError,eError,eError,eError,eError,eError,eError,eError,#10-17 
-    eError,eError,eError,eError,eError,eError,eError,eError,#18-1f 
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f 
-    eError,eError,     5,     5,     5,     5,eError,eError,#30-37 
-    eError,eError,eError,eError,eError,eError,eError,eError,#38-3f 
-    eError,eError,eError,     5,     5,     5,eError,eError,#40-47 
-    eError,eError,eError,eError,eError,eError,eError,eError,#48-4f 
-    eError,eError,     7,     7,     7,     7,eError,eError,#50-57 
-    eError,eError,eError,eError,eError,eError,eError,eError,#58-5f 
-    eError,eError,eError,eError,     7,     7,eError,eError,#60-67 
-    eError,eError,eError,eError,eError,eError,eError,eError,#68-6f 
-    eError,eError,     9,     9,     9,     9,eError,eError,#70-77 
-    eError,eError,eError,eError,eError,eError,eError,eError,#78-7f 
-    eError,eError,eError,eError,eError,     9,eError,eError,#80-87 
-    eError,eError,eError,eError,eError,eError,eError,eError,#88-8f 
-    eError,eError,    12,    12,    12,    12,eError,eError,#90-97 
-    eError,eError,eError,eError,eError,eError,eError,eError,#98-9f 
-    eError,eError,eError,eError,eError,    12,eError,eError,#a0-a7 
-    eError,eError,eError,eError,eError,eError,eError,eError,#a8-af 
-    eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7 
-    eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf 
-    eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 
-    eError,eError,eError,eError,eError,eError,eError,eError)#c8-cf 
+UTF8_st = (
+    eError,eStart,eError,eError,eError,eError,     12,   10,#00-07
+         9,     11,     8,     7,     6,     5,     4,    3,#08-0f
+    eError,eError,eError,eError,eError,eError,eError,eError,#10-17
+    eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
+    eError,eError,     5,     5,     5,     5,eError,eError,#30-37
+    eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
+    eError,eError,eError,     5,     5,     5,eError,eError,#40-47
+    eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
+    eError,eError,     7,     7,     7,     7,eError,eError,#50-57
+    eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
+    eError,eError,eError,eError,     7,     7,eError,eError,#60-67
+    eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
+    eError,eError,     9,     9,     9,     9,eError,eError,#70-77
+    eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
+    eError,eError,eError,eError,eError,     9,eError,eError,#80-87
+    eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
+    eError,eError,    12,    12,    12,    12,eError,eError,#90-97
+    eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
+    eError,eError,eError,eError,eError,    12,eError,eError,#a0-a7
+    eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
+    eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7
+    eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
+    eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
+    eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
+)
 
 UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
 
diff --git a/thirdparty/chardet/sbcharsetprober.py b/thirdparty/chardet/sbcharsetprober.py
index f92fc14c8..37291bd27 100644
--- a/thirdparty/chardet/sbcharsetprober.py
+++ b/thirdparty/chardet/sbcharsetprober.py
@@ -14,20 +14,22 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from charsetprober import CharSetProber
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+from .compat import wrap_ord
 
 SAMPLE_SIZE = 64
 SB_ENOUGH_REL_THRESHOLD = 1024
@@ -38,21 +40,26 @@ NUMBER_OF_SEQ_CAT = 4
 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
 #NEGATIVE_CAT = 0
 
+
 class SingleByteCharSetProber(CharSetProber):
-    def __init__(self, model, reversed=constants.False, nameProber=None):
+    def __init__(self, model, reversed=False, nameProber=None):
         CharSetProber.__init__(self)
         self._mModel = model
-        self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
-        self._mNameProber = nameProber # Optional auxiliary prober for name decision
+        # TRUE if we need to reverse every pair in the model lookup
+        self._mReversed = reversed
+        # Optional auxiliary prober for name decision
+        self._mNameProber = nameProber
         self.reset()
 
     def reset(self):
         CharSetProber.reset(self)
-        self._mLastOrder = 255 # char order of last character
+        # char order of last character
+        self._mLastOrder = 255
         self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
         self._mTotalSeqs = 0
         self._mTotalChar = 0
-        self._mFreqChar = 0 # characters that fall in our sampling range
+        # characters that fall in our sampling range
+        self._mFreqChar = 0
 
     def get_charset_name(self):
         if self._mNameProber:
@@ -67,7 +74,7 @@ class SingleByteCharSetProber(CharSetProber):
         if not aLen:
             return self.get_state()
         for c in aBuf:
-            order = self._mModel['charToOrderMap'][ord(c)]
+            order = self._mModel['charToOrderMap'][wrap_ord(c)]
             if order < SYMBOL_CAT_ORDER:
                 self._mTotalChar += 1
             if order < SAMPLE_SIZE:
@@ -75,9 +82,12 @@ class SingleByteCharSetProber(CharSetProber):
                 if self._mLastOrder < SAMPLE_SIZE:
                     self._mTotalSeqs += 1
                     if not self._mReversed:
-                        self._mSeqCounters[self._mModel['precedenceMatrix'][(self._mLastOrder * SAMPLE_SIZE) + order]] += 1
-                    else: # reverse the order of the letters in the lookup
-                        self._mSeqCounters[self._mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + self._mLastOrder]] += 1
+                        i = (self._mLastOrder * SAMPLE_SIZE) + order
+                        model = self._mModel['precedenceMatrix'][i]
+                    else:  # reverse the order of the letters in the lookup
+                        i = (order * SAMPLE_SIZE) + self._mLastOrder
+                        model = self._mModel['precedenceMatrix'][i]
+                    self._mSeqCounters[model] += 1
             self._mLastOrder = order
 
         if self.get_state() == constants.eDetecting:
@@ -85,11 +95,16 @@ class SingleByteCharSetProber(CharSetProber):
                 cf = self.get_confidence()
                 if cf > POSITIVE_SHORTCUT_THRESHOLD:
                     if constants._debug:
-                        sys.stderr.write('%s confidence = %s, we have a winner\n' % (self._mModel['charsetName'], cf))
+                        sys.stderr.write('%s confidence = %s, we have a'
+                                         'winner\n' %
+                                         (self._mModel['charsetName'], cf))
                     self._mState = constants.eFoundIt
                 elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
                     if constants._debug:
-                        sys.stderr.write('%s confidence = %s, below negative shortcut threshhold %s\n' % (self._mModel['charsetName'], cf, NEGATIVE_SHORTCUT_THRESHOLD))
+                        sys.stderr.write('%s confidence = %s, below negative'
+                                         'shortcut threshhold %s\n' %
+                                         (self._mModel['charsetName'], cf,
+                                          NEGATIVE_SHORTCUT_THRESHOLD))
                     self._mState = constants.eNotMe
 
         return self.get_state()
@@ -97,9 +112,8 @@ class SingleByteCharSetProber(CharSetProber):
     def get_confidence(self):
         r = 0.01
         if self._mTotalSeqs > 0:
-#            print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
-            r = (1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs / self._mModel['mTypicalPositiveRatio']
-#            print r, self._mFreqChar, self._mTotalChar
+            r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
+                 / self._mModel['mTypicalPositiveRatio'])
             r = r * self._mFreqChar / self._mTotalChar
             if r >= 1.0:
                 r = 0.99
diff --git a/thirdparty/chardet/sbcsgroupprober.py b/thirdparty/chardet/sbcsgroupprober.py
index d19160c86..1b6196cd1 100644
--- a/thirdparty/chardet/sbcsgroupprober.py
+++ b/thirdparty/chardet/sbcsgroupprober.py
@@ -14,33 +14,35 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from charsetgroupprober import CharSetGroupProber
-from sbcharsetprober import SingleByteCharSetProber
-from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model
-from langgreekmodel import Latin7GreekModel, Win1253GreekModel
-from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
-from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
-from langthaimodel import TIS620ThaiModel
-from langhebrewmodel import Win1255HebrewModel
-from hebrewprober import HebrewProber
+from .charsetgroupprober import CharSetGroupProber
+from .sbcharsetprober import SingleByteCharSetProber
+from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
+                                Latin5CyrillicModel, MacCyrillicModel,
+                                Ibm866Model, Ibm855Model)
+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
+from .langthaimodel import TIS620ThaiModel
+from .langhebrewmodel import Win1255HebrewModel
+from .hebrewprober import HebrewProber
+
 
 class SBCSGroupProber(CharSetGroupProber):
     def __init__(self):
         CharSetGroupProber.__init__(self)
-        self._mProbers = [ \
+        self._mProbers = [
             SingleByteCharSetProber(Win1251CyrillicModel),
             SingleByteCharSetProber(Koi8rModel),
             SingleByteCharSetProber(Latin5CyrillicModel),
@@ -54,11 +56,14 @@ class SBCSGroupProber(CharSetGroupProber):
             SingleByteCharSetProber(Latin2HungarianModel),
             SingleByteCharSetProber(Win1250HungarianModel),
             SingleByteCharSetProber(TIS620ThaiModel),
-            ]
+        ]
         hebrewProber = HebrewProber()
-        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.False, hebrewProber)
-        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.True, hebrewProber)
+        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
+                                                      False, hebrewProber)
+        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
+                                                     hebrewProber)
         hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
-        self._mProbers.extend([hebrewProber, logicalHebrewProber, visualHebrewProber])
+        self._mProbers.extend([hebrewProber, logicalHebrewProber,
+                               visualHebrewProber])
 
         self.reset()
diff --git a/thirdparty/chardet/sjisprober.py b/thirdparty/chardet/sjisprober.py
index 8f69f60be..cd0e9e707 100644
--- a/thirdparty/chardet/sjisprober.py
+++ b/thirdparty/chardet/sjisprober.py
@@ -13,25 +13,26 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from mbcharsetprober import MultiByteCharSetProber
-from codingstatemachine import CodingStateMachine
-from chardistribution import SJISDistributionAnalysis
-from jpcntx import SJISContextAnalysis
-from mbcssm import SJISSMModel
-import constants, sys
-from constants import eStart, eError, eItsMe
+import sys
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import SJISDistributionAnalysis
+from .jpcntx import SJISContextAnalysis
+from .mbcssm import SJISSMModel
+from . import constants
+
 
 class SJISProber(MultiByteCharSetProber):
     def __init__(self):
@@ -46,35 +47,40 @@ class SJISProber(MultiByteCharSetProber):
         self._mContextAnalyzer.reset()
 
     def get_charset_name(self):
-        return "SHIFT_JIS"
+        return self._mContextAnalyzer.get_charset_name()
 
     def feed(self, aBuf):
         aLen = len(aBuf)
-        for i in xrange(0, aLen):
+        for i in range(0, aLen):
             codingState = self._mCodingSM.next_state(aBuf[i])
-            if codingState == eError:
+            if codingState == constants.eError:
                 if constants._debug:
-                    sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
                 self._mState = constants.eNotMe
                 break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                 self._mState = constants.eFoundIt
                 break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                 charLen = self._mCodingSM.get_current_charlen()
                 if i == 0:
                     self._mLastChar[1] = aBuf[0]
-                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen :], charLen)
+                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
+                                                charLen)
                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
                 else:
-                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen : i + 3 - charLen], charLen)
-                    self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen)
+                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
+                                                     - charLen], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)
 
         self._mLastChar[0] = aBuf[aLen - 1]
 
         if self.get_state() == constants.eDetecting:
-            if self._mContextAnalyzer.got_enough_data() and \
-                   (self.get_confidence() > constants.SHORTCUT_THRESHOLD):
+            if (self._mContextAnalyzer.got_enough_data() and
+               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
                 self._mState = constants.eFoundIt
 
         return self.get_state()
diff --git a/thirdparty/chardet/test.py b/thirdparty/chardet/test.py
deleted file mode 100644
index 2ebf3a4dc..000000000
--- a/thirdparty/chardet/test.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import sys, glob
-sys.path.insert(0, '..')
-from chardet.universaldetector import UniversalDetector
-
-count = 0
-u = UniversalDetector()
-for f in glob.glob(sys.argv[1]):
-    print f.ljust(60),
-    u.reset()
-    for line in file(f, 'rb'):
-        u.feed(line)
-        if u.done: break
-    u.close()
-    result = u.result
-    if result['encoding']:
-        print result['encoding'], 'with confidence', result['confidence']
-    else:
-        print '******** no result'
-    count += 1
-print count, 'tests'
diff --git a/thirdparty/chardet/universaldetector.py b/thirdparty/chardet/universaldetector.py
index a08425f87..476522b99 100644
--- a/thirdparty/chardet/universaldetector.py
+++ b/thirdparty/chardet/universaldetector.py
@@ -14,23 +14,25 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from latin1prober import Latin1Prober # windows-1252
-from mbcsgroupprober import MBCSGroupProber # multi-byte character sets
-from sbcsgroupprober import SBCSGroupProber # single-byte character sets
-from escprober import EscCharSetProber # ISO-2122, etc.
+from . import constants
+import sys
+import codecs
+from .latin1prober import Latin1Prober  # windows-1252
+from .mbcsgroupprober import MBCSGroupProber  # multi-byte character sets
+from .sbcsgroupprober import SBCSGroupProber  # single-byte character sets
+from .escprober import EscCharSetProber  # ISO-2122, etc.
 import re
 
 MINIMUM_THRESHOLD = 0.20
@@ -38,68 +40,78 @@ ePureAscii = 0
 eEscAscii = 1
 eHighbyte = 2
 
+
 class UniversalDetector:
     def __init__(self):
-        self._highBitDetector = re.compile(r'[\x80-\xFF]')
-        self._escDetector = re.compile(r'(\033|~{)')
+        self._highBitDetector = re.compile(b'[\x80-\xFF]')
+        self._escDetector = re.compile(b'(\033|~{)')
         self._mEscCharSetProber = None
         self._mCharSetProbers = []
         self.reset()
 
     def reset(self):
         self.result = {'encoding': None, 'confidence': 0.0}
-        self.done = constants.False
-        self._mStart = constants.True
-        self._mGotData = constants.False
+        self.done = False
+        self._mStart = True
+        self._mGotData = False
         self._mInputState = ePureAscii
-        self._mLastChar = ''
+        self._mLastChar = b''
         if self._mEscCharSetProber:
             self._mEscCharSetProber.reset()
         for prober in self._mCharSetProbers:
             prober.reset()
 
     def feed(self, aBuf):
-        if self.done: return
+        if self.done:
+            return
 
         aLen = len(aBuf)
-        if not aLen: return
+        if not aLen:
+            return
 
         if not self._mGotData:
             # If the data starts with BOM, we know it is UTF
-            if aBuf[:3] == '\xEF\xBB\xBF':
+            if aBuf[:3] == codecs.BOM_UTF8:
                 # EF BB BF  UTF-8 with BOM
-                self.result = {'encoding': "UTF-8", 'confidence': 1.0}
-            elif aBuf[:4] == '\xFF\xFE\x00\x00':
+                self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
+            elif aBuf[:4] == codecs.BOM_UTF32_LE:
                 # FF FE 00 00  UTF-32, little-endian BOM
                 self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
-            elif aBuf[:4] == '\x00\x00\xFE\xFF': 
+            elif aBuf[:4] == codecs.BOM_UTF32_BE:
                 # 00 00 FE FF  UTF-32, big-endian BOM
                 self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
-            elif aBuf[:4] == '\xFE\xFF\x00\x00':
+            elif aBuf[:4] == b'\xFE\xFF\x00\x00':
                 # FE FF 00 00  UCS-4, unusual octet order BOM (3412)
-                self.result = {'encoding': "X-ISO-10646-UCS-4-3412", 'confidence': 1.0}
-            elif aBuf[:4] == '\x00\x00\xFF\xFE':
+                self.result = {
+                    'encoding': "X-ISO-10646-UCS-4-3412",
+                    'confidence': 1.0
+                }
+            elif aBuf[:4] == b'\x00\x00\xFF\xFE':
                 # 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
-                self.result = {'encoding': "X-ISO-10646-UCS-4-2143", 'confidence': 1.0}
-            elif aBuf[:2] == '\xFF\xFE':
+                self.result = {
+                    'encoding': "X-ISO-10646-UCS-4-2143",
+                    'confidence': 1.0
+                }
+            elif aBuf[:2] == codecs.BOM_LE:
                 # FF FE  UTF-16, little endian BOM
                 self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
-            elif aBuf[:2] == '\xFE\xFF':
+            elif aBuf[:2] == codecs.BOM_BE:
                 # FE FF  UTF-16, big endian BOM
                 self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
 
-        self._mGotData = constants.True
+        self._mGotData = True
         if self.result['encoding'] and (self.result['confidence'] > 0.0):
-            self.done = constants.True
+            self.done = True
             return
 
         if self._mInputState == ePureAscii:
             if self._highBitDetector.search(aBuf):
                 self._mInputState = eHighbyte
-            elif (self._mInputState == ePureAscii) and self._escDetector.search(self._mLastChar + aBuf):
+            elif ((self._mInputState == ePureAscii) and
+                    self._escDetector.search(self._mLastChar + aBuf)):
                 self._mInputState = eEscAscii
 
-        self._mLastChar = aBuf[-1]
+        self._mLastChar = aBuf[-1:]
 
         if self._mInputState == eEscAscii:
             if not self._mEscCharSetProber:
@@ -107,24 +119,26 @@ class UniversalDetector:
             if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
                 self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
                                'confidence': self._mEscCharSetProber.get_confidence()}
-                self.done = constants.True
+                self.done = True
         elif self._mInputState == eHighbyte:
             if not self._mCharSetProbers:
-                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), Latin1Prober()]
+                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
+                                         Latin1Prober()]
             for prober in self._mCharSetProbers:
                 if prober.feed(aBuf) == constants.eFoundIt:
                     self.result = {'encoding': prober.get_charset_name(),
                                    'confidence': prober.get_confidence()}
-                    self.done = constants.True
+                    self.done = True
                     break
 
     def close(self):
-        if self.done: return
+        if self.done:
+            return
         if not self._mGotData:
             if constants._debug:
                 sys.stderr.write('no data received!\n')
             return
-        self.done = constants.True
+        self.done = True
 
         if self._mInputState == ePureAscii:
             self.result = {'encoding': 'ascii', 'confidence': 1.0}
@@ -135,7 +149,8 @@ class UniversalDetector:
             maxProberConfidence = 0.0
             maxProber = None
             for prober in self._mCharSetProbers:
-                if not prober: continue
+                if not prober:
+                    continue
                 proberConfidence = prober.get_confidence()
                 if proberConfidence > maxProberConfidence:
                     maxProberConfidence = proberConfidence
@@ -148,7 +163,8 @@ class UniversalDetector:
         if constants._debug:
             sys.stderr.write('no probers hit minimum threshhold\n')
             for prober in self._mCharSetProbers[0].mProbers:
-                if not prober: continue
-                sys.stderr.write('%s confidence = %s\n' % \
-                                 (prober.get_charset_name(), \
+                if not prober:
+                    continue
+                sys.stderr.write('%s confidence = %s\n' %
+                                 (prober.get_charset_name(),
                                   prober.get_confidence()))
diff --git a/thirdparty/chardet/utf8prober.py b/thirdparty/chardet/utf8prober.py
index fec8548c8..1c0bb5d8f 100644
--- a/thirdparty/chardet/utf8prober.py
+++ b/thirdparty/chardet/utf8prober.py
@@ -13,26 +13,26 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from constants import eStart, eError, eItsMe
-from charsetprober import CharSetProber
-from codingstatemachine import CodingStateMachine
-from mbcssm import UTF8SMModel
+from . import constants
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .mbcssm import UTF8SMModel
 
 ONE_CHAR_PROB = 0.5
 
+
 class UTF8Prober(CharSetProber):
     def __init__(self):
         CharSetProber.__init__(self)
@@ -50,13 +50,13 @@ class UTF8Prober(CharSetProber):
     def feed(self, aBuf):
         for c in aBuf:
             codingState = self._mCodingSM.next_state(c)
-            if codingState == eError:
+            if codingState == constants.eError:
                 self._mState = constants.eNotMe
                 break
-            elif codingState == eItsMe:
+            elif codingState == constants.eItsMe:
                 self._mState = constants.eFoundIt
                 break
-            elif codingState == eStart:
+            elif codingState == constants.eStart:
                 if self._mCodingSM.get_current_charlen() >= 2:
                     self._mNumOfMBChar += 1
 
@@ -69,7 +69,7 @@ class UTF8Prober(CharSetProber):
     def get_confidence(self):
         unlike = 0.99
         if self._mNumOfMBChar < 6:
-            for i in xrange(0, self._mNumOfMBChar):
+            for i in range(0, self._mNumOfMBChar):
                 unlike = unlike * ONE_CHAR_PROB
             return 1.0 - unlike
         else: