mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2024-11-22 01:26:42 +03:00
Update of 3rd party library chardet
This commit is contained in:
parent
75905e0cd9
commit
bacf18832a
|
@ -20,7 +20,7 @@ from thirdparty import six
|
||||||
from thirdparty.six import unichr as _unichr
|
from thirdparty.six import unichr as _unichr
|
||||||
|
|
||||||
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
|
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
|
||||||
VERSION = "1.6.3.1"
|
VERSION = "1.6.3.2"
|
||||||
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
|
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
|
||||||
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
|
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
|
||||||
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
|
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
|
||||||
|
|
31
thirdparty/chardet/__init__.py
vendored
31
thirdparty/chardet/__init__.py
vendored
|
@ -15,18 +15,25 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
__version__ = "2.3.0"
|
|
||||||
from sys import version_info
|
from .compat import PY2, PY3
|
||||||
|
from .universaldetector import UniversalDetector
|
||||||
|
from .version import __version__, VERSION
|
||||||
|
|
||||||
|
|
||||||
def detect(aBuf):
|
def detect(byte_str):
|
||||||
if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
|
"""
|
||||||
(version_info >= (3, 0) and not isinstance(aBuf, bytes))):
|
Detect the encoding of the given byte string.
|
||||||
raise ValueError('Expected a bytes object, not a unicode object')
|
|
||||||
|
|
||||||
from . import universaldetector
|
:param byte_str: The byte sequence to examine.
|
||||||
u = universaldetector.UniversalDetector()
|
:type byte_str: ``bytes`` or ``bytearray``
|
||||||
u.reset()
|
"""
|
||||||
u.feed(aBuf)
|
if not isinstance(byte_str, bytearray):
|
||||||
u.close()
|
if not isinstance(byte_str, bytes):
|
||||||
return u.result
|
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||||
|
'{0}'.format(type(byte_str)))
|
||||||
|
else:
|
||||||
|
byte_str = bytearray(byte_str)
|
||||||
|
detector = UniversalDetector()
|
||||||
|
detector.feed(byte_str)
|
||||||
|
return detector.close()
|
||||||
|
|
545
thirdparty/chardet/big5freq.py
vendored
545
thirdparty/chardet/big5freq.py
vendored
|
@ -45,7 +45,7 @@ BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||||
#Char to FreqOrder table
|
#Char to FreqOrder table
|
||||||
BIG5_TABLE_SIZE = 5376
|
BIG5_TABLE_SIZE = 5376
|
||||||
|
|
||||||
Big5CharToFreqOrder = (
|
BIG5_CHAR_TO_FREQ_ORDER = (
|
||||||
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
|
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
|
||||||
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
|
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
|
||||||
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
|
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
|
||||||
|
@ -381,545 +381,6 @@ Big5CharToFreqOrder = (
|
||||||
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
|
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
|
||||||
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
|
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
|
||||||
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
|
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
|
||||||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512
|
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
|
||||||
#Everything below is of no interest for detection purpose
|
)
|
||||||
2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392
|
|
||||||
2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408
|
|
||||||
5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424
|
|
||||||
5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440
|
|
||||||
5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456
|
|
||||||
5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472
|
|
||||||
5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488
|
|
||||||
5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504
|
|
||||||
5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520
|
|
||||||
5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536
|
|
||||||
5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552
|
|
||||||
5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568
|
|
||||||
5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584
|
|
||||||
5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600
|
|
||||||
6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616
|
|
||||||
6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632
|
|
||||||
6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648
|
|
||||||
6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664
|
|
||||||
6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680
|
|
||||||
6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696
|
|
||||||
6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712
|
|
||||||
6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728
|
|
||||||
6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744
|
|
||||||
6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760
|
|
||||||
6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776
|
|
||||||
6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792
|
|
||||||
6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808
|
|
||||||
6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824
|
|
||||||
6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840
|
|
||||||
6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856
|
|
||||||
6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872
|
|
||||||
6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888
|
|
||||||
6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904
|
|
||||||
6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920
|
|
||||||
6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936
|
|
||||||
6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952
|
|
||||||
6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968
|
|
||||||
6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984
|
|
||||||
6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000
|
|
||||||
6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016
|
|
||||||
6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032
|
|
||||||
6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048
|
|
||||||
6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064
|
|
||||||
6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080
|
|
||||||
6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096
|
|
||||||
6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112
|
|
||||||
6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128
|
|
||||||
6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144
|
|
||||||
6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160
|
|
||||||
6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176
|
|
||||||
6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192
|
|
||||||
6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208
|
|
||||||
6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224
|
|
||||||
6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240
|
|
||||||
6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256
|
|
||||||
3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272
|
|
||||||
6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288
|
|
||||||
6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304
|
|
||||||
3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320
|
|
||||||
6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336
|
|
||||||
6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352
|
|
||||||
6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368
|
|
||||||
6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384
|
|
||||||
6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400
|
|
||||||
6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416
|
|
||||||
6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432
|
|
||||||
4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448
|
|
||||||
6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464
|
|
||||||
6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480
|
|
||||||
3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496
|
|
||||||
6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512
|
|
||||||
6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528
|
|
||||||
6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544
|
|
||||||
6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560
|
|
||||||
6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576
|
|
||||||
6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592
|
|
||||||
6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608
|
|
||||||
6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624
|
|
||||||
6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640
|
|
||||||
6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656
|
|
||||||
6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672
|
|
||||||
7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688
|
|
||||||
7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704
|
|
||||||
7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720
|
|
||||||
7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736
|
|
||||||
7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752
|
|
||||||
7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768
|
|
||||||
7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784
|
|
||||||
7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800
|
|
||||||
7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816
|
|
||||||
7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832
|
|
||||||
7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848
|
|
||||||
7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864
|
|
||||||
7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880
|
|
||||||
7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896
|
|
||||||
7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912
|
|
||||||
7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928
|
|
||||||
7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944
|
|
||||||
7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960
|
|
||||||
7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976
|
|
||||||
7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992
|
|
||||||
7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008
|
|
||||||
7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024
|
|
||||||
7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040
|
|
||||||
7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056
|
|
||||||
7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072
|
|
||||||
7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088
|
|
||||||
7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104
|
|
||||||
7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120
|
|
||||||
7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136
|
|
||||||
7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152
|
|
||||||
7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168
|
|
||||||
7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184
|
|
||||||
7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200
|
|
||||||
7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216
|
|
||||||
7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232
|
|
||||||
7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248
|
|
||||||
7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264
|
|
||||||
7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280
|
|
||||||
7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296
|
|
||||||
7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312
|
|
||||||
7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328
|
|
||||||
7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344
|
|
||||||
7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360
|
|
||||||
7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376
|
|
||||||
7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392
|
|
||||||
7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408
|
|
||||||
7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424
|
|
||||||
7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440
|
|
||||||
3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456
|
|
||||||
7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472
|
|
||||||
7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488
|
|
||||||
7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504
|
|
||||||
7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520
|
|
||||||
4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536
|
|
||||||
7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552
|
|
||||||
7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568
|
|
||||||
7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584
|
|
||||||
7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600
|
|
||||||
7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616
|
|
||||||
7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632
|
|
||||||
7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648
|
|
||||||
7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664
|
|
||||||
7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680
|
|
||||||
7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696
|
|
||||||
7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712
|
|
||||||
8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728
|
|
||||||
8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744
|
|
||||||
8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760
|
|
||||||
8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776
|
|
||||||
8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792
|
|
||||||
8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808
|
|
||||||
8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824
|
|
||||||
8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840
|
|
||||||
8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856
|
|
||||||
8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872
|
|
||||||
8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888
|
|
||||||
8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904
|
|
||||||
8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920
|
|
||||||
8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936
|
|
||||||
8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952
|
|
||||||
8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968
|
|
||||||
8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984
|
|
||||||
8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000
|
|
||||||
8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016
|
|
||||||
8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032
|
|
||||||
8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048
|
|
||||||
8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064
|
|
||||||
8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080
|
|
||||||
8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096
|
|
||||||
8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112
|
|
||||||
8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128
|
|
||||||
8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144
|
|
||||||
8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160
|
|
||||||
8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176
|
|
||||||
8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192
|
|
||||||
8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208
|
|
||||||
8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224
|
|
||||||
8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240
|
|
||||||
8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256
|
|
||||||
8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272
|
|
||||||
8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288
|
|
||||||
8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304
|
|
||||||
8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320
|
|
||||||
8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336
|
|
||||||
8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352
|
|
||||||
8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368
|
|
||||||
8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384
|
|
||||||
8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400
|
|
||||||
8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416
|
|
||||||
8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432
|
|
||||||
8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448
|
|
||||||
8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464
|
|
||||||
8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480
|
|
||||||
8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496
|
|
||||||
8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512
|
|
||||||
8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528
|
|
||||||
8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544
|
|
||||||
8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560
|
|
||||||
8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576
|
|
||||||
8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592
|
|
||||||
8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608
|
|
||||||
8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624
|
|
||||||
8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640
|
|
||||||
8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656
|
|
||||||
8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672
|
|
||||||
8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688
|
|
||||||
4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704
|
|
||||||
8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720
|
|
||||||
8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736
|
|
||||||
8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752
|
|
||||||
8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768
|
|
||||||
9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784
|
|
||||||
9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800
|
|
||||||
9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816
|
|
||||||
9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832
|
|
||||||
9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848
|
|
||||||
9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864
|
|
||||||
9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880
|
|
||||||
9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896
|
|
||||||
9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912
|
|
||||||
9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928
|
|
||||||
9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944
|
|
||||||
9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960
|
|
||||||
9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976
|
|
||||||
9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992
|
|
||||||
9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008
|
|
||||||
9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024
|
|
||||||
9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040
|
|
||||||
9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056
|
|
||||||
9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072
|
|
||||||
9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088
|
|
||||||
9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104
|
|
||||||
9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120
|
|
||||||
9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136
|
|
||||||
9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152
|
|
||||||
9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168
|
|
||||||
9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184
|
|
||||||
9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200
|
|
||||||
9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216
|
|
||||||
9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232
|
|
||||||
9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248
|
|
||||||
9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264
|
|
||||||
9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280
|
|
||||||
9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296
|
|
||||||
9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312
|
|
||||||
9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328
|
|
||||||
9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344
|
|
||||||
9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360
|
|
||||||
9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376
|
|
||||||
3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392
|
|
||||||
9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408
|
|
||||||
9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424
|
|
||||||
9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440
|
|
||||||
4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456
|
|
||||||
9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472
|
|
||||||
9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488
|
|
||||||
9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504
|
|
||||||
9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520
|
|
||||||
9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536
|
|
||||||
9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552
|
|
||||||
9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568
|
|
||||||
9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584
|
|
||||||
9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600
|
|
||||||
9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616
|
|
||||||
9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632
|
|
||||||
9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648
|
|
||||||
9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664
|
|
||||||
9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680
|
|
||||||
9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696
|
|
||||||
9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712
|
|
||||||
9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728
|
|
||||||
9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744
|
|
||||||
9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760
|
|
||||||
9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776
|
|
||||||
9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792
|
|
||||||
9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808
|
|
||||||
9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824
|
|
||||||
10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840
|
|
||||||
10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856
|
|
||||||
10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872
|
|
||||||
10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888
|
|
||||||
10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904
|
|
||||||
10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920
|
|
||||||
10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936
|
|
||||||
10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952
|
|
||||||
10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968
|
|
||||||
4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984
|
|
||||||
10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000
|
|
||||||
10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016
|
|
||||||
10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032
|
|
||||||
10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048
|
|
||||||
10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064
|
|
||||||
10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080
|
|
||||||
10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096
|
|
||||||
10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112
|
|
||||||
4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128
|
|
||||||
10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144
|
|
||||||
10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160
|
|
||||||
10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176
|
|
||||||
10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192
|
|
||||||
10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208
|
|
||||||
10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224
|
|
||||||
10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240
|
|
||||||
10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256
|
|
||||||
10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272
|
|
||||||
10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288
|
|
||||||
10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304
|
|
||||||
10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320
|
|
||||||
10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336
|
|
||||||
10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352
|
|
||||||
10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368
|
|
||||||
10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384
|
|
||||||
10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400
|
|
||||||
4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416
|
|
||||||
10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432
|
|
||||||
10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448
|
|
||||||
10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464
|
|
||||||
10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480
|
|
||||||
10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496
|
|
||||||
10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512
|
|
||||||
10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528
|
|
||||||
10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544
|
|
||||||
10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560
|
|
||||||
10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576
|
|
||||||
10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592
|
|
||||||
10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608
|
|
||||||
10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624
|
|
||||||
10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640
|
|
||||||
10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656
|
|
||||||
10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672
|
|
||||||
10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688
|
|
||||||
10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704
|
|
||||||
10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720
|
|
||||||
10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736
|
|
||||||
10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752
|
|
||||||
10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768
|
|
||||||
10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784
|
|
||||||
10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800
|
|
||||||
10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816
|
|
||||||
10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832
|
|
||||||
10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848
|
|
||||||
10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864
|
|
||||||
10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880
|
|
||||||
10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896
|
|
||||||
11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912
|
|
||||||
11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928
|
|
||||||
11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944
|
|
||||||
4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960
|
|
||||||
11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976
|
|
||||||
11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992
|
|
||||||
11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008
|
|
||||||
11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024
|
|
||||||
11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040
|
|
||||||
11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056
|
|
||||||
11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072
|
|
||||||
11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088
|
|
||||||
11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104
|
|
||||||
11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120
|
|
||||||
11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136
|
|
||||||
11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152
|
|
||||||
11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168
|
|
||||||
11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184
|
|
||||||
11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200
|
|
||||||
11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216
|
|
||||||
11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232
|
|
||||||
11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248
|
|
||||||
11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264
|
|
||||||
11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280
|
|
||||||
11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296
|
|
||||||
11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312
|
|
||||||
11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328
|
|
||||||
11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344
|
|
||||||
11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360
|
|
||||||
11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376
|
|
||||||
11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392
|
|
||||||
11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408
|
|
||||||
11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424
|
|
||||||
11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440
|
|
||||||
11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456
|
|
||||||
11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472
|
|
||||||
4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488
|
|
||||||
11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504
|
|
||||||
11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520
|
|
||||||
11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536
|
|
||||||
11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552
|
|
||||||
11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568
|
|
||||||
11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584
|
|
||||||
11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600
|
|
||||||
11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616
|
|
||||||
11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632
|
|
||||||
11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648
|
|
||||||
11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664
|
|
||||||
11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680
|
|
||||||
11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696
|
|
||||||
11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712
|
|
||||||
11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728
|
|
||||||
11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744
|
|
||||||
11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760
|
|
||||||
11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776
|
|
||||||
11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792
|
|
||||||
11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808
|
|
||||||
11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824
|
|
||||||
11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840
|
|
||||||
11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856
|
|
||||||
11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872
|
|
||||||
11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888
|
|
||||||
11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904
|
|
||||||
11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920
|
|
||||||
11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936
|
|
||||||
12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952
|
|
||||||
12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968
|
|
||||||
12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984
|
|
||||||
12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000
|
|
||||||
12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016
|
|
||||||
12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032
|
|
||||||
12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048
|
|
||||||
12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064
|
|
||||||
12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080
|
|
||||||
12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096
|
|
||||||
12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112
|
|
||||||
12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128
|
|
||||||
12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144
|
|
||||||
12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160
|
|
||||||
12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176
|
|
||||||
4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192
|
|
||||||
4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208
|
|
||||||
4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224
|
|
||||||
12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240
|
|
||||||
12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256
|
|
||||||
12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272
|
|
||||||
12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288
|
|
||||||
12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304
|
|
||||||
12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320
|
|
||||||
12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336
|
|
||||||
12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352
|
|
||||||
12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368
|
|
||||||
12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384
|
|
||||||
12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400
|
|
||||||
12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416
|
|
||||||
12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432
|
|
||||||
12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448
|
|
||||||
12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464
|
|
||||||
12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480
|
|
||||||
12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496
|
|
||||||
12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512
|
|
||||||
12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528
|
|
||||||
12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544
|
|
||||||
12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560
|
|
||||||
12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576
|
|
||||||
12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592
|
|
||||||
12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608
|
|
||||||
12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624
|
|
||||||
12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640
|
|
||||||
12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656
|
|
||||||
12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672
|
|
||||||
12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688
|
|
||||||
12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704
|
|
||||||
12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720
|
|
||||||
12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736
|
|
||||||
12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752
|
|
||||||
12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768
|
|
||||||
12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784
|
|
||||||
12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800
|
|
||||||
12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816
|
|
||||||
12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832
|
|
||||||
12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848
|
|
||||||
12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864
|
|
||||||
12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880
|
|
||||||
12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896
|
|
||||||
12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912
|
|
||||||
12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928
|
|
||||||
12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944
|
|
||||||
12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960
|
|
||||||
12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976
|
|
||||||
4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992
|
|
||||||
13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008
|
|
||||||
13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024
|
|
||||||
13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040
|
|
||||||
13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056
|
|
||||||
13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072
|
|
||||||
13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088
|
|
||||||
13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104
|
|
||||||
4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120
|
|
||||||
13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136
|
|
||||||
13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152
|
|
||||||
13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168
|
|
||||||
13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184
|
|
||||||
13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200
|
|
||||||
13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216
|
|
||||||
13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232
|
|
||||||
13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248
|
|
||||||
13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264
|
|
||||||
13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280
|
|
||||||
13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296
|
|
||||||
13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312
|
|
||||||
13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328
|
|
||||||
13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344
|
|
||||||
13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360
|
|
||||||
5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376
|
|
||||||
13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392
|
|
||||||
13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408
|
|
||||||
13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424
|
|
||||||
13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440
|
|
||||||
13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456
|
|
||||||
13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472
|
|
||||||
13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488
|
|
||||||
13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504
|
|
||||||
13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520
|
|
||||||
13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536
|
|
||||||
13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552
|
|
||||||
13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568
|
|
||||||
13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584
|
|
||||||
13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600
|
|
||||||
13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616
|
|
||||||
13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632
|
|
||||||
13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648
|
|
||||||
13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664
|
|
||||||
13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680
|
|
||||||
13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696
|
|
||||||
13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712
|
|
||||||
13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728
|
|
||||||
13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744
|
|
||||||
13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760
|
|
||||||
13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776
|
|
||||||
13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792
|
|
||||||
13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808
|
|
||||||
13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824
|
|
||||||
13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840
|
|
||||||
13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856
|
|
||||||
13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872
|
|
||||||
13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888
|
|
||||||
13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904
|
|
||||||
13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920
|
|
||||||
13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936
|
|
||||||
13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952
|
|
||||||
13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968
|
|
||||||
13968,13969,13970,13971,13972) #13973
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
15
thirdparty/chardet/big5prober.py
vendored
15
thirdparty/chardet/big5prober.py
vendored
|
@ -28,15 +28,20 @@
|
||||||
from .mbcharsetprober import MultiByteCharSetProber
|
from .mbcharsetprober import MultiByteCharSetProber
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .chardistribution import Big5DistributionAnalysis
|
from .chardistribution import Big5DistributionAnalysis
|
||||||
from .mbcssm import Big5SMModel
|
from .mbcssm import BIG5_SM_MODEL
|
||||||
|
|
||||||
|
|
||||||
class Big5Prober(MultiByteCharSetProber):
|
class Big5Prober(MultiByteCharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
MultiByteCharSetProber.__init__(self)
|
super(Big5Prober, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(Big5SMModel)
|
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
|
||||||
self._mDistributionAnalyzer = Big5DistributionAnalysis()
|
self.distribution_analyzer = Big5DistributionAnalysis()
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return "Big5"
|
return "Big5"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self):
|
||||||
|
return "Chinese"
|
||||||
|
|
80
thirdparty/chardet/chardetect.py
vendored
80
thirdparty/chardet/chardetect.py
vendored
|
@ -1,80 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Script which takes one or more file paths and reports on their detected
|
|
||||||
encodings
|
|
||||||
|
|
||||||
Example::
|
|
||||||
|
|
||||||
% chardetect somefile someotherfile
|
|
||||||
somefile: windows-1252 with confidence 0.5
|
|
||||||
someotherfile: ascii with confidence 1.0
|
|
||||||
|
|
||||||
If no paths are provided, it takes its input from stdin.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import, print_function, unicode_literals
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import sys
|
|
||||||
from io import open
|
|
||||||
|
|
||||||
from chardet import __version__
|
|
||||||
from chardet.universaldetector import UniversalDetector
|
|
||||||
|
|
||||||
|
|
||||||
def description_of(lines, name='stdin'):
|
|
||||||
"""
|
|
||||||
Return a string describing the probable encoding of a file or
|
|
||||||
list of strings.
|
|
||||||
|
|
||||||
:param lines: The lines to get the encoding of.
|
|
||||||
:type lines: Iterable of bytes
|
|
||||||
:param name: Name of file or collection of lines
|
|
||||||
:type name: str
|
|
||||||
"""
|
|
||||||
u = UniversalDetector()
|
|
||||||
for line in lines:
|
|
||||||
u.feed(line)
|
|
||||||
u.close()
|
|
||||||
result = u.result
|
|
||||||
if result['encoding']:
|
|
||||||
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
|
|
||||||
result['confidence'])
|
|
||||||
else:
|
|
||||||
return '{0}: no result'.format(name)
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv=None):
|
|
||||||
'''
|
|
||||||
Handles command line arguments and gets things started.
|
|
||||||
|
|
||||||
:param argv: List of arguments, as if specified on the command-line.
|
|
||||||
If None, ``sys.argv[1:]`` is used instead.
|
|
||||||
:type argv: list of str
|
|
||||||
'''
|
|
||||||
# Get command line arguments
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Takes one or more file paths and reports their detected \
|
|
||||||
encodings",
|
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
||||||
conflict_handler='resolve')
|
|
||||||
parser.add_argument('input',
|
|
||||||
help='File whose encoding we would like to determine.',
|
|
||||||
type=argparse.FileType('rb'), nargs='*',
|
|
||||||
default=[sys.stdin])
|
|
||||||
parser.add_argument('--version', action='version',
|
|
||||||
version='%(prog)s {0}'.format(__version__))
|
|
||||||
args = parser.parse_args(argv)
|
|
||||||
|
|
||||||
for f in args.input:
|
|
||||||
if f.isatty():
|
|
||||||
print("You are running chardetect interactively. Press " +
|
|
||||||
"CTRL-D twice at the start of a blank line to signal the " +
|
|
||||||
"end of your input. If you want help, run chardetect " +
|
|
||||||
"--help\n", file=sys.stderr)
|
|
||||||
print(description_of(f, f.name))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
148
thirdparty/chardet/chardistribution.py
vendored
148
thirdparty/chardet/chardistribution.py
vendored
|
@ -25,82 +25,84 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
|
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
|
||||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
|
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
|
||||||
from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
|
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
|
||||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
|
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
|
||||||
from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
|
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
|
||||||
GB2312_TYPICAL_DISTRIBUTION_RATIO)
|
GB2312_TYPICAL_DISTRIBUTION_RATIO)
|
||||||
from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
|
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
|
||||||
BIG5_TYPICAL_DISTRIBUTION_RATIO)
|
BIG5_TYPICAL_DISTRIBUTION_RATIO)
|
||||||
from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
|
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
|
||||||
JIS_TYPICAL_DISTRIBUTION_RATIO)
|
JIS_TYPICAL_DISTRIBUTION_RATIO)
|
||||||
from .compat import wrap_ord
|
|
||||||
|
|
||||||
ENOUGH_DATA_THRESHOLD = 1024
|
|
||||||
SURE_YES = 0.99
|
|
||||||
SURE_NO = 0.01
|
|
||||||
MINIMUM_DATA_THRESHOLD = 3
|
|
||||||
|
|
||||||
|
|
||||||
class CharDistributionAnalysis:
|
class CharDistributionAnalysis(object):
|
||||||
|
ENOUGH_DATA_THRESHOLD = 1024
|
||||||
|
SURE_YES = 0.99
|
||||||
|
SURE_NO = 0.01
|
||||||
|
MINIMUM_DATA_THRESHOLD = 3
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Mapping table to get frequency order from char order (get from
|
# Mapping table to get frequency order from char order (get from
|
||||||
# GetOrder())
|
# GetOrder())
|
||||||
self._mCharToFreqOrder = None
|
self._char_to_freq_order = None
|
||||||
self._mTableSize = None # Size of above table
|
self._table_size = None # Size of above table
|
||||||
# This is a constant value which varies from language to language,
|
# This is a constant value which varies from language to language,
|
||||||
# used in calculating confidence. See
|
# used in calculating confidence. See
|
||||||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||||
# for further detail.
|
# for further detail.
|
||||||
self._mTypicalDistributionRatio = None
|
self.typical_distribution_ratio = None
|
||||||
|
self._done = None
|
||||||
|
self._total_chars = None
|
||||||
|
self._freq_chars = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
"""reset analyser, clear any state"""
|
"""reset analyser, clear any state"""
|
||||||
# If this flag is set to True, detection is done and conclusion has
|
# If this flag is set to True, detection is done and conclusion has
|
||||||
# been made
|
# been made
|
||||||
self._mDone = False
|
self._done = False
|
||||||
self._mTotalChars = 0 # Total characters encountered
|
self._total_chars = 0 # Total characters encountered
|
||||||
# The number of characters whose frequency order is less than 512
|
# The number of characters whose frequency order is less than 512
|
||||||
self._mFreqChars = 0
|
self._freq_chars = 0
|
||||||
|
|
||||||
def feed(self, aBuf, aCharLen):
|
def feed(self, char, char_len):
|
||||||
"""feed a character with known length"""
|
"""feed a character with known length"""
|
||||||
if aCharLen == 2:
|
if char_len == 2:
|
||||||
# we only care about 2-bytes character in our distribution analysis
|
# we only care about 2-bytes character in our distribution analysis
|
||||||
order = self.get_order(aBuf)
|
order = self.get_order(char)
|
||||||
else:
|
else:
|
||||||
order = -1
|
order = -1
|
||||||
if order >= 0:
|
if order >= 0:
|
||||||
self._mTotalChars += 1
|
self._total_chars += 1
|
||||||
# order is valid
|
# order is valid
|
||||||
if order < self._mTableSize:
|
if order < self._table_size:
|
||||||
if 512 > self._mCharToFreqOrder[order]:
|
if 512 > self._char_to_freq_order[order]:
|
||||||
self._mFreqChars += 1
|
self._freq_chars += 1
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
"""return confidence based on existing data"""
|
"""return confidence based on existing data"""
|
||||||
# if we didn't receive any character in our consideration range,
|
# if we didn't receive any character in our consideration range,
|
||||||
# return negative answer
|
# return negative answer
|
||||||
if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
|
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
|
||||||
return SURE_NO
|
return self.SURE_NO
|
||||||
|
|
||||||
if self._mTotalChars != self._mFreqChars:
|
if self._total_chars != self._freq_chars:
|
||||||
r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
|
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
|
||||||
* self._mTypicalDistributionRatio))
|
* self.typical_distribution_ratio))
|
||||||
if r < SURE_YES:
|
if r < self.SURE_YES:
|
||||||
return r
|
return r
|
||||||
|
|
||||||
# normalize confidence (we don't want to be 100% sure)
|
# normalize confidence (we don't want to be 100% sure)
|
||||||
return SURE_YES
|
return self.SURE_YES
|
||||||
|
|
||||||
def got_enough_data(self):
|
def got_enough_data(self):
|
||||||
# It is not necessary to receive all data to draw conclusion.
|
# It is not necessary to receive all data to draw conclusion.
|
||||||
# For charset detection, certain amount of data is enough
|
# For charset detection, certain amount of data is enough
|
||||||
return self._mTotalChars > ENOUGH_DATA_THRESHOLD
|
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
# We do not handle characters based on the original encoding string,
|
# We do not handle characters based on the original encoding string,
|
||||||
# but convert this encoding string to a number, here called order.
|
# but convert this encoding string to a number, here called order.
|
||||||
# This allows multiple encodings of a language to share one frequency
|
# This allows multiple encodings of a language to share one frequency
|
||||||
|
@ -110,55 +112,55 @@ class CharDistributionAnalysis:
|
||||||
|
|
||||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharDistributionAnalysis.__init__(self)
|
super(EUCTWDistributionAnalysis, self).__init__()
|
||||||
self._mCharToFreqOrder = EUCTWCharToFreqOrder
|
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
|
||||||
self._mTableSize = EUCTW_TABLE_SIZE
|
self._table_size = EUCTW_TABLE_SIZE
|
||||||
self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
# for euc-TW encoding, we are interested
|
# for euc-TW encoding, we are interested
|
||||||
# first byte range: 0xc4 -- 0xfe
|
# first byte range: 0xc4 -- 0xfe
|
||||||
# second byte range: 0xa1 -- 0xfe
|
# second byte range: 0xa1 -- 0xfe
|
||||||
# no validation needed here. State machine has done that
|
# no validation needed here. State machine has done that
|
||||||
first_char = wrap_ord(aBuf[0])
|
first_char = byte_str[0]
|
||||||
if first_char >= 0xC4:
|
if first_char >= 0xC4:
|
||||||
return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
|
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
|
||||||
else:
|
else:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
|
||||||
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharDistributionAnalysis.__init__(self)
|
super(EUCKRDistributionAnalysis, self).__init__()
|
||||||
self._mCharToFreqOrder = EUCKRCharToFreqOrder
|
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||||
self._mTableSize = EUCKR_TABLE_SIZE
|
self._table_size = EUCKR_TABLE_SIZE
|
||||||
self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
# for euc-KR encoding, we are interested
|
# for euc-KR encoding, we are interested
|
||||||
# first byte range: 0xb0 -- 0xfe
|
# first byte range: 0xb0 -- 0xfe
|
||||||
# second byte range: 0xa1 -- 0xfe
|
# second byte range: 0xa1 -- 0xfe
|
||||||
# no validation needed here. State machine has done that
|
# no validation needed here. State machine has done that
|
||||||
first_char = wrap_ord(aBuf[0])
|
first_char = byte_str[0]
|
||||||
if first_char >= 0xB0:
|
if first_char >= 0xB0:
|
||||||
return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
|
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
|
||||||
else:
|
else:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
|
||||||
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharDistributionAnalysis.__init__(self)
|
super(GB2312DistributionAnalysis, self).__init__()
|
||||||
self._mCharToFreqOrder = GB2312CharToFreqOrder
|
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
|
||||||
self._mTableSize = GB2312_TABLE_SIZE
|
self._table_size = GB2312_TABLE_SIZE
|
||||||
self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
# for GB2312 encoding, we are interested
|
# for GB2312 encoding, we are interested
|
||||||
# first byte range: 0xb0 -- 0xfe
|
# first byte range: 0xb0 -- 0xfe
|
||||||
# second byte range: 0xa1 -- 0xfe
|
# second byte range: 0xa1 -- 0xfe
|
||||||
# no validation needed here. State machine has done that
|
# no validation needed here. State machine has done that
|
||||||
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
|
first_char, second_char = byte_str[0], byte_str[1]
|
||||||
if (first_char >= 0xB0) and (second_char >= 0xA1):
|
if (first_char >= 0xB0) and (second_char >= 0xA1):
|
||||||
return 94 * (first_char - 0xB0) + second_char - 0xA1
|
return 94 * (first_char - 0xB0) + second_char - 0xA1
|
||||||
else:
|
else:
|
||||||
|
@ -167,17 +169,17 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||||
|
|
||||||
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharDistributionAnalysis.__init__(self)
|
super(Big5DistributionAnalysis, self).__init__()
|
||||||
self._mCharToFreqOrder = Big5CharToFreqOrder
|
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
|
||||||
self._mTableSize = BIG5_TABLE_SIZE
|
self._table_size = BIG5_TABLE_SIZE
|
||||||
self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
# for big5 encoding, we are interested
|
# for big5 encoding, we are interested
|
||||||
# first byte range: 0xa4 -- 0xfe
|
# first byte range: 0xa4 -- 0xfe
|
||||||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||||
# no validation needed here. State machine has done that
|
# no validation needed here. State machine has done that
|
||||||
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
|
first_char, second_char = byte_str[0], byte_str[1]
|
||||||
if first_char >= 0xA4:
|
if first_char >= 0xA4:
|
||||||
if second_char >= 0xA1:
|
if second_char >= 0xA1:
|
||||||
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
|
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
|
||||||
|
@ -189,17 +191,17 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||||
|
|
||||||
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharDistributionAnalysis.__init__(self)
|
super(SJISDistributionAnalysis, self).__init__()
|
||||||
self._mCharToFreqOrder = JISCharToFreqOrder
|
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||||
self._mTableSize = JIS_TABLE_SIZE
|
self._table_size = JIS_TABLE_SIZE
|
||||||
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
# for sjis encoding, we are interested
|
# for sjis encoding, we are interested
|
||||||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||||
# no validation needed here. State machine has done that
|
# no validation needed here. State machine has done that
|
||||||
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
|
first_char, second_char = byte_str[0], byte_str[1]
|
||||||
if (first_char >= 0x81) and (first_char <= 0x9F):
|
if (first_char >= 0x81) and (first_char <= 0x9F):
|
||||||
order = 188 * (first_char - 0x81)
|
order = 188 * (first_char - 0x81)
|
||||||
elif (first_char >= 0xE0) and (first_char <= 0xEF):
|
elif (first_char >= 0xE0) and (first_char <= 0xEF):
|
||||||
|
@ -214,18 +216,18 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||||
|
|
||||||
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharDistributionAnalysis.__init__(self)
|
super(EUCJPDistributionAnalysis, self).__init__()
|
||||||
self._mCharToFreqOrder = JISCharToFreqOrder
|
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||||
self._mTableSize = JIS_TABLE_SIZE
|
self._table_size = JIS_TABLE_SIZE
|
||||||
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
# for euc-JP encoding, we are interested
|
# for euc-JP encoding, we are interested
|
||||||
# first byte range: 0xa0 -- 0xfe
|
# first byte range: 0xa0 -- 0xfe
|
||||||
# second byte range: 0xa1 -- 0xfe
|
# second byte range: 0xa1 -- 0xfe
|
||||||
# no validation needed here. State machine has done that
|
# no validation needed here. State machine has done that
|
||||||
char = wrap_ord(aBuf[0])
|
char = byte_str[0]
|
||||||
if char >= 0xA0:
|
if char >= 0xA0:
|
||||||
return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
|
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
|
||||||
else:
|
else:
|
||||||
return -1
|
return -1
|
||||||
|
|
102
thirdparty/chardet/charsetgroupprober.py
vendored
102
thirdparty/chardet/charsetgroupprober.py
vendored
|
@ -25,82 +25,82 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from . import constants
|
from .enums import ProbingState
|
||||||
import sys
|
|
||||||
from .charsetprober import CharSetProber
|
from .charsetprober import CharSetProber
|
||||||
|
|
||||||
|
|
||||||
class CharSetGroupProber(CharSetProber):
|
class CharSetGroupProber(CharSetProber):
|
||||||
def __init__(self):
|
def __init__(self, lang_filter=None):
|
||||||
CharSetProber.__init__(self)
|
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
|
||||||
self._mActiveNum = 0
|
self._active_num = 0
|
||||||
self._mProbers = []
|
self.probers = []
|
||||||
self._mBestGuessProber = None
|
self._best_guess_prober = None
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
CharSetProber.reset(self)
|
super(CharSetGroupProber, self).reset()
|
||||||
self._mActiveNum = 0
|
self._active_num = 0
|
||||||
for prober in self._mProbers:
|
for prober in self.probers:
|
||||||
if prober:
|
if prober:
|
||||||
prober.reset()
|
prober.reset()
|
||||||
prober.active = True
|
prober.active = True
|
||||||
self._mActiveNum += 1
|
self._active_num += 1
|
||||||
self._mBestGuessProber = None
|
self._best_guess_prober = None
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
if not self._mBestGuessProber:
|
def charset_name(self):
|
||||||
|
if not self._best_guess_prober:
|
||||||
self.get_confidence()
|
self.get_confidence()
|
||||||
if not self._mBestGuessProber:
|
if not self._best_guess_prober:
|
||||||
return None
|
return None
|
||||||
# self._mBestGuessProber = self._mProbers[0]
|
return self._best_guess_prober.charset_name
|
||||||
return self._mBestGuessProber.get_charset_name()
|
|
||||||
|
|
||||||
def feed(self, aBuf):
|
@property
|
||||||
for prober in self._mProbers:
|
def language(self):
|
||||||
|
if not self._best_guess_prober:
|
||||||
|
self.get_confidence()
|
||||||
|
if not self._best_guess_prober:
|
||||||
|
return None
|
||||||
|
return self._best_guess_prober.language
|
||||||
|
|
||||||
|
def feed(self, byte_str):
|
||||||
|
for prober in self.probers:
|
||||||
if not prober:
|
if not prober:
|
||||||
continue
|
continue
|
||||||
if not prober.active:
|
if not prober.active:
|
||||||
continue
|
continue
|
||||||
st = prober.feed(aBuf)
|
state = prober.feed(byte_str)
|
||||||
if not st:
|
if not state:
|
||||||
continue
|
continue
|
||||||
if st == constants.eFoundIt:
|
if state == ProbingState.FOUND_IT:
|
||||||
self._mBestGuessProber = prober
|
self._best_guess_prober = prober
|
||||||
return self.get_state()
|
return self.state
|
||||||
elif st == constants.eNotMe:
|
elif state == ProbingState.NOT_ME:
|
||||||
prober.active = False
|
prober.active = False
|
||||||
self._mActiveNum -= 1
|
self._active_num -= 1
|
||||||
if self._mActiveNum <= 0:
|
if self._active_num <= 0:
|
||||||
self._mState = constants.eNotMe
|
self._state = ProbingState.NOT_ME
|
||||||
return self.get_state()
|
return self.state
|
||||||
return self.get_state()
|
return self.state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
st = self.get_state()
|
state = self.state
|
||||||
if st == constants.eFoundIt:
|
if state == ProbingState.FOUND_IT:
|
||||||
return 0.99
|
return 0.99
|
||||||
elif st == constants.eNotMe:
|
elif state == ProbingState.NOT_ME:
|
||||||
return 0.01
|
return 0.01
|
||||||
bestConf = 0.0
|
best_conf = 0.0
|
||||||
self._mBestGuessProber = None
|
self._best_guess_prober = None
|
||||||
for prober in self._mProbers:
|
for prober in self.probers:
|
||||||
if not prober:
|
if not prober:
|
||||||
continue
|
continue
|
||||||
if not prober.active:
|
if not prober.active:
|
||||||
if constants._debug:
|
self.logger.debug('%s not active', prober.charset_name)
|
||||||
sys.stderr.write(prober.get_charset_name()
|
|
||||||
+ ' not active\n')
|
|
||||||
continue
|
continue
|
||||||
cf = prober.get_confidence()
|
conf = prober.get_confidence()
|
||||||
if constants._debug:
|
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
|
||||||
sys.stderr.write('%s confidence = %s\n' %
|
if best_conf < conf:
|
||||||
(prober.get_charset_name(), cf))
|
best_conf = conf
|
||||||
if bestConf < cf:
|
self._best_guess_prober = prober
|
||||||
bestConf = cf
|
if not self._best_guess_prober:
|
||||||
self._mBestGuessProber = prober
|
|
||||||
if not self._mBestGuessProber:
|
|
||||||
return 0.0
|
return 0.0
|
||||||
return bestConf
|
return best_conf
|
||||||
# else:
|
|
||||||
# self._mBestGuessProber = self._mProbers[0]
|
|
||||||
# return self._mBestGuessProber.get_confidence()
|
|
||||||
|
|
119
thirdparty/chardet/charsetprober.py
vendored
119
thirdparty/chardet/charsetprober.py
vendored
|
@ -26,37 +26,120 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from . import constants
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from .enums import ProbingState
|
||||||
|
|
||||||
class CharSetProber:
|
|
||||||
def __init__(self):
|
class CharSetProber(object):
|
||||||
pass
|
|
||||||
|
SHORTCUT_THRESHOLD = 0.95
|
||||||
|
|
||||||
|
def __init__(self, lang_filter=None):
|
||||||
|
self._state = None
|
||||||
|
self.lang_filter = lang_filter
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._mState = constants.eDetecting
|
self._state = ProbingState.DETECTING
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def feed(self, aBuf):
|
def feed(self, buf):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_state(self):
|
@property
|
||||||
return self._mState
|
def state(self):
|
||||||
|
return self._state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
def filter_high_bit_only(self, aBuf):
|
@staticmethod
|
||||||
aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
|
def filter_high_byte_only(buf):
|
||||||
return aBuf
|
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
|
||||||
|
return buf
|
||||||
|
|
||||||
def filter_without_english_letters(self, aBuf):
|
@staticmethod
|
||||||
aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
|
def filter_international_words(buf):
|
||||||
return aBuf
|
"""
|
||||||
|
We define three types of bytes:
|
||||||
|
alphabet: english alphabets [a-zA-Z]
|
||||||
|
international: international characters [\x80-\xFF]
|
||||||
|
marker: everything else [^a-zA-Z\x80-\xFF]
|
||||||
|
|
||||||
def filter_with_english_letters(self, aBuf):
|
The input buffer can be thought to contain a series of words delimited
|
||||||
# TODO
|
by markers. This function works to filter all words that contain at
|
||||||
return aBuf
|
least one international character. All contiguous sequences of markers
|
||||||
|
are replaced by a single space ascii character.
|
||||||
|
|
||||||
|
This filter applies to all scripts which do not use English characters.
|
||||||
|
"""
|
||||||
|
filtered = bytearray()
|
||||||
|
|
||||||
|
# This regex expression filters out only words that have at-least one
|
||||||
|
# international character. The word may include one marker character at
|
||||||
|
# the end.
|
||||||
|
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
|
||||||
|
buf)
|
||||||
|
|
||||||
|
for word in words:
|
||||||
|
filtered.extend(word[:-1])
|
||||||
|
|
||||||
|
# If the last character in the word is a marker, replace it with a
|
||||||
|
# space as markers shouldn't affect our analysis (they are used
|
||||||
|
# similarly across all languages and may thus have similar
|
||||||
|
# frequencies).
|
||||||
|
last_char = word[-1:]
|
||||||
|
if not last_char.isalpha() and last_char < b'\x80':
|
||||||
|
last_char = b' '
|
||||||
|
filtered.extend(last_char)
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def filter_with_english_letters(buf):
|
||||||
|
"""
|
||||||
|
Returns a copy of ``buf`` that retains only the sequences of English
|
||||||
|
alphabet and high byte characters that are not between <> characters.
|
||||||
|
Also retains English alphabet and high byte characters immediately
|
||||||
|
before occurrences of >.
|
||||||
|
|
||||||
|
This filter can be applied to all scripts which contain both English
|
||||||
|
characters and extended ASCII characters, but is currently only used by
|
||||||
|
``Latin1Prober``.
|
||||||
|
"""
|
||||||
|
filtered = bytearray()
|
||||||
|
in_tag = False
|
||||||
|
prev = 0
|
||||||
|
|
||||||
|
for curr in range(len(buf)):
|
||||||
|
# Slice here to get bytes instead of an int with Python 3
|
||||||
|
buf_char = buf[curr:curr + 1]
|
||||||
|
# Check if we're coming out of or entering an HTML tag
|
||||||
|
if buf_char == b'>':
|
||||||
|
in_tag = False
|
||||||
|
elif buf_char == b'<':
|
||||||
|
in_tag = True
|
||||||
|
|
||||||
|
# If current character is not extended-ASCII and not alphabetic...
|
||||||
|
if buf_char < b'\x80' and not buf_char.isalpha():
|
||||||
|
# ...and we're not in a tag
|
||||||
|
if curr > prev and not in_tag:
|
||||||
|
# Keep everything after last non-extended-ASCII,
|
||||||
|
# non-alphabetic character
|
||||||
|
filtered.extend(buf[prev:curr])
|
||||||
|
# Output a space to delimit stretch we kept
|
||||||
|
filtered.extend(b' ')
|
||||||
|
prev = curr + 1
|
||||||
|
|
||||||
|
# If we're not in a tag...
|
||||||
|
if not in_tag:
|
||||||
|
# Keep everything after last non-extended-ASCII, non-alphabetic
|
||||||
|
# character
|
||||||
|
filtered.extend(buf[prev:])
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
67
thirdparty/chardet/codingstatemachine.py
vendored
67
thirdparty/chardet/codingstatemachine.py
vendored
|
@ -25,37 +25,64 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .constants import eStart
|
import logging
|
||||||
from .compat import wrap_ord
|
|
||||||
|
from .enums import MachineState
|
||||||
|
|
||||||
|
|
||||||
class CodingStateMachine:
|
class CodingStateMachine(object):
|
||||||
|
"""
|
||||||
|
A state machine to verify a byte sequence for a particular encoding. For
|
||||||
|
each byte the detector receives, it will feed that byte to every active
|
||||||
|
state machine available, one byte at a time. The state machine changes its
|
||||||
|
state based on its previous state and the byte it receives. There are 3
|
||||||
|
states in a state machine that are of interest to an auto-detector:
|
||||||
|
|
||||||
|
START state: This is the state to start with, or a legal byte sequence
|
||||||
|
(i.e. a valid code point) for character has been identified.
|
||||||
|
|
||||||
|
ME state: This indicates that the state machine identified a byte sequence
|
||||||
|
that is specific to the charset it is designed for and that
|
||||||
|
there is no other possible encoding which can contain this byte
|
||||||
|
sequence. This will to lead to an immediate positive answer for
|
||||||
|
the detector.
|
||||||
|
|
||||||
|
ERROR state: This indicates the state machine identified an illegal byte
|
||||||
|
sequence for that encoding. This will lead to an immediate
|
||||||
|
negative answer for this encoding. Detector will exclude this
|
||||||
|
encoding from consideration from here on.
|
||||||
|
"""
|
||||||
def __init__(self, sm):
|
def __init__(self, sm):
|
||||||
self._mModel = sm
|
self._model = sm
|
||||||
self._mCurrentBytePos = 0
|
self._curr_byte_pos = 0
|
||||||
self._mCurrentCharLen = 0
|
self._curr_char_len = 0
|
||||||
|
self._curr_state = None
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._mCurrentState = eStart
|
self._curr_state = MachineState.START
|
||||||
|
|
||||||
def next_state(self, c):
|
def next_state(self, c):
|
||||||
# for each byte we get its class
|
# for each byte we get its class
|
||||||
# if it is first byte, we also get byte length
|
# if it is first byte, we also get byte length
|
||||||
# PY3K: aBuf is a byte stream, so c is an int, not a byte
|
byte_class = self._model['class_table'][c]
|
||||||
byteCls = self._mModel['classTable'][wrap_ord(c)]
|
if self._curr_state == MachineState.START:
|
||||||
if self._mCurrentState == eStart:
|
self._curr_byte_pos = 0
|
||||||
self._mCurrentBytePos = 0
|
self._curr_char_len = self._model['char_len_table'][byte_class]
|
||||||
self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
|
# from byte's class and state_table, we get its next state
|
||||||
# from byte's class and stateTable, we get its next state
|
curr_state = (self._curr_state * self._model['class_factor']
|
||||||
curr_state = (self._mCurrentState * self._mModel['classFactor']
|
+ byte_class)
|
||||||
+ byteCls)
|
self._curr_state = self._model['state_table'][curr_state]
|
||||||
self._mCurrentState = self._mModel['stateTable'][curr_state]
|
self._curr_byte_pos += 1
|
||||||
self._mCurrentBytePos += 1
|
return self._curr_state
|
||||||
return self._mCurrentState
|
|
||||||
|
|
||||||
def get_current_charlen(self):
|
def get_current_charlen(self):
|
||||||
return self._mCurrentCharLen
|
return self._curr_char_len
|
||||||
|
|
||||||
def get_coding_state_machine(self):
|
def get_coding_state_machine(self):
|
||||||
return self._mModel['name']
|
return self._model['name']
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self):
|
||||||
|
return self._model['language']
|
||||||
|
|
16
thirdparty/chardet/compat.py
vendored
16
thirdparty/chardet/compat.py
vendored
|
@ -1,6 +1,7 @@
|
||||||
######################## BEGIN LICENSE BLOCK ########################
|
######################## BEGIN LICENSE BLOCK ########################
|
||||||
# Contributor(s):
|
# Contributor(s):
|
||||||
# Ian Cordasco - port to Python
|
# Dan Blanchard
|
||||||
|
# Ian Cordasco
|
||||||
#
|
#
|
||||||
# This library is free software; you can redistribute it and/or
|
# This library is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU Lesser General Public
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
@ -22,13 +23,12 @@ import sys
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
|
PY2 = True
|
||||||
|
PY3 = False
|
||||||
base_str = (str, unicode)
|
base_str = (str, unicode)
|
||||||
|
text_type = unicode
|
||||||
else:
|
else:
|
||||||
|
PY2 = False
|
||||||
|
PY3 = True
|
||||||
base_str = (bytes, str)
|
base_str = (bytes, str)
|
||||||
|
text_type = str
|
||||||
|
|
||||||
def wrap_ord(a):
|
|
||||||
if sys.version_info < (3, 0) and isinstance(a, base_str):
|
|
||||||
return ord(a)
|
|
||||||
else:
|
|
||||||
return a
|
|
||||||
|
|
39
thirdparty/chardet/constants.py
vendored
39
thirdparty/chardet/constants.py
vendored
|
@ -1,39 +0,0 @@
|
||||||
######################## BEGIN LICENSE BLOCK ########################
|
|
||||||
# The Original Code is Mozilla Universal charset detector code.
|
|
||||||
#
|
|
||||||
# The Initial Developer of the Original Code is
|
|
||||||
# Netscape Communications Corporation.
|
|
||||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
|
||||||
# the Initial Developer. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Contributor(s):
|
|
||||||
# Mark Pilgrim - port to Python
|
|
||||||
# Shy Shalom - original C code
|
|
||||||
#
|
|
||||||
# This library is free software; you can redistribute it and/or
|
|
||||||
# modify it under the terms of the GNU Lesser General Public
|
|
||||||
# License as published by the Free Software Foundation; either
|
|
||||||
# version 2.1 of the License, or (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This library is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
# Lesser General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU Lesser General Public
|
|
||||||
# License along with this library; if not, write to the Free Software
|
|
||||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
|
||||||
# 02110-1301 USA
|
|
||||||
######################### END LICENSE BLOCK #########################
|
|
||||||
|
|
||||||
_debug = 0
|
|
||||||
|
|
||||||
eDetecting = 0
|
|
||||||
eFoundIt = 1
|
|
||||||
eNotMe = 2
|
|
||||||
|
|
||||||
eStart = 0
|
|
||||||
eError = 1
|
|
||||||
eItsMe = 2
|
|
||||||
|
|
||||||
SHORTCUT_THRESHOLD = 0.95
|
|
19
thirdparty/chardet/cp949prober.py
vendored
19
thirdparty/chardet/cp949prober.py
vendored
|
@ -25,20 +25,25 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .mbcharsetprober import MultiByteCharSetProber
|
|
||||||
from .codingstatemachine import CodingStateMachine
|
|
||||||
from .chardistribution import EUCKRDistributionAnalysis
|
from .chardistribution import EUCKRDistributionAnalysis
|
||||||
from .mbcssm import CP949SMModel
|
from .codingstatemachine import CodingStateMachine
|
||||||
|
from .mbcharsetprober import MultiByteCharSetProber
|
||||||
|
from .mbcssm import CP949_SM_MODEL
|
||||||
|
|
||||||
|
|
||||||
class CP949Prober(MultiByteCharSetProber):
|
class CP949Prober(MultiByteCharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
MultiByteCharSetProber.__init__(self)
|
super(CP949Prober, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(CP949SMModel)
|
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
||||||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
||||||
# not different.
|
# not different.
|
||||||
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
|
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return "CP949"
|
return "CP949"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self):
|
||||||
|
return "Korean"
|
||||||
|
|
76
thirdparty/chardet/enums.py
vendored
Normal file
76
thirdparty/chardet/enums.py
vendored
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
"""
|
||||||
|
All of the Enums that are used throughout the chardet package.
|
||||||
|
|
||||||
|
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class InputState(object):
|
||||||
|
"""
|
||||||
|
This enum represents the different states a universal detector can be in.
|
||||||
|
"""
|
||||||
|
PURE_ASCII = 0
|
||||||
|
ESC_ASCII = 1
|
||||||
|
HIGH_BYTE = 2
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageFilter(object):
|
||||||
|
"""
|
||||||
|
This enum represents the different language filters we can apply to a
|
||||||
|
``UniversalDetector``.
|
||||||
|
"""
|
||||||
|
CHINESE_SIMPLIFIED = 0x01
|
||||||
|
CHINESE_TRADITIONAL = 0x02
|
||||||
|
JAPANESE = 0x04
|
||||||
|
KOREAN = 0x08
|
||||||
|
NON_CJK = 0x10
|
||||||
|
ALL = 0x1F
|
||||||
|
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
|
||||||
|
CJK = CHINESE | JAPANESE | KOREAN
|
||||||
|
|
||||||
|
|
||||||
|
class ProbingState(object):
|
||||||
|
"""
|
||||||
|
This enum represents the different states a prober can be in.
|
||||||
|
"""
|
||||||
|
DETECTING = 0
|
||||||
|
FOUND_IT = 1
|
||||||
|
NOT_ME = 2
|
||||||
|
|
||||||
|
|
||||||
|
class MachineState(object):
|
||||||
|
"""
|
||||||
|
This enum represents the different states a state machine can be in.
|
||||||
|
"""
|
||||||
|
START = 0
|
||||||
|
ERROR = 1
|
||||||
|
ITS_ME = 2
|
||||||
|
|
||||||
|
|
||||||
|
class SequenceLikelihood(object):
|
||||||
|
"""
|
||||||
|
This enum represents the likelihood of a character following the previous one.
|
||||||
|
"""
|
||||||
|
NEGATIVE = 0
|
||||||
|
UNLIKELY = 1
|
||||||
|
LIKELY = 2
|
||||||
|
POSITIVE = 3
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_num_categories(cls):
|
||||||
|
""":returns: The number of likelihood categories in the enum."""
|
||||||
|
return 4
|
||||||
|
|
||||||
|
|
||||||
|
class CharacterCategory(object):
|
||||||
|
"""
|
||||||
|
This enum represents the different categories language models for
|
||||||
|
``SingleByteCharsetProber`` put characters into.
|
||||||
|
|
||||||
|
Anything less than CONTROL is considered a letter.
|
||||||
|
"""
|
||||||
|
UNDEFINED = 255
|
||||||
|
LINE_BREAK = 254
|
||||||
|
SYMBOL = 253
|
||||||
|
DIGIT = 252
|
||||||
|
CONTROL = 251
|
97
thirdparty/chardet/escprober.py
vendored
97
thirdparty/chardet/escprober.py
vendored
|
@ -25,62 +25,77 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from . import constants
|
|
||||||
from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
|
|
||||||
ISO2022KRSMModel)
|
|
||||||
from .charsetprober import CharSetProber
|
from .charsetprober import CharSetProber
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .compat import wrap_ord
|
from .enums import LanguageFilter, ProbingState, MachineState
|
||||||
|
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
|
||||||
|
ISO2022KR_SM_MODEL)
|
||||||
|
|
||||||
|
|
||||||
class EscCharSetProber(CharSetProber):
|
class EscCharSetProber(CharSetProber):
|
||||||
def __init__(self):
|
"""
|
||||||
CharSetProber.__init__(self)
|
This CharSetProber uses a "code scheme" approach for detecting encodings,
|
||||||
self._mCodingSM = [
|
whereby easily recognizable escape or shift sequences are relied on to
|
||||||
CodingStateMachine(HZSMModel),
|
identify these encodings.
|
||||||
CodingStateMachine(ISO2022CNSMModel),
|
"""
|
||||||
CodingStateMachine(ISO2022JPSMModel),
|
|
||||||
CodingStateMachine(ISO2022KRSMModel)
|
def __init__(self, lang_filter=None):
|
||||||
]
|
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||||
|
self.coding_sm = []
|
||||||
|
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
|
||||||
|
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
|
||||||
|
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
|
||||||
|
if self.lang_filter & LanguageFilter.JAPANESE:
|
||||||
|
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
|
||||||
|
if self.lang_filter & LanguageFilter.KOREAN:
|
||||||
|
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
|
||||||
|
self.active_sm_count = None
|
||||||
|
self._detected_charset = None
|
||||||
|
self._detected_language = None
|
||||||
|
self._state = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
CharSetProber.reset(self)
|
super(EscCharSetProber, self).reset()
|
||||||
for codingSM in self._mCodingSM:
|
for coding_sm in self.coding_sm:
|
||||||
if not codingSM:
|
if not coding_sm:
|
||||||
continue
|
continue
|
||||||
codingSM.active = True
|
coding_sm.active = True
|
||||||
codingSM.reset()
|
coding_sm.reset()
|
||||||
self._mActiveSM = len(self._mCodingSM)
|
self.active_sm_count = len(self.coding_sm)
|
||||||
self._mDetectedCharset = None
|
self._detected_charset = None
|
||||||
|
self._detected_language = None
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
return self._mDetectedCharset
|
def charset_name(self):
|
||||||
|
return self._detected_charset
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self):
|
||||||
|
return self._detected_language
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
if self._mDetectedCharset:
|
if self._detected_charset:
|
||||||
return 0.99
|
return 0.99
|
||||||
else:
|
else:
|
||||||
return 0.00
|
return 0.00
|
||||||
|
|
||||||
def feed(self, aBuf):
|
def feed(self, byte_str):
|
||||||
for c in aBuf:
|
for c in byte_str:
|
||||||
# PY3K: aBuf is a byte array, so c is an int, not a byte
|
for coding_sm in self.coding_sm:
|
||||||
for codingSM in self._mCodingSM:
|
if not coding_sm or not coding_sm.active:
|
||||||
if not codingSM:
|
|
||||||
continue
|
continue
|
||||||
if not codingSM.active:
|
coding_state = coding_sm.next_state(c)
|
||||||
continue
|
if coding_state == MachineState.ERROR:
|
||||||
codingState = codingSM.next_state(wrap_ord(c))
|
coding_sm.active = False
|
||||||
if codingState == constants.eError:
|
self.active_sm_count -= 1
|
||||||
codingSM.active = False
|
if self.active_sm_count <= 0:
|
||||||
self._mActiveSM -= 1
|
self._state = ProbingState.NOT_ME
|
||||||
if self._mActiveSM <= 0:
|
return self.state
|
||||||
self._mState = constants.eNotMe
|
elif coding_state == MachineState.ITS_ME:
|
||||||
return self.get_state()
|
self._state = ProbingState.FOUND_IT
|
||||||
elif codingState == constants.eItsMe:
|
self._detected_charset = coding_sm.get_coding_state_machine()
|
||||||
self._mState = constants.eFoundIt
|
self._detected_language = coding_sm.language
|
||||||
self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
|
return self.state
|
||||||
return self.get_state()
|
|
||||||
|
|
||||||
return self.get_state()
|
return self.state
|
||||||
|
|
128
thirdparty/chardet/escsm.py
vendored
128
thirdparty/chardet/escsm.py
vendored
|
@ -25,9 +25,9 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .constants import eStart, eError, eItsMe
|
from .enums import MachineState
|
||||||
|
|
||||||
HZ_cls = (
|
HZ_CLS = (
|
||||||
1,0,0,0,0,0,0,0, # 00 - 07
|
1,0,0,0,0,0,0,0, # 00 - 07
|
||||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||||
0,0,0,0,0,0,0,0, # 10 - 17
|
0,0,0,0,0,0,0,0, # 10 - 17
|
||||||
|
@ -62,24 +62,25 @@ HZ_cls = (
|
||||||
1,1,1,1,1,1,1,1, # f8 - ff
|
1,1,1,1,1,1,1,1, # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
HZ_st = (
|
HZ_ST = (
|
||||||
eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07
|
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||||
eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
|
||||||
5,eError, 6,eError, 5, 5, 4,eError,# 18-1f
|
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
|
||||||
4,eError, 4, 4, 4,eError, 4,eError,# 20-27
|
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
|
||||||
4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
|
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
|
||||||
)
|
)
|
||||||
|
|
||||||
HZCharLenTable = (0, 0, 0, 0, 0, 0)
|
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||||
|
|
||||||
HZSMModel = {'classTable': HZ_cls,
|
HZ_SM_MODEL = {'class_table': HZ_CLS,
|
||||||
'classFactor': 6,
|
'class_factor': 6,
|
||||||
'stateTable': HZ_st,
|
'state_table': HZ_ST,
|
||||||
'charLenTable': HZCharLenTable,
|
'char_len_table': HZ_CHAR_LEN_TABLE,
|
||||||
'name': "HZ-GB-2312"}
|
'name': "HZ-GB-2312",
|
||||||
|
'language': 'Chinese'}
|
||||||
|
|
||||||
ISO2022CN_cls = (
|
ISO2022CN_CLS = (
|
||||||
2,0,0,0,0,0,0,0, # 00 - 07
|
2,0,0,0,0,0,0,0, # 00 - 07
|
||||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||||
0,0,0,0,0,0,0,0, # 10 - 17
|
0,0,0,0,0,0,0,0, # 10 - 17
|
||||||
|
@ -114,26 +115,27 @@ ISO2022CN_cls = (
|
||||||
2,2,2,2,2,2,2,2, # f8 - ff
|
2,2,2,2,2,2,2,2, # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
ISO2022CN_st = (
|
ISO2022CN_ST = (
|
||||||
eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
|
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||||
eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
|
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||||
eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
|
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||||
eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
|
||||||
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||||
5, 6,eError,eError,eError,eError,eError,eError,# 28-2f
|
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
|
||||||
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
|
||||||
eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
|
||||||
)
|
)
|
||||||
|
|
||||||
ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
|
||||||
ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
|
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
|
||||||
'classFactor': 9,
|
'class_factor': 9,
|
||||||
'stateTable': ISO2022CN_st,
|
'state_table': ISO2022CN_ST,
|
||||||
'charLenTable': ISO2022CNCharLenTable,
|
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
|
||||||
'name': "ISO-2022-CN"}
|
'name': "ISO-2022-CN",
|
||||||
|
'language': 'Chinese'}
|
||||||
|
|
||||||
ISO2022JP_cls = (
|
ISO2022JP_CLS = (
|
||||||
2,0,0,0,0,0,0,0, # 00 - 07
|
2,0,0,0,0,0,0,0, # 00 - 07
|
||||||
0,0,0,0,0,0,2,2, # 08 - 0f
|
0,0,0,0,0,0,2,2, # 08 - 0f
|
||||||
0,0,0,0,0,0,0,0, # 10 - 17
|
0,0,0,0,0,0,0,0, # 10 - 17
|
||||||
|
@ -168,27 +170,28 @@ ISO2022JP_cls = (
|
||||||
2,2,2,2,2,2,2,2, # f8 - ff
|
2,2,2,2,2,2,2,2, # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
ISO2022JP_st = (
|
ISO2022JP_ST = (
|
||||||
eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
|
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||||
eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
|
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||||
eError, 5,eError,eError,eError, 4,eError,eError,# 20-27
|
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||||
eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
|
||||||
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
|
||||||
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
|
||||||
eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
|
||||||
)
|
)
|
||||||
|
|
||||||
ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
|
||||||
ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
|
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
|
||||||
'classFactor': 10,
|
'class_factor': 10,
|
||||||
'stateTable': ISO2022JP_st,
|
'state_table': ISO2022JP_ST,
|
||||||
'charLenTable': ISO2022JPCharLenTable,
|
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
|
||||||
'name': "ISO-2022-JP"}
|
'name': "ISO-2022-JP",
|
||||||
|
'language': 'Japanese'}
|
||||||
|
|
||||||
ISO2022KR_cls = (
|
ISO2022KR_CLS = (
|
||||||
2,0,0,0,0,0,0,0, # 00 - 07
|
2,0,0,0,0,0,0,0, # 00 - 07
|
||||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||||
0,0,0,0,0,0,0,0, # 10 - 17
|
0,0,0,0,0,0,0,0, # 10 - 17
|
||||||
|
@ -223,20 +226,21 @@ ISO2022KR_cls = (
|
||||||
2,2,2,2,2,2,2,2, # f8 - ff
|
2,2,2,2,2,2,2,2, # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
ISO2022KR_st = (
|
ISO2022KR_ST = (
|
||||||
eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07
|
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||||
eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
|
||||||
eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||||
eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
|
||||||
)
|
)
|
||||||
|
|
||||||
ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
|
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||||
|
|
||||||
|
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
|
||||||
|
'class_factor': 6,
|
||||||
|
'state_table': ISO2022KR_ST,
|
||||||
|
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
|
||||||
|
'name': "ISO-2022-KR",
|
||||||
|
'language': 'Korean'}
|
||||||
|
|
||||||
ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
|
|
||||||
'classFactor': 6,
|
|
||||||
'stateTable': ISO2022KR_st,
|
|
||||||
'charLenTable': ISO2022KRCharLenTable,
|
|
||||||
'name': "ISO-2022-KR"}
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
84
thirdparty/chardet/eucjpprober.py
vendored
84
thirdparty/chardet/eucjpprober.py
vendored
|
@ -25,68 +25,68 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import sys
|
from .enums import ProbingState, MachineState
|
||||||
from . import constants
|
|
||||||
from .mbcharsetprober import MultiByteCharSetProber
|
from .mbcharsetprober import MultiByteCharSetProber
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .chardistribution import EUCJPDistributionAnalysis
|
from .chardistribution import EUCJPDistributionAnalysis
|
||||||
from .jpcntx import EUCJPContextAnalysis
|
from .jpcntx import EUCJPContextAnalysis
|
||||||
from .mbcssm import EUCJPSMModel
|
from .mbcssm import EUCJP_SM_MODEL
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
|
||||||
xrange = range
|
|
||||||
|
|
||||||
class EUCJPProber(MultiByteCharSetProber):
|
class EUCJPProber(MultiByteCharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
MultiByteCharSetProber.__init__(self)
|
super(EUCJPProber, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(EUCJPSMModel)
|
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
||||||
self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
|
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
||||||
self._mContextAnalyzer = EUCJPContextAnalysis()
|
self.context_analyzer = EUCJPContextAnalysis()
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
MultiByteCharSetProber.reset(self)
|
super(EUCJPProber, self).reset()
|
||||||
self._mContextAnalyzer.reset()
|
self.context_analyzer.reset()
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return "EUC-JP"
|
return "EUC-JP"
|
||||||
|
|
||||||
def feed(self, aBuf):
|
@property
|
||||||
aLen = len(aBuf)
|
def language(self):
|
||||||
for i in xrange(0, aLen):
|
return "Japanese"
|
||||||
# PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
|
|
||||||
codingState = self._mCodingSM.next_state(aBuf[i])
|
def feed(self, byte_str):
|
||||||
if codingState == constants.eError:
|
for i in range(len(byte_str)):
|
||||||
if constants._debug:
|
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
|
||||||
sys.stderr.write(self.get_charset_name()
|
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||||
+ ' prober hit error at byte ' + str(i)
|
if coding_state == MachineState.ERROR:
|
||||||
+ '\n')
|
self.logger.debug('%s %s prober hit error at byte %s',
|
||||||
self._mState = constants.eNotMe
|
self.charset_name, self.language, i)
|
||||||
|
self._state = ProbingState.NOT_ME
|
||||||
break
|
break
|
||||||
elif codingState == constants.eItsMe:
|
elif coding_state == MachineState.ITS_ME:
|
||||||
self._mState = constants.eFoundIt
|
self._state = ProbingState.FOUND_IT
|
||||||
break
|
break
|
||||||
elif codingState == constants.eStart:
|
elif coding_state == MachineState.START:
|
||||||
charLen = self._mCodingSM.get_current_charlen()
|
char_len = self.coding_sm.get_current_charlen()
|
||||||
if i == 0:
|
if i == 0:
|
||||||
self._mLastChar[1] = aBuf[0]
|
self._last_char[1] = byte_str[0]
|
||||||
self._mContextAnalyzer.feed(self._mLastChar, charLen)
|
self.context_analyzer.feed(self._last_char, char_len)
|
||||||
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
|
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||||
else:
|
else:
|
||||||
self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
|
self.context_analyzer.feed(byte_str[i - 1:i + 1],
|
||||||
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
|
char_len)
|
||||||
charLen)
|
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||||
|
char_len)
|
||||||
|
|
||||||
self._mLastChar[0] = aBuf[aLen - 1]
|
self._last_char[0] = byte_str[-1]
|
||||||
|
|
||||||
if self.get_state() == constants.eDetecting:
|
if self.state == ProbingState.DETECTING:
|
||||||
if (self._mContextAnalyzer.got_enough_data() and
|
if (self.context_analyzer.got_enough_data() and
|
||||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
|
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||||
self._mState = constants.eFoundIt
|
self._state = ProbingState.FOUND_IT
|
||||||
|
|
||||||
return self.get_state()
|
return self.state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
contxtCf = self._mContextAnalyzer.get_confidence()
|
context_conf = self.context_analyzer.get_confidence()
|
||||||
distribCf = self._mDistributionAnalyzer.get_confidence()
|
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||||
return max(contxtCf, distribCf)
|
return max(context_conf, distrib_conf)
|
||||||
|
|
405
thirdparty/chardet/euckrfreq.py
vendored
405
thirdparty/chardet/euckrfreq.py
vendored
|
@ -43,7 +43,7 @@ EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
|
||||||
EUCKR_TABLE_SIZE = 2352
|
EUCKR_TABLE_SIZE = 2352
|
||||||
|
|
||||||
# Char to FreqOrder table ,
|
# Char to FreqOrder table ,
|
||||||
EUCKRCharToFreqOrder = ( \
|
EUCKR_CHAR_TO_FREQ_ORDER = (
|
||||||
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
|
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
|
||||||
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
|
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
|
||||||
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
|
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
|
||||||
|
@ -191,406 +191,5 @@ EUCKRCharToFreqOrder = ( \
|
||||||
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
|
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
|
||||||
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
|
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
|
||||||
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
|
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
|
||||||
#Everything below is of no interest for detection purpose
|
)
|
||||||
2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658,
|
|
||||||
2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674,
|
|
||||||
2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690,
|
|
||||||
2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704,
|
|
||||||
2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720,
|
|
||||||
2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734,
|
|
||||||
2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750,
|
|
||||||
2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765,
|
|
||||||
2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779,
|
|
||||||
2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793,
|
|
||||||
2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809,
|
|
||||||
2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824,
|
|
||||||
2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840,
|
|
||||||
2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856,
|
|
||||||
1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869,
|
|
||||||
2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883,
|
|
||||||
2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899,
|
|
||||||
2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915,
|
|
||||||
2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331,
|
|
||||||
2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945,
|
|
||||||
2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961,
|
|
||||||
2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976,
|
|
||||||
2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992,
|
|
||||||
2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008,
|
|
||||||
3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021,
|
|
||||||
3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037,
|
|
||||||
3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052,
|
|
||||||
3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066,
|
|
||||||
3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080,
|
|
||||||
3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095,
|
|
||||||
3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110,
|
|
||||||
3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124,
|
|
||||||
3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140,
|
|
||||||
3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156,
|
|
||||||
3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172,
|
|
||||||
3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187,
|
|
||||||
3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201,
|
|
||||||
3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217,
|
|
||||||
3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233,
|
|
||||||
3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248,
|
|
||||||
3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264,
|
|
||||||
3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279,
|
|
||||||
3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295,
|
|
||||||
3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311,
|
|
||||||
3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327,
|
|
||||||
3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343,
|
|
||||||
3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359,
|
|
||||||
3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374,
|
|
||||||
3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389,
|
|
||||||
3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405,
|
|
||||||
3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338,
|
|
||||||
3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432,
|
|
||||||
3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446,
|
|
||||||
3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191,
|
|
||||||
3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471,
|
|
||||||
3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486,
|
|
||||||
1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499,
|
|
||||||
1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513,
|
|
||||||
3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525,
|
|
||||||
3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541,
|
|
||||||
3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557,
|
|
||||||
3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573,
|
|
||||||
3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587,
|
|
||||||
3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603,
|
|
||||||
3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618,
|
|
||||||
3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632,
|
|
||||||
3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648,
|
|
||||||
3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663,
|
|
||||||
3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679,
|
|
||||||
3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695,
|
|
||||||
3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583,
|
|
||||||
1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722,
|
|
||||||
3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738,
|
|
||||||
3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753,
|
|
||||||
3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767,
|
|
||||||
3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782,
|
|
||||||
3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796,
|
|
||||||
3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810,
|
|
||||||
3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591,
|
|
||||||
1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836,
|
|
||||||
3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851,
|
|
||||||
3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866,
|
|
||||||
3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880,
|
|
||||||
3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895,
|
|
||||||
1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905,
|
|
||||||
3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921,
|
|
||||||
3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934,
|
|
||||||
3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603,
|
|
||||||
3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964,
|
|
||||||
3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978,
|
|
||||||
3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993,
|
|
||||||
3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009,
|
|
||||||
4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024,
|
|
||||||
4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040,
|
|
||||||
1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055,
|
|
||||||
4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069,
|
|
||||||
4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083,
|
|
||||||
4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098,
|
|
||||||
4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113,
|
|
||||||
4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610,
|
|
||||||
4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142,
|
|
||||||
4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157,
|
|
||||||
4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173,
|
|
||||||
4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189,
|
|
||||||
4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205,
|
|
||||||
4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220,
|
|
||||||
4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234,
|
|
||||||
4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249,
|
|
||||||
4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265,
|
|
||||||
4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279,
|
|
||||||
4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294,
|
|
||||||
4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310,
|
|
||||||
4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326,
|
|
||||||
4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341,
|
|
||||||
4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357,
|
|
||||||
4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371,
|
|
||||||
4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387,
|
|
||||||
4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403,
|
|
||||||
4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418,
|
|
||||||
4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432,
|
|
||||||
4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446,
|
|
||||||
4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461,
|
|
||||||
4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476,
|
|
||||||
4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491,
|
|
||||||
4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507,
|
|
||||||
4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623,
|
|
||||||
4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536,
|
|
||||||
4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551,
|
|
||||||
4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567,
|
|
||||||
4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581,
|
|
||||||
4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627,
|
|
||||||
4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611,
|
|
||||||
4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626,
|
|
||||||
4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642,
|
|
||||||
4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657,
|
|
||||||
4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672,
|
|
||||||
4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687,
|
|
||||||
1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700,
|
|
||||||
4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715,
|
|
||||||
4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731,
|
|
||||||
4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633,
|
|
||||||
4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758,
|
|
||||||
4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773,
|
|
||||||
4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788,
|
|
||||||
4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803,
|
|
||||||
4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817,
|
|
||||||
4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832,
|
|
||||||
4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847,
|
|
||||||
4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863,
|
|
||||||
4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879,
|
|
||||||
4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893,
|
|
||||||
4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909,
|
|
||||||
4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923,
|
|
||||||
4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938,
|
|
||||||
4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954,
|
|
||||||
4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970,
|
|
||||||
4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645,
|
|
||||||
4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999,
|
|
||||||
5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078,
|
|
||||||
5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028,
|
|
||||||
1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042,
|
|
||||||
5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056,
|
|
||||||
5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072,
|
|
||||||
5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087,
|
|
||||||
5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103,
|
|
||||||
5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118,
|
|
||||||
1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132,
|
|
||||||
5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,
|
|
||||||
5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161,
|
|
||||||
5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177,
|
|
||||||
5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192,
|
|
||||||
5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206,
|
|
||||||
1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218,
|
|
||||||
5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,
|
|
||||||
5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249,
|
|
||||||
5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262,
|
|
||||||
5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,
|
|
||||||
5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,
|
|
||||||
5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308,
|
|
||||||
5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323,
|
|
||||||
5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338,
|
|
||||||
5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353,
|
|
||||||
5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369,
|
|
||||||
5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385,
|
|
||||||
5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400,
|
|
||||||
5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415,
|
|
||||||
5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430,
|
|
||||||
5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445,
|
|
||||||
5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461,
|
|
||||||
5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477,
|
|
||||||
5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491,
|
|
||||||
5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507,
|
|
||||||
5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523,
|
|
||||||
5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539,
|
|
||||||
5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554,
|
|
||||||
5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570,
|
|
||||||
1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585,
|
|
||||||
5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600,
|
|
||||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615,
|
|
||||||
5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,
|
|
||||||
5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,
|
|
||||||
5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,
|
|
||||||
1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673,
|
|
||||||
5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688,
|
|
||||||
5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703,
|
|
||||||
5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716,
|
|
||||||
5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729,
|
|
||||||
5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744,
|
|
||||||
1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758,
|
|
||||||
5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773,
|
|
||||||
1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786,
|
|
||||||
5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801,
|
|
||||||
5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815,
|
|
||||||
5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831,
|
|
||||||
5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847,
|
|
||||||
5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862,
|
|
||||||
5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876,
|
|
||||||
5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889,
|
|
||||||
5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,
|
|
||||||
5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,
|
|
||||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687,
|
|
||||||
5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,
|
|
||||||
5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963,
|
|
||||||
5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,
|
|
||||||
5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993,
|
|
||||||
5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009,
|
|
||||||
6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025,
|
|
||||||
6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039,
|
|
||||||
6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055,
|
|
||||||
6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071,
|
|
||||||
6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086,
|
|
||||||
6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102,
|
|
||||||
6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118,
|
|
||||||
6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133,
|
|
||||||
6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147,
|
|
||||||
6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,
|
|
||||||
6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,
|
|
||||||
6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194,
|
|
||||||
6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,
|
|
||||||
6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225,
|
|
||||||
6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,
|
|
||||||
6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256,
|
|
||||||
6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024
|
|
||||||
6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287,
|
|
||||||
6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699,
|
|
||||||
6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317,
|
|
||||||
6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,
|
|
||||||
6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347,
|
|
||||||
6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,
|
|
||||||
6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,
|
|
||||||
6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,
|
|
||||||
6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411,
|
|
||||||
6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425,
|
|
||||||
6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440,
|
|
||||||
6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456,
|
|
||||||
6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,
|
|
||||||
6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488,
|
|
||||||
6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266,
|
|
||||||
6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,
|
|
||||||
6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535,
|
|
||||||
6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551,
|
|
||||||
1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565,
|
|
||||||
6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581,
|
|
||||||
6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597,
|
|
||||||
6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613,
|
|
||||||
6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629,
|
|
||||||
6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644,
|
|
||||||
1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659,
|
|
||||||
6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674,
|
|
||||||
1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689,
|
|
||||||
6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705,
|
|
||||||
6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721,
|
|
||||||
6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736,
|
|
||||||
1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748,
|
|
||||||
6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763,
|
|
||||||
6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779,
|
|
||||||
6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794,
|
|
||||||
6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711,
|
|
||||||
6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825,
|
|
||||||
6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840,
|
|
||||||
6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856,
|
|
||||||
6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872,
|
|
||||||
6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888,
|
|
||||||
6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903,
|
|
||||||
6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918,
|
|
||||||
6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934,
|
|
||||||
6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950,
|
|
||||||
6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,
|
|
||||||
6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981,
|
|
||||||
6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996,
|
|
||||||
6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011,
|
|
||||||
7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,
|
|
||||||
7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042,
|
|
||||||
7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,
|
|
||||||
7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074,
|
|
||||||
7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090,
|
|
||||||
7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106,
|
|
||||||
7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122,
|
|
||||||
7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138,
|
|
||||||
7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154,
|
|
||||||
7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170,
|
|
||||||
7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186,
|
|
||||||
7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202,
|
|
||||||
7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216,
|
|
||||||
7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232,
|
|
||||||
7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248,
|
|
||||||
7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264,
|
|
||||||
7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280,
|
|
||||||
7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296,
|
|
||||||
7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312,
|
|
||||||
7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327,
|
|
||||||
7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343,
|
|
||||||
7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359,
|
|
||||||
7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375,
|
|
||||||
7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391,
|
|
||||||
7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407,
|
|
||||||
7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423,
|
|
||||||
7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439,
|
|
||||||
7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455,
|
|
||||||
7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,
|
|
||||||
7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,
|
|
||||||
7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503,
|
|
||||||
7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,
|
|
||||||
7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,
|
|
||||||
7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551,
|
|
||||||
7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567,
|
|
||||||
7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583,
|
|
||||||
7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599,
|
|
||||||
7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615,
|
|
||||||
7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631,
|
|
||||||
7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647,
|
|
||||||
7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663,
|
|
||||||
7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679,
|
|
||||||
7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695,
|
|
||||||
7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711,
|
|
||||||
7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727,
|
|
||||||
7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743,
|
|
||||||
7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759,
|
|
||||||
7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,
|
|
||||||
7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,
|
|
||||||
7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807,
|
|
||||||
7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,
|
|
||||||
7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,
|
|
||||||
7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,
|
|
||||||
7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,
|
|
||||||
7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,
|
|
||||||
7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,
|
|
||||||
7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,
|
|
||||||
7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935,
|
|
||||||
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951,
|
|
||||||
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967,
|
|
||||||
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983,
|
|
||||||
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999,
|
|
||||||
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,
|
|
||||||
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031,
|
|
||||||
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047,
|
|
||||||
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,
|
|
||||||
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,
|
|
||||||
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,
|
|
||||||
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111,
|
|
||||||
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127,
|
|
||||||
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,
|
|
||||||
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,
|
|
||||||
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,
|
|
||||||
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,
|
|
||||||
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,
|
|
||||||
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,
|
|
||||||
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,
|
|
||||||
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,
|
|
||||||
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,
|
|
||||||
8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,
|
|
||||||
8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,
|
|
||||||
8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,
|
|
||||||
8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,
|
|
||||||
8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,
|
|
||||||
8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,
|
|
||||||
8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,
|
|
||||||
8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,
|
|
||||||
8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,
|
|
||||||
8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,
|
|
||||||
8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,
|
|
||||||
8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,
|
|
||||||
8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,
|
|
||||||
8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,
|
|
||||||
8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,
|
|
||||||
8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,
|
|
||||||
8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,
|
|
||||||
8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,
|
|
||||||
8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,
|
|
||||||
8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,
|
|
||||||
8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,
|
|
||||||
8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,
|
|
||||||
8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,
|
|
||||||
8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,
|
|
||||||
8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,
|
|
||||||
8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,
|
|
||||||
8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,
|
|
||||||
8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
|
|
||||||
8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
|
|
||||||
8736,8737,8738,8739,8740,8741)
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
15
thirdparty/chardet/euckrprober.py
vendored
15
thirdparty/chardet/euckrprober.py
vendored
|
@ -28,15 +28,20 @@
|
||||||
from .mbcharsetprober import MultiByteCharSetProber
|
from .mbcharsetprober import MultiByteCharSetProber
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .chardistribution import EUCKRDistributionAnalysis
|
from .chardistribution import EUCKRDistributionAnalysis
|
||||||
from .mbcssm import EUCKRSMModel
|
from .mbcssm import EUCKR_SM_MODEL
|
||||||
|
|
||||||
|
|
||||||
class EUCKRProber(MultiByteCharSetProber):
|
class EUCKRProber(MultiByteCharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
MultiByteCharSetProber.__init__(self)
|
super(EUCKRProber, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(EUCKRSMModel)
|
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
|
||||||
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
|
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return "EUC-KR"
|
return "EUC-KR"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self):
|
||||||
|
return "Korean"
|
||||||
|
|
47
thirdparty/chardet/euctwfreq.py
vendored
47
thirdparty/chardet/euctwfreq.py
vendored
|
@ -44,9 +44,9 @@
|
||||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||||
|
|
||||||
# Char to FreqOrder table ,
|
# Char to FreqOrder table ,
|
||||||
EUCTW_TABLE_SIZE = 8102
|
EUCTW_TABLE_SIZE = 5376
|
||||||
|
|
||||||
EUCTWCharToFreqOrder = (
|
EUCTW_CHAR_TO_FREQ_ORDER = (
|
||||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||||
|
@ -383,46 +383,5 @@ EUCTWCharToFreqOrder = (
|
||||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||||
#Everything below is of no interest for detection purpose
|
)
|
||||||
2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118
|
|
||||||
2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134
|
|
||||||
8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150
|
|
||||||
8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166
|
|
||||||
8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182
|
|
||||||
8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198
|
|
||||||
8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214
|
|
||||||
8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230
|
|
||||||
8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246
|
|
||||||
8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262
|
|
||||||
8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278
|
|
||||||
8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294
|
|
||||||
8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310
|
|
||||||
8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326
|
|
||||||
8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342
|
|
||||||
8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358
|
|
||||||
8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374
|
|
||||||
8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390
|
|
||||||
8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406
|
|
||||||
8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422
|
|
||||||
8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438
|
|
||||||
8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454
|
|
||||||
8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470
|
|
||||||
8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486
|
|
||||||
8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502
|
|
||||||
8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518
|
|
||||||
8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534
|
|
||||||
8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550
|
|
||||||
8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566
|
|
||||||
8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582
|
|
||||||
8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598
|
|
||||||
8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614
|
|
||||||
8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630
|
|
||||||
8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646
|
|
||||||
8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662
|
|
||||||
8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678
|
|
||||||
8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694
|
|
||||||
8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
|
|
||||||
8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
|
|
||||||
8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
15
thirdparty/chardet/euctwprober.py
vendored
15
thirdparty/chardet/euctwprober.py
vendored
|
@ -28,14 +28,19 @@
|
||||||
from .mbcharsetprober import MultiByteCharSetProber
|
from .mbcharsetprober import MultiByteCharSetProber
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .chardistribution import EUCTWDistributionAnalysis
|
from .chardistribution import EUCTWDistributionAnalysis
|
||||||
from .mbcssm import EUCTWSMModel
|
from .mbcssm import EUCTW_SM_MODEL
|
||||||
|
|
||||||
class EUCTWProber(MultiByteCharSetProber):
|
class EUCTWProber(MultiByteCharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
MultiByteCharSetProber.__init__(self)
|
super(EUCTWProber, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(EUCTWSMModel)
|
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
||||||
self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
|
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return "EUC-TW"
|
return "EUC-TW"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self):
|
||||||
|
return "Taiwan"
|
||||||
|
|
195
thirdparty/chardet/gb2312freq.py
vendored
195
thirdparty/chardet/gb2312freq.py
vendored
|
@ -43,7 +43,7 @@ GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
|
||||||
|
|
||||||
GB2312_TABLE_SIZE = 3760
|
GB2312_TABLE_SIZE = 3760
|
||||||
|
|
||||||
GB2312CharToFreqOrder = (
|
GB2312_CHAR_TO_FREQ_ORDER = (
|
||||||
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
|
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
|
||||||
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
|
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
|
||||||
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
|
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
|
||||||
|
@ -278,195 +278,6 @@ GB2312CharToFreqOrder = (
|
||||||
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
|
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
|
||||||
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
|
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
|
||||||
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
|
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
|
||||||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512
|
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
|
||||||
#Everything below is of no interest for detection purpose
|
)
|
||||||
5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636,
|
|
||||||
5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874,
|
|
||||||
5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278,
|
|
||||||
3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806,
|
|
||||||
4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827,
|
|
||||||
5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512,
|
|
||||||
5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578,
|
|
||||||
4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828,
|
|
||||||
4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105,
|
|
||||||
4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189,
|
|
||||||
4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561,
|
|
||||||
3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226,
|
|
||||||
6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778,
|
|
||||||
4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039,
|
|
||||||
6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404,
|
|
||||||
4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213,
|
|
||||||
4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739,
|
|
||||||
4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328,
|
|
||||||
5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592,
|
|
||||||
3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424,
|
|
||||||
4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270,
|
|
||||||
3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232,
|
|
||||||
4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456,
|
|
||||||
4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121,
|
|
||||||
6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971,
|
|
||||||
6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409,
|
|
||||||
5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519,
|
|
||||||
4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367,
|
|
||||||
6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834,
|
|
||||||
4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460,
|
|
||||||
5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464,
|
|
||||||
5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709,
|
|
||||||
5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906,
|
|
||||||
6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530,
|
|
||||||
3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262,
|
|
||||||
6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920,
|
|
||||||
4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190,
|
|
||||||
5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318,
|
|
||||||
6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538,
|
|
||||||
6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697,
|
|
||||||
4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544,
|
|
||||||
5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016,
|
|
||||||
4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638,
|
|
||||||
5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006,
|
|
||||||
5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071,
|
|
||||||
4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552,
|
|
||||||
4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556,
|
|
||||||
5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432,
|
|
||||||
4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632,
|
|
||||||
4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885,
|
|
||||||
5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336,
|
|
||||||
4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729,
|
|
||||||
4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854,
|
|
||||||
4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332,
|
|
||||||
5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004,
|
|
||||||
5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419,
|
|
||||||
4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293,
|
|
||||||
3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580,
|
|
||||||
4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339,
|
|
||||||
6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341,
|
|
||||||
5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493,
|
|
||||||
5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046,
|
|
||||||
4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904,
|
|
||||||
6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728,
|
|
||||||
5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350,
|
|
||||||
6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233,
|
|
||||||
4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944,
|
|
||||||
5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413,
|
|
||||||
5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700,
|
|
||||||
3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999,
|
|
||||||
5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694,
|
|
||||||
6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571,
|
|
||||||
4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359,
|
|
||||||
6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178,
|
|
||||||
4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421,
|
|
||||||
4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330,
|
|
||||||
6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855,
|
|
||||||
3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587,
|
|
||||||
6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803,
|
|
||||||
4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791,
|
|
||||||
3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304,
|
|
||||||
3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445,
|
|
||||||
3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506,
|
|
||||||
4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856,
|
|
||||||
2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057,
|
|
||||||
5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777,
|
|
||||||
4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369,
|
|
||||||
5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028,
|
|
||||||
5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914,
|
|
||||||
5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175,
|
|
||||||
4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681,
|
|
||||||
5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534,
|
|
||||||
4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912,
|
|
||||||
5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054,
|
|
||||||
1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336,
|
|
||||||
3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666,
|
|
||||||
4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375,
|
|
||||||
4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113,
|
|
||||||
6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614,
|
|
||||||
4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173,
|
|
||||||
5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197,
|
|
||||||
3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271,
|
|
||||||
5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423,
|
|
||||||
5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529,
|
|
||||||
5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921,
|
|
||||||
3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837,
|
|
||||||
5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922,
|
|
||||||
5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187,
|
|
||||||
3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382,
|
|
||||||
5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628,
|
|
||||||
5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683,
|
|
||||||
5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053,
|
|
||||||
6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928,
|
|
||||||
4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662,
|
|
||||||
6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663,
|
|
||||||
4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554,
|
|
||||||
3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191,
|
|
||||||
4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013,
|
|
||||||
5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932,
|
|
||||||
5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055,
|
|
||||||
5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829,
|
|
||||||
3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096,
|
|
||||||
3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660,
|
|
||||||
6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199,
|
|
||||||
6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748,
|
|
||||||
5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402,
|
|
||||||
6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957,
|
|
||||||
6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668,
|
|
||||||
6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763,
|
|
||||||
6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407,
|
|
||||||
6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051,
|
|
||||||
5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429,
|
|
||||||
6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791,
|
|
||||||
6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028,
|
|
||||||
3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305,
|
|
||||||
3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159,
|
|
||||||
4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683,
|
|
||||||
4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372,
|
|
||||||
3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514,
|
|
||||||
5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544,
|
|
||||||
5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472,
|
|
||||||
5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716,
|
|
||||||
5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905,
|
|
||||||
5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327,
|
|
||||||
4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030,
|
|
||||||
5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281,
|
|
||||||
6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224,
|
|
||||||
5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327,
|
|
||||||
4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062,
|
|
||||||
4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354,
|
|
||||||
6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065,
|
|
||||||
3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953,
|
|
||||||
4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681,
|
|
||||||
4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708,
|
|
||||||
5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442,
|
|
||||||
6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387,
|
|
||||||
6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237,
|
|
||||||
4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713,
|
|
||||||
6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547,
|
|
||||||
5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957,
|
|
||||||
5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337,
|
|
||||||
5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074,
|
|
||||||
5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685,
|
|
||||||
5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455,
|
|
||||||
4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722,
|
|
||||||
5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615,
|
|
||||||
5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093,
|
|
||||||
5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989,
|
|
||||||
5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094,
|
|
||||||
6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212,
|
|
||||||
4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967,
|
|
||||||
5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733,
|
|
||||||
4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260,
|
|
||||||
4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864,
|
|
||||||
6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353,
|
|
||||||
4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095,
|
|
||||||
6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287,
|
|
||||||
3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504,
|
|
||||||
5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539,
|
|
||||||
6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750,
|
|
||||||
6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864,
|
|
||||||
6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213,
|
|
||||||
5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573,
|
|
||||||
6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252,
|
|
||||||
6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970,
|
|
||||||
3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703,
|
|
||||||
5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
|
|
||||||
4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767)
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
15
thirdparty/chardet/gb2312prober.py
vendored
15
thirdparty/chardet/gb2312prober.py
vendored
|
@ -28,14 +28,19 @@
|
||||||
from .mbcharsetprober import MultiByteCharSetProber
|
from .mbcharsetprober import MultiByteCharSetProber
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .chardistribution import GB2312DistributionAnalysis
|
from .chardistribution import GB2312DistributionAnalysis
|
||||||
from .mbcssm import GB2312SMModel
|
from .mbcssm import GB2312_SM_MODEL
|
||||||
|
|
||||||
class GB2312Prober(MultiByteCharSetProber):
|
class GB2312Prober(MultiByteCharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
MultiByteCharSetProber.__init__(self)
|
super(GB2312Prober, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(GB2312SMModel)
|
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
||||||
self._mDistributionAnalyzer = GB2312DistributionAnalysis()
|
self.distribution_analyzer = GB2312DistributionAnalysis()
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return "GB2312"
|
return "GB2312"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self):
|
||||||
|
return "Chinese"
|
||||||
|
|
163
thirdparty/chardet/hebrewprober.py
vendored
163
thirdparty/chardet/hebrewprober.py
vendored
|
@ -26,8 +26,7 @@
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .charsetprober import CharSetProber
|
from .charsetprober import CharSetProber
|
||||||
from .constants import eNotMe, eDetecting
|
from .enums import ProbingState
|
||||||
from .compat import wrap_ord
|
|
||||||
|
|
||||||
# This prober doesn't actually recognize a language or a charset.
|
# This prober doesn't actually recognize a language or a charset.
|
||||||
# It is a helper prober for the use of the Hebrew model probers
|
# It is a helper prober for the use of the Hebrew model probers
|
||||||
|
@ -126,56 +125,59 @@ from .compat import wrap_ord
|
||||||
# model probers scores. The answer is returned in the form of the name of the
|
# model probers scores. The answer is returned in the form of the name of the
|
||||||
# charset identified, either "windows-1255" or "ISO-8859-8".
|
# charset identified, either "windows-1255" or "ISO-8859-8".
|
||||||
|
|
||||||
# windows-1255 / ISO-8859-8 code points of interest
|
|
||||||
FINAL_KAF = 0xea
|
|
||||||
NORMAL_KAF = 0xeb
|
|
||||||
FINAL_MEM = 0xed
|
|
||||||
NORMAL_MEM = 0xee
|
|
||||||
FINAL_NUN = 0xef
|
|
||||||
NORMAL_NUN = 0xf0
|
|
||||||
FINAL_PE = 0xf3
|
|
||||||
NORMAL_PE = 0xf4
|
|
||||||
FINAL_TSADI = 0xf5
|
|
||||||
NORMAL_TSADI = 0xf6
|
|
||||||
|
|
||||||
# Minimum Visual vs Logical final letter score difference.
|
|
||||||
# If the difference is below this, don't rely solely on the final letter score
|
|
||||||
# distance.
|
|
||||||
MIN_FINAL_CHAR_DISTANCE = 5
|
|
||||||
|
|
||||||
# Minimum Visual vs Logical model score difference.
|
|
||||||
# If the difference is below this, don't rely at all on the model score
|
|
||||||
# distance.
|
|
||||||
MIN_MODEL_DISTANCE = 0.01
|
|
||||||
|
|
||||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
|
||||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
|
||||||
|
|
||||||
|
|
||||||
class HebrewProber(CharSetProber):
|
class HebrewProber(CharSetProber):
|
||||||
|
# windows-1255 / ISO-8859-8 code points of interest
|
||||||
|
FINAL_KAF = 0xea
|
||||||
|
NORMAL_KAF = 0xeb
|
||||||
|
FINAL_MEM = 0xed
|
||||||
|
NORMAL_MEM = 0xee
|
||||||
|
FINAL_NUN = 0xef
|
||||||
|
NORMAL_NUN = 0xf0
|
||||||
|
FINAL_PE = 0xf3
|
||||||
|
NORMAL_PE = 0xf4
|
||||||
|
FINAL_TSADI = 0xf5
|
||||||
|
NORMAL_TSADI = 0xf6
|
||||||
|
|
||||||
|
# Minimum Visual vs Logical final letter score difference.
|
||||||
|
# If the difference is below this, don't rely solely on the final letter score
|
||||||
|
# distance.
|
||||||
|
MIN_FINAL_CHAR_DISTANCE = 5
|
||||||
|
|
||||||
|
# Minimum Visual vs Logical model score difference.
|
||||||
|
# If the difference is below this, don't rely at all on the model score
|
||||||
|
# distance.
|
||||||
|
MIN_MODEL_DISTANCE = 0.01
|
||||||
|
|
||||||
|
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||||
|
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharSetProber.__init__(self)
|
super(HebrewProber, self).__init__()
|
||||||
self._mLogicalProber = None
|
self._final_char_logical_score = None
|
||||||
self._mVisualProber = None
|
self._final_char_visual_score = None
|
||||||
|
self._prev = None
|
||||||
|
self._before_prev = None
|
||||||
|
self._logical_prober = None
|
||||||
|
self._visual_prober = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._mFinalCharLogicalScore = 0
|
self._final_char_logical_score = 0
|
||||||
self._mFinalCharVisualScore = 0
|
self._final_char_visual_score = 0
|
||||||
# The two last characters seen in the previous buffer,
|
# The two last characters seen in the previous buffer,
|
||||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||||
# a word delimiter at the beginning of the data
|
# a word delimiter at the beginning of the data
|
||||||
self._mPrev = ' '
|
self._prev = ' '
|
||||||
self._mBeforePrev = ' '
|
self._before_prev = ' '
|
||||||
# These probers are owned by the group prober.
|
# These probers are owned by the group prober.
|
||||||
|
|
||||||
def set_model_probers(self, logicalProber, visualProber):
|
def set_model_probers(self, logicalProber, visualProber):
|
||||||
self._mLogicalProber = logicalProber
|
self._logical_prober = logicalProber
|
||||||
self._mVisualProber = visualProber
|
self._visual_prober = visualProber
|
||||||
|
|
||||||
def is_final(self, c):
|
def is_final(self, c):
|
||||||
return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
|
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
||||||
FINAL_TSADI]
|
self.FINAL_PE, self.FINAL_TSADI]
|
||||||
|
|
||||||
def is_non_final(self, c):
|
def is_non_final(self, c):
|
||||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||||
|
@ -188,9 +190,10 @@ class HebrewProber(CharSetProber):
|
||||||
# for example legally end with a Non-Final Pe or Kaf. However, the
|
# for example legally end with a Non-Final Pe or Kaf. However, the
|
||||||
# benefit of these letters as Non-Final letters outweighs the damage
|
# benefit of these letters as Non-Final letters outweighs the damage
|
||||||
# since these words are quite rare.
|
# since these words are quite rare.
|
||||||
return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
|
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
||||||
|
self.NORMAL_NUN, self.NORMAL_PE]
|
||||||
|
|
||||||
def feed(self, aBuf):
|
def feed(self, byte_str):
|
||||||
# Final letter analysis for logical-visual decision.
|
# Final letter analysis for logical-visual decision.
|
||||||
# Look for evidence that the received buffer is either logical Hebrew
|
# Look for evidence that the received buffer is either logical Hebrew
|
||||||
# or visual Hebrew.
|
# or visual Hebrew.
|
||||||
|
@ -217,67 +220,73 @@ class HebrewProber(CharSetProber):
|
||||||
# We automatically filter out all 7-bit characters (replace them with
|
# We automatically filter out all 7-bit characters (replace them with
|
||||||
# spaces) so the word boundary detection works properly. [MAP]
|
# spaces) so the word boundary detection works properly. [MAP]
|
||||||
|
|
||||||
if self.get_state() == eNotMe:
|
if self.state == ProbingState.NOT_ME:
|
||||||
# Both model probers say it's not them. No reason to continue.
|
# Both model probers say it's not them. No reason to continue.
|
||||||
return eNotMe
|
return ProbingState.NOT_ME
|
||||||
|
|
||||||
aBuf = self.filter_high_bit_only(aBuf)
|
byte_str = self.filter_high_byte_only(byte_str)
|
||||||
|
|
||||||
for cur in aBuf:
|
for cur in byte_str:
|
||||||
if cur == ' ':
|
if cur == ' ':
|
||||||
# We stand on a space - a word just ended
|
# We stand on a space - a word just ended
|
||||||
if self._mBeforePrev != ' ':
|
if self._before_prev != ' ':
|
||||||
# next-to-last char was not a space so self._mPrev is not a
|
# next-to-last char was not a space so self._prev is not a
|
||||||
# 1 letter word
|
# 1 letter word
|
||||||
if self.is_final(self._mPrev):
|
if self.is_final(self._prev):
|
||||||
# case (1) [-2:not space][-1:final letter][cur:space]
|
# case (1) [-2:not space][-1:final letter][cur:space]
|
||||||
self._mFinalCharLogicalScore += 1
|
self._final_char_logical_score += 1
|
||||||
elif self.is_non_final(self._mPrev):
|
elif self.is_non_final(self._prev):
|
||||||
# case (2) [-2:not space][-1:Non-Final letter][
|
# case (2) [-2:not space][-1:Non-Final letter][
|
||||||
# cur:space]
|
# cur:space]
|
||||||
self._mFinalCharVisualScore += 1
|
self._final_char_visual_score += 1
|
||||||
else:
|
else:
|
||||||
# Not standing on a space
|
# Not standing on a space
|
||||||
if ((self._mBeforePrev == ' ') and
|
if ((self._before_prev == ' ') and
|
||||||
(self.is_final(self._mPrev)) and (cur != ' ')):
|
(self.is_final(self._prev)) and (cur != ' ')):
|
||||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||||
self._mFinalCharVisualScore += 1
|
self._final_char_visual_score += 1
|
||||||
self._mBeforePrev = self._mPrev
|
self._before_prev = self._prev
|
||||||
self._mPrev = cur
|
self._prev = cur
|
||||||
|
|
||||||
# Forever detecting, till the end or until both model probers return
|
# Forever detecting, till the end or until both model probers return
|
||||||
# eNotMe (handled above)
|
# ProbingState.NOT_ME (handled above)
|
||||||
return eDetecting
|
return ProbingState.DETECTING
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
# Make the decision: is it Logical or Visual?
|
# Make the decision: is it Logical or Visual?
|
||||||
# If the final letter score distance is dominant enough, rely on it.
|
# If the final letter score distance is dominant enough, rely on it.
|
||||||
finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
|
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||||
if finalsub >= MIN_FINAL_CHAR_DISTANCE:
|
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
||||||
return LOGICAL_HEBREW_NAME
|
return self.LOGICAL_HEBREW_NAME
|
||||||
if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
|
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
||||||
return VISUAL_HEBREW_NAME
|
return self.VISUAL_HEBREW_NAME
|
||||||
|
|
||||||
# It's not dominant enough, try to rely on the model scores instead.
|
# It's not dominant enough, try to rely on the model scores instead.
|
||||||
modelsub = (self._mLogicalProber.get_confidence()
|
modelsub = (self._logical_prober.get_confidence()
|
||||||
- self._mVisualProber.get_confidence())
|
- self._visual_prober.get_confidence())
|
||||||
if modelsub > MIN_MODEL_DISTANCE:
|
if modelsub > self.MIN_MODEL_DISTANCE:
|
||||||
return LOGICAL_HEBREW_NAME
|
return self.LOGICAL_HEBREW_NAME
|
||||||
if modelsub < -MIN_MODEL_DISTANCE:
|
if modelsub < -self.MIN_MODEL_DISTANCE:
|
||||||
return VISUAL_HEBREW_NAME
|
return self.VISUAL_HEBREW_NAME
|
||||||
|
|
||||||
# Still no good, back to final letter distance, maybe it'll save the
|
# Still no good, back to final letter distance, maybe it'll save the
|
||||||
# day.
|
# day.
|
||||||
if finalsub < 0.0:
|
if finalsub < 0.0:
|
||||||
return VISUAL_HEBREW_NAME
|
return self.VISUAL_HEBREW_NAME
|
||||||
|
|
||||||
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
||||||
# Logical.
|
# Logical.
|
||||||
return LOGICAL_HEBREW_NAME
|
return self.LOGICAL_HEBREW_NAME
|
||||||
|
|
||||||
def get_state(self):
|
@property
|
||||||
|
def language(self):
|
||||||
|
return 'Hebrew'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def state(self):
|
||||||
# Remain active as long as any of the model probers are active.
|
# Remain active as long as any of the model probers are active.
|
||||||
if (self._mLogicalProber.get_state() == eNotMe) and \
|
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
||||||
(self._mVisualProber.get_state() == eNotMe):
|
(self._visual_prober.state == ProbingState.NOT_ME):
|
||||||
return eNotMe
|
return ProbingState.NOT_ME
|
||||||
return eDetecting
|
return ProbingState.DETECTING
|
||||||
|
|
250
thirdparty/chardet/jisfreq.py
vendored
250
thirdparty/chardet/jisfreq.py
vendored
|
@ -46,7 +46,7 @@ JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
|
||||||
# Char to FreqOrder table ,
|
# Char to FreqOrder table ,
|
||||||
JIS_TABLE_SIZE = 4368
|
JIS_TABLE_SIZE = 4368
|
||||||
|
|
||||||
JISCharToFreqOrder = (
|
JIS_CHAR_TO_FREQ_ORDER = (
|
||||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
|
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
|
||||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
|
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
|
||||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
|
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
|
||||||
|
@ -320,250 +320,6 @@ JISCharToFreqOrder = (
|
||||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
|
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
|
||||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
|
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
|
||||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
|
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
|
||||||
#Everything below is of no interest for detection purpose
|
)
|
||||||
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384
|
|
||||||
6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400
|
|
||||||
6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416
|
|
||||||
6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432
|
|
||||||
6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448
|
|
||||||
4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464
|
|
||||||
4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480
|
|
||||||
3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496
|
|
||||||
3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512
|
|
||||||
4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528
|
|
||||||
3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544
|
|
||||||
6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560
|
|
||||||
4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576
|
|
||||||
6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592
|
|
||||||
6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608
|
|
||||||
6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624
|
|
||||||
6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640
|
|
||||||
6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656
|
|
||||||
6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672
|
|
||||||
3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688
|
|
||||||
3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704
|
|
||||||
6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720
|
|
||||||
2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736
|
|
||||||
4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752
|
|
||||||
4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768
|
|
||||||
4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784
|
|
||||||
6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800
|
|
||||||
3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816
|
|
||||||
4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832
|
|
||||||
4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848
|
|
||||||
6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864
|
|
||||||
4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880
|
|
||||||
6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896
|
|
||||||
3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912
|
|
||||||
2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928
|
|
||||||
4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944
|
|
||||||
2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960
|
|
||||||
6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976
|
|
||||||
4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992
|
|
||||||
6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008
|
|
||||||
6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024
|
|
||||||
6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040
|
|
||||||
4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056
|
|
||||||
6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072
|
|
||||||
2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088
|
|
||||||
6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104
|
|
||||||
4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120
|
|
||||||
6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136
|
|
||||||
4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152
|
|
||||||
4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168
|
|
||||||
6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184
|
|
||||||
6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200
|
|
||||||
6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216
|
|
||||||
3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232
|
|
||||||
1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248
|
|
||||||
3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264
|
|
||||||
3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280
|
|
||||||
4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296
|
|
||||||
6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312
|
|
||||||
3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328
|
|
||||||
6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344
|
|
||||||
3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360
|
|
||||||
3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376
|
|
||||||
2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392
|
|
||||||
6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408
|
|
||||||
6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424
|
|
||||||
3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440
|
|
||||||
6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456
|
|
||||||
3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472
|
|
||||||
6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488
|
|
||||||
6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504
|
|
||||||
6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520
|
|
||||||
4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536
|
|
||||||
6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552
|
|
||||||
4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568
|
|
||||||
3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584
|
|
||||||
3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600
|
|
||||||
6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616
|
|
||||||
6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632
|
|
||||||
4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648
|
|
||||||
6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664
|
|
||||||
6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680
|
|
||||||
6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696
|
|
||||||
6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712
|
|
||||||
6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728
|
|
||||||
6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744
|
|
||||||
4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760
|
|
||||||
4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776
|
|
||||||
3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792
|
|
||||||
6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808
|
|
||||||
4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824
|
|
||||||
2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840
|
|
||||||
6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856
|
|
||||||
6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872
|
|
||||||
4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888
|
|
||||||
2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904
|
|
||||||
4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920
|
|
||||||
2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936
|
|
||||||
4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952
|
|
||||||
4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968
|
|
||||||
4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984
|
|
||||||
6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000
|
|
||||||
3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016
|
|
||||||
6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032
|
|
||||||
3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048
|
|
||||||
6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064
|
|
||||||
2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080
|
|
||||||
3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096
|
|
||||||
7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112
|
|
||||||
2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128
|
|
||||||
3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144
|
|
||||||
3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160
|
|
||||||
3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176
|
|
||||||
3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192
|
|
||||||
7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208
|
|
||||||
7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224
|
|
||||||
7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240
|
|
||||||
7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256
|
|
||||||
7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272
|
|
||||||
4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288
|
|
||||||
3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304
|
|
||||||
3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320
|
|
||||||
4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336
|
|
||||||
3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352
|
|
||||||
3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368
|
|
||||||
7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384
|
|
||||||
4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400
|
|
||||||
7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416
|
|
||||||
7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432
|
|
||||||
7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448
|
|
||||||
7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464
|
|
||||||
7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480
|
|
||||||
4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496
|
|
||||||
4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512
|
|
||||||
7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528
|
|
||||||
3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544
|
|
||||||
4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560
|
|
||||||
7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576
|
|
||||||
7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592
|
|
||||||
4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608
|
|
||||||
3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624
|
|
||||||
3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640
|
|
||||||
7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656
|
|
||||||
4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672
|
|
||||||
4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688
|
|
||||||
4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704
|
|
||||||
4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720
|
|
||||||
4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736
|
|
||||||
4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752
|
|
||||||
7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768
|
|
||||||
7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784
|
|
||||||
7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800
|
|
||||||
7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816
|
|
||||||
7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832
|
|
||||||
2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848
|
|
||||||
3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864
|
|
||||||
7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880
|
|
||||||
7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896
|
|
||||||
3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912
|
|
||||||
4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928
|
|
||||||
3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944
|
|
||||||
3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960
|
|
||||||
2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976
|
|
||||||
7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992
|
|
||||||
7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008
|
|
||||||
4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024
|
|
||||||
3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040
|
|
||||||
3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056
|
|
||||||
7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072
|
|
||||||
7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088
|
|
||||||
7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104
|
|
||||||
4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120
|
|
||||||
7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136
|
|
||||||
2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152
|
|
||||||
3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168
|
|
||||||
4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184
|
|
||||||
7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200
|
|
||||||
4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216
|
|
||||||
4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232
|
|
||||||
7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248
|
|
||||||
7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264
|
|
||||||
5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280
|
|
||||||
7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296
|
|
||||||
7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312
|
|
||||||
7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328
|
|
||||||
7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344
|
|
||||||
7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360
|
|
||||||
5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376
|
|
||||||
5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392
|
|
||||||
7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408
|
|
||||||
3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424
|
|
||||||
7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440
|
|
||||||
7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456
|
|
||||||
3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472
|
|
||||||
7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488
|
|
||||||
7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504
|
|
||||||
1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520
|
|
||||||
3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536
|
|
||||||
4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552
|
|
||||||
2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568
|
|
||||||
3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584
|
|
||||||
2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600
|
|
||||||
5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616
|
|
||||||
4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632
|
|
||||||
4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648
|
|
||||||
5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664
|
|
||||||
7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680
|
|
||||||
7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696
|
|
||||||
7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712
|
|
||||||
7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728
|
|
||||||
3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744
|
|
||||||
7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760
|
|
||||||
3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776
|
|
||||||
7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792
|
|
||||||
4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808
|
|
||||||
7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824
|
|
||||||
7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840
|
|
||||||
7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856
|
|
||||||
7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872
|
|
||||||
7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888
|
|
||||||
7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904
|
|
||||||
7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920
|
|
||||||
7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936
|
|
||||||
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952
|
|
||||||
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968
|
|
||||||
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984
|
|
||||||
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000
|
|
||||||
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016
|
|
||||||
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032
|
|
||||||
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048
|
|
||||||
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064
|
|
||||||
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080
|
|
||||||
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096
|
|
||||||
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112
|
|
||||||
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128
|
|
||||||
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144
|
|
||||||
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160
|
|
||||||
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176
|
|
||||||
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192
|
|
||||||
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208
|
|
||||||
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224
|
|
||||||
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240
|
|
||||||
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256
|
|
||||||
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
124
thirdparty/chardet/jpcntx.py
vendored
124
thirdparty/chardet/jpcntx.py
vendored
|
@ -25,13 +25,6 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .compat import wrap_ord
|
|
||||||
|
|
||||||
NUM_OF_CATEGORY = 6
|
|
||||||
DONT_KNOW = -1
|
|
||||||
ENOUGH_REL_THRESHOLD = 100
|
|
||||||
MAX_REL_THRESHOLD = 1000
|
|
||||||
MINIMUM_DATA_THRESHOLD = 4
|
|
||||||
|
|
||||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||||
jp2CharContext = (
|
jp2CharContext = (
|
||||||
|
@ -120,24 +113,35 @@ jp2CharContext = (
|
||||||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
||||||
)
|
)
|
||||||
|
|
||||||
class JapaneseContextAnalysis:
|
class JapaneseContextAnalysis(object):
|
||||||
|
NUM_OF_CATEGORY = 6
|
||||||
|
DONT_KNOW = -1
|
||||||
|
ENOUGH_REL_THRESHOLD = 100
|
||||||
|
MAX_REL_THRESHOLD = 1000
|
||||||
|
MINIMUM_DATA_THRESHOLD = 4
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self._total_rel = None
|
||||||
|
self._rel_sample = None
|
||||||
|
self._need_to_skip_char_num = None
|
||||||
|
self._last_char_order = None
|
||||||
|
self._done = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._mTotalRel = 0 # total sequence received
|
self._total_rel = 0 # total sequence received
|
||||||
# category counters, each interger counts sequence in its category
|
# category counters, each integer counts sequence in its category
|
||||||
self._mRelSample = [0] * NUM_OF_CATEGORY
|
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||||
# if last byte in current buffer is not the last byte of a character,
|
# if last byte in current buffer is not the last byte of a character,
|
||||||
# we need to know how many bytes to skip in next buffer
|
# we need to know how many bytes to skip in next buffer
|
||||||
self._mNeedToSkipCharNum = 0
|
self._need_to_skip_char_num = 0
|
||||||
self._mLastCharOrder = -1 # The order of previous char
|
self._last_char_order = -1 # The order of previous char
|
||||||
# If this flag is set to True, detection is done and conclusion has
|
# If this flag is set to True, detection is done and conclusion has
|
||||||
# been made
|
# been made
|
||||||
self._mDone = False
|
self._done = False
|
||||||
|
|
||||||
def feed(self, aBuf, aLen):
|
def feed(self, byte_str, num_bytes):
|
||||||
if self._mDone:
|
if self._done:
|
||||||
return
|
return
|
||||||
|
|
||||||
# The buffer we got is byte oriented, and a character may span in more than one
|
# The buffer we got is byte oriented, and a character may span in more than one
|
||||||
|
@ -147,81 +151,83 @@ class JapaneseContextAnalysis:
|
||||||
# well and analyse the character once it is complete, but since a
|
# well and analyse the character once it is complete, but since a
|
||||||
# character will not make much difference, by simply skipping
|
# character will not make much difference, by simply skipping
|
||||||
# this character will simply our logic and improve performance.
|
# this character will simply our logic and improve performance.
|
||||||
i = self._mNeedToSkipCharNum
|
i = self._need_to_skip_char_num
|
||||||
while i < aLen:
|
while i < num_bytes:
|
||||||
order, charLen = self.get_order(aBuf[i:i + 2])
|
order, char_len = self.get_order(byte_str[i:i + 2])
|
||||||
i += charLen
|
i += char_len
|
||||||
if i > aLen:
|
if i > num_bytes:
|
||||||
self._mNeedToSkipCharNum = i - aLen
|
self._need_to_skip_char_num = i - num_bytes
|
||||||
self._mLastCharOrder = -1
|
self._last_char_order = -1
|
||||||
else:
|
else:
|
||||||
if (order != -1) and (self._mLastCharOrder != -1):
|
if (order != -1) and (self._last_char_order != -1):
|
||||||
self._mTotalRel += 1
|
self._total_rel += 1
|
||||||
if self._mTotalRel > MAX_REL_THRESHOLD:
|
if self._total_rel > self.MAX_REL_THRESHOLD:
|
||||||
self._mDone = True
|
self._done = True
|
||||||
break
|
break
|
||||||
self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
|
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
||||||
self._mLastCharOrder = order
|
self._last_char_order = order
|
||||||
|
|
||||||
def got_enough_data(self):
|
def got_enough_data(self):
|
||||||
return self._mTotalRel > ENOUGH_REL_THRESHOLD
|
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
# This is just one way to calculate confidence. It works well for me.
|
# This is just one way to calculate confidence. It works well for me.
|
||||||
if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
|
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||||
return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
|
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||||
else:
|
else:
|
||||||
return DONT_KNOW
|
return self.DONT_KNOW
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
return -1, 1
|
return -1, 1
|
||||||
|
|
||||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.charset_name = "SHIFT_JIS"
|
super(SJISContextAnalysis, self).__init__()
|
||||||
|
self._charset_name = "SHIFT_JIS"
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
return self.charset_name
|
def charset_name(self):
|
||||||
|
return self._charset_name
|
||||||
|
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
if not aBuf:
|
if not byte_str:
|
||||||
return -1, 1
|
return -1, 1
|
||||||
# find out current char's byte length
|
# find out current char's byte length
|
||||||
first_char = wrap_ord(aBuf[0])
|
first_char = byte_str[0]
|
||||||
if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
|
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
||||||
charLen = 2
|
char_len = 2
|
||||||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
||||||
self.charset_name = "CP932"
|
self._charset_name = "CP932"
|
||||||
else:
|
else:
|
||||||
charLen = 1
|
char_len = 1
|
||||||
|
|
||||||
# return its order if it is hiragana
|
# return its order if it is hiragana
|
||||||
if len(aBuf) > 1:
|
if len(byte_str) > 1:
|
||||||
second_char = wrap_ord(aBuf[1])
|
second_char = byte_str[1]
|
||||||
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
||||||
return second_char - 0x9F, charLen
|
return second_char - 0x9F, char_len
|
||||||
|
|
||||||
return -1, charLen
|
return -1, char_len
|
||||||
|
|
||||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||||
def get_order(self, aBuf):
|
def get_order(self, byte_str):
|
||||||
if not aBuf:
|
if not byte_str:
|
||||||
return -1, 1
|
return -1, 1
|
||||||
# find out current char's byte length
|
# find out current char's byte length
|
||||||
first_char = wrap_ord(aBuf[0])
|
first_char = byte_str[0]
|
||||||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
||||||
charLen = 2
|
char_len = 2
|
||||||
elif first_char == 0x8F:
|
elif first_char == 0x8F:
|
||||||
charLen = 3
|
char_len = 3
|
||||||
else:
|
else:
|
||||||
charLen = 1
|
char_len = 1
|
||||||
|
|
||||||
# return its order if it is hiragana
|
# return its order if it is hiragana
|
||||||
if len(aBuf) > 1:
|
if len(byte_str) > 1:
|
||||||
second_char = wrap_ord(aBuf[1])
|
second_char = byte_str[1]
|
||||||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
||||||
return second_char - 0xA1, charLen
|
return second_char - 0xA1, char_len
|
||||||
|
|
||||||
|
return -1, char_len
|
||||||
|
|
||||||
return -1, charLen
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
25
thirdparty/chardet/langbulgarianmodel.py
vendored
25
thirdparty/chardet/langbulgarianmodel.py
vendored
|
@ -210,20 +210,19 @@ BulgarianLangModel = (
|
||||||
)
|
)
|
||||||
|
|
||||||
Latin5BulgarianModel = {
|
Latin5BulgarianModel = {
|
||||||
'charToOrderMap': Latin5_BulgarianCharToOrderMap,
|
'char_to_order_map': Latin5_BulgarianCharToOrderMap,
|
||||||
'precedenceMatrix': BulgarianLangModel,
|
'precedence_matrix': BulgarianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.969392,
|
'typical_positive_ratio': 0.969392,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "ISO-8859-5"
|
'charset_name': "ISO-8859-5",
|
||||||
|
'language': 'Bulgairan',
|
||||||
}
|
}
|
||||||
|
|
||||||
Win1251BulgarianModel = {
|
Win1251BulgarianModel = {
|
||||||
'charToOrderMap': win1251BulgarianCharToOrderMap,
|
'char_to_order_map': win1251BulgarianCharToOrderMap,
|
||||||
'precedenceMatrix': BulgarianLangModel,
|
'precedence_matrix': BulgarianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.969392,
|
'typical_positive_ratio': 0.969392,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "windows-1251"
|
'charset_name': "windows-1251",
|
||||||
|
'language': 'Bulgarian',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
82
thirdparty/chardet/langcyrillicmodel.py
vendored
82
thirdparty/chardet/langcyrillicmodel.py
vendored
|
@ -27,7 +27,7 @@
|
||||||
|
|
||||||
# KOI8-R language model
|
# KOI8-R language model
|
||||||
# Character Mapping Table:
|
# Character Mapping Table:
|
||||||
KOI8R_CharToOrderMap = (
|
KOI8R_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -46,7 +46,7 @@ KOI8R_CharToOrderMap = (
|
||||||
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
|
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
|
||||||
)
|
)
|
||||||
|
|
||||||
win1251_CharToOrderMap = (
|
win1251_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -65,7 +65,7 @@ win1251_CharToOrderMap = (
|
||||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||||
)
|
)
|
||||||
|
|
||||||
latin5_CharToOrderMap = (
|
latin5_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -84,7 +84,7 @@ latin5_CharToOrderMap = (
|
||||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||||
)
|
)
|
||||||
|
|
||||||
macCyrillic_CharToOrderMap = (
|
macCyrillic_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -103,7 +103,7 @@ macCyrillic_CharToOrderMap = (
|
||||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
||||||
)
|
)
|
||||||
|
|
||||||
IBM855_CharToOrderMap = (
|
IBM855_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -122,7 +122,7 @@ IBM855_CharToOrderMap = (
|
||||||
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
||||||
)
|
)
|
||||||
|
|
||||||
IBM866_CharToOrderMap = (
|
IBM866_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -279,51 +279,55 @@ RussianLangModel = (
|
||||||
)
|
)
|
||||||
|
|
||||||
Koi8rModel = {
|
Koi8rModel = {
|
||||||
'charToOrderMap': KOI8R_CharToOrderMap,
|
'char_to_order_map': KOI8R_char_to_order_map,
|
||||||
'precedenceMatrix': RussianLangModel,
|
'precedence_matrix': RussianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.976601,
|
'typical_positive_ratio': 0.976601,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "KOI8-R"
|
'charset_name': "KOI8-R",
|
||||||
|
'language': 'Russian',
|
||||||
}
|
}
|
||||||
|
|
||||||
Win1251CyrillicModel = {
|
Win1251CyrillicModel = {
|
||||||
'charToOrderMap': win1251_CharToOrderMap,
|
'char_to_order_map': win1251_char_to_order_map,
|
||||||
'precedenceMatrix': RussianLangModel,
|
'precedence_matrix': RussianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.976601,
|
'typical_positive_ratio': 0.976601,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "windows-1251"
|
'charset_name': "windows-1251",
|
||||||
|
'language': 'Russian',
|
||||||
}
|
}
|
||||||
|
|
||||||
Latin5CyrillicModel = {
|
Latin5CyrillicModel = {
|
||||||
'charToOrderMap': latin5_CharToOrderMap,
|
'char_to_order_map': latin5_char_to_order_map,
|
||||||
'precedenceMatrix': RussianLangModel,
|
'precedence_matrix': RussianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.976601,
|
'typical_positive_ratio': 0.976601,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "ISO-8859-5"
|
'charset_name': "ISO-8859-5",
|
||||||
|
'language': 'Russian',
|
||||||
}
|
}
|
||||||
|
|
||||||
MacCyrillicModel = {
|
MacCyrillicModel = {
|
||||||
'charToOrderMap': macCyrillic_CharToOrderMap,
|
'char_to_order_map': macCyrillic_char_to_order_map,
|
||||||
'precedenceMatrix': RussianLangModel,
|
'precedence_matrix': RussianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.976601,
|
'typical_positive_ratio': 0.976601,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "MacCyrillic"
|
'charset_name': "MacCyrillic",
|
||||||
};
|
'language': 'Russian',
|
||||||
|
}
|
||||||
|
|
||||||
Ibm866Model = {
|
Ibm866Model = {
|
||||||
'charToOrderMap': IBM866_CharToOrderMap,
|
'char_to_order_map': IBM866_char_to_order_map,
|
||||||
'precedenceMatrix': RussianLangModel,
|
'precedence_matrix': RussianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.976601,
|
'typical_positive_ratio': 0.976601,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "IBM866"
|
'charset_name': "IBM866",
|
||||||
|
'language': 'Russian',
|
||||||
}
|
}
|
||||||
|
|
||||||
Ibm855Model = {
|
Ibm855Model = {
|
||||||
'charToOrderMap': IBM855_CharToOrderMap,
|
'char_to_order_map': IBM855_char_to_order_map,
|
||||||
'precedenceMatrix': RussianLangModel,
|
'precedence_matrix': RussianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.976601,
|
'typical_positive_ratio': 0.976601,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "IBM855"
|
'charset_name': "IBM855",
|
||||||
|
'language': 'Russian',
|
||||||
}
|
}
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
28
thirdparty/chardet/langgreekmodel.py
vendored
28
thirdparty/chardet/langgreekmodel.py
vendored
|
@ -31,7 +31,7 @@
|
||||||
# 252: 0 - 9
|
# 252: 0 - 9
|
||||||
|
|
||||||
# Character Mapping Table:
|
# Character Mapping Table:
|
||||||
Latin7_CharToOrderMap = (
|
Latin7_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -50,7 +50,7 @@ Latin7_CharToOrderMap = (
|
||||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||||
)
|
)
|
||||||
|
|
||||||
win1253_CharToOrderMap = (
|
win1253_char_to_order_map = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -207,19 +207,19 @@ GreekLangModel = (
|
||||||
)
|
)
|
||||||
|
|
||||||
Latin7GreekModel = {
|
Latin7GreekModel = {
|
||||||
'charToOrderMap': Latin7_CharToOrderMap,
|
'char_to_order_map': Latin7_char_to_order_map,
|
||||||
'precedenceMatrix': GreekLangModel,
|
'precedence_matrix': GreekLangModel,
|
||||||
'mTypicalPositiveRatio': 0.982851,
|
'typical_positive_ratio': 0.982851,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "ISO-8859-7"
|
'charset_name': "ISO-8859-7",
|
||||||
|
'language': 'Greek',
|
||||||
}
|
}
|
||||||
|
|
||||||
Win1253GreekModel = {
|
Win1253GreekModel = {
|
||||||
'charToOrderMap': win1253_CharToOrderMap,
|
'char_to_order_map': win1253_char_to_order_map,
|
||||||
'precedenceMatrix': GreekLangModel,
|
'precedence_matrix': GreekLangModel,
|
||||||
'mTypicalPositiveRatio': 0.982851,
|
'typical_positive_ratio': 0.982851,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "windows-1253"
|
'charset_name': "windows-1253",
|
||||||
|
'language': 'Greek',
|
||||||
}
|
}
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
17
thirdparty/chardet/langhebrewmodel.py
vendored
17
thirdparty/chardet/langhebrewmodel.py
vendored
|
@ -34,7 +34,7 @@
|
||||||
|
|
||||||
# Windows-1255 language model
|
# Windows-1255 language model
|
||||||
# Character Mapping Table:
|
# Character Mapping Table:
|
||||||
win1255_CharToOrderMap = (
|
WIN1255_CHAR_TO_ORDER_MAP = (
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||||
|
@ -59,7 +59,7 @@ win1255_CharToOrderMap = (
|
||||||
# first 1024 sequences: 1.5981%
|
# first 1024 sequences: 1.5981%
|
||||||
# rest sequences: 0.087%
|
# rest sequences: 0.087%
|
||||||
# negative sequences: 0.0015%
|
# negative sequences: 0.0015%
|
||||||
HebrewLangModel = (
|
HEBREW_LANG_MODEL = (
|
||||||
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
|
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
|
||||||
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
|
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
|
||||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
|
||||||
|
@ -191,11 +191,10 @@ HebrewLangModel = (
|
||||||
)
|
)
|
||||||
|
|
||||||
Win1255HebrewModel = {
|
Win1255HebrewModel = {
|
||||||
'charToOrderMap': win1255_CharToOrderMap,
|
'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP,
|
||||||
'precedenceMatrix': HebrewLangModel,
|
'precedence_matrix': HEBREW_LANG_MODEL,
|
||||||
'mTypicalPositiveRatio': 0.984004,
|
'typical_positive_ratio': 0.984004,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "windows-1255"
|
'charset_name': "windows-1255",
|
||||||
|
'language': 'Hebrew',
|
||||||
}
|
}
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
24
thirdparty/chardet/langhungarianmodel.py
vendored
24
thirdparty/chardet/langhungarianmodel.py
vendored
|
@ -207,19 +207,19 @@ HungarianLangModel = (
|
||||||
)
|
)
|
||||||
|
|
||||||
Latin2HungarianModel = {
|
Latin2HungarianModel = {
|
||||||
'charToOrderMap': Latin2_HungarianCharToOrderMap,
|
'char_to_order_map': Latin2_HungarianCharToOrderMap,
|
||||||
'precedenceMatrix': HungarianLangModel,
|
'precedence_matrix': HungarianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.947368,
|
'typical_positive_ratio': 0.947368,
|
||||||
'keepEnglishLetter': True,
|
'keep_english_letter': True,
|
||||||
'charsetName': "ISO-8859-2"
|
'charset_name': "ISO-8859-2",
|
||||||
|
'language': 'Hungarian',
|
||||||
}
|
}
|
||||||
|
|
||||||
Win1250HungarianModel = {
|
Win1250HungarianModel = {
|
||||||
'charToOrderMap': win1250HungarianCharToOrderMap,
|
'char_to_order_map': win1250HungarianCharToOrderMap,
|
||||||
'precedenceMatrix': HungarianLangModel,
|
'precedence_matrix': HungarianLangModel,
|
||||||
'mTypicalPositiveRatio': 0.947368,
|
'typical_positive_ratio': 0.947368,
|
||||||
'keepEnglishLetter': True,
|
'keep_english_letter': True,
|
||||||
'charsetName': "windows-1250"
|
'charset_name': "windows-1250",
|
||||||
|
'language': 'Hungarian',
|
||||||
}
|
}
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
13
thirdparty/chardet/langthaimodel.py
vendored
13
thirdparty/chardet/langthaimodel.py
vendored
|
@ -190,11 +190,10 @@ ThaiLangModel = (
|
||||||
)
|
)
|
||||||
|
|
||||||
TIS620ThaiModel = {
|
TIS620ThaiModel = {
|
||||||
'charToOrderMap': TIS620CharToOrderMap,
|
'char_to_order_map': TIS620CharToOrderMap,
|
||||||
'precedenceMatrix': ThaiLangModel,
|
'precedence_matrix': ThaiLangModel,
|
||||||
'mTypicalPositiveRatio': 0.926386,
|
'typical_positive_ratio': 0.926386,
|
||||||
'keepEnglishLetter': False,
|
'keep_english_letter': False,
|
||||||
'charsetName': "TIS-620"
|
'charset_name': "TIS-620",
|
||||||
|
'language': 'Thai',
|
||||||
}
|
}
|
||||||
|
|
||||||
# flake8: noqa
|
|
||||||
|
|
193
thirdparty/chardet/langturkishmodel.py
vendored
Normal file
193
thirdparty/chardet/langturkishmodel.py
vendored
Normal file
|
@ -0,0 +1,193 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
######################## BEGIN LICENSE BLOCK ########################
|
||||||
|
# The Original Code is Mozilla Communicator client code.
|
||||||
|
#
|
||||||
|
# The Initial Developer of the Original Code is
|
||||||
|
# Netscape Communications Corporation.
|
||||||
|
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
# the Initial Developer. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Contributor(s):
|
||||||
|
# Mark Pilgrim - port to Python
|
||||||
|
# Özgür Baskın - Turkish Language Model
|
||||||
|
#
|
||||||
|
# This library is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This library is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with this library; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||||
|
# 02110-1301 USA
|
||||||
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
|
# 255: Control characters that usually does not exist in any text
|
||||||
|
# 254: Carriage/Return
|
||||||
|
# 253: symbol (punctuation) that does not belong to word
|
||||||
|
# 252: 0 - 9
|
||||||
|
|
||||||
|
# Character Mapping Table:
|
||||||
|
Latin5_TurkishCharToOrderMap = (
|
||||||
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||||
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||||
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||||
|
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||||
|
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42,
|
||||||
|
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255,
|
||||||
|
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15,
|
||||||
|
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255,
|
||||||
|
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,
|
||||||
|
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106,
|
||||||
|
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136,
|
||||||
|
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125,
|
||||||
|
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119,
|
||||||
|
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86,
|
||||||
|
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96,
|
||||||
|
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107,
|
||||||
|
)
|
||||||
|
|
||||||
|
TurkishLangModel = (
|
||||||
|
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3,
|
||||||
|
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||||
|
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3,
|
||||||
|
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1,
|
||||||
|
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3,
|
||||||
|
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,
|
||||||
|
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2,
|
||||||
|
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,
|
||||||
|
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2,
|
||||||
|
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,
|
||||||
|
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1,
|
||||||
|
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2,
|
||||||
|
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,
|
||||||
|
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2,
|
||||||
|
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2,
|
||||||
|
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2,
|
||||||
|
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3,
|
||||||
|
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,
|
||||||
|
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1,
|
||||||
|
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,
|
||||||
|
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1,
|
||||||
|
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1,
|
||||||
|
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3,
|
||||||
|
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||||
|
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3,
|
||||||
|
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||||
|
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0,
|
||||||
|
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0,
|
||||||
|
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2,
|
||||||
|
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1,
|
||||||
|
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,
|
||||||
|
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2,
|
||||||
|
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0,
|
||||||
|
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0,
|
||||||
|
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1,
|
||||||
|
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1,
|
||||||
|
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,
|
||||||
|
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3,
|
||||||
|
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1,
|
||||||
|
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
|
||||||
|
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1,
|
||||||
|
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0,
|
||||||
|
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0,
|
||||||
|
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||||
|
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1,
|
||||||
|
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||||
|
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2,
|
||||||
|
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1,
|
||||||
|
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,
|
||||||
|
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1,
|
||||||
|
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2,
|
||||||
|
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,
|
||||||
|
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,
|
||||||
|
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2,
|
||||||
|
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1,
|
||||||
|
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,
|
||||||
|
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0,
|
||||||
|
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0,
|
||||||
|
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0,
|
||||||
|
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0,
|
||||||
|
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0,
|
||||||
|
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0,
|
||||||
|
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1,
|
||||||
|
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,
|
||||||
|
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1,
|
||||||
|
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,
|
||||||
|
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||||
|
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0,
|
||||||
|
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0,
|
||||||
|
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0,
|
||||||
|
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0,
|
||||||
|
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0,
|
||||||
|
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||||
|
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0,
|
||||||
|
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||||
|
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,
|
||||||
|
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0,
|
||||||
|
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0,
|
||||||
|
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0,
|
||||||
|
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1,
|
||||||
|
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,
|
||||||
|
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||||
|
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0,
|
||||||
|
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,
|
||||||
|
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||||
|
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
)
|
||||||
|
|
||||||
|
Latin5TurkishModel = {
|
||||||
|
'char_to_order_map': Latin5_TurkishCharToOrderMap,
|
||||||
|
'precedence_matrix': TurkishLangModel,
|
||||||
|
'typical_positive_ratio': 0.970290,
|
||||||
|
'keep_english_letter': True,
|
||||||
|
'charset_name': "ISO-8859-9",
|
||||||
|
'language': 'Turkish',
|
||||||
|
}
|
48
thirdparty/chardet/latin1prober.py
vendored
48
thirdparty/chardet/latin1prober.py
vendored
|
@ -27,8 +27,7 @@
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .charsetprober import CharSetProber
|
from .charsetprober import CharSetProber
|
||||||
from .constants import eNotMe
|
from .enums import ProbingState
|
||||||
from .compat import wrap_ord
|
|
||||||
|
|
||||||
FREQ_CAT_NUM = 4
|
FREQ_CAT_NUM = 4
|
||||||
|
|
||||||
|
@ -82,7 +81,7 @@ Latin1_CharToClass = (
|
||||||
# 2 : normal
|
# 2 : normal
|
||||||
# 3 : very likely
|
# 3 : very likely
|
||||||
Latin1ClassModel = (
|
Latin1ClassModel = (
|
||||||
# UDF OTH ASC ASS ACV ACO ASV ASO
|
# UDF OTH ASC ASS ACV ACO ASV ASO
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
||||||
0, 3, 3, 3, 3, 3, 3, 3, # OTH
|
0, 3, 3, 3, 3, 3, 3, 3, # OTH
|
||||||
0, 3, 3, 3, 3, 3, 3, 3, # ASC
|
0, 3, 3, 3, 3, 3, 3, 3, # ASC
|
||||||
|
@ -96,40 +95,47 @@ Latin1ClassModel = (
|
||||||
|
|
||||||
class Latin1Prober(CharSetProber):
|
class Latin1Prober(CharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharSetProber.__init__(self)
|
super(Latin1Prober, self).__init__()
|
||||||
|
self._last_char_class = None
|
||||||
|
self._freq_counter = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._mLastCharClass = OTH
|
self._last_char_class = OTH
|
||||||
self._mFreqCounter = [0] * FREQ_CAT_NUM
|
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||||
CharSetProber.reset(self)
|
CharSetProber.reset(self)
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
return "windows-1252"
|
def charset_name(self):
|
||||||
|
return "ISO-8859-1"
|
||||||
|
|
||||||
def feed(self, aBuf):
|
@property
|
||||||
aBuf = self.filter_with_english_letters(aBuf)
|
def language(self):
|
||||||
for c in aBuf:
|
return ""
|
||||||
charClass = Latin1_CharToClass[wrap_ord(c)]
|
|
||||||
freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
|
def feed(self, byte_str):
|
||||||
+ charClass]
|
byte_str = self.filter_with_english_letters(byte_str)
|
||||||
|
for c in byte_str:
|
||||||
|
char_class = Latin1_CharToClass[c]
|
||||||
|
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
|
||||||
|
+ char_class]
|
||||||
if freq == 0:
|
if freq == 0:
|
||||||
self._mState = eNotMe
|
self._state = ProbingState.NOT_ME
|
||||||
break
|
break
|
||||||
self._mFreqCounter[freq] += 1
|
self._freq_counter[freq] += 1
|
||||||
self._mLastCharClass = charClass
|
self._last_char_class = char_class
|
||||||
|
|
||||||
return self.get_state()
|
return self.state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
if self.get_state() == eNotMe:
|
if self.state == ProbingState.NOT_ME:
|
||||||
return 0.01
|
return 0.01
|
||||||
|
|
||||||
total = sum(self._mFreqCounter)
|
total = sum(self._freq_counter)
|
||||||
if total < 0.01:
|
if total < 0.01:
|
||||||
confidence = 0.0
|
confidence = 0.0
|
||||||
else:
|
else:
|
||||||
confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
|
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
|
||||||
/ total)
|
/ total)
|
||||||
if confidence < 0.0:
|
if confidence < 0.0:
|
||||||
confidence = 0.0
|
confidence = 0.0
|
||||||
|
|
87
thirdparty/chardet/mbcharsetprober.py
vendored
87
thirdparty/chardet/mbcharsetprober.py
vendored
|
@ -27,62 +27,65 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import sys
|
|
||||||
from . import constants
|
|
||||||
from .charsetprober import CharSetProber
|
from .charsetprober import CharSetProber
|
||||||
|
from .enums import ProbingState, MachineState
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
|
||||||
xrange = range
|
|
||||||
|
|
||||||
class MultiByteCharSetProber(CharSetProber):
|
class MultiByteCharSetProber(CharSetProber):
|
||||||
def __init__(self):
|
"""
|
||||||
CharSetProber.__init__(self)
|
MultiByteCharSetProber
|
||||||
self._mDistributionAnalyzer = None
|
"""
|
||||||
self._mCodingSM = None
|
|
||||||
self._mLastChar = [0, 0]
|
def __init__(self, lang_filter=None):
|
||||||
|
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||||
|
self.distribution_analyzer = None
|
||||||
|
self.coding_sm = None
|
||||||
|
self._last_char = [0, 0]
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
CharSetProber.reset(self)
|
super(MultiByteCharSetProber, self).reset()
|
||||||
if self._mCodingSM:
|
if self.coding_sm:
|
||||||
self._mCodingSM.reset()
|
self.coding_sm.reset()
|
||||||
if self._mDistributionAnalyzer:
|
if self.distribution_analyzer:
|
||||||
self._mDistributionAnalyzer.reset()
|
self.distribution_analyzer.reset()
|
||||||
self._mLastChar = [0, 0]
|
self._last_char = [0, 0]
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
pass
|
def charset_name(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def feed(self, aBuf):
|
@property
|
||||||
aLen = len(aBuf)
|
def language(self):
|
||||||
for i in xrange(0, aLen):
|
raise NotImplementedError
|
||||||
codingState = self._mCodingSM.next_state(aBuf[i])
|
|
||||||
if codingState == constants.eError:
|
def feed(self, byte_str):
|
||||||
if constants._debug:
|
for i in range(len(byte_str)):
|
||||||
sys.stderr.write(self.get_charset_name()
|
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||||
+ ' prober hit error at byte ' + str(i)
|
if coding_state == MachineState.ERROR:
|
||||||
+ '\n')
|
self.logger.debug('%s %s prober hit error at byte %s',
|
||||||
self._mState = constants.eNotMe
|
self.charset_name, self.language, i)
|
||||||
|
self._state = ProbingState.NOT_ME
|
||||||
break
|
break
|
||||||
elif codingState == constants.eItsMe:
|
elif coding_state == MachineState.ITS_ME:
|
||||||
self._mState = constants.eFoundIt
|
self._state = ProbingState.FOUND_IT
|
||||||
break
|
break
|
||||||
elif codingState == constants.eStart:
|
elif coding_state == MachineState.START:
|
||||||
charLen = self._mCodingSM.get_current_charlen()
|
char_len = self.coding_sm.get_current_charlen()
|
||||||
if i == 0:
|
if i == 0:
|
||||||
self._mLastChar[1] = aBuf[0]
|
self._last_char[1] = byte_str[0]
|
||||||
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
|
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||||
else:
|
else:
|
||||||
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
|
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||||
charLen)
|
char_len)
|
||||||
|
|
||||||
self._mLastChar[0] = aBuf[aLen - 1]
|
self._last_char[0] = byte_str[-1]
|
||||||
|
|
||||||
if self.get_state() == constants.eDetecting:
|
if self.state == ProbingState.DETECTING:
|
||||||
if (self._mDistributionAnalyzer.got_enough_data() and
|
if (self.distribution_analyzer.got_enough_data() and
|
||||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
|
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||||
self._mState = constants.eFoundIt
|
self._state = ProbingState.FOUND_IT
|
||||||
|
|
||||||
return self.get_state()
|
return self.state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
return self._mDistributionAnalyzer.get_confidence()
|
return self.distribution_analyzer.get_confidence()
|
||||||
|
|
6
thirdparty/chardet/mbcsgroupprober.py
vendored
6
thirdparty/chardet/mbcsgroupprober.py
vendored
|
@ -39,9 +39,9 @@ from .euctwprober import EUCTWProber
|
||||||
|
|
||||||
|
|
||||||
class MBCSGroupProber(CharSetGroupProber):
|
class MBCSGroupProber(CharSetGroupProber):
|
||||||
def __init__(self):
|
def __init__(self, lang_filter=None):
|
||||||
CharSetGroupProber.__init__(self)
|
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
|
||||||
self._mProbers = [
|
self.probers = [
|
||||||
UTF8Prober(),
|
UTF8Prober(),
|
||||||
SJISProber(),
|
SJISProber(),
|
||||||
EUCJPProber(),
|
EUCJPProber(),
|
||||||
|
|
286
thirdparty/chardet/mbcssm.py
vendored
286
thirdparty/chardet/mbcssm.py
vendored
|
@ -25,11 +25,11 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from .constants import eStart, eError, eItsMe
|
from .enums import MachineState
|
||||||
|
|
||||||
# BIG5
|
# BIG5
|
||||||
|
|
||||||
BIG5_cls = (
|
BIG5_CLS = (
|
||||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
||||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||||
1,1,1,1,1,1,1,1, # 10 - 17
|
1,1,1,1,1,1,1,1, # 10 - 17
|
||||||
|
@ -64,23 +64,23 @@ BIG5_cls = (
|
||||||
3,3,3,3,3,3,3,0 # f8 - ff
|
3,3,3,3,3,3,3,0 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
BIG5_st = (
|
BIG5_ST = (
|
||||||
eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
|
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||||
eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
|
||||||
eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
|
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
|
||||||
)
|
)
|
||||||
|
|
||||||
Big5CharLenTable = (0, 1, 1, 2, 0)
|
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||||
|
|
||||||
Big5SMModel = {'classTable': BIG5_cls,
|
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
|
||||||
'classFactor': 5,
|
'class_factor': 5,
|
||||||
'stateTable': BIG5_st,
|
'state_table': BIG5_ST,
|
||||||
'charLenTable': Big5CharLenTable,
|
'char_len_table': BIG5_CHAR_LEN_TABLE,
|
||||||
'name': 'Big5'}
|
'name': 'Big5'}
|
||||||
|
|
||||||
# CP949
|
# CP949
|
||||||
|
|
||||||
CP949_cls = (
|
CP949_CLS = (
|
||||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
||||||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
||||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
||||||
|
@ -99,28 +99,28 @@ CP949_cls = (
|
||||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
CP949_st = (
|
CP949_ST = (
|
||||||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
||||||
eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart
|
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
|
||||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
|
||||||
eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
|
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
|
||||||
eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
|
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
|
||||||
eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
|
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
|
||||||
eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
|
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
|
||||||
)
|
)
|
||||||
|
|
||||||
CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||||
|
|
||||||
CP949SMModel = {'classTable': CP949_cls,
|
CP949_SM_MODEL = {'class_table': CP949_CLS,
|
||||||
'classFactor': 10,
|
'class_factor': 10,
|
||||||
'stateTable': CP949_st,
|
'state_table': CP949_ST,
|
||||||
'charLenTable': CP949CharLenTable,
|
'char_len_table': CP949_CHAR_LEN_TABLE,
|
||||||
'name': 'CP949'}
|
'name': 'CP949'}
|
||||||
|
|
||||||
# EUC-JP
|
# EUC-JP
|
||||||
|
|
||||||
EUCJP_cls = (
|
EUCJP_CLS = (
|
||||||
4,4,4,4,4,4,4,4, # 00 - 07
|
4,4,4,4,4,4,4,4, # 00 - 07
|
||||||
4,4,4,4,4,4,5,5, # 08 - 0f
|
4,4,4,4,4,4,5,5, # 08 - 0f
|
||||||
4,4,4,4,4,4,4,4, # 10 - 17
|
4,4,4,4,4,4,4,4, # 10 - 17
|
||||||
|
@ -155,25 +155,25 @@ EUCJP_cls = (
|
||||||
0,0,0,0,0,0,0,5 # f8 - ff
|
0,0,0,0,0,0,0,5 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
EUCJP_st = (
|
EUCJP_ST = (
|
||||||
3, 4, 3, 5,eStart,eError,eError,eError,#00-07
|
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||||
eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||||
eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f
|
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
|
||||||
3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
|
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
|
||||||
)
|
)
|
||||||
|
|
||||||
EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
|
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||||
|
|
||||||
EUCJPSMModel = {'classTable': EUCJP_cls,
|
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
|
||||||
'classFactor': 6,
|
'class_factor': 6,
|
||||||
'stateTable': EUCJP_st,
|
'state_table': EUCJP_ST,
|
||||||
'charLenTable': EUCJPCharLenTable,
|
'char_len_table': EUCJP_CHAR_LEN_TABLE,
|
||||||
'name': 'EUC-JP'}
|
'name': 'EUC-JP'}
|
||||||
|
|
||||||
# EUC-KR
|
# EUC-KR
|
||||||
|
|
||||||
EUCKR_cls = (
|
EUCKR_CLS = (
|
||||||
1,1,1,1,1,1,1,1, # 00 - 07
|
1,1,1,1,1,1,1,1, # 00 - 07
|
||||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||||
1,1,1,1,1,1,1,1, # 10 - 17
|
1,1,1,1,1,1,1,1, # 10 - 17
|
||||||
|
@ -208,22 +208,22 @@ EUCKR_cls = (
|
||||||
2,2,2,2,2,2,2,0 # f8 - ff
|
2,2,2,2,2,2,2,0 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
EUCKR_st = (
|
EUCKR_ST = (
|
||||||
eError,eStart, 3,eError,eError,eError,eError,eError,#00-07
|
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||||
eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
|
||||||
)
|
)
|
||||||
|
|
||||||
EUCKRCharLenTable = (0, 1, 2, 0)
|
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||||
|
|
||||||
EUCKRSMModel = {'classTable': EUCKR_cls,
|
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
|
||||||
'classFactor': 4,
|
'class_factor': 4,
|
||||||
'stateTable': EUCKR_st,
|
'state_table': EUCKR_ST,
|
||||||
'charLenTable': EUCKRCharLenTable,
|
'char_len_table': EUCKR_CHAR_LEN_TABLE,
|
||||||
'name': 'EUC-KR'}
|
'name': 'EUC-KR'}
|
||||||
|
|
||||||
# EUC-TW
|
# EUC-TW
|
||||||
|
|
||||||
EUCTW_cls = (
|
EUCTW_CLS = (
|
||||||
2,2,2,2,2,2,2,2, # 00 - 07
|
2,2,2,2,2,2,2,2, # 00 - 07
|
||||||
2,2,2,2,2,2,0,0, # 08 - 0f
|
2,2,2,2,2,2,0,0, # 08 - 0f
|
||||||
2,2,2,2,2,2,2,2, # 10 - 17
|
2,2,2,2,2,2,2,2, # 10 - 17
|
||||||
|
@ -258,26 +258,26 @@ EUCTW_cls = (
|
||||||
3,3,3,3,3,3,3,0 # f8 - ff
|
3,3,3,3,3,3,3,0 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
EUCTW_st = (
|
EUCTW_ST = (
|
||||||
eError,eError,eStart, 3, 3, 3, 4,eError,#00-07
|
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
|
||||||
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
|
||||||
eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
|
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||||
5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
|
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
|
||||||
eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
|
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||||
)
|
)
|
||||||
|
|
||||||
EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
|
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||||
|
|
||||||
EUCTWSMModel = {'classTable': EUCTW_cls,
|
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
|
||||||
'classFactor': 7,
|
'class_factor': 7,
|
||||||
'stateTable': EUCTW_st,
|
'state_table': EUCTW_ST,
|
||||||
'charLenTable': EUCTWCharLenTable,
|
'char_len_table': EUCTW_CHAR_LEN_TABLE,
|
||||||
'name': 'x-euc-tw'}
|
'name': 'x-euc-tw'}
|
||||||
|
|
||||||
# GB2312
|
# GB2312
|
||||||
|
|
||||||
GB2312_cls = (
|
GB2312_CLS = (
|
||||||
1,1,1,1,1,1,1,1, # 00 - 07
|
1,1,1,1,1,1,1,1, # 00 - 07
|
||||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||||
1,1,1,1,1,1,1,1, # 10 - 17
|
1,1,1,1,1,1,1,1, # 10 - 17
|
||||||
|
@ -312,31 +312,31 @@ GB2312_cls = (
|
||||||
6,6,6,6,6,6,6,0 # f8 - ff
|
6,6,6,6,6,6,6,0 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
GB2312_st = (
|
GB2312_ST = (
|
||||||
eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07
|
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
|
||||||
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
|
||||||
4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
|
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||||
eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27
|
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
|
||||||
eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
|
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||||
)
|
)
|
||||||
|
|
||||||
# To be accurate, the length of class 6 can be either 2 or 4.
|
# To be accurate, the length of class 6 can be either 2 or 4.
|
||||||
# But it is not necessary to discriminate between the two since
|
# But it is not necessary to discriminate between the two since
|
||||||
# it is used for frequency analysis only, and we are validing
|
# it is used for frequency analysis only, and we are validating
|
||||||
# each code range there as well. So it is safe to set it to be
|
# each code range there as well. So it is safe to set it to be
|
||||||
# 2 here.
|
# 2 here.
|
||||||
GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
|
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||||
|
|
||||||
GB2312SMModel = {'classTable': GB2312_cls,
|
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
|
||||||
'classFactor': 7,
|
'class_factor': 7,
|
||||||
'stateTable': GB2312_st,
|
'state_table': GB2312_ST,
|
||||||
'charLenTable': GB2312CharLenTable,
|
'char_len_table': GB2312_CHAR_LEN_TABLE,
|
||||||
'name': 'GB2312'}
|
'name': 'GB2312'}
|
||||||
|
|
||||||
# Shift_JIS
|
# Shift_JIS
|
||||||
|
|
||||||
SJIS_cls = (
|
SJIS_CLS = (
|
||||||
1,1,1,1,1,1,1,1, # 00 - 07
|
1,1,1,1,1,1,1,1, # 00 - 07
|
||||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||||
1,1,1,1,1,1,1,1, # 10 - 17
|
1,1,1,1,1,1,1,1, # 10 - 17
|
||||||
|
@ -373,23 +373,23 @@ SJIS_cls = (
|
||||||
3,3,3,3,3,0,0,0) # f8 - ff
|
3,3,3,3,3,0,0,0) # f8 - ff
|
||||||
|
|
||||||
|
|
||||||
SJIS_st = (
|
SJIS_ST = (
|
||||||
eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
|
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||||
eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
|
||||||
)
|
)
|
||||||
|
|
||||||
SJISCharLenTable = (0, 1, 1, 2, 0, 0)
|
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||||
|
|
||||||
SJISSMModel = {'classTable': SJIS_cls,
|
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
|
||||||
'classFactor': 6,
|
'class_factor': 6,
|
||||||
'stateTable': SJIS_st,
|
'state_table': SJIS_ST,
|
||||||
'charLenTable': SJISCharLenTable,
|
'char_len_table': SJIS_CHAR_LEN_TABLE,
|
||||||
'name': 'Shift_JIS'}
|
'name': 'Shift_JIS'}
|
||||||
|
|
||||||
# UCS2-BE
|
# UCS2-BE
|
||||||
|
|
||||||
UCS2BE_cls = (
|
UCS2BE_CLS = (
|
||||||
0,0,0,0,0,0,0,0, # 00 - 07
|
0,0,0,0,0,0,0,0, # 00 - 07
|
||||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||||
0,0,0,0,0,0,0,0, # 10 - 17
|
0,0,0,0,0,0,0,0, # 10 - 17
|
||||||
|
@ -424,27 +424,27 @@ UCS2BE_cls = (
|
||||||
0,0,0,0,0,0,4,5 # f8 - ff
|
0,0,0,0,0,0,4,5 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
UCS2BE_st = (
|
UCS2BE_ST = (
|
||||||
5, 7, 7,eError, 4, 3,eError,eError,#00-07
|
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||||
eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17
|
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||||
6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f
|
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
|
||||||
6, 6, 6, 6, 5, 7, 7,eError,#20-27
|
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
|
||||||
5, 8, 6, 6,eError, 6, 6, 6,#28-2f
|
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
|
||||||
6, 6, 6, 6,eError,eError,eStart,eStart #30-37
|
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||||
)
|
)
|
||||||
|
|
||||||
UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
|
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||||
|
|
||||||
UCS2BESMModel = {'classTable': UCS2BE_cls,
|
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
|
||||||
'classFactor': 6,
|
'class_factor': 6,
|
||||||
'stateTable': UCS2BE_st,
|
'state_table': UCS2BE_ST,
|
||||||
'charLenTable': UCS2BECharLenTable,
|
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
|
||||||
'name': 'UTF-16BE'}
|
'name': 'UTF-16BE'}
|
||||||
|
|
||||||
# UCS2-LE
|
# UCS2-LE
|
||||||
|
|
||||||
UCS2LE_cls = (
|
UCS2LE_CLS = (
|
||||||
0,0,0,0,0,0,0,0, # 00 - 07
|
0,0,0,0,0,0,0,0, # 00 - 07
|
||||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||||
0,0,0,0,0,0,0,0, # 10 - 17
|
0,0,0,0,0,0,0,0, # 10 - 17
|
||||||
|
@ -479,27 +479,27 @@ UCS2LE_cls = (
|
||||||
0,0,0,0,0,0,4,5 # f8 - ff
|
0,0,0,0,0,0,4,5 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
UCS2LE_st = (
|
UCS2LE_ST = (
|
||||||
6, 6, 7, 6, 4, 3,eError,eError,#00-07
|
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||||
eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17
|
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
|
||||||
5, 5, 5,eError, 5,eError, 6, 6,#18-1f
|
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
|
||||||
7, 6, 8, 8, 5, 5, 5,eError,#20-27
|
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
|
||||||
5, 5, 5,eError,eError,eError, 5, 5,#28-2f
|
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
|
||||||
5, 5, 5,eError, 5,eError,eStart,eStart #30-37
|
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||||
)
|
)
|
||||||
|
|
||||||
UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
|
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||||
|
|
||||||
UCS2LESMModel = {'classTable': UCS2LE_cls,
|
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
|
||||||
'classFactor': 6,
|
'class_factor': 6,
|
||||||
'stateTable': UCS2LE_st,
|
'state_table': UCS2LE_ST,
|
||||||
'charLenTable': UCS2LECharLenTable,
|
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
|
||||||
'name': 'UTF-16LE'}
|
'name': 'UTF-16LE'}
|
||||||
|
|
||||||
# UTF-8
|
# UTF-8
|
||||||
|
|
||||||
UTF8_cls = (
|
UTF8_CLS = (
|
||||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
||||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||||
1,1,1,1,1,1,1,1, # 10 - 17
|
1,1,1,1,1,1,1,1, # 10 - 17
|
||||||
|
@ -534,39 +534,39 @@ UTF8_cls = (
|
||||||
12,13,13,13,14,15,0,0 # f8 - ff
|
12,13,13,13,14,15,0,0 # f8 - ff
|
||||||
)
|
)
|
||||||
|
|
||||||
UTF8_st = (
|
UTF8_ST = (
|
||||||
eError,eStart,eError,eError,eError,eError, 12, 10,#00-07
|
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
|
||||||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#10-17
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
|
||||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
|
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
|
||||||
eError,eError, 5, 5, 5, 5,eError,eError,#30-37
|
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
|
||||||
eError,eError,eError, 5, 5, 5,eError,eError,#40-47
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
|
||||||
eError,eError, 7, 7, 7, 7,eError,eError,#50-57
|
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
|
||||||
eError,eError,eError,eError, 7, 7,eError,eError,#60-67
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
|
||||||
eError,eError, 9, 9, 9, 9,eError,eError,#70-77
|
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
|
||||||
eError,eError,eError,eError,eError, 9,eError,eError,#80-87
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
|
||||||
eError,eError, 12, 12, 12, 12,eError,eError,#90-97
|
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
|
||||||
eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
|
||||||
eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7
|
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
|
||||||
eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
|
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
|
||||||
eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
|
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
|
||||||
)
|
)
|
||||||
|
|
||||||
UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||||
|
|
||||||
UTF8SMModel = {'classTable': UTF8_cls,
|
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
|
||||||
'classFactor': 16,
|
'class_factor': 16,
|
||||||
'stateTable': UTF8_st,
|
'state_table': UTF8_ST,
|
||||||
'charLenTable': UTF8CharLenTable,
|
'char_len_table': UTF8_CHAR_LEN_TABLE,
|
||||||
'name': 'UTF-8'}
|
'name': 'UTF-8'}
|
||||||
|
|
150
thirdparty/chardet/sbcharsetprober.py
vendored
150
thirdparty/chardet/sbcharsetprober.py
vendored
|
@ -26,95 +26,107 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import sys
|
|
||||||
from . import constants
|
|
||||||
from .charsetprober import CharSetProber
|
from .charsetprober import CharSetProber
|
||||||
from .compat import wrap_ord
|
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
|
||||||
|
|
||||||
SAMPLE_SIZE = 64
|
|
||||||
SB_ENOUGH_REL_THRESHOLD = 1024
|
|
||||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
|
||||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
|
||||||
SYMBOL_CAT_ORDER = 250
|
|
||||||
NUMBER_OF_SEQ_CAT = 4
|
|
||||||
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
|
|
||||||
#NEGATIVE_CAT = 0
|
|
||||||
|
|
||||||
|
|
||||||
class SingleByteCharSetProber(CharSetProber):
|
class SingleByteCharSetProber(CharSetProber):
|
||||||
def __init__(self, model, reversed=False, nameProber=None):
|
SAMPLE_SIZE = 64
|
||||||
CharSetProber.__init__(self)
|
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
|
||||||
self._mModel = model
|
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||||
|
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||||
|
|
||||||
|
def __init__(self, model, reversed=False, name_prober=None):
|
||||||
|
super(SingleByteCharSetProber, self).__init__()
|
||||||
|
self._model = model
|
||||||
# TRUE if we need to reverse every pair in the model lookup
|
# TRUE if we need to reverse every pair in the model lookup
|
||||||
self._mReversed = reversed
|
self._reversed = reversed
|
||||||
# Optional auxiliary prober for name decision
|
# Optional auxiliary prober for name decision
|
||||||
self._mNameProber = nameProber
|
self._name_prober = name_prober
|
||||||
|
self._last_order = None
|
||||||
|
self._seq_counters = None
|
||||||
|
self._total_seqs = None
|
||||||
|
self._total_char = None
|
||||||
|
self._freq_char = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
CharSetProber.reset(self)
|
super(SingleByteCharSetProber, self).reset()
|
||||||
# char order of last character
|
# char order of last character
|
||||||
self._mLastOrder = 255
|
self._last_order = 255
|
||||||
self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
|
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
|
||||||
self._mTotalSeqs = 0
|
self._total_seqs = 0
|
||||||
self._mTotalChar = 0
|
self._total_char = 0
|
||||||
# characters that fall in our sampling range
|
# characters that fall in our sampling range
|
||||||
self._mFreqChar = 0
|
self._freq_char = 0
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
if self._mNameProber:
|
def charset_name(self):
|
||||||
return self._mNameProber.get_charset_name()
|
if self._name_prober:
|
||||||
|
return self._name_prober.charset_name
|
||||||
else:
|
else:
|
||||||
return self._mModel['charsetName']
|
return self._model['charset_name']
|
||||||
|
|
||||||
def feed(self, aBuf):
|
@property
|
||||||
if not self._mModel['keepEnglishLetter']:
|
def language(self):
|
||||||
aBuf = self.filter_without_english_letters(aBuf)
|
if self._name_prober:
|
||||||
aLen = len(aBuf)
|
return self._name_prober.language
|
||||||
if not aLen:
|
else:
|
||||||
return self.get_state()
|
return self._model.get('language')
|
||||||
for c in aBuf:
|
|
||||||
order = self._mModel['charToOrderMap'][wrap_ord(c)]
|
def feed(self, byte_str):
|
||||||
if order < SYMBOL_CAT_ORDER:
|
if not self._model['keep_english_letter']:
|
||||||
self._mTotalChar += 1
|
byte_str = self.filter_international_words(byte_str)
|
||||||
if order < SAMPLE_SIZE:
|
if not byte_str:
|
||||||
self._mFreqChar += 1
|
return self.state
|
||||||
if self._mLastOrder < SAMPLE_SIZE:
|
char_to_order_map = self._model['char_to_order_map']
|
||||||
self._mTotalSeqs += 1
|
for i, c in enumerate(byte_str):
|
||||||
if not self._mReversed:
|
# XXX: Order is in range 1-64, so one would think we want 0-63 here,
|
||||||
i = (self._mLastOrder * SAMPLE_SIZE) + order
|
# but that leads to 27 more test failures than before.
|
||||||
model = self._mModel['precedenceMatrix'][i]
|
order = char_to_order_map[c]
|
||||||
|
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
|
||||||
|
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
|
||||||
|
# to make it closer to the original intent. The only difference
|
||||||
|
# is whether or not we count digits and control characters for
|
||||||
|
# _total_char purposes.
|
||||||
|
if order < CharacterCategory.CONTROL:
|
||||||
|
self._total_char += 1
|
||||||
|
if order < self.SAMPLE_SIZE:
|
||||||
|
self._freq_char += 1
|
||||||
|
if self._last_order < self.SAMPLE_SIZE:
|
||||||
|
self._total_seqs += 1
|
||||||
|
if not self._reversed:
|
||||||
|
i = (self._last_order * self.SAMPLE_SIZE) + order
|
||||||
|
model = self._model['precedence_matrix'][i]
|
||||||
else: # reverse the order of the letters in the lookup
|
else: # reverse the order of the letters in the lookup
|
||||||
i = (order * SAMPLE_SIZE) + self._mLastOrder
|
i = (order * self.SAMPLE_SIZE) + self._last_order
|
||||||
model = self._mModel['precedenceMatrix'][i]
|
model = self._model['precedence_matrix'][i]
|
||||||
self._mSeqCounters[model] += 1
|
self._seq_counters[model] += 1
|
||||||
self._mLastOrder = order
|
self._last_order = order
|
||||||
|
|
||||||
if self.get_state() == constants.eDetecting:
|
charset_name = self._model['charset_name']
|
||||||
if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
|
if self.state == ProbingState.DETECTING:
|
||||||
cf = self.get_confidence()
|
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
|
||||||
if cf > POSITIVE_SHORTCUT_THRESHOLD:
|
confidence = self.get_confidence()
|
||||||
if constants._debug:
|
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
|
||||||
sys.stderr.write('%s confidence = %s, we have a'
|
self.logger.debug('%s confidence = %s, we have a winner',
|
||||||
'winner\n' %
|
charset_name, confidence)
|
||||||
(self._mModel['charsetName'], cf))
|
self._state = ProbingState.FOUND_IT
|
||||||
self._mState = constants.eFoundIt
|
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
|
||||||
elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
|
self.logger.debug('%s confidence = %s, below negative '
|
||||||
if constants._debug:
|
'shortcut threshhold %s', charset_name,
|
||||||
sys.stderr.write('%s confidence = %s, below negative'
|
confidence,
|
||||||
'shortcut threshhold %s\n' %
|
self.NEGATIVE_SHORTCUT_THRESHOLD)
|
||||||
(self._mModel['charsetName'], cf,
|
self._state = ProbingState.NOT_ME
|
||||||
NEGATIVE_SHORTCUT_THRESHOLD))
|
|
||||||
self._mState = constants.eNotMe
|
|
||||||
|
|
||||||
return self.get_state()
|
return self.state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
r = 0.01
|
r = 0.01
|
||||||
if self._mTotalSeqs > 0:
|
if self._total_seqs > 0:
|
||||||
r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
|
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
|
||||||
/ self._mModel['mTypicalPositiveRatio'])
|
self._total_seqs / self._model['typical_positive_ratio'])
|
||||||
r = r * self._mFreqChar / self._mTotalChar
|
r = r * self._freq_char / self._total_char
|
||||||
if r >= 1.0:
|
if r >= 1.0:
|
||||||
r = 0.99
|
r = 0.99
|
||||||
return r
|
return r
|
||||||
|
|
30
thirdparty/chardet/sbcsgroupprober.py
vendored
30
thirdparty/chardet/sbcsgroupprober.py
vendored
|
@ -33,16 +33,17 @@ from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
|
||||||
Ibm866Model, Ibm855Model)
|
Ibm866Model, Ibm855Model)
|
||||||
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
|
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
|
||||||
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
|
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
|
||||||
from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
||||||
from .langthaimodel import TIS620ThaiModel
|
from .langthaimodel import TIS620ThaiModel
|
||||||
from .langhebrewmodel import Win1255HebrewModel
|
from .langhebrewmodel import Win1255HebrewModel
|
||||||
from .hebrewprober import HebrewProber
|
from .hebrewprober import HebrewProber
|
||||||
|
from .langturkishmodel import Latin5TurkishModel
|
||||||
|
|
||||||
|
|
||||||
class SBCSGroupProber(CharSetGroupProber):
|
class SBCSGroupProber(CharSetGroupProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharSetGroupProber.__init__(self)
|
super(SBCSGroupProber, self).__init__()
|
||||||
self._mProbers = [
|
self.probers = [
|
||||||
SingleByteCharSetProber(Win1251CyrillicModel),
|
SingleByteCharSetProber(Win1251CyrillicModel),
|
||||||
SingleByteCharSetProber(Koi8rModel),
|
SingleByteCharSetProber(Koi8rModel),
|
||||||
SingleByteCharSetProber(Latin5CyrillicModel),
|
SingleByteCharSetProber(Latin5CyrillicModel),
|
||||||
|
@ -53,17 +54,20 @@ class SBCSGroupProber(CharSetGroupProber):
|
||||||
SingleByteCharSetProber(Win1253GreekModel),
|
SingleByteCharSetProber(Win1253GreekModel),
|
||||||
SingleByteCharSetProber(Latin5BulgarianModel),
|
SingleByteCharSetProber(Latin5BulgarianModel),
|
||||||
SingleByteCharSetProber(Win1251BulgarianModel),
|
SingleByteCharSetProber(Win1251BulgarianModel),
|
||||||
SingleByteCharSetProber(Latin2HungarianModel),
|
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
|
||||||
SingleByteCharSetProber(Win1250HungarianModel),
|
# after we retrain model.
|
||||||
|
# SingleByteCharSetProber(Latin2HungarianModel),
|
||||||
|
# SingleByteCharSetProber(Win1250HungarianModel),
|
||||||
SingleByteCharSetProber(TIS620ThaiModel),
|
SingleByteCharSetProber(TIS620ThaiModel),
|
||||||
|
SingleByteCharSetProber(Latin5TurkishModel),
|
||||||
]
|
]
|
||||||
hebrewProber = HebrewProber()
|
hebrew_prober = HebrewProber()
|
||||||
logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
|
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
|
||||||
False, hebrewProber)
|
False, hebrew_prober)
|
||||||
visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
|
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
|
||||||
hebrewProber)
|
hebrew_prober)
|
||||||
hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
|
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
|
||||||
self._mProbers.extend([hebrewProber, logicalHebrewProber,
|
self.probers.extend([hebrew_prober, logical_hebrew_prober,
|
||||||
visualHebrewProber])
|
visual_hebrew_prober])
|
||||||
|
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
87
thirdparty/chardet/sjisprober.py
vendored
87
thirdparty/chardet/sjisprober.py
vendored
|
@ -25,69 +25,68 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import sys
|
|
||||||
from .mbcharsetprober import MultiByteCharSetProber
|
from .mbcharsetprober import MultiByteCharSetProber
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .chardistribution import SJISDistributionAnalysis
|
from .chardistribution import SJISDistributionAnalysis
|
||||||
from .jpcntx import SJISContextAnalysis
|
from .jpcntx import SJISContextAnalysis
|
||||||
from .mbcssm import SJISSMModel
|
from .mbcssm import SJIS_SM_MODEL
|
||||||
from . import constants
|
from .enums import ProbingState, MachineState
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
|
||||||
xrange = range
|
|
||||||
|
|
||||||
class SJISProber(MultiByteCharSetProber):
|
class SJISProber(MultiByteCharSetProber):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
MultiByteCharSetProber.__init__(self)
|
super(SJISProber, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(SJISSMModel)
|
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
||||||
self._mDistributionAnalyzer = SJISDistributionAnalysis()
|
self.distribution_analyzer = SJISDistributionAnalysis()
|
||||||
self._mContextAnalyzer = SJISContextAnalysis()
|
self.context_analyzer = SJISContextAnalysis()
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
MultiByteCharSetProber.reset(self)
|
super(SJISProber, self).reset()
|
||||||
self._mContextAnalyzer.reset()
|
self.context_analyzer.reset()
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
return self._mContextAnalyzer.get_charset_name()
|
def charset_name(self):
|
||||||
|
return self.context_analyzer.charset_name
|
||||||
|
|
||||||
def feed(self, aBuf):
|
@property
|
||||||
aLen = len(aBuf)
|
def language(self):
|
||||||
for i in xrange(0, aLen):
|
return "Japanese"
|
||||||
codingState = self._mCodingSM.next_state(aBuf[i])
|
|
||||||
if codingState == constants.eError:
|
def feed(self, byte_str):
|
||||||
if constants._debug:
|
for i in range(len(byte_str)):
|
||||||
sys.stderr.write(self.get_charset_name()
|
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||||
+ ' prober hit error at byte ' + str(i)
|
if coding_state == MachineState.ERROR:
|
||||||
+ '\n')
|
self.logger.debug('%s %s prober hit error at byte %s',
|
||||||
self._mState = constants.eNotMe
|
self.charset_name, self.language, i)
|
||||||
|
self._state = ProbingState.NOT_ME
|
||||||
break
|
break
|
||||||
elif codingState == constants.eItsMe:
|
elif coding_state == MachineState.ITS_ME:
|
||||||
self._mState = constants.eFoundIt
|
self._state = ProbingState.FOUND_IT
|
||||||
break
|
break
|
||||||
elif codingState == constants.eStart:
|
elif coding_state == MachineState.START:
|
||||||
charLen = self._mCodingSM.get_current_charlen()
|
char_len = self.coding_sm.get_current_charlen()
|
||||||
if i == 0:
|
if i == 0:
|
||||||
self._mLastChar[1] = aBuf[0]
|
self._last_char[1] = byte_str[0]
|
||||||
self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
|
self.context_analyzer.feed(self._last_char[2 - char_len:],
|
||||||
charLen)
|
char_len)
|
||||||
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
|
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||||
else:
|
else:
|
||||||
self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
|
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
|
||||||
- charLen], charLen)
|
- char_len], char_len)
|
||||||
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
|
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||||
charLen)
|
char_len)
|
||||||
|
|
||||||
self._mLastChar[0] = aBuf[aLen - 1]
|
self._last_char[0] = byte_str[-1]
|
||||||
|
|
||||||
if self.get_state() == constants.eDetecting:
|
if self.state == ProbingState.DETECTING:
|
||||||
if (self._mContextAnalyzer.got_enough_data() and
|
if (self.context_analyzer.got_enough_data() and
|
||||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
|
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||||
self._mState = constants.eFoundIt
|
self._state = ProbingState.FOUND_IT
|
||||||
|
|
||||||
return self.get_state()
|
return self.state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
contxtCf = self._mContextAnalyzer.get_confidence()
|
context_conf = self.context_analyzer.get_confidence()
|
||||||
distribCf = self._mDistributionAnalyzer.get_confidence()
|
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||||
return max(contxtCf, distribCf)
|
return max(context_conf, distrib_conf)
|
||||||
|
|
314
thirdparty/chardet/universaldetector.py
vendored
314
thirdparty/chardet/universaldetector.py
vendored
|
@ -25,146 +25,262 @@
|
||||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
"""
|
||||||
|
Module containing the UniversalDetector detector class, which is the primary
|
||||||
|
class a user of ``chardet`` should use.
|
||||||
|
|
||||||
|
:author: Mark Pilgrim (initial port to Python)
|
||||||
|
:author: Shy Shalom (original C code)
|
||||||
|
:author: Dan Blanchard (major refactoring for 3.0)
|
||||||
|
:author: Ian Cordasco
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
from . import constants
|
|
||||||
import sys
|
|
||||||
import codecs
|
import codecs
|
||||||
from .latin1prober import Latin1Prober # windows-1252
|
import logging
|
||||||
from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
|
|
||||||
from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
|
|
||||||
from .escprober import EscCharSetProber # ISO-2122, etc.
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
MINIMUM_THRESHOLD = 0.20
|
from .charsetgroupprober import CharSetGroupProber
|
||||||
ePureAscii = 0
|
from .enums import InputState, LanguageFilter, ProbingState
|
||||||
eEscAscii = 1
|
from .escprober import EscCharSetProber
|
||||||
eHighbyte = 2
|
from .latin1prober import Latin1Prober
|
||||||
|
from .mbcsgroupprober import MBCSGroupProber
|
||||||
|
from .sbcsgroupprober import SBCSGroupProber
|
||||||
|
|
||||||
|
|
||||||
class UniversalDetector:
|
class UniversalDetector(object):
|
||||||
def __init__(self):
|
"""
|
||||||
self._highBitDetector = re.compile(b'[\x80-\xFF]')
|
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
|
||||||
self._escDetector = re.compile(b'(\033|~{)')
|
and coordinates all of the different charset probers.
|
||||||
self._mEscCharSetProber = None
|
|
||||||
self._mCharSetProbers = []
|
To get a ``dict`` containing an encoding and its confidence, you can simply
|
||||||
|
run:
|
||||||
|
|
||||||
|
.. code::
|
||||||
|
|
||||||
|
u = UniversalDetector()
|
||||||
|
u.feed(some_bytes)
|
||||||
|
u.close()
|
||||||
|
detected = u.result
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
MINIMUM_THRESHOLD = 0.20
|
||||||
|
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
|
||||||
|
ESC_DETECTOR = re.compile(b'(\033|~{)')
|
||||||
|
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
|
||||||
|
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
|
||||||
|
'iso-8859-2': 'Windows-1250',
|
||||||
|
'iso-8859-5': 'Windows-1251',
|
||||||
|
'iso-8859-6': 'Windows-1256',
|
||||||
|
'iso-8859-7': 'Windows-1253',
|
||||||
|
'iso-8859-8': 'Windows-1255',
|
||||||
|
'iso-8859-9': 'Windows-1254',
|
||||||
|
'iso-8859-13': 'Windows-1257'}
|
||||||
|
|
||||||
|
def __init__(self, lang_filter=LanguageFilter.ALL):
|
||||||
|
self._esc_charset_prober = None
|
||||||
|
self._charset_probers = []
|
||||||
|
self.result = None
|
||||||
|
self.done = None
|
||||||
|
self._got_data = None
|
||||||
|
self._input_state = None
|
||||||
|
self._last_char = None
|
||||||
|
self.lang_filter = lang_filter
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self._has_win_bytes = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.result = {'encoding': None, 'confidence': 0.0}
|
"""
|
||||||
|
Reset the UniversalDetector and all of its probers back to their
|
||||||
|
initial states. This is called by ``__init__``, so you only need to
|
||||||
|
call this directly in between analyses of different documents.
|
||||||
|
"""
|
||||||
|
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
|
||||||
self.done = False
|
self.done = False
|
||||||
self._mStart = True
|
self._got_data = False
|
||||||
self._mGotData = False
|
self._has_win_bytes = False
|
||||||
self._mInputState = ePureAscii
|
self._input_state = InputState.PURE_ASCII
|
||||||
self._mLastChar = b''
|
self._last_char = b''
|
||||||
if self._mEscCharSetProber:
|
if self._esc_charset_prober:
|
||||||
self._mEscCharSetProber.reset()
|
self._esc_charset_prober.reset()
|
||||||
for prober in self._mCharSetProbers:
|
for prober in self._charset_probers:
|
||||||
prober.reset()
|
prober.reset()
|
||||||
|
|
||||||
def feed(self, aBuf):
|
def feed(self, byte_str):
|
||||||
|
"""
|
||||||
|
Takes a chunk of a document and feeds it through all of the relevant
|
||||||
|
charset probers.
|
||||||
|
|
||||||
|
After calling ``feed``, you can check the value of the ``done``
|
||||||
|
attribute to see if you need to continue feeding the
|
||||||
|
``UniversalDetector`` more data, or if it has made a prediction
|
||||||
|
(in the ``result`` attribute).
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
You should always call ``close`` when you're done feeding in your
|
||||||
|
document if ``done`` is not already ``True``.
|
||||||
|
"""
|
||||||
if self.done:
|
if self.done:
|
||||||
return
|
return
|
||||||
|
|
||||||
aLen = len(aBuf)
|
if not len(byte_str):
|
||||||
if not aLen:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self._mGotData:
|
if not isinstance(byte_str, bytearray):
|
||||||
|
byte_str = bytearray(byte_str)
|
||||||
|
|
||||||
|
# First check for known BOMs, since these are guaranteed to be correct
|
||||||
|
if not self._got_data:
|
||||||
# If the data starts with BOM, we know it is UTF
|
# If the data starts with BOM, we know it is UTF
|
||||||
if aBuf[:3] == codecs.BOM_UTF8:
|
if byte_str.startswith(codecs.BOM_UTF8):
|
||||||
# EF BB BF UTF-8 with BOM
|
# EF BB BF UTF-8 with BOM
|
||||||
self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
|
self.result = {'encoding': "UTF-8-SIG",
|
||||||
elif aBuf[:4] == codecs.BOM_UTF32_LE:
|
'confidence': 1.0,
|
||||||
|
'language': ''}
|
||||||
|
elif byte_str.startswith((codecs.BOM_UTF32_LE,
|
||||||
|
codecs.BOM_UTF32_BE)):
|
||||||
# FF FE 00 00 UTF-32, little-endian BOM
|
# FF FE 00 00 UTF-32, little-endian BOM
|
||||||
self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
|
|
||||||
elif aBuf[:4] == codecs.BOM_UTF32_BE:
|
|
||||||
# 00 00 FE FF UTF-32, big-endian BOM
|
# 00 00 FE FF UTF-32, big-endian BOM
|
||||||
self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
|
self.result = {'encoding': "UTF-32",
|
||||||
elif aBuf[:4] == b'\xFE\xFF\x00\x00':
|
'confidence': 1.0,
|
||||||
|
'language': ''}
|
||||||
|
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
|
||||||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||||
self.result = {
|
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
|
||||||
'encoding': "X-ISO-10646-UCS-4-3412",
|
'confidence': 1.0,
|
||||||
'confidence': 1.0
|
'language': ''}
|
||||||
}
|
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
|
||||||
elif aBuf[:4] == b'\x00\x00\xFF\xFE':
|
|
||||||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||||
self.result = {
|
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
|
||||||
'encoding': "X-ISO-10646-UCS-4-2143",
|
'confidence': 1.0,
|
||||||
'confidence': 1.0
|
'language': ''}
|
||||||
}
|
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
|
||||||
elif aBuf[:2] == codecs.BOM_LE:
|
|
||||||
# FF FE UTF-16, little endian BOM
|
# FF FE UTF-16, little endian BOM
|
||||||
self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
|
|
||||||
elif aBuf[:2] == codecs.BOM_BE:
|
|
||||||
# FE FF UTF-16, big endian BOM
|
# FE FF UTF-16, big endian BOM
|
||||||
self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
|
self.result = {'encoding': "UTF-16",
|
||||||
|
'confidence': 1.0,
|
||||||
|
'language': ''}
|
||||||
|
|
||||||
self._mGotData = True
|
self._got_data = True
|
||||||
if self.result['encoding'] and (self.result['confidence'] > 0.0):
|
if self.result['encoding'] is not None:
|
||||||
self.done = True
|
self.done = True
|
||||||
return
|
return
|
||||||
|
|
||||||
if self._mInputState == ePureAscii:
|
# If none of those matched and we've only see ASCII so far, check
|
||||||
if self._highBitDetector.search(aBuf):
|
# for high bytes and escape sequences
|
||||||
self._mInputState = eHighbyte
|
if self._input_state == InputState.PURE_ASCII:
|
||||||
elif ((self._mInputState == ePureAscii) and
|
if self.HIGH_BYTE_DETECTOR.search(byte_str):
|
||||||
self._escDetector.search(self._mLastChar + aBuf)):
|
self._input_state = InputState.HIGH_BYTE
|
||||||
self._mInputState = eEscAscii
|
elif self._input_state == InputState.PURE_ASCII and \
|
||||||
|
self.ESC_DETECTOR.search(self._last_char + byte_str):
|
||||||
|
self._input_state = InputState.ESC_ASCII
|
||||||
|
|
||||||
self._mLastChar = aBuf[-1:]
|
self._last_char = byte_str[-1:]
|
||||||
|
|
||||||
if self._mInputState == eEscAscii:
|
# If we've seen escape sequences, use the EscCharSetProber, which
|
||||||
if not self._mEscCharSetProber:
|
# uses a simple state machine to check for known escape sequences in
|
||||||
self._mEscCharSetProber = EscCharSetProber()
|
# HZ and ISO-2022 encodings, since those are the only encodings that
|
||||||
if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
|
# use such sequences.
|
||||||
self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
|
if self._input_state == InputState.ESC_ASCII:
|
||||||
'confidence': self._mEscCharSetProber.get_confidence()}
|
if not self._esc_charset_prober:
|
||||||
|
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
|
||||||
|
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||||
|
self.result = {'encoding':
|
||||||
|
self._esc_charset_prober.charset_name,
|
||||||
|
'confidence':
|
||||||
|
self._esc_charset_prober.get_confidence(),
|
||||||
|
'language':
|
||||||
|
self._esc_charset_prober.language}
|
||||||
self.done = True
|
self.done = True
|
||||||
elif self._mInputState == eHighbyte:
|
# If we've seen high bytes (i.e., those with values greater than 127),
|
||||||
if not self._mCharSetProbers:
|
# we need to do more complicated checks using all our multi-byte and
|
||||||
self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
|
# single-byte probers that are left. The single-byte probers
|
||||||
Latin1Prober()]
|
# use character bigram distributions to determine the encoding, whereas
|
||||||
for prober in self._mCharSetProbers:
|
# the multi-byte probers use a combination of character unigram and
|
||||||
if prober.feed(aBuf) == constants.eFoundIt:
|
# bigram distributions.
|
||||||
self.result = {'encoding': prober.get_charset_name(),
|
elif self._input_state == InputState.HIGH_BYTE:
|
||||||
'confidence': prober.get_confidence()}
|
if not self._charset_probers:
|
||||||
|
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
|
||||||
|
# If we're checking non-CJK encodings, use single-byte prober
|
||||||
|
if self.lang_filter & LanguageFilter.NON_CJK:
|
||||||
|
self._charset_probers.append(SBCSGroupProber())
|
||||||
|
self._charset_probers.append(Latin1Prober())
|
||||||
|
for prober in self._charset_probers:
|
||||||
|
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||||
|
self.result = {'encoding': prober.charset_name,
|
||||||
|
'confidence': prober.get_confidence(),
|
||||||
|
'language': prober.language}
|
||||||
self.done = True
|
self.done = True
|
||||||
break
|
break
|
||||||
|
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
||||||
|
self._has_win_bytes = True
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
|
"""
|
||||||
|
Stop analyzing the current document and come up with a final
|
||||||
|
prediction.
|
||||||
|
|
||||||
|
:returns: The ``result`` attribute, a ``dict`` with the keys
|
||||||
|
`encoding`, `confidence`, and `language`.
|
||||||
|
"""
|
||||||
|
# Don't bother with checks if we're already done
|
||||||
if self.done:
|
if self.done:
|
||||||
return
|
return self.result
|
||||||
if not self._mGotData:
|
|
||||||
if constants._debug:
|
|
||||||
sys.stderr.write('no data received!\n')
|
|
||||||
return
|
|
||||||
self.done = True
|
self.done = True
|
||||||
|
|
||||||
if self._mInputState == ePureAscii:
|
if not self._got_data:
|
||||||
self.result = {'encoding': 'ascii', 'confidence': 1.0}
|
self.logger.debug('no data received!')
|
||||||
return self.result
|
|
||||||
|
|
||||||
if self._mInputState == eHighbyte:
|
# Default to ASCII if it is all we've seen so far
|
||||||
proberConfidence = None
|
elif self._input_state == InputState.PURE_ASCII:
|
||||||
maxProberConfidence = 0.0
|
self.result = {'encoding': 'ascii',
|
||||||
maxProber = None
|
'confidence': 1.0,
|
||||||
for prober in self._mCharSetProbers:
|
'language': ''}
|
||||||
|
|
||||||
|
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
|
||||||
|
elif self._input_state == InputState.HIGH_BYTE:
|
||||||
|
prober_confidence = None
|
||||||
|
max_prober_confidence = 0.0
|
||||||
|
max_prober = None
|
||||||
|
for prober in self._charset_probers:
|
||||||
if not prober:
|
if not prober:
|
||||||
continue
|
continue
|
||||||
proberConfidence = prober.get_confidence()
|
prober_confidence = prober.get_confidence()
|
||||||
if proberConfidence > maxProberConfidence:
|
if prober_confidence > max_prober_confidence:
|
||||||
maxProberConfidence = proberConfidence
|
max_prober_confidence = prober_confidence
|
||||||
maxProber = prober
|
max_prober = prober
|
||||||
if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
|
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
||||||
self.result = {'encoding': maxProber.get_charset_name(),
|
charset_name = max_prober.charset_name
|
||||||
'confidence': maxProber.get_confidence()}
|
lower_charset_name = max_prober.charset_name.lower()
|
||||||
return self.result
|
confidence = max_prober.get_confidence()
|
||||||
|
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||||
|
# extra Windows-specific bytes
|
||||||
|
if lower_charset_name.startswith('iso-8859'):
|
||||||
|
if self._has_win_bytes:
|
||||||
|
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
|
||||||
|
charset_name)
|
||||||
|
self.result = {'encoding': charset_name,
|
||||||
|
'confidence': confidence,
|
||||||
|
'language': max_prober.language}
|
||||||
|
|
||||||
if constants._debug:
|
# Log all prober confidences if none met MINIMUM_THRESHOLD
|
||||||
sys.stderr.write('no probers hit minimum threshhold\n')
|
if self.logger.getEffectiveLevel() == logging.DEBUG:
|
||||||
for prober in self._mCharSetProbers[0].mProbers:
|
if self.result['encoding'] is None:
|
||||||
if not prober:
|
self.logger.debug('no probers hit minimum threshold')
|
||||||
|
for group_prober in self._charset_probers:
|
||||||
|
if not group_prober:
|
||||||
continue
|
continue
|
||||||
sys.stderr.write('%s confidence = %s\n' %
|
if isinstance(group_prober, CharSetGroupProber):
|
||||||
(prober.get_charset_name(),
|
for prober in group_prober.probers:
|
||||||
prober.get_confidence()))
|
self.logger.debug('%s %s confidence = %s',
|
||||||
|
prober.charset_name,
|
||||||
|
prober.language,
|
||||||
|
prober.get_confidence())
|
||||||
|
else:
|
||||||
|
self.logger.debug('%s %s confidence = %s',
|
||||||
|
prober.charset_name,
|
||||||
|
prober.language,
|
||||||
|
prober.get_confidence())
|
||||||
|
return self.result
|
||||||
|
|
66
thirdparty/chardet/utf8prober.py
vendored
66
thirdparty/chardet/utf8prober.py
vendored
|
@ -25,56 +25,58 @@
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import sys
|
|
||||||
from . import constants
|
|
||||||
from .charsetprober import CharSetProber
|
from .charsetprober import CharSetProber
|
||||||
|
from .enums import ProbingState, MachineState
|
||||||
from .codingstatemachine import CodingStateMachine
|
from .codingstatemachine import CodingStateMachine
|
||||||
from .mbcssm import UTF8SMModel
|
from .mbcssm import UTF8_SM_MODEL
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
|
||||||
xrange = range
|
|
||||||
|
|
||||||
ONE_CHAR_PROB = 0.5
|
|
||||||
|
|
||||||
|
|
||||||
class UTF8Prober(CharSetProber):
|
class UTF8Prober(CharSetProber):
|
||||||
|
ONE_CHAR_PROB = 0.5
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
CharSetProber.__init__(self)
|
super(UTF8Prober, self).__init__()
|
||||||
self._mCodingSM = CodingStateMachine(UTF8SMModel)
|
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
|
||||||
|
self._num_mb_chars = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
CharSetProber.reset(self)
|
super(UTF8Prober, self).reset()
|
||||||
self._mCodingSM.reset()
|
self.coding_sm.reset()
|
||||||
self._mNumOfMBChar = 0
|
self._num_mb_chars = 0
|
||||||
|
|
||||||
def get_charset_name(self):
|
@property
|
||||||
|
def charset_name(self):
|
||||||
return "utf-8"
|
return "utf-8"
|
||||||
|
|
||||||
def feed(self, aBuf):
|
@property
|
||||||
for c in aBuf:
|
def language(self):
|
||||||
codingState = self._mCodingSM.next_state(c)
|
return ""
|
||||||
if codingState == constants.eError:
|
|
||||||
self._mState = constants.eNotMe
|
|
||||||
break
|
|
||||||
elif codingState == constants.eItsMe:
|
|
||||||
self._mState = constants.eFoundIt
|
|
||||||
break
|
|
||||||
elif codingState == constants.eStart:
|
|
||||||
if self._mCodingSM.get_current_charlen() >= 2:
|
|
||||||
self._mNumOfMBChar += 1
|
|
||||||
|
|
||||||
if self.get_state() == constants.eDetecting:
|
def feed(self, byte_str):
|
||||||
if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
|
for c in byte_str:
|
||||||
self._mState = constants.eFoundIt
|
coding_state = self.coding_sm.next_state(c)
|
||||||
|
if coding_state == MachineState.ERROR:
|
||||||
|
self._state = ProbingState.NOT_ME
|
||||||
|
break
|
||||||
|
elif coding_state == MachineState.ITS_ME:
|
||||||
|
self._state = ProbingState.FOUND_IT
|
||||||
|
break
|
||||||
|
elif coding_state == MachineState.START:
|
||||||
|
if self.coding_sm.get_current_charlen() >= 2:
|
||||||
|
self._num_mb_chars += 1
|
||||||
|
|
||||||
return self.get_state()
|
if self.state == ProbingState.DETECTING:
|
||||||
|
if self.get_confidence() > self.SHORTCUT_THRESHOLD:
|
||||||
|
self._state = ProbingState.FOUND_IT
|
||||||
|
|
||||||
|
return self.state
|
||||||
|
|
||||||
def get_confidence(self):
|
def get_confidence(self):
|
||||||
unlike = 0.99
|
unlike = 0.99
|
||||||
if self._mNumOfMBChar < 6:
|
if self._num_mb_chars < 6:
|
||||||
for i in xrange(0, self._mNumOfMBChar):
|
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
|
||||||
unlike = unlike * ONE_CHAR_PROB
|
|
||||||
return 1.0 - unlike
|
return 1.0 - unlike
|
||||||
else:
|
else:
|
||||||
return unlike
|
return unlike
|
||||||
|
|
9
thirdparty/chardet/version.py
vendored
Normal file
9
thirdparty/chardet/version.py
vendored
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
"""
|
||||||
|
This module exists only to simplify retrieving the version number of chardet
|
||||||
|
from within setup.py and from chardet subpackages.
|
||||||
|
|
||||||
|
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "3.0.4"
|
||||||
|
VERSION = __version__.split('.')
|
Loading…
Reference in New Issue
Block a user