mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 20:28:20 +03:00
f02b0af821
500 was too larger and caused laggy rendering
102 lines
46 KiB
JSON
102 lines
46 KiB
JSON
{"lang": "en", "settings": {"oov_prob": -20.502029418945312}}
|
|
{"orth": ".", "id": 1, "lower": ".", "norm": ".", "shape": ".", "prefix": ".", "suffix": ".", "length": 1, "cluster": "8", "prob": -3.0678977966308594, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": ",", "id": 2, "lower": ",", "norm": ",", "shape": ",", "prefix": ",", "suffix": ",", "length": 1, "cluster": "4", "prob": -3.4549596309661865, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "the", "id": 3, "lower": "the", "norm": "the", "shape": "xxx", "prefix": "t", "suffix": "the", "length": 3, "cluster": "11", "prob": -3.528766632080078, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "I", "id": 4, "lower": "i", "norm": "I", "shape": "X", "prefix": "I", "suffix": "I", "length": 1, "cluster": "346", "prob": -3.791565179824829, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "to", "id": 5, "lower": "to", "norm": "to", "shape": "xx", "prefix": "t", "suffix": "to", "length": 2, "cluster": "12", "prob": -3.8560216426849365, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "a", "id": 6, "lower": "a", "norm": "a", "shape": "x", "prefix": "a", "suffix": "a", "length": 1, "cluster": "19", "prob": -3.92978835105896, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "and", "id": 7, "lower": "and", "norm": "and", "shape": "xxx", "prefix": "a", "suffix": "and", "length": 3, "cluster": "20", "prob": -4.113108158111572, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "of", "id": 8, "lower": "of", "norm": "of", "shape": "xx", "prefix": "o", "suffix": "of", "length": 2, "cluster": "28", "prob": -4.27587366104126, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "you", "id": 9, "lower": "you", "norm": "you", "shape": "xxx", "prefix": "y", "suffix": "you", "length": 3, "cluster": "602", "prob": -4.373791217803955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "it", "id": 10, "lower": "it", "norm": "it", "shape": "xx", "prefix": "i", "suffix": "it", "length": 2, "cluster": "474", "prob": -4.388050079345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "is", "id": 11, "lower": "is", "norm": "is", "shape": "xx", "prefix": "i", "suffix": "is", "length": 2, "cluster": "762", "prob": -4.457748889923096, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "that", "id": 12, "lower": "that", "norm": "that", "shape": "xxxx", "prefix": "t", "suffix": "hat", "length": 4, "cluster": "84", "prob": -4.464504718780518, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "\n\n", "id": 0, "lower": "\n\n", "norm": "\n\n", "shape": "\n\n", "prefix": "\n", "suffix": "\n\n", "length": 2, "cluster": "0", "prob": -4.606560707092285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "in", "id": 13, "lower": "in", "norm": "in", "shape": "xx", "prefix": "i", "suffix": "in", "length": 2, "cluster": "60", "prob": -4.619071960449219, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "'s", "id": 14, "lower": "'s", "norm": "'s", "shape": "'x", "prefix": "'", "suffix": "'s", "length": 2, "cluster": "52", "prob": -4.830559253692627, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "n't", "id": 15, "lower": "n't", "norm": "n't", "shape": "x'x", "prefix": "n", "suffix": "n't", "length": 3, "cluster": "74", "prob": -4.859938621520996, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "for", "id": 16, "lower": "for", "norm": "for", "shape": "xxx", "prefix": "f", "suffix": "for", "length": 3, "cluster": "508", "prob": -4.8801093101501465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "\"", "id": 17, "lower": "\"", "norm": "\"", "shape": "\"", "prefix": "\"", "suffix": "\"", "length": 1, "cluster": "0", "prob": -5.02677583694458, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true}
|
|
{"orth": "?", "id": 18, "lower": "?", "norm": "?", "shape": "?", "prefix": "?", "suffix": "?", "length": 1, "cluster": "0", "prob": -5.05924654006958, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": " ", "id": 0, "lower": " ", "norm": " ", "shape": " ", "prefix": " ", "suffix": " ", "length": 1, "cluster": "0", "prob": -5.129165172576904, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "have", "id": 19, "lower": "have", "norm": "have", "shape": "xxxx", "prefix": "h", "suffix": "ave", "length": 4, "cluster": "378", "prob": -5.156484603881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "on", "id": 20, "lower": "on", "norm": "on", "shape": "xx", "prefix": "o", "suffix": "on", "length": 2, "cluster": "2044", "prob": -5.172736167907715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "*", "id": 21, "lower": "*", "norm": "*", "shape": "*", "prefix": "*", "suffix": "*", "length": 1, "cluster": "5098", "prob": -5.1977410316467285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": ")", "id": 22, "lower": ")", "norm": ")", "shape": ")", "prefix": ")", "suffix": ")", "length": 1, "cluster": "0", "prob": -5.197994232177734, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true}
|
|
{"orth": "be", "id": 23, "lower": "be", "norm": "be", "shape": "xx", "prefix": "b", "suffix": "be", "length": 2, "cluster": "458", "prob": -5.225094318389893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "with", "id": 24, "lower": "with", "norm": "with", "shape": "xxxx", "prefix": "w", "suffix": "ith", "length": 4, "cluster": "1020", "prob": -5.243249893188477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "do", "id": 25, "lower": "do", "norm": "do", "shape": "xx", "prefix": "d", "suffix": "do", "length": 2, "cluster": "2042", "prob": -5.246996879577637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "was", "id": 26, "lower": "was", "norm": "was", "shape": "xxx", "prefix": "w", "suffix": "was", "length": 3, "cluster": "250", "prob": -5.252320289611816, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "are", "id": 27, "lower": "are", "norm": "are", "shape": "xxx", "prefix": "a", "suffix": "are", "length": 3, "cluster": "1530", "prob": -5.271068096160889, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "not", "id": 28, "lower": "not", "norm": "not", "shape": "xxx", "prefix": "n", "suffix": "not", "length": 3, "cluster": "1258", "prob": -5.332601070404053, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "but", "id": 29, "lower": "but", "norm": "but", "shape": "xxx", "prefix": "b", "suffix": "but", "length": 3, "cluster": "148", "prob": -5.3419694900512695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "!", "id": 30, "lower": "!", "norm": "!", "shape": "!", "prefix": "!", "suffix": "!", "length": 1, "cluster": "0", "prob": -5.359641075134277, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "this", "id": 31, "lower": "this", "norm": "this", "shape": "xxxx", "prefix": "t", "suffix": "his", "length": 4, "cluster": "63", "prob": -5.36181640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "[", "id": 32, "lower": "[", "norm": "[", "shape": "[", "prefix": "[", "suffix": "[", "length": 1, "cluster": "0", "prob": -5.438112258911133, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false}
|
|
{"orth": "-", "id": 33, "lower": "-", "norm": "-", "shape": "-", "prefix": "-", "suffix": "-", "length": 1, "cluster": "36", "prob": -5.468655109405518, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "my", "id": 34, "lower": "my", "norm": "my", "shape": "xx", "prefix": "m", "suffix": "my", "length": 2, "cluster": "251", "prob": -5.491642951965332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "they", "id": 35, "lower": "they", "norm": "they", "shape": "xxxx", "prefix": "t", "suffix": "hey", "length": 4, "cluster": "90", "prob": -5.5243682861328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "as", "id": 36, "lower": "as", "norm": "as", "shape": "xx", "prefix": "a", "suffix": "as", "length": 2, "cluster": "212", "prob": -5.53448486328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "like", "id": 37, "lower": "like", "norm": "like", "shape": "xxxx", "prefix": "l", "suffix": "ike", "length": 4, "cluster": "1684", "prob": -5.610429763793945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "just", "id": 38, "lower": "just", "norm": "just", "shape": "xxxx", "prefix": "j", "suffix": "ust", "length": 4, "cluster": "31978", "prob": -5.630868434906006, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "your", "id": 39, "lower": "your", "norm": "your", "shape": "xxxx", "prefix": "y", "suffix": "our", "length": 4, "cluster": "251", "prob": -5.650108814239502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "or", "id": 40, "lower": "or", "norm": "or", "shape": "xx", "prefix": "o", "suffix": "or", "length": 2, "cluster": "404", "prob": -5.654984951019287, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "(", "id": 41, "lower": "(", "norm": "(", "shape": "(", "prefix": "(", "suffix": "(", "length": 1, "cluster": "0", "prob": -5.75598669052124, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false}
|
|
{"orth": "at", "id": 42, "lower": "at", "norm": "at", "shape": "xx", "prefix": "a", "suffix": "at", "length": 2, "cluster": "124", "prob": -5.763442516326904, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "if", "id": 43, "lower": "if", "norm": "if", "shape": "xx", "prefix": "i", "suffix": "if", "length": 2, "cluster": "4052", "prob": -5.763589859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "would", "id": 44, "lower": "would", "norm": "would", "shape": "xxxx", "prefix": "w", "suffix": "uld", "length": 5, "cluster": "1978", "prob": -5.772674560546875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "so", "id": 45, "lower": "so", "norm": "so", "shape": "xx", "prefix": "s", "suffix": "so", "length": 2, "cluster": "2282", "prob": -5.823773384094238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "can", "id": 46, "lower": "can", "norm": "can", "shape": "xxx", "prefix": "c", "suffix": "can", "length": 3, "cluster": "58", "prob": -5.827763080596924, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "me", "id": 47, "lower": "me", "norm": "me", "shape": "xx", "prefix": "m", "suffix": "me", "length": 2, "cluster": "1898", "prob": -5.846089839935303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "about", "id": 48, "lower": "about", "norm": "about", "shape": "xxxx", "prefix": "a", "suffix": "out", "length": 5, "cluster": "618", "prob": -5.906808853149414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "he", "id": 49, "lower": "he", "norm": "he", "shape": "xx", "prefix": "h", "suffix": "he", "length": 2, "cluster": "218", "prob": -5.9319047927856445, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "It", "id": 50, "lower": "it", "norm": "It", "shape": "Xx", "prefix": "I", "suffix": "It", "length": 2, "cluster": "894", "prob": -5.93662691116333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "all", "id": 51, "lower": "all", "norm": "all", "shape": "xxx", "prefix": "a", "suffix": "all", "length": 3, "cluster": "6122", "prob": -5.936640739440918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "The", "id": 52, "lower": "the", "norm": "The", "shape": "Xxx", "prefix": "T", "suffix": "The", "length": 3, "cluster": "30", "prob": -5.958707332611084, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "get", "id": 53, "lower": "get", "norm": "get", "shape": "xxx", "prefix": "g", "suffix": "get", "length": 3, "cluster": "2570", "prob": -5.992605686187744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "one", "id": 54, "lower": "one", "norm": "one", "shape": "xxx", "prefix": "o", "suffix": "one", "length": 3, "cluster": "8170", "prob": -5.996385097503662, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "'m", "id": 55, "lower": "'m", "norm": "'m", "shape": "'x", "prefix": "'", "suffix": "'m", "length": 2, "cluster": "3066", "prob": -5.9999823570251465, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "out", "id": 56, "lower": "out", "norm": "out", "shape": "xxx", "prefix": "o", "suffix": "out", "length": 3, "cluster": "1386", "prob": -6.0027008056640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "from", "id": 57, "lower": "from", "norm": "from", "shape": "xxxx", "prefix": "f", "suffix": "rom", "length": 4, "cluster": "380", "prob": -6.010132312774658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "an", "id": 58, "lower": "an", "norm": "an", "shape": "xx", "prefix": "a", "suffix": "an", "length": 2, "cluster": "3", "prob": -6.014852046966553, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "what", "id": 59, "lower": "what", "norm": "what", "shape": "xxxx", "prefix": "w", "suffix": "hat", "length": 4, "cluster": "2026", "prob": -6.023346424102783, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "up", "id": 60, "lower": "up", "norm": "up", "shape": "xx", "prefix": "u", "suffix": "up", "length": 2, "cluster": "362", "prob": -6.028695583343506, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "]", "id": 61, "lower": "]", "norm": "]", "shape": "]", "prefix": "]", "suffix": "]", "length": 1, "cluster": "0", "prob": -6.0386552810668945, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true}
|
|
{"orth": "\n", "id": 0, "lower": "\n", "norm": "\n", "shape": "\n", "prefix": "\n", "suffix": "\n", "length": 1, "cluster": "0", "prob": -6.0506510734558105, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "people", "id": 62, "lower": "people", "norm": "people", "shape": "xxxx", "prefix": "p", "suffix": "ple", "length": 6, "cluster": "365", "prob": -6.0715765953063965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "more", "id": 63, "lower": "more", "norm": "more", "shape": "xxxx", "prefix": "m", "suffix": "ore", "length": 4, "cluster": "1514", "prob": -6.081598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": ":", "id": 64, "lower": ":", "norm": ":", "shape": ":", "prefix": ":", "suffix": ":", "length": 1, "cluster": "228", "prob": -6.128875732421875, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "there", "id": 65, "lower": "there", "norm": "there", "shape": "xxxx", "prefix": "t", "suffix": "ere", "length": 5, "cluster": "986", "prob": -6.135282039642334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "deleted", "id": 66, "lower": "deleted", "norm": "deleted", "shape": "xxxx", "prefix": "d", "suffix": "ted", "length": 7, "cluster": "1706", "prob": -6.1543049812316895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "think", "id": 67, "lower": "think", "norm": "think", "shape": "xxxx", "prefix": "t", "suffix": "ink", "length": 5, "cluster": "1674", "prob": -6.180924892425537, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "will", "id": 68, "lower": "will", "norm": "will", "shape": "xxxx", "prefix": "w", "suffix": "ill", "length": 4, "cluster": "442", "prob": -6.199834823608398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "them", "id": 69, "lower": "them", "norm": "them", "shape": "xxxx", "prefix": "t", "suffix": "hem", "length": 4, "cluster": "5994", "prob": -6.2177276611328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "we", "id": 70, "lower": "we", "norm": "we", "shape": "xx", "prefix": "w", "suffix": "we", "length": 2, "cluster": "1626", "prob": -6.230024337768555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "'re", "id": 71, "lower": "'re", "norm": "'re", "shape": "'xx", "prefix": "'", "suffix": "'re", "length": 3, "cluster": "7162", "prob": -6.255462646484375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "when", "id": 72, "lower": "when", "norm": "when", "shape": "xxxx", "prefix": "w", "suffix": "hen", "length": 4, "cluster": "16340", "prob": -6.2623114585876465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "You", "id": 73, "lower": "you", "norm": "You", "shape": "Xxx", "prefix": "Y", "suffix": "You", "length": 3, "cluster": "858", "prob": -6.276494026184082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "...", "id": 74, "lower": "...", "norm": "...", "shape": "...", "prefix": ".", "suffix": "...", "length": 3, "cluster": "966", "prob": -6.278521537780762, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "some", "id": 75, "lower": "some", "norm": "some", "shape": "xxxx", "prefix": "s", "suffix": "ome", "length": 4, "cluster": "239", "prob": -6.318882465362549, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "has", "id": 76, "lower": "has", "norm": "has", "shape": "xxx", "prefix": "h", "suffix": "has", "length": 3, "cluster": "890", "prob": -6.325605392456055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "because", "id": 77, "lower": "because", "norm": "because", "shape": "xxxx", "prefix": "b", "suffix": "use", "length": 7, "cluster": "980", "prob": -6.349620342254639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "know", "id": 78, "lower": "know", "norm": "know", "shape": "xxxx", "prefix": "k", "suffix": "now", "length": 4, "cluster": "3722", "prob": -6.368943214416504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "really", "id": 79, "lower": "really", "norm": "really", "shape": "xxxx", "prefix": "r", "suffix": "lly", "length": 6, "cluster": "7802", "prob": -6.370757102966309, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "by", "id": 80, "lower": "by", "norm": "by", "shape": "xx", "prefix": "b", "suffix": "by", "length": 2, "cluster": "252", "prob": -6.375086784362793, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "time", "id": 81, "lower": "time", "norm": "time", "shape": "xxxx", "prefix": "t", "suffix": "ime", "length": 4, "cluster": "477", "prob": -6.3782219886779785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "did", "id": 82, "lower": "did", "norm": "did", "shape": "xxx", "prefix": "d", "suffix": "did", "length": 3, "cluster": "8186", "prob": -6.389003753662109, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "no", "id": 83, "lower": "no", "norm": "no", "shape": "xx", "prefix": "n", "suffix": "no", "length": 2, "cluster": "4074", "prob": -6.402691841125488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "had", "id": 84, "lower": "had", "norm": "had", "shape": "xxx", "prefix": "h", "suffix": "had", "length": 3, "cluster": "1914", "prob": -6.45427131652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "their", "id": 85, "lower": "their", "norm": "their", "shape": "xxxx", "prefix": "t", "suffix": "eir", "length": 5, "cluster": "187", "prob": -6.461463928222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "If", "id": 86, "lower": "if", "norm": "If", "shape": "Xx", "prefix": "I", "suffix": "If", "length": 2, "cluster": "190", "prob": -6.469156742095947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "how", "id": 87, "lower": "how", "norm": "how", "shape": "xxx", "prefix": "h", "suffix": "how", "length": 3, "cluster": "10218", "prob": -6.496722221374512, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "does", "id": 88, "lower": "does", "norm": "does", "shape": "xxxx", "prefix": "d", "suffix": "oes", "length": 4, "cluster": "4090", "prob": -6.500738143920898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "who", "id": 89, "lower": "who", "norm": "who", "shape": "xxx", "prefix": "w", "suffix": "who", "length": 3, "cluster": "410", "prob": -6.504637241363525, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "than", "id": 90, "lower": "than", "norm": "than", "shape": "xxxx", "prefix": "t", "suffix": "han", "length": 4, "cluster": "106", "prob": -6.512253761291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "good", "id": 91, "lower": "good", "norm": "good", "shape": "xxxx", "prefix": "g", "suffix": "ood", "length": 4, "cluster": "551", "prob": -6.518923759460449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "only", "id": 92, "lower": "only", "norm": "only", "shape": "xxxx", "prefix": "o", "suffix": "nly", "length": 4, "cluster": "15594", "prob": -6.535442352294922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "his", "id": 93, "lower": "his", "norm": "his", "shape": "xxx", "prefix": "h", "suffix": "his", "length": 3, "cluster": "123", "prob": -6.574275016784668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "much", "id": 94, "lower": "much", "norm": "much", "shape": "xxxx", "prefix": "m", "suffix": "uch", "length": 4, "cluster": "2794", "prob": -6.584301948547363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": ";", "id": 95, "lower": ";", "norm": ";", "shape": ";", "prefix": ";", "suffix": ";", "length": 1, "cluster": "36", "prob": -6.586422920227051, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "'ve", "id": 96, "lower": "'ve", "norm": "'ve", "shape": "'xx", "prefix": "'", "suffix": "'ve", "length": 3, "cluster": "1018", "prob": -6.593011379241943, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
{"orth": "could", "id": 97, "lower": "could", "norm": "could", "shape": "xxxx", "prefix": "c", "suffix": "uld", "length": 5, "cluster": "954", "prob": -6.595959186553955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|