mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Update training data docs and add vocab JSONL
This commit is contained in:
parent
57534253e6
commit
18dde7869a
500
examples/training/vocab-data.jsonl
Normal file
500
examples/training/vocab-data.jsonl
Normal file
|
@ -0,0 +1,500 @@
|
|||
{"lang": "en", "settings": {"oov_prob": -20.502029418945312}}
|
||||
{"orth": ".", "id": 1, "lower": ".", "norm": ".", "shape": ".", "prefix": ".", "suffix": ".", "length": 1, "cluster": "8", "prob": -3.0678977966308594, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": ",", "id": 2, "lower": ",", "norm": ",", "shape": ",", "prefix": ",", "suffix": ",", "length": 1, "cluster": "4", "prob": -3.4549596309661865, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "the", "id": 3, "lower": "the", "norm": "the", "shape": "xxx", "prefix": "t", "suffix": "the", "length": 3, "cluster": "11", "prob": -3.528766632080078, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "I", "id": 4, "lower": "i", "norm": "I", "shape": "X", "prefix": "I", "suffix": "I", "length": 1, "cluster": "346", "prob": -3.791565179824829, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "to", "id": 5, "lower": "to", "norm": "to", "shape": "xx", "prefix": "t", "suffix": "to", "length": 2, "cluster": "12", "prob": -3.8560216426849365, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "a", "id": 6, "lower": "a", "norm": "a", "shape": "x", "prefix": "a", "suffix": "a", "length": 1, "cluster": "19", "prob": -3.92978835105896, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "and", "id": 7, "lower": "and", "norm": "and", "shape": "xxx", "prefix": "a", "suffix": "and", "length": 3, "cluster": "20", "prob": -4.113108158111572, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "of", "id": 8, "lower": "of", "norm": "of", "shape": "xx", "prefix": "o", "suffix": "of", "length": 2, "cluster": "28", "prob": -4.27587366104126, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "you", "id": 9, "lower": "you", "norm": "you", "shape": "xxx", "prefix": "y", "suffix": "you", "length": 3, "cluster": "602", "prob": -4.373791217803955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "it", "id": 10, "lower": "it", "norm": "it", "shape": "xx", "prefix": "i", "suffix": "it", "length": 2, "cluster": "474", "prob": -4.388050079345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "is", "id": 11, "lower": "is", "norm": "is", "shape": "xx", "prefix": "i", "suffix": "is", "length": 2, "cluster": "762", "prob": -4.457748889923096, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "that", "id": 12, "lower": "that", "norm": "that", "shape": "xxxx", "prefix": "t", "suffix": "hat", "length": 4, "cluster": "84", "prob": -4.464504718780518, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "\n\n", "id": 0, "lower": "\n\n", "norm": "\n\n", "shape": "\n\n", "prefix": "\n", "suffix": "\n\n", "length": 2, "cluster": "0", "prob": -4.606560707092285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "in", "id": 13, "lower": "in", "norm": "in", "shape": "xx", "prefix": "i", "suffix": "in", "length": 2, "cluster": "60", "prob": -4.619071960449219, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "'s", "id": 14, "lower": "'s", "norm": "'s", "shape": "'x", "prefix": "'", "suffix": "'s", "length": 2, "cluster": "52", "prob": -4.830559253692627, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "n't", "id": 15, "lower": "n't", "norm": "n't", "shape": "x'x", "prefix": "n", "suffix": "n't", "length": 3, "cluster": "74", "prob": -4.859938621520996, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "for", "id": 16, "lower": "for", "norm": "for", "shape": "xxx", "prefix": "f", "suffix": "for", "length": 3, "cluster": "508", "prob": -4.8801093101501465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "\"", "id": 17, "lower": "\"", "norm": "\"", "shape": "\"", "prefix": "\"", "suffix": "\"", "length": 1, "cluster": "0", "prob": -5.02677583694458, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true}
|
||||
{"orth": "?", "id": 18, "lower": "?", "norm": "?", "shape": "?", "prefix": "?", "suffix": "?", "length": 1, "cluster": "0", "prob": -5.05924654006958, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": " ", "id": 0, "lower": " ", "norm": " ", "shape": " ", "prefix": " ", "suffix": " ", "length": 1, "cluster": "0", "prob": -5.129165172576904, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "have", "id": 19, "lower": "have", "norm": "have", "shape": "xxxx", "prefix": "h", "suffix": "ave", "length": 4, "cluster": "378", "prob": -5.156484603881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "on", "id": 20, "lower": "on", "norm": "on", "shape": "xx", "prefix": "o", "suffix": "on", "length": 2, "cluster": "2044", "prob": -5.172736167907715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "*", "id": 21, "lower": "*", "norm": "*", "shape": "*", "prefix": "*", "suffix": "*", "length": 1, "cluster": "5098", "prob": -5.1977410316467285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": ")", "id": 22, "lower": ")", "norm": ")", "shape": ")", "prefix": ")", "suffix": ")", "length": 1, "cluster": "0", "prob": -5.197994232177734, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true}
|
||||
{"orth": "be", "id": 23, "lower": "be", "norm": "be", "shape": "xx", "prefix": "b", "suffix": "be", "length": 2, "cluster": "458", "prob": -5.225094318389893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "with", "id": 24, "lower": "with", "norm": "with", "shape": "xxxx", "prefix": "w", "suffix": "ith", "length": 4, "cluster": "1020", "prob": -5.243249893188477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "do", "id": 25, "lower": "do", "norm": "do", "shape": "xx", "prefix": "d", "suffix": "do", "length": 2, "cluster": "2042", "prob": -5.246996879577637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "was", "id": 26, "lower": "was", "norm": "was", "shape": "xxx", "prefix": "w", "suffix": "was", "length": 3, "cluster": "250", "prob": -5.252320289611816, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "are", "id": 27, "lower": "are", "norm": "are", "shape": "xxx", "prefix": "a", "suffix": "are", "length": 3, "cluster": "1530", "prob": -5.271068096160889, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "not", "id": 28, "lower": "not", "norm": "not", "shape": "xxx", "prefix": "n", "suffix": "not", "length": 3, "cluster": "1258", "prob": -5.332601070404053, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "but", "id": 29, "lower": "but", "norm": "but", "shape": "xxx", "prefix": "b", "suffix": "but", "length": 3, "cluster": "148", "prob": -5.3419694900512695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "!", "id": 30, "lower": "!", "norm": "!", "shape": "!", "prefix": "!", "suffix": "!", "length": 1, "cluster": "0", "prob": -5.359641075134277, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "this", "id": 31, "lower": "this", "norm": "this", "shape": "xxxx", "prefix": "t", "suffix": "his", "length": 4, "cluster": "63", "prob": -5.36181640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "[", "id": 32, "lower": "[", "norm": "[", "shape": "[", "prefix": "[", "suffix": "[", "length": 1, "cluster": "0", "prob": -5.438112258911133, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false}
|
||||
{"orth": "-", "id": 33, "lower": "-", "norm": "-", "shape": "-", "prefix": "-", "suffix": "-", "length": 1, "cluster": "36", "prob": -5.468655109405518, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "my", "id": 34, "lower": "my", "norm": "my", "shape": "xx", "prefix": "m", "suffix": "my", "length": 2, "cluster": "251", "prob": -5.491642951965332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "they", "id": 35, "lower": "they", "norm": "they", "shape": "xxxx", "prefix": "t", "suffix": "hey", "length": 4, "cluster": "90", "prob": -5.5243682861328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "as", "id": 36, "lower": "as", "norm": "as", "shape": "xx", "prefix": "a", "suffix": "as", "length": 2, "cluster": "212", "prob": -5.53448486328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "like", "id": 37, "lower": "like", "norm": "like", "shape": "xxxx", "prefix": "l", "suffix": "ike", "length": 4, "cluster": "1684", "prob": -5.610429763793945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "just", "id": 38, "lower": "just", "norm": "just", "shape": "xxxx", "prefix": "j", "suffix": "ust", "length": 4, "cluster": "31978", "prob": -5.630868434906006, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "your", "id": 39, "lower": "your", "norm": "your", "shape": "xxxx", "prefix": "y", "suffix": "our", "length": 4, "cluster": "251", "prob": -5.650108814239502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "or", "id": 40, "lower": "or", "norm": "or", "shape": "xx", "prefix": "o", "suffix": "or", "length": 2, "cluster": "404", "prob": -5.654984951019287, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "(", "id": 41, "lower": "(", "norm": "(", "shape": "(", "prefix": "(", "suffix": "(", "length": 1, "cluster": "0", "prob": -5.75598669052124, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false}
|
||||
{"orth": "at", "id": 42, "lower": "at", "norm": "at", "shape": "xx", "prefix": "a", "suffix": "at", "length": 2, "cluster": "124", "prob": -5.763442516326904, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "if", "id": 43, "lower": "if", "norm": "if", "shape": "xx", "prefix": "i", "suffix": "if", "length": 2, "cluster": "4052", "prob": -5.763589859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "would", "id": 44, "lower": "would", "norm": "would", "shape": "xxxx", "prefix": "w", "suffix": "uld", "length": 5, "cluster": "1978", "prob": -5.772674560546875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "so", "id": 45, "lower": "so", "norm": "so", "shape": "xx", "prefix": "s", "suffix": "so", "length": 2, "cluster": "2282", "prob": -5.823773384094238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "can", "id": 46, "lower": "can", "norm": "can", "shape": "xxx", "prefix": "c", "suffix": "can", "length": 3, "cluster": "58", "prob": -5.827763080596924, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "me", "id": 47, "lower": "me", "norm": "me", "shape": "xx", "prefix": "m", "suffix": "me", "length": 2, "cluster": "1898", "prob": -5.846089839935303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "about", "id": 48, "lower": "about", "norm": "about", "shape": "xxxx", "prefix": "a", "suffix": "out", "length": 5, "cluster": "618", "prob": -5.906808853149414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "he", "id": 49, "lower": "he", "norm": "he", "shape": "xx", "prefix": "h", "suffix": "he", "length": 2, "cluster": "218", "prob": -5.9319047927856445, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "It", "id": 50, "lower": "it", "norm": "It", "shape": "Xx", "prefix": "I", "suffix": "It", "length": 2, "cluster": "894", "prob": -5.93662691116333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "all", "id": 51, "lower": "all", "norm": "all", "shape": "xxx", "prefix": "a", "suffix": "all", "length": 3, "cluster": "6122", "prob": -5.936640739440918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "The", "id": 52, "lower": "the", "norm": "The", "shape": "Xxx", "prefix": "T", "suffix": "The", "length": 3, "cluster": "30", "prob": -5.958707332611084, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "get", "id": 53, "lower": "get", "norm": "get", "shape": "xxx", "prefix": "g", "suffix": "get", "length": 3, "cluster": "2570", "prob": -5.992605686187744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "one", "id": 54, "lower": "one", "norm": "one", "shape": "xxx", "prefix": "o", "suffix": "one", "length": 3, "cluster": "8170", "prob": -5.996385097503662, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "'m", "id": 55, "lower": "'m", "norm": "'m", "shape": "'x", "prefix": "'", "suffix": "'m", "length": 2, "cluster": "3066", "prob": -5.9999823570251465, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "out", "id": 56, "lower": "out", "norm": "out", "shape": "xxx", "prefix": "o", "suffix": "out", "length": 3, "cluster": "1386", "prob": -6.0027008056640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "from", "id": 57, "lower": "from", "norm": "from", "shape": "xxxx", "prefix": "f", "suffix": "rom", "length": 4, "cluster": "380", "prob": -6.010132312774658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "an", "id": 58, "lower": "an", "norm": "an", "shape": "xx", "prefix": "a", "suffix": "an", "length": 2, "cluster": "3", "prob": -6.014852046966553, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "what", "id": 59, "lower": "what", "norm": "what", "shape": "xxxx", "prefix": "w", "suffix": "hat", "length": 4, "cluster": "2026", "prob": -6.023346424102783, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "up", "id": 60, "lower": "up", "norm": "up", "shape": "xx", "prefix": "u", "suffix": "up", "length": 2, "cluster": "362", "prob": -6.028695583343506, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "]", "id": 61, "lower": "]", "norm": "]", "shape": "]", "prefix": "]", "suffix": "]", "length": 1, "cluster": "0", "prob": -6.0386552810668945, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true}
|
||||
{"orth": "\n", "id": 0, "lower": "\n", "norm": "\n", "shape": "\n", "prefix": "\n", "suffix": "\n", "length": 1, "cluster": "0", "prob": -6.0506510734558105, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "people", "id": 62, "lower": "people", "norm": "people", "shape": "xxxx", "prefix": "p", "suffix": "ple", "length": 6, "cluster": "365", "prob": -6.0715765953063965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "more", "id": 63, "lower": "more", "norm": "more", "shape": "xxxx", "prefix": "m", "suffix": "ore", "length": 4, "cluster": "1514", "prob": -6.081598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": ":", "id": 64, "lower": ":", "norm": ":", "shape": ":", "prefix": ":", "suffix": ":", "length": 1, "cluster": "228", "prob": -6.128875732421875, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "there", "id": 65, "lower": "there", "norm": "there", "shape": "xxxx", "prefix": "t", "suffix": "ere", "length": 5, "cluster": "986", "prob": -6.135282039642334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "deleted", "id": 66, "lower": "deleted", "norm": "deleted", "shape": "xxxx", "prefix": "d", "suffix": "ted", "length": 7, "cluster": "1706", "prob": -6.1543049812316895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "think", "id": 67, "lower": "think", "norm": "think", "shape": "xxxx", "prefix": "t", "suffix": "ink", "length": 5, "cluster": "1674", "prob": -6.180924892425537, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "will", "id": 68, "lower": "will", "norm": "will", "shape": "xxxx", "prefix": "w", "suffix": "ill", "length": 4, "cluster": "442", "prob": -6.199834823608398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "them", "id": 69, "lower": "them", "norm": "them", "shape": "xxxx", "prefix": "t", "suffix": "hem", "length": 4, "cluster": "5994", "prob": -6.2177276611328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "we", "id": 70, "lower": "we", "norm": "we", "shape": "xx", "prefix": "w", "suffix": "we", "length": 2, "cluster": "1626", "prob": -6.230024337768555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "'re", "id": 71, "lower": "'re", "norm": "'re", "shape": "'xx", "prefix": "'", "suffix": "'re", "length": 3, "cluster": "7162", "prob": -6.255462646484375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "when", "id": 72, "lower": "when", "norm": "when", "shape": "xxxx", "prefix": "w", "suffix": "hen", "length": 4, "cluster": "16340", "prob": -6.2623114585876465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "You", "id": 73, "lower": "you", "norm": "You", "shape": "Xxx", "prefix": "Y", "suffix": "You", "length": 3, "cluster": "858", "prob": -6.276494026184082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "...", "id": 74, "lower": "...", "norm": "...", "shape": "...", "prefix": ".", "suffix": "...", "length": 3, "cluster": "966", "prob": -6.278521537780762, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "some", "id": 75, "lower": "some", "norm": "some", "shape": "xxxx", "prefix": "s", "suffix": "ome", "length": 4, "cluster": "239", "prob": -6.318882465362549, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "has", "id": 76, "lower": "has", "norm": "has", "shape": "xxx", "prefix": "h", "suffix": "has", "length": 3, "cluster": "890", "prob": -6.325605392456055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "because", "id": 77, "lower": "because", "norm": "because", "shape": "xxxx", "prefix": "b", "suffix": "use", "length": 7, "cluster": "980", "prob": -6.349620342254639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "know", "id": 78, "lower": "know", "norm": "know", "shape": "xxxx", "prefix": "k", "suffix": "now", "length": 4, "cluster": "3722", "prob": -6.368943214416504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "really", "id": 79, "lower": "really", "norm": "really", "shape": "xxxx", "prefix": "r", "suffix": "lly", "length": 6, "cluster": "7802", "prob": -6.370757102966309, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "by", "id": 80, "lower": "by", "norm": "by", "shape": "xx", "prefix": "b", "suffix": "by", "length": 2, "cluster": "252", "prob": -6.375086784362793, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "time", "id": 81, "lower": "time", "norm": "time", "shape": "xxxx", "prefix": "t", "suffix": "ime", "length": 4, "cluster": "477", "prob": -6.3782219886779785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "did", "id": 82, "lower": "did", "norm": "did", "shape": "xxx", "prefix": "d", "suffix": "did", "length": 3, "cluster": "8186", "prob": -6.389003753662109, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "no", "id": 83, "lower": "no", "norm": "no", "shape": "xx", "prefix": "n", "suffix": "no", "length": 2, "cluster": "4074", "prob": -6.402691841125488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "had", "id": 84, "lower": "had", "norm": "had", "shape": "xxx", "prefix": "h", "suffix": "had", "length": 3, "cluster": "1914", "prob": -6.45427131652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "their", "id": 85, "lower": "their", "norm": "their", "shape": "xxxx", "prefix": "t", "suffix": "eir", "length": 5, "cluster": "187", "prob": -6.461463928222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "If", "id": 86, "lower": "if", "norm": "If", "shape": "Xx", "prefix": "I", "suffix": "If", "length": 2, "cluster": "190", "prob": -6.469156742095947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "how", "id": 87, "lower": "how", "norm": "how", "shape": "xxx", "prefix": "h", "suffix": "how", "length": 3, "cluster": "10218", "prob": -6.496722221374512, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "does", "id": 88, "lower": "does", "norm": "does", "shape": "xxxx", "prefix": "d", "suffix": "oes", "length": 4, "cluster": "4090", "prob": -6.500738143920898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "who", "id": 89, "lower": "who", "norm": "who", "shape": "xxx", "prefix": "w", "suffix": "who", "length": 3, "cluster": "410", "prob": -6.504637241363525, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "than", "id": 90, "lower": "than", "norm": "than", "shape": "xxxx", "prefix": "t", "suffix": "han", "length": 4, "cluster": "106", "prob": -6.512253761291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "good", "id": 91, "lower": "good", "norm": "good", "shape": "xxxx", "prefix": "g", "suffix": "ood", "length": 4, "cluster": "551", "prob": -6.518923759460449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "only", "id": 92, "lower": "only", "norm": "only", "shape": "xxxx", "prefix": "o", "suffix": "nly", "length": 4, "cluster": "15594", "prob": -6.535442352294922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "his", "id": 93, "lower": "his", "norm": "his", "shape": "xxx", "prefix": "h", "suffix": "his", "length": 3, "cluster": "123", "prob": -6.574275016784668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "much", "id": 94, "lower": "much", "norm": "much", "shape": "xxxx", "prefix": "m", "suffix": "uch", "length": 4, "cluster": "2794", "prob": -6.584301948547363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": ";", "id": 95, "lower": ";", "norm": ";", "shape": ";", "prefix": ";", "suffix": ";", "length": 1, "cluster": "36", "prob": -6.586422920227051, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "'ve", "id": 96, "lower": "'ve", "norm": "'ve", "shape": "'xx", "prefix": "'", "suffix": "'ve", "length": 3, "cluster": "1018", "prob": -6.593011379241943, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "could", "id": 97, "lower": "could", "norm": "could", "shape": "xxxx", "prefix": "c", "suffix": "uld", "length": 5, "cluster": "954", "prob": -6.595959186553955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "then", "id": 98, "lower": "then", "norm": "then", "shape": "xxxx", "prefix": "t", "suffix": "hen", "length": 4, "cluster": "9962", "prob": -6.598200798034668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "other", "id": 99, "lower": "other", "norm": "other", "shape": "xxxx", "prefix": "o", "suffix": "her", "length": 5, "cluster": "47", "prob": -6.6438727378845215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "make", "id": 100, "lower": "make", "norm": "make", "shape": "xxxx", "prefix": "m", "suffix": "ake", "length": 4, "cluster": "4618", "prob": -6.66980504989624, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "been", "id": 101, "lower": "been", "norm": "been", "shape": "xxxx", "prefix": "b", "suffix": "een", "length": 4, "cluster": "202", "prob": -6.670916557312012, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "were", "id": 102, "lower": "were", "norm": "were", "shape": "xxxx", "prefix": "w", "suffix": "ere", "length": 4, "cluster": "506", "prob": -6.673174858093262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "see", "id": 103, "lower": "see", "norm": "see", "shape": "xxx", "prefix": "s", "suffix": "see", "length": 3, "cluster": "1546", "prob": -6.6828837394714355, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "That", "id": 104, "lower": "that", "norm": "That", "shape": "Xxxx", "prefix": "T", "suffix": "hat", "length": 4, "cluster": "1406", "prob": -6.688080310821533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "i", "id": 105, "lower": "i", "norm": "i", "shape": "x", "prefix": "i", "suffix": "i", "length": 1, "cluster": "966", "prob": -6.6887712478637695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "any", "id": 106, "lower": "any", "norm": "any", "shape": "xxx", "prefix": "a", "suffix": "any", "length": 3, "cluster": "12266", "prob": -6.689523220062256, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "want", "id": 107, "lower": "want", "norm": "want", "shape": "xxxx", "prefix": "w", "suffix": "ant", "length": 4, "cluster": "906", "prob": -6.694204807281494, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "even", "id": 108, "lower": "even", "norm": "even", "shape": "xxxx", "prefix": "e", "suffix": "ven", "length": 4, "cluster": "3306", "prob": -6.702912330627441, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "should", "id": 109, "lower": "should", "norm": "should", "shape": "xxxx", "prefix": "s", "suffix": "uld", "length": 6, "cluster": "698", "prob": -6.733259677886963, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "way", "id": 110, "lower": "way", "norm": "way", "shape": "xxx", "prefix": "w", "suffix": "way", "length": 3, "cluster": "1349", "prob": -6.73627233505249, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "'", "id": 111, "lower": "'", "norm": "'", "shape": "'", "prefix": "'", "suffix": "'", "length": 1, "cluster": "916", "prob": -6.73720121383667, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true}
|
||||
{"orth": "too", "id": 112, "lower": "too", "norm": "too", "shape": "xxx", "prefix": "t", "suffix": "too", "length": 3, "cluster": "6378", "prob": -6.77581787109375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "go", "id": 113, "lower": "go", "norm": "go", "shape": "xx", "prefix": "g", "suffix": "go", "length": 2, "cluster": "3466", "prob": -6.775965213775635, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "him", "id": 114, "lower": "him", "norm": "him", "shape": "xxx", "prefix": "h", "suffix": "him", "length": 3, "cluster": "1898", "prob": -6.783067226409912, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "This", "id": 115, "lower": "this", "norm": "This", "shape": "Xxxx", "prefix": "T", "suffix": "his", "length": 4, "cluster": "382", "prob": -6.78391695022583, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "her", "id": 116, "lower": "her", "norm": "her", "shape": "xxx", "prefix": "h", "suffix": "her", "length": 3, "cluster": "507", "prob": -6.798486709594727, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "going", "id": 117, "lower": "going", "norm": "going", "shape": "xxxx", "prefix": "g", "suffix": "ing", "length": 5, "cluster": "2090", "prob": -6.833367824554443, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "now", "id": 118, "lower": "now", "norm": "now", "shape": "xxx", "prefix": "n", "suffix": "now", "length": 3, "cluster": "1770", "prob": -6.834407329559326, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "being", "id": 119, "lower": "being", "norm": "being", "shape": "xxxx", "prefix": "b", "suffix": "ing", "length": 5, "cluster": "3818", "prob": -6.845808029174805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "still", "id": 120, "lower": "still", "norm": "still", "shape": "xxxx", "prefix": "s", "suffix": "ill", "length": 5, "cluster": "1658", "prob": -6.867525100708008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "into", "id": 121, "lower": "into", "norm": "into", "shape": "xxxx", "prefix": "i", "suffix": "nto", "length": 4, "cluster": "8188", "prob": -6.87359094619751, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "which", "id": 122, "lower": "which", "norm": "which", "shape": "xxxx", "prefix": "w", "suffix": "ich", "length": 5, "cluster": "154", "prob": -6.877470970153809, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "something", "id": 123, "lower": "something", "norm": "something", "shape": "xxxx", "prefix": "s", "suffix": "ing", "length": 9, "cluster": "14314", "prob": -6.887354850769043, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "she", "id": 124, "lower": "she", "norm": "she", "shape": "xxx", "prefix": "s", "suffix": "she", "length": 3, "cluster": "218", "prob": -6.90155553817749, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "also", "id": 125, "lower": "also", "norm": "also", "shape": "xxxx", "prefix": "a", "suffix": "lso", "length": 4, "cluster": "122", "prob": -6.928974151611328, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "very", "id": 126, "lower": "very", "norm": "very", "shape": "xxxx", "prefix": "v", "suffix": "ery", "length": 4, "cluster": "234", "prob": -6.93242883682251, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "right", "id": 127, "lower": "right", "norm": "right", "shape": "xxxx", "prefix": "r", "suffix": "ght", "length": 5, "cluster": "14122", "prob": -6.933711051940918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "game", "id": 128, "lower": "game", "norm": "game", "shape": "xxxx", "prefix": "g", "suffix": "ame", "length": 4, "cluster": "7973", "prob": -6.940612316131592, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "say", "id": 129, "lower": "say", "norm": "say", "shape": "xxx", "prefix": "s", "suffix": "say", "length": 3, "cluster": "1162", "prob": -6.950479984283447, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "'ll", "id": 130, "lower": "'ll", "norm": "'ll", "shape": "'xx", "prefix": "'", "suffix": "'ll", "length": 3, "cluster": "5114", "prob": -6.958071231842041, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "got", "id": 131, "lower": "got", "norm": "got", "shape": "xxx", "prefix": "g", "suffix": "got", "length": 3, "cluster": "10666", "prob": -6.98855447769165, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "well", "id": 132, "lower": "well", "norm": "well", "shape": "xxxx", "prefix": "w", "suffix": "ell", "length": 4, "cluster": "746", "prob": -6.995903968811035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "need", "id": 133, "lower": "need", "norm": "need", "shape": "xxxx", "prefix": "n", "suffix": "eed", "length": 4, "cluster": "2954", "prob": -7.008103370666504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "And", "id": 134, "lower": "and", "norm": "And", "shape": "Xxx", "prefix": "A", "suffix": "And", "length": 3, "cluster": "1470", "prob": -7.012199401855469, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "But", "id": 135, "lower": "but", "norm": "But", "shape": "Xxx", "prefix": "B", "suffix": "But", "length": 3, "cluster": "1470", "prob": -7.0142974853515625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "over", "id": 136, "lower": "over", "norm": "over", "shape": "xxxx", "prefix": "o", "suffix": "ver", "length": 4, "cluster": "49148", "prob": -7.027544975280762, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "back", "id": 137, "lower": "back", "norm": "back", "shape": "xxxx", "prefix": "b", "suffix": "ack", "length": 4, "cluster": "7530", "prob": -7.033305644989014, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "same", "id": 138, "lower": "same", "norm": "same", "shape": "xxxx", "prefix": "s", "suffix": "ame", "length": 4, "cluster": "991", "prob": -7.053191661834717, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "thing", "id": 139, "lower": "thing", "norm": "thing", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 5, "cluster": "2013", "prob": -7.063167572021484, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "first", "id": 140, "lower": "first", "norm": "first", "shape": "xxxx", "prefix": "f", "suffix": "rst", "length": 5, "cluster": "159", "prob": -7.063716888427734, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "most", "id": 141, "lower": "most", "norm": "most", "shape": "xxxx", "prefix": "m", "suffix": "ost", "length": 4, "cluster": "175", "prob": -7.0663957595825195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "here", "id": 142, "lower": "here", "norm": "here", "shape": "xxxx", "prefix": "h", "suffix": "ere", "length": 4, "cluster": "3946", "prob": -7.0680251121521, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "ca", "id": 143, "lower": "ca", "norm": "ca", "shape": "xx", "prefix": "c", "suffix": "ca", "length": 2, "cluster": "0", "prob": -7.071251392364502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "off", "id": 144, "lower": "off", "norm": "off", "shape": "xxx", "prefix": "o", "suffix": "off", "length": 3, "cluster": "6506", "prob": -7.073742389678955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "'d", "id": 145, "lower": "'d", "norm": "'d", "shape": "'x", "prefix": "'", "suffix": "'d", "length": 2, "cluster": "5114", "prob": -7.075286865234375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "They", "id": 146, "lower": "they", "norm": "They", "shape": "Xxxx", "prefix": "T", "suffix": "hey", "length": 4, "cluster": "1882", "prob": -7.0789008140563965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "work", "id": 147, "lower": "work", "norm": "work", "shape": "xxxx", "prefix": "w", "suffix": "ork", "length": 4, "cluster": "1973", "prob": -7.081293106079102, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "use", "id": 148, "lower": "use", "norm": "use", "shape": "xxx", "prefix": "u", "suffix": "use", "length": 3, "cluster": "2741", "prob": -7.083596229553223, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "never", "id": 149, "lower": "never", "norm": "never", "shape": "xxxx", "prefix": "n", "suffix": "ver", "length": 5, "cluster": "15994", "prob": -7.084620475769043, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "better", "id": 150, "lower": "better", "norm": "better", "shape": "xxxx", "prefix": "b", "suffix": "ter", "length": 6, "cluster": "7658", "prob": -7.1072587966918945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "though", "id": 151, "lower": "though", "norm": "though", "shape": "xxxx", "prefix": "t", "suffix": "ugh", "length": 6, "cluster": "2004", "prob": -7.113335132598877, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "lot", "id": 152, "lower": "lot", "norm": "lot", "shape": "xxx", "prefix": "l", "suffix": "lot", "length": 3, "cluster": "853", "prob": -7.113600254058838, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "pretty", "id": 153, "lower": "pretty", "norm": "pretty", "shape": "xxxx", "prefix": "p", "suffix": "tty", "length": 6, "cluster": "234", "prob": -7.1256103515625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "where", "id": 154, "lower": "where", "norm": "where", "shape": "xxxx", "prefix": "w", "suffix": "ere", "length": 5, "cluster": "8148", "prob": -7.146170139312744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "am", "id": 155, "lower": "am", "norm": "am", "shape": "xx", "prefix": "a", "suffix": "am", "length": 2, "cluster": "3066", "prob": -7.149725437164307, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "things", "id": 156, "lower": "things", "norm": "things", "shape": "xxxx", "prefix": "t", "suffix": "ngs", "length": 6, "cluster": "3917", "prob": -7.154941082000732, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "sure", "id": 157, "lower": "sure", "norm": "sure", "shape": "xxxx", "prefix": "s", "suffix": "ure", "length": 4, "cluster": "490", "prob": -7.157395839691162, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "actually", "id": 158, "lower": "actually", "norm": "actually", "shape": "xxxx", "prefix": "a", "suffix": "lly", "length": 8, "cluster": "7802", "prob": -7.160778045654297, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "He", "id": 159, "lower": "he", "norm": "He", "shape": "Xx", "prefix": "H", "suffix": "He", "length": 2, "cluster": "126", "prob": -7.162238121032715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "those", "id": 160, "lower": "those", "norm": "those", "shape": "xxxx", "prefix": "t", "suffix": "ose", "length": 5, "cluster": "495", "prob": -7.169255256652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "why", "id": 161, "lower": "why", "norm": "why", "shape": "xxx", "prefix": "w", "suffix": "why", "length": 3, "cluster": "18410", "prob": -7.178915500640869, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "So", "id": 162, "lower": "so", "norm": "So", "shape": "Xx", "prefix": "S", "suffix": "So", "length": 2, "cluster": "1726", "prob": -7.199381351470947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "take", "id": 163, "lower": "take", "norm": "take", "shape": "xxxx", "prefix": "t", "suffix": "ake", "length": 4, "cluster": "6666", "prob": -7.209812641143799, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "down", "id": 164, "lower": "down", "norm": "down", "shape": "xxxx", "prefix": "d", "suffix": "own", "length": 4, "cluster": "2410", "prob": -7.223586082458496, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "What", "id": 165, "lower": "what", "norm": "What", "shape": "Xxxx", "prefix": "W", "suffix": "hat", "length": 4, "cluster": "702", "prob": -7.226758003234863, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "someone", "id": 166, "lower": "someone", "norm": "someone", "shape": "xxxx", "prefix": "s", "suffix": "one", "length": 7, "cluster": "30698", "prob": -7.249640464782715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "before", "id": 167, "lower": "before", "norm": "before", "shape": "xxxx", "prefix": "b", "suffix": "ore", "length": 6, "cluster": "1492", "prob": -7.253359794616699, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "said", "id": 168, "lower": "said", "norm": "said", "shape": "xxxx", "prefix": "s", "suffix": "aid", "length": 4, "cluster": "116", "prob": -7.258025169372559, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "after", "id": 169, "lower": "after", "norm": "after", "shape": "xxxx", "prefix": "a", "suffix": "ter", "length": 5, "cluster": "3540", "prob": -7.265651702880859, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "around", "id": 170, "lower": "around", "norm": "around", "shape": "xxxx", "prefix": "a", "suffix": "und", "length": 6, "cluster": "245756", "prob": -7.313362121582031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "its", "id": 171, "lower": "its", "norm": "its", "shape": "xxx", "prefix": "i", "suffix": "its", "length": 3, "cluster": "27", "prob": -7.321457862854004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "2", "id": 172, "lower": "2", "norm": "2", "shape": "d", "prefix": "2", "suffix": "2", "length": 1, "cluster": "818", "prob": -7.324268341064453, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "feel", "id": 173, "lower": "feel", "norm": "feel", "shape": "xxxx", "prefix": "f", "suffix": "eel", "length": 4, "cluster": "1674", "prob": -7.342533588409424, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "My", "id": 174, "lower": "my", "norm": "My", "shape": "Xx", "prefix": "M", "suffix": "My", "length": 2, "cluster": "94", "prob": -7.345071792602539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "There", "id": 175, "lower": "there", "norm": "There", "shape": "Xxxxx", "prefix": "T", "suffix": "ere", "length": 5, "cluster": "1918", "prob": -7.347356796264648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "look", "id": 176, "lower": "look", "norm": "look", "shape": "xxxx", "prefix": "l", "suffix": "ook", "length": 4, "cluster": "2442", "prob": -7.352481365203857, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "these", "id": 177, "lower": "these", "norm": "these", "shape": "xxxx", "prefix": "t", "suffix": "ese", "length": 5, "cluster": "1519", "prob": -7.36269474029541, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "years", "id": 178, "lower": "years", "norm": "years", "shape": "xxxx", "prefix": "y", "suffix": "ars", "length": 5, "cluster": "189", "prob": -7.368987560272217, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "love", "id": 179, "lower": "love", "norm": "love", "shape": "xxxx", "prefix": "l", "suffix": "ove", "length": 4, "cluster": "2661", "prob": -7.372685432434082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "always", "id": 180, "lower": "always", "norm": "always", "shape": "xxxx", "prefix": "a", "suffix": "ays", "length": 6, "cluster": "15994", "prob": -7.37296724319458, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "many", "id": 181, "lower": "many", "norm": "many", "shape": "xxxx", "prefix": "m", "suffix": "any", "length": 4, "cluster": "751", "prob": -7.377613067626953, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": ">", "id": 0, "lower": ">", "norm": ">", "shape": "&xx", "prefix": "&", "suffix": ">", "length": 3, "cluster": "0", "prob": -7.38146448135376, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "A", "id": 182, "lower": "a", "norm": "A", "shape": "X", "prefix": "A", "suffix": "A", "length": 1, "cluster": "222", "prob": -7.38541841506958, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "point", "id": 183, "lower": "point", "norm": "point", "shape": "xxxx", "prefix": "p", "suffix": "int", "length": 5, "cluster": "389", "prob": -7.386973857879639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "find", "id": 184, "lower": "find", "norm": "find", "shape": "xxxx", "prefix": "f", "suffix": "ind", "length": 4, "cluster": "5642", "prob": -7.387212753295898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "probably", "id": 185, "lower": "probably", "norm": "probably", "shape": "xxxx", "prefix": "p", "suffix": "bly", "length": 8, "cluster": "5754", "prob": -7.395048141479492, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "new", "id": 186, "lower": "new", "norm": "new", "shape": "xxx", "prefix": "n", "suffix": "new", "length": 3, "cluster": "199", "prob": -7.398182392120361, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "made", "id": 187, "lower": "made", "norm": "made", "shape": "xxxx", "prefix": "m", "suffix": "ade", "length": 4, "cluster": "120490", "prob": -7.399899005889893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "day", "id": 188, "lower": "day", "norm": "day", "shape": "xxx", "prefix": "d", "suffix": "day", "length": 3, "cluster": "989", "prob": -7.400947093963623, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "We", "id": 189, "lower": "we", "norm": "We", "shape": "Xx", "prefix": "W", "suffix": "We", "length": 2, "cluster": "858", "prob": -7.402578353881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "every", "id": 190, "lower": "every", "norm": "every", "shape": "xxxx", "prefix": "e", "suffix": "ery", "length": 5, "cluster": "61418", "prob": -7.414647579193115, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "great", "id": 191, "lower": "great", "norm": "great", "shape": "xxxx", "prefix": "g", "suffix": "eat", "length": 5, "cluster": "1831", "prob": -7.420454502105713, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "our", "id": 192, "lower": "our", "norm": "our", "shape": "xxx", "prefix": "o", "suffix": "our", "length": 3, "cluster": "59", "prob": -7.4210286140441895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "two", "id": 193, "lower": "two", "norm": "two", "shape": "xxx", "prefix": "t", "suffix": "two", "length": 3, "cluster": "15", "prob": -7.433600425720215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "anything", "id": 194, "lower": "anything", "norm": "anything", "shape": "xxxx", "prefix": "a", "suffix": "ing", "length": 8, "cluster": "14314", "prob": -7.439383506774902, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "while", "id": 195, "lower": "while", "norm": "while", "shape": "xxxx", "prefix": "w", "suffix": "ile", "length": 5, "cluster": "6100", "prob": -7.440170764923096, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "few", "id": 196, "lower": "few", "norm": "few", "shape": "xxx", "prefix": "f", "suffix": "few", "length": 3, "cluster": "79", "prob": -7.440912246704102, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "$", "id": 197, "lower": "$", "norm": "$", "shape": "$", "prefix": "$", "suffix": "$", "length": 1, "cluster": "18", "prob": -7.450106620788574, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "bad", "id": 198, "lower": "bad", "norm": "bad", "shape": "xxx", "prefix": "b", "suffix": "bad", "length": 3, "cluster": "551", "prob": -7.452563762664795, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "No", "id": 199, "lower": "no", "norm": "No", "shape": "Xx", "prefix": "N", "suffix": "No", "length": 2, "cluster": "94", "prob": -7.456389427185059, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "little", "id": 200, "lower": "little", "norm": "little", "shape": "xxxx", "prefix": "l", "suffix": "tle", "length": 6, "cluster": "1959", "prob": -7.480203628540039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "might", "id": 201, "lower": "might", "norm": "might", "shape": "xxxx", "prefix": "m", "suffix": "ght", "length": 5, "cluster": "186", "prob": -7.490107536315918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "best", "id": 202, "lower": "best", "norm": "best", "shape": "xxxx", "prefix": "b", "suffix": "est", "length": 4, "cluster": "479", "prob": -7.492556571960449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "play", "id": 203, "lower": "play", "norm": "play", "shape": "xxxx", "prefix": "p", "suffix": "lay", "length": 4, "cluster": "1717", "prob": -7.50220251083374, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "shit", "id": 204, "lower": "shit", "norm": "shit", "shape": "xxxx", "prefix": "s", "suffix": "hit", "length": 4, "cluster": "0", "prob": -7.522359371185303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "try", "id": 205, "lower": "try", "norm": "try", "shape": "xxx", "prefix": "t", "suffix": "try", "length": 3, "cluster": "1930", "prob": -7.540920734405518, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "used", "id": 206, "lower": "used", "norm": "used", "shape": "xxxx", "prefix": "u", "suffix": "sed", "length": 4, "cluster": "15402", "prob": -7.542972087860107, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "long", "id": 207, "lower": "long", "norm": "long", "shape": "xxxx", "prefix": "l", "suffix": "ong", "length": 4, "cluster": "935", "prob": -7.544892311096191, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "doing", "id": 208, "lower": "doing", "norm": "doing", "shape": "xxxx", "prefix": "d", "suffix": "ing", "length": 5, "cluster": "15338", "prob": -7.553442478179932, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "getting", "id": 209, "lower": "getting", "norm": "getting", "shape": "xxxx", "prefix": "g", "suffix": "ing", "length": 7, "cluster": "31722", "prob": -7.564762115478516, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "post", "id": 210, "lower": "post", "norm": "post", "shape": "xxxx", "prefix": "p", "suffix": "ost", "length": 4, "cluster": "3733", "prob": -7.565684795379639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "year", "id": 211, "lower": "year", "norm": "year", "shape": "xxxx", "prefix": "y", "suffix": "ear", "length": 4, "cluster": "29", "prob": -7.567681312561035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Do", "id": 212, "lower": "do", "norm": "Do", "shape": "Xx", "prefix": "D", "suffix": "Do", "length": 2, "cluster": "702", "prob": -7.570033073425293, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "life", "id": 213, "lower": "life", "norm": "life", "shape": "xxxx", "prefix": "l", "suffix": "ife", "length": 4, "cluster": "1893", "prob": -7.574200630187988, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "through", "id": 214, "lower": "through", "norm": "through", "shape": "xxxx", "prefix": "t", "suffix": "ugh", "length": 7, "cluster": "65532", "prob": -7.575429439544678, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "guy", "id": 215, "lower": "guy", "norm": "guy", "shape": "xxx", "prefix": "g", "suffix": "guy", "length": 3, "cluster": "549", "prob": -7.582011699676514, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "enough", "id": 216, "lower": "enough", "norm": "enough", "shape": "xxxx", "prefix": "e", "suffix": "ugh", "length": 6, "cluster": "1834", "prob": -7.586349010467529, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "ever", "id": 217, "lower": "ever", "norm": "ever", "shape": "xxxx", "prefix": "e", "suffix": "ver", "length": 4, "cluster": "14058", "prob": -7.591183662414551, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "In", "id": 218, "lower": "in", "norm": "In", "shape": "Xx", "prefix": "I", "suffix": "In", "length": 2, "cluster": "62", "prob": -7.603263854980469, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "give", "id": 219, "lower": "give", "norm": "give", "shape": "xxxx", "prefix": "g", "suffix": "ive", "length": 4, "cluster": "522", "prob": -7.611863136291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "mean", "id": 220, "lower": "mean", "norm": "mean", "shape": "xxxx", "prefix": "m", "suffix": "ean", "length": 4, "cluster": "3082", "prob": -7.611870765686035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "thought", "id": 221, "lower": "thought", "norm": "thought", "shape": "xxxx", "prefix": "t", "suffix": "ght", "length": 7, "cluster": "650", "prob": -7.614910125732422, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "since", "id": 222, "lower": "since", "norm": "since", "shape": "xxxx", "prefix": "s", "suffix": "nce", "length": 5, "cluster": "468", "prob": -7.615171909332275, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "|", "id": 223, "lower": "|", "norm": "|", "shape": "|", "prefix": "|", "suffix": "|", "length": 1, "cluster": "0", "prob": -7.6297454833984375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "different", "id": 224, "lower": "different", "norm": "different", "shape": "xxxx", "prefix": "d", "suffix": "ent", "length": 9, "cluster": "1319", "prob": -7.630640506744385, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "3", "id": 225, "lower": "3", "norm": "3", "shape": "d", "prefix": "3", "suffix": "3", "length": 1, "cluster": "818", "prob": -7.636006832122803, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "last", "id": 226, "lower": "last", "norm": "last", "shape": "xxxx", "prefix": "l", "suffix": "ast", "length": 4, "cluster": "127", "prob": -7.636077404022217, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "own", "id": 227, "lower": "own", "norm": "own", "shape": "xxx", "prefix": "o", "suffix": "own", "length": 3, "cluster": "217", "prob": -7.636797904968262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "1", "id": 228, "lower": "1", "norm": "1", "shape": "d", "prefix": "1", "suffix": "1", "length": 1, "cluster": "306", "prob": -7.639832973480225, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "us", "id": 229, "lower": "us", "norm": "us", "shape": "xx", "prefix": "u", "suffix": "us", "length": 2, "cluster": "1898", "prob": -7.643693923950195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "%", "id": 230, "lower": "%", "norm": "%", "shape": "%", "prefix": "%", "suffix": "%", "length": 1, "cluster": "34", "prob": -7.645323753356934, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Not", "id": 231, "lower": "not", "norm": "Not", "shape": "Xxx", "prefix": "N", "suffix": "Not", "length": 3, "cluster": "1982", "prob": -7.65825080871582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "put", "id": 232, "lower": "put", "norm": "put", "shape": "xxx", "prefix": "p", "suffix": "put", "length": 3, "cluster": "6314", "prob": -7.666473865509033, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "man", "id": 233, "lower": "man", "norm": "man", "shape": "xxx", "prefix": "m", "suffix": "man", "length": 3, "cluster": "549", "prob": -7.668745517730713, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "may", "id": 234, "lower": "may", "norm": "may", "shape": "xxx", "prefix": "m", "suffix": "may", "length": 3, "cluster": "186", "prob": -7.678494930267334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "makes", "id": 235, "lower": "makes", "norm": "makes", "shape": "xxxx", "prefix": "m", "suffix": "kes", "length": 5, "cluster": "426", "prob": -7.684445858001709, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "money", "id": 236, "lower": "money", "norm": "money", "shape": "xxxx", "prefix": "m", "suffix": "ney", "length": 5, "cluster": "357", "prob": -7.693631172180176, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": ":)", "id": 237, "lower": ":)", "norm": ":)", "shape": ":)", "prefix": ":", "suffix": ":)", "length": 2, "cluster": "0", "prob": -7.694086074829102, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "without", "id": 238, "lower": "without", "norm": "without", "shape": "xxxx", "prefix": "w", "suffix": "out", "length": 7, "cluster": "57340", "prob": -7.694504261016846, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "bit", "id": 239, "lower": "bit", "norm": "bit", "shape": "xxx", "prefix": "b", "suffix": "bit", "length": 3, "cluster": "853", "prob": -7.721855640411377, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "person", "id": 240, "lower": "person", "norm": "person", "shape": "xxxx", "prefix": "p", "suffix": "son", "length": 6, "cluster": "549", "prob": -7.727076530456543, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Also", "id": 241, "lower": "also", "norm": "Also", "shape": "Xxxx", "prefix": "A", "suffix": "lso", "length": 4, "cluster": "254", "prob": -7.734253406524658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "again", "id": 242, "lower": "again", "norm": "again", "shape": "xxxx", "prefix": "a", "suffix": "ain", "length": 5, "cluster": "28522", "prob": -7.7370924949646, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Just", "id": 243, "lower": "just", "norm": "Just", "shape": "Xxxx", "prefix": "J", "suffix": "ust", "length": 4, "cluster": "1982", "prob": -7.743429183959961, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "both", "id": 244, "lower": "both", "norm": "both", "shape": "xxxx", "prefix": "b", "suffix": "oth", "length": 4, "cluster": "1007", "prob": -7.750914573669434, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "help", "id": 245, "lower": "help", "norm": "help", "shape": "xxxx", "prefix": "h", "suffix": "elp", "length": 4, "cluster": "309", "prob": -7.758815288543701, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "trying", "id": 246, "lower": "trying", "norm": "trying", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 6, "cluster": "14378", "prob": -7.759474754333496, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "least", "id": 247, "lower": "least", "norm": "least", "shape": "xxxx", "prefix": "l", "suffix": "ast", "length": 5, "cluster": "3690", "prob": -7.7660088539123535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "come", "id": 248, "lower": "come", "norm": "come", "shape": "xxxx", "prefix": "c", "suffix": "ome", "length": 4, "cluster": "7562", "prob": -7.775856971740723, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "keep", "id": 249, "lower": "keep", "norm": "keep", "shape": "xxxx", "prefix": "k", "suffix": "eep", "length": 4, "cluster": "3338", "prob": -7.778285980224609, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Thanks", "id": 250, "lower": "thanks", "norm": "Thanks", "shape": "Xxxxx", "prefix": "T", "suffix": "nks", "length": 6, "cluster": "510", "prob": -7.781467914581299, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "read", "id": 251, "lower": "read", "norm": "read", "shape": "xxxx", "prefix": "r", "suffix": "ead", "length": 4, "cluster": "6314", "prob": -7.787075042724609, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "nt", "id": 252, "lower": "nt", "norm": "nt", "shape": "xx", "prefix": "n", "suffix": "nt", "length": 2, "cluster": "3685", "prob": -7.788322925567627, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "part", "id": 253, "lower": "part", "norm": "part", "shape": "xxxx", "prefix": "p", "suffix": "art", "length": 4, "cluster": "725", "prob": -7.791079521179199, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "let", "id": 254, "lower": "let", "norm": "let", "shape": "xxx", "prefix": "l", "suffix": "let", "length": 3, "cluster": "522", "prob": -7.795135974884033, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "hard", "id": 255, "lower": "hard", "norm": "hard", "shape": "xxxx", "prefix": "h", "suffix": "ard", "length": 4, "cluster": "2538", "prob": -7.795384407043457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "another", "id": 256, "lower": "another", "norm": "another", "shape": "xxxx", "prefix": "a", "suffix": "her", "length": 7, "cluster": "28650", "prob": -7.801506519317627, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "end", "id": 257, "lower": "end", "norm": "end", "shape": "xxx", "prefix": "e", "suffix": "end", "length": 3, "cluster": "21", "prob": -7.816553115844727, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "having", "id": 258, "lower": "having", "norm": "having", "shape": "xxxx", "prefix": "h", "suffix": "ing", "length": 6, "cluster": "130026", "prob": -7.818792819976807, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "As", "id": 259, "lower": "as", "norm": "As", "shape": "Xx", "prefix": "A", "suffix": "As", "length": 2, "cluster": "958", "prob": -7.836142539978027, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "games", "id": 260, "lower": "games", "norm": "games", "shape": "xxxx", "prefix": "g", "suffix": "mes", "length": 5, "cluster": "1485", "prob": -7.836157321929932, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "already", "id": 261, "lower": "already", "norm": "already", "shape": "xxxx", "prefix": "a", "suffix": "ady", "length": 7, "cluster": "634", "prob": -7.838688850402832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "..", "id": 0, "lower": "..", "norm": "..", "shape": "..", "prefix": ".", "suffix": "..", "length": 2, "cluster": "4906", "prob": -7.840396404266357, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "problem", "id": 262, "lower": "problem", "norm": "problem", "shape": "xxxx", "prefix": "p", "suffix": "lem", "length": 7, "cluster": "16069", "prob": -7.841479301452637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "kind", "id": 263, "lower": "kind", "norm": "kind", "shape": "xxxx", "prefix": "k", "suffix": "ind", "length": 4, "cluster": "213", "prob": -7.844367980957031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "old", "id": 264, "lower": "old", "norm": "old", "shape": "xxx", "prefix": "o", "suffix": "old", "length": 3, "cluster": "2346", "prob": -7.845602989196777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "everyone", "id": 265, "lower": "everyone", "norm": "everyone", "shape": "xxxx", "prefix": "e", "suffix": "one", "length": 8, "cluster": "30698", "prob": -7.850788116455078, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "saying", "id": 266, "lower": "saying", "norm": "saying", "shape": "xxxx", "prefix": "s", "suffix": "ing", "length": 6, "cluster": "3732", "prob": -7.854340076446533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "idea", "id": 267, "lower": "idea", "norm": "idea", "shape": "xxxx", "prefix": "i", "suffix": "dea", "length": 4, "cluster": "709", "prob": -7.855560779571533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "else", "id": 268, "lower": "else", "norm": "else", "shape": "xxxx", "prefix": "e", "suffix": "lse", "length": 4, "cluster": "2013", "prob": -7.86043643951416, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "reason", "id": 269, "lower": "reason", "norm": "reason", "shape": "xxxx", "prefix": "r", "suffix": "son", "length": 6, "cluster": "113", "prob": -7.867291450500488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Well", "id": 270, "lower": "well", "norm": "Well", "shape": "Xxxx", "prefix": "W", "suffix": "ell", "length": 4, "cluster": "1726", "prob": -7.871857643127441, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "less", "id": 271, "lower": "less", "norm": "less", "shape": "xxxx", "prefix": "l", "suffix": "ess", "length": 4, "cluster": "5610", "prob": -7.872425079345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "world", "id": 272, "lower": "world", "norm": "world", "shape": "xxxx", "prefix": "w", "suffix": "rld", "length": 5, "cluster": "329", "prob": -7.8744120597839355, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "wrong", "id": 273, "lower": "wrong", "norm": "wrong", "shape": "xxxx", "prefix": "w", "suffix": "ong", "length": 5, "cluster": "4586", "prob": -7.876842021942139, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "How", "id": 274, "lower": "how", "norm": "How", "shape": "Xxx", "prefix": "H", "suffix": "How", "length": 3, "cluster": "702", "prob": -7.879385948181152, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "far", "id": 275, "lower": "far", "norm": "far", "shape": "xxx", "prefix": "f", "suffix": "far", "length": 3, "cluster": "6890", "prob": -7.8802924156188965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "big", "id": 276, "lower": "big", "norm": "big", "shape": "xxx", "prefix": "b", "suffix": "big", "length": 3, "cluster": "135", "prob": -7.880735874176025, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "done", "id": 277, "lower": "done", "norm": "done", "shape": "xxxx", "prefix": "d", "suffix": "one", "length": 4, "cluster": "26282", "prob": -7.886453151702881, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "believe", "id": 278, "lower": "believe", "norm": "believe", "shape": "xxxx", "prefix": "b", "suffix": "eve", "length": 7, "cluster": "138", "prob": -7.886724948883057, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Yeah", "id": 279, "lower": "yeah", "norm": "Yeah", "shape": "Xxxx", "prefix": "Y", "suffix": "eah", "length": 4, "cluster": "1726", "prob": -7.890377044677734, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "such", "id": 280, "lower": "such", "norm": "such", "shape": "xxxx", "prefix": "s", "suffix": "uch", "length": 4, "cluster": "111", "prob": -7.894707679748535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "stuff", "id": 281, "lower": "stuff", "norm": "stuff", "shape": "xxxx", "prefix": "s", "suffix": "uff", "length": 5, "cluster": "6853", "prob": -7.898244380950928, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "away", "id": 282, "lower": "away", "norm": "away", "shape": "xxxx", "prefix": "a", "suffix": "way", "length": 4, "cluster": "3434", "prob": -7.9017462730407715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "nothing", "id": 283, "lower": "nothing", "norm": "nothing", "shape": "xxxx", "prefix": "n", "suffix": "ing", "length": 7, "cluster": "14314", "prob": -7.909971714019775, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "tell", "id": 284, "lower": "tell", "norm": "tell", "shape": "xxxx", "prefix": "t", "suffix": "ell", "length": 4, "cluster": "1546", "prob": -7.910365581512451, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "looking", "id": 285, "lower": "looking", "norm": "looking", "shape": "xxxx", "prefix": "l", "suffix": "ing", "length": 7, "cluster": "1066", "prob": -7.911639213562012, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "start", "id": 286, "lower": "start", "norm": "start", "shape": "xxxx", "prefix": "s", "suffix": "art", "length": 5, "cluster": "3978", "prob": -7.923925876617432, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "using", "id": 287, "lower": "using", "norm": "using", "shape": "xxxx", "prefix": "u", "suffix": "ing", "length": 5, "cluster": "7146", "prob": -7.938363075256348, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "able", "id": 288, "lower": "able", "norm": "able", "shape": "xxxx", "prefix": "a", "suffix": "ble", "length": 4, "cluster": "6186", "prob": -7.939544677734375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "place", "id": 289, "lower": "place", "norm": "place", "shape": "xxxx", "prefix": "p", "suffix": "ace", "length": 5, "cluster": "6245", "prob": -7.954748153686523, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "high", "id": 290, "lower": "high", "norm": "high", "shape": "xxxx", "prefix": "h", "suffix": "igh", "length": 4, "cluster": "167", "prob": -7.963760852813721, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "until", "id": 291, "lower": "until", "norm": "until", "shape": "xxxx", "prefix": "u", "suffix": "til", "length": 5, "cluster": "2516", "prob": -7.964784622192383, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "either", "id": 292, "lower": "either", "norm": "either", "shape": "xxxx", "prefix": "e", "suffix": "her", "length": 6, "cluster": "30698", "prob": -7.965897560119629, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "seen", "id": 293, "lower": "seen", "norm": "seen", "shape": "xxxx", "prefix": "s", "suffix": "een", "length": 4, "cluster": "26282", "prob": -7.97322416305542, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "times", "id": 294, "lower": "times", "norm": "times", "shape": "xxxx", "prefix": "t", "suffix": "mes", "length": 5, "cluster": "61", "prob": -7.9734907150268555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "real", "id": 295, "lower": "real", "norm": "real", "shape": "xxxx", "prefix": "r", "suffix": "eal", "length": 4, "cluster": "503", "prob": -7.981620788574219, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "When", "id": 296, "lower": "when", "norm": "When", "shape": "Xxxx", "prefix": "W", "suffix": "hen", "length": 4, "cluster": "190", "prob": -7.982150554656982, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "making", "id": 297, "lower": "making", "norm": "making", "shape": "xxxx", "prefix": "m", "suffix": "ing", "length": 6, "cluster": "7146", "prob": -7.985988616943359, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "seems", "id": 298, "lower": "seems", "norm": "seems", "shape": "xxxx", "prefix": "s", "suffix": "ems", "length": 5, "cluster": "16298", "prob": -7.989145278930664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "fuck", "id": 299, "lower": "fuck", "norm": "fuck", "shape": "xxxx", "prefix": "f", "suffix": "uck", "length": 4, "cluster": "0", "prob": -7.992913246154785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "fucking", "id": 300, "lower": "fucking", "norm": "fucking", "shape": "xxxx", "prefix": "f", "suffix": "ing", "length": 7, "cluster": "0", "prob": -7.993165969848633, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "\n\n\n", "id": 0, "lower": "\n\n\n", "norm": "\n\n\n", "shape": "\n\n\n", "prefix": "\n", "suffix": "\n\n\n", "length": 3, "cluster": "0", "prob": -7.996075630187988, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "next", "id": 301, "lower": "next", "norm": "next", "shape": "xxxx", "prefix": "n", "suffix": "ext", "length": 4, "cluster": "255", "prob": -7.996739864349365, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "anyone", "id": 302, "lower": "anyone", "norm": "anyone", "shape": "xxxx", "prefix": "a", "suffix": "one", "length": 6, "cluster": "30698", "prob": -7.997350215911865, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "#", "id": 303, "lower": "#", "norm": "#", "shape": "#", "prefix": "#", "suffix": "#", "length": 1, "cluster": "18", "prob": -8.001263618469238, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "looks", "id": 304, "lower": "looks", "norm": "looks", "shape": "xxxx", "prefix": "l", "suffix": "oks", "length": 5, "cluster": "2442", "prob": -8.001678466796875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "everything", "id": 305, "lower": "everything", "norm": "everything", "shape": "xxxx", "prefix": "e", "suffix": "ing", "length": 10, "cluster": "14314", "prob": -8.00584602355957, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Oh", "id": 306, "lower": "oh", "norm": "Oh", "shape": "Xx", "prefix": "O", "suffix": "Oh", "length": 2, "cluster": "1726", "prob": -8.007224082946777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "nice", "id": 307, "lower": "nice", "norm": "nice", "shape": "xxxx", "prefix": "n", "suffix": "ice", "length": 4, "cluster": "551", "prob": -8.009806632995605, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "once", "id": 308, "lower": "once", "norm": "once", "shape": "xxxx", "prefix": "o", "suffix": "nce", "length": 4, "cluster": "22250", "prob": -8.010163307189941, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "show", "id": 309, "lower": "show", "norm": "show", "shape": "xxxx", "prefix": "s", "suffix": "how", "length": 4, "cluster": "7690", "prob": -8.011373519897461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "maybe", "id": 310, "lower": "maybe", "norm": "maybe", "shape": "xxxx", "prefix": "m", "suffix": "ybe", "length": 5, "cluster": "60650", "prob": -8.020626068115234, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "fact", "id": 311, "lower": "fact", "norm": "fact", "shape": "xxxx", "prefix": "f", "suffix": "act", "length": 4, "cluster": "369", "prob": -8.032754898071289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "wo", "id": 312, "lower": "wo", "norm": "wo", "shape": "xx", "prefix": "w", "suffix": "wo", "length": 2, "cluster": "26", "prob": -8.0400972366333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "5", "id": 313, "lower": "5", "norm": "5", "shape": "d", "prefix": "5", "suffix": "5", "length": 1, "cluster": "818", "prob": -8.040534019470215, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "free", "id": 314, "lower": "free", "norm": "free", "shape": "xxxx", "prefix": "f", "suffix": "ree", "length": 4, "cluster": "6634", "prob": -8.0440092086792, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "understand", "id": 315, "lower": "understand", "norm": "understand", "shape": "xxxx", "prefix": "u", "suffix": "and", "length": 10, "cluster": "3722", "prob": -8.052404403686523, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "team", "id": 316, "lower": "team", "norm": "team", "shape": "xxxx", "prefix": "t", "suffix": "eam", "length": 4, "cluster": "1061", "prob": -8.053070068359375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "....", "id": 317, "lower": "....", "norm": "....", "shape": "....", "prefix": ".", "suffix": "...", "length": 4, "cluster": "1202", "prob": -8.05477523803711, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "against", "id": 318, "lower": "against", "norm": "against", "shape": "xxxx", "prefix": "a", "suffix": "nst", "length": 7, "cluster": "24572", "prob": -8.064282417297363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "live", "id": 319, "lower": "live", "norm": "live", "shape": "xxxx", "prefix": "l", "suffix": "ive", "length": 4, "cluster": "1418", "prob": -8.065953254699707, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": " \n\n", "id": 0, "lower": " \n\n", "norm": " \n\n", "shape": " \n\n", "prefix": " ", "suffix": " \n\n", "length": 3, "cluster": "0", "prob": -8.068946838378906, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Why", "id": 320, "lower": "why", "norm": "Why", "shape": "Xxx", "prefix": "W", "suffix": "Why", "length": 3, "cluster": "702", "prob": -8.06901741027832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "whole", "id": 321, "lower": "whole", "norm": "whole", "shape": "xxxx", "prefix": "w", "suffix": "ole", "length": 5, "cluster": "71", "prob": -8.070209503173828, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "For", "id": 322, "lower": "for", "norm": "For", "shape": "Xxx", "prefix": "F", "suffix": "For", "length": 3, "cluster": "1342", "prob": -8.072200775146484, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "guys", "id": 323, "lower": "guys", "norm": "guys", "shape": "xxxx", "prefix": "g", "suffix": "uys", "length": 4, "cluster": "365", "prob": -8.075167655944824, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "job", "id": 324, "lower": "job", "norm": "job", "shape": "xxx", "prefix": "j", "suffix": "job", "length": 3, "cluster": "37", "prob": -8.082273483276367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "etc", "id": 325, "lower": "etc", "norm": "etc", "shape": "xxx", "prefix": "e", "suffix": "etc", "length": 3, "cluster": "26", "prob": -8.087606430053711, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "4", "id": 326, "lower": "4", "norm": "4", "shape": "d", "prefix": "4", "suffix": "4", "length": 1, "cluster": "818", "prob": -8.088510513305664, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "went", "id": 327, "lower": "went", "norm": "went", "shape": "xxxx", "prefix": "w", "suffix": "ent", "length": 4, "cluster": "7338", "prob": -8.091073989868164, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "school", "id": 328, "lower": "school", "norm": "school", "shape": "xxxx", "prefix": "s", "suffix": "ool", "length": 6, "cluster": "1829", "prob": -8.096077919006348, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "guess", "id": 329, "lower": "guess", "norm": "guess", "shape": "xxxx", "prefix": "g", "suffix": "ess", "length": 5, "cluster": "650", "prob": -8.097951889038086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "friends", "id": 330, "lower": "friends", "norm": "friends", "shape": "xxxx", "prefix": "f", "suffix": "nds", "length": 7, "cluster": "3565", "prob": -8.10158634185791, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "between", "id": 331, "lower": "between", "norm": "between", "shape": "xxxx", "prefix": "b", "suffix": "een", "length": 7, "cluster": "12284", "prob": -8.106386184692383, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "case", "id": 332, "lower": "case", "norm": "case", "shape": "xxxx", "prefix": "c", "suffix": "ase", "length": 4, "cluster": "3269", "prob": -8.106882095336914, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "She", "id": 333, "lower": "she", "norm": "She", "shape": "Xxx", "prefix": "S", "suffix": "She", "length": 3, "cluster": "126", "prob": -8.119241714477539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "each", "id": 334, "lower": "each", "norm": "each", "shape": "xxxx", "prefix": "e", "suffix": "ach", "length": 4, "cluster": "32746", "prob": -8.123948097229004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "fun", "id": 335, "lower": "fun", "norm": "fun", "shape": "xxx", "prefix": "f", "suffix": "fun", "length": 3, "cluster": "16229", "prob": -8.124406814575195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "agree", "id": 336, "lower": "agree", "norm": "agree", "shape": "xxxx", "prefix": "a", "suffix": "ree", "length": 5, "cluster": "394", "prob": -8.12778091430664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Is", "id": 337, "lower": "is", "norm": "Is", "shape": "Xx", "prefix": "I", "suffix": "Is", "length": 2, "cluster": "1214", "prob": -8.129456520080566, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "buy", "id": 338, "lower": "buy", "norm": "buy", "shape": "xxx", "prefix": "b", "suffix": "buy", "length": 3, "cluster": "2826", "prob": -8.142950057983398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Yes", "id": 339, "lower": "yes", "norm": "Yes", "shape": "Xxx", "prefix": "Y", "suffix": "Yes", "length": 3, "cluster": "1726", "prob": -8.147512435913086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "run", "id": 340, "lower": "run", "norm": "run", "shape": "xxx", "prefix": "r", "suffix": "run", "length": 3, "cluster": "437", "prob": -8.156776428222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "change", "id": 341, "lower": "change", "norm": "change", "shape": "xxxx", "prefix": "c", "suffix": "nge", "length": 6, "cluster": "2997", "prob": -8.157740592956543, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "found", "id": 342, "lower": "found", "norm": "found", "shape": "xxxx", "prefix": "f", "suffix": "und", "length": 5, "cluster": "13738", "prob": -8.182107925415039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "question", "id": 343, "lower": "question", "norm": "question", "shape": "xxxx", "prefix": "q", "suffix": "ion", "length": 8, "cluster": "709", "prob": -8.185464859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "top", "id": 344, "lower": "top", "norm": "top", "shape": "xxx", "prefix": "t", "suffix": "top", "length": 3, "cluster": "1479", "prob": -8.191086769104004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "playing", "id": 345, "lower": "playing", "norm": "playing", "shape": "xxxx", "prefix": "p", "suffix": "ing", "length": 7, "cluster": "11242", "prob": -8.191595077514648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "name", "id": 346, "lower": "name", "norm": "name", "shape": "xxxx", "prefix": "n", "suffix": "ame", "length": 4, "cluster": "4021", "prob": -8.19616985321045, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "mind", "id": 347, "lower": "mind", "norm": "mind", "shape": "xxxx", "prefix": "m", "suffix": "ind", "length": 4, "cluster": "1893", "prob": -8.197138786315918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "myself", "id": 348, "lower": "myself", "norm": "myself", "shape": "xxxx", "prefix": "m", "suffix": "elf", "length": 6, "cluster": "8042", "prob": -8.200143814086914, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "gets", "id": 349, "lower": "gets", "norm": "gets", "shape": "xxxx", "prefix": "g", "suffix": "ets", "length": 4, "cluster": "10666", "prob": -8.202808380126953, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "ago", "id": 350, "lower": "ago", "norm": "ago", "shape": "xxx", "prefix": "a", "suffix": "ago", "length": 3, "cluster": "6442", "prob": -8.206598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "friend", "id": 351, "lower": "friend", "norm": "friend", "shape": "xxxx", "prefix": "f", "suffix": "end", "length": 6, "cluster": "1061", "prob": -8.210515975952148, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "talking", "id": 352, "lower": "talking", "norm": "talking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 7, "cluster": "4586", "prob": -8.22729778289795, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "days", "id": 353, "lower": "days", "norm": "days", "shape": "xxxx", "prefix": "d", "suffix": "ays", "length": 4, "cluster": "317", "prob": -8.227437973022461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "yet", "id": 354, "lower": "yet", "norm": "yet", "shape": "xxx", "prefix": "y", "suffix": "yet", "length": 3, "cluster": "32490", "prob": -8.229137420654297, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "means", "id": 355, "lower": "means", "norm": "means", "shape": "xxxx", "prefix": "m", "suffix": "ans", "length": 5, "cluster": "31146", "prob": -8.234617233276367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "hope", "id": 356, "lower": "hope", "norm": "hope", "shape": "xxxx", "prefix": "h", "suffix": "ope", "length": 4, "cluster": "650", "prob": -8.236272811889648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "almost", "id": 357, "lower": "almost", "norm": "almost", "shape": "xxxx", "prefix": "a", "suffix": "ost", "length": 6, "cluster": "7402", "prob": -8.236738204956055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "yourself", "id": 358, "lower": "yourself", "norm": "yourself", "shape": "xxxx", "prefix": "y", "suffix": "elf", "length": 8, "cluster": "8042", "prob": -8.2402982711792, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "awesome", "id": 359, "lower": "awesome", "norm": "awesome", "shape": "xxxx", "prefix": "a", "suffix": "ome", "length": 7, "cluster": "871", "prob": -8.247021675109863, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "care", "id": 360, "lower": "care", "norm": "care", "shape": "xxxx", "prefix": "c", "suffix": "are", "length": 4, "cluster": "1229", "prob": -8.248679161071777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "quite", "id": 361, "lower": "quite", "norm": "quite", "shape": "xxxx", "prefix": "q", "suffix": "ite", "length": 5, "cluster": "15338", "prob": -8.254060745239258, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "10", "id": 362, "lower": "10", "norm": "10", "shape": "dd", "prefix": "1", "suffix": "10", "length": 2, "cluster": "1970", "prob": -8.258377075195312, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "true", "id": 363, "lower": "true", "norm": "true", "shape": "xxxx", "prefix": "t", "suffix": "rue", "length": 4, "cluster": "4586", "prob": -8.259368896484375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "remember", "id": 364, "lower": "remember", "norm": "remember", "shape": "xxxx", "prefix": "r", "suffix": "ber", "length": 8, "cluster": "3722", "prob": -8.259916305541992, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "definitely", "id": 365, "lower": "definitely", "norm": "definitely", "shape": "xxxx", "prefix": "d", "suffix": "ely", "length": 10, "cluster": "7802", "prob": -8.264209747314453, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "call", "id": 366, "lower": "call", "norm": "call", "shape": "xxxx", "prefix": "c", "suffix": "all", "length": 4, "cluster": "3765", "prob": -8.267317771911621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "pay", "id": 367, "lower": "pay", "norm": "pay", "shape": "xxx", "prefix": "p", "suffix": "pay", "length": 3, "cluster": "7946", "prob": -8.26932144165039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "stop", "id": 368, "lower": "stop", "norm": "stop", "shape": "xxxx", "prefix": "s", "suffix": "top", "length": 4, "cluster": "3338", "prob": -8.272970199584961, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "set", "id": 369, "lower": "set", "norm": "set", "shape": "xxx", "prefix": "s", "suffix": "set", "length": 3, "cluster": "2218", "prob": -8.285635948181152, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "started", "id": 370, "lower": "started", "norm": "started", "shape": "xxxx", "prefix": "s", "suffix": "ted", "length": 7, "cluster": "3242", "prob": -8.286487579345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "instead", "id": 371, "lower": "instead", "norm": "instead", "shape": "xxxx", "prefix": "i", "suffix": "ead", "length": 7, "cluster": "2005", "prob": -8.292781829833984, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "story", "id": 372, "lower": "story", "norm": "story", "shape": "xxxx", "prefix": "s", "suffix": "ory", "length": 5, "cluster": "6853", "prob": -8.293317794799805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "level", "id": 373, "lower": "level", "norm": "level", "shape": "xxxx", "prefix": "l", "suffix": "vel", "length": 5, "cluster": "6117", "prob": -8.29642391204834, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "left", "id": 374, "lower": "left", "norm": "left", "shape": "xxxx", "prefix": "l", "suffix": "eft", "length": 4, "cluster": "54954", "prob": -8.296669006347656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "week", "id": 375, "lower": "week", "norm": "week", "shape": "xxxx", "prefix": "w", "suffix": "eek", "length": 4, "cluster": "157", "prob": -8.300933837890625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "system", "id": 376, "lower": "system", "norm": "system", "shape": "xxxx", "prefix": "s", "suffix": "tem", "length": 6, "cluster": "4901", "prob": -8.303738594055176, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "full", "id": 377, "lower": "full", "norm": "full", "shape": "xxxx", "prefix": "f", "suffix": "ull", "length": 4, "cluster": "4071", "prob": -8.303950309753418, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "rather", "id": 378, "lower": "rather", "norm": "rather", "shape": "xxxx", "prefix": "r", "suffix": "her", "length": 6, "cluster": "6698", "prob": -8.312031745910645, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "video", "id": 379, "lower": "video", "norm": "video", "shape": "xxxx", "prefix": "v", "suffix": "deo", "length": 5, "cluster": "1975", "prob": -8.316000938415527, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "home", "id": 380, "lower": "home", "norm": "home", "shape": "xxxx", "prefix": "h", "suffix": "ome", "length": 4, "cluster": "1013", "prob": -8.316133499145508, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "women", "id": 381, "lower": "women", "norm": "women", "shape": "xxxx", "prefix": "w", "suffix": "men", "length": 5, "cluster": "877", "prob": -8.317564964294434, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "usually", "id": 382, "lower": "usually", "norm": "usually", "shape": "xxxx", "prefix": "u", "suffix": "lly", "length": 7, "cluster": "3706", "prob": -8.324220657348633, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "side", "id": 383, "lower": "side", "norm": "side", "shape": "xxxx", "prefix": "s", "suffix": "ide", "length": 4, "cluster": "8037", "prob": -8.327798843383789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "wanted", "id": 384, "lower": "wanted", "norm": "wanted", "shape": "xxxx", "prefix": "w", "suffix": "ted", "length": 6, "cluster": "30634", "prob": -8.329934120178223, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "sense", "id": 385, "lower": "sense", "norm": "sense", "shape": "xxxx", "prefix": "s", "suffix": "nse", "length": 5, "cluster": "613", "prob": -8.338400840759277, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Your", "id": 386, "lower": "your", "norm": "Your", "shape": "Xxxx", "prefix": "Y", "suffix": "our", "length": 4, "cluster": "94", "prob": -8.347208023071289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "second", "id": 387, "lower": "second", "norm": "second", "shape": "xxxx", "prefix": "s", "suffix": "ond", "length": 6, "cluster": "31", "prob": -8.351142883300781, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "comment", "id": 388, "lower": "comment", "norm": "comment", "shape": "xxxx", "prefix": "c", "suffix": "ent", "length": 7, "cluster": "757", "prob": -8.35578727722168, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "course", "id": 389, "lower": "course", "norm": "course", "shape": "xxxx", "prefix": "c", "suffix": "rse", "length": 6, "cluster": "1009", "prob": -8.35777759552002, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "ask", "id": 390, "lower": "ask", "norm": "ask", "shape": "xxx", "prefix": "a", "suffix": "ask", "length": 3, "cluster": "1546", "prob": -8.35922622680664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Or", "id": 391, "lower": "or", "norm": "Or", "shape": "Xx", "prefix": "O", "suffix": "Or", "length": 2, "cluster": "1726", "prob": -8.361105918884277, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "seem", "id": 392, "lower": "seem", "norm": "seem", "shape": "xxxx", "prefix": "s", "suffix": "eem", "length": 4, "cluster": "906", "prob": -8.363061904907227, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Maybe", "id": 393, "lower": "maybe", "norm": "Maybe", "shape": "Xxxxx", "prefix": "M", "suffix": "ybe", "length": 5, "cluster": "190", "prob": -8.364654541015625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "must", "id": 394, "lower": "must", "norm": "must", "shape": "xxxx", "prefix": "m", "suffix": "ust", "length": 4, "cluster": "698", "prob": -8.365957260131836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Then", "id": 395, "lower": "then", "norm": "Then", "shape": "Xxxx", "prefix": "T", "suffix": "hen", "length": 4, "cluster": "1726", "prob": -8.369159698486328, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "small", "id": 396, "lower": "small", "norm": "small", "shape": "xxxx", "prefix": "s", "suffix": "all", "length": 5, "cluster": "391", "prob": -8.371565818786621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "car", "id": 397, "lower": "car", "norm": "car", "shape": "xxx", "prefix": "c", "suffix": "car", "length": 3, "cluster": "1145", "prob": -8.374984741210938, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "hate", "id": 398, "lower": "hate", "norm": "hate", "shape": "xxxx", "prefix": "h", "suffix": "ate", "length": 4, "cluster": "906", "prob": -8.380099296569824, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "came", "id": 399, "lower": "came", "norm": "came", "shape": "xxxx", "prefix": "c", "suffix": "ame", "length": 4, "cluster": "15530", "prob": -8.382718086242676, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "watch", "id": 400, "lower": "watch", "norm": "watch", "shape": "xxxx", "prefix": "w", "suffix": "tch", "length": 5, "cluster": "3765", "prob": -8.386272430419922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "experience", "id": 401, "lower": "experience", "norm": "experience", "shape": "xxxx", "prefix": "e", "suffix": "nce", "length": 10, "cluster": "2917", "prob": -8.387101173400879, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "cool", "id": 402, "lower": "cool", "norm": "cool", "shape": "xxxx", "prefix": "c", "suffix": "ool", "length": 4, "cluster": "565", "prob": -8.393746376037598, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "matter", "id": 403, "lower": "matter", "norm": "matter", "shape": "xxxx", "prefix": "m", "suffix": "ter", "length": 6, "cluster": "4805", "prob": -8.395515441894531, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "others", "id": 404, "lower": "others", "norm": "others", "shape": "xxxx", "prefix": "o", "suffix": "ers", "length": 6, "cluster": "1901", "prob": -8.396527290344238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "completely", "id": 405, "lower": "completely", "norm": "completely", "shape": "xxxx", "prefix": "c", "suffix": "ely", "length": 10, "cluster": "12010", "prob": -8.40324592590332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "All", "id": 406, "lower": "all", "norm": "All", "shape": "Xxx", "prefix": "A", "suffix": "All", "length": 3, "cluster": "1214", "prob": -8.403707504272461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "called", "id": 407, "lower": "called", "norm": "called", "shape": "xxxx", "prefix": "c", "suffix": "led", "length": 6, "cluster": "11946", "prob": -8.404229164123535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "under", "id": 408, "lower": "under", "norm": "under", "shape": "xxxx", "prefix": "u", "suffix": "der", "length": 5, "cluster": "32764", "prob": -8.406200408935547, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "yes", "id": 409, "lower": "yes", "norm": "yes", "shape": "xxx", "prefix": "y", "suffix": "yes", "length": 3, "cluster": "15146", "prob": -8.41097354888916, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Now", "id": 410, "lower": "now", "norm": "Now", "shape": "Xxx", "prefix": "N", "suffix": "Now", "length": 3, "cluster": "1726", "prob": -8.417712211608887, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Please", "id": 411, "lower": "please", "norm": "Please", "shape": "Xxxxx", "prefix": "P", "suffix": "ase", "length": 6, "cluster": "3582", "prob": -8.41897964477539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "worth", "id": 412, "lower": "worth", "norm": "worth", "shape": "xxxx", "prefix": "w", "suffix": "rth", "length": 5, "cluster": "981", "prob": -8.423324584960938, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "says", "id": 413, "lower": "says", "norm": "says", "shape": "xxxx", "prefix": "s", "suffix": "ays", "length": 4, "cluster": "244", "prob": -8.426565170288086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "comes", "id": 414, "lower": "comes", "norm": "comes", "shape": "xxxx", "prefix": "c", "suffix": "mes", "length": 5, "cluster": "15530", "prob": -8.428640365600586, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "fine", "id": 415, "lower": "fine", "norm": "fine", "shape": "xxxx", "prefix": "f", "suffix": "ine", "length": 4, "cluster": "8057", "prob": -8.428781509399414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Thank", "id": 416, "lower": "thank", "norm": "Thank", "shape": "Xxxxx", "prefix": "T", "suffix": "ank", "length": 5, "cluster": "190", "prob": -8.434432983398438, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": " \n", "id": 0, "lower": " \n", "norm": " \n", "shape": " \n", "prefix": " ", "suffix": " \n", "length": 2, "cluster": "0", "prob": -8.435208320617676, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "works", "id": 417, "lower": "works", "norm": "works", "shape": "xxxx", "prefix": "w", "suffix": "rks", "length": 5, "cluster": "77", "prob": -8.436944961547852, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "exactly", "id": 418, "lower": "exactly", "norm": "exactly", "shape": "xxxx", "prefix": "e", "suffix": "tly", "length": 7, "cluster": "15338", "prob": -8.43747615814209, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "heard", "id": 419, "lower": "heard", "norm": "heard", "shape": "xxxx", "prefix": "h", "suffix": "ard", "length": 5, "cluster": "26282", "prob": -8.4396333694458, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "possible", "id": 420, "lower": "possible", "norm": "possible", "shape": "xxxx", "prefix": "p", "suffix": "ble", "length": 8, "cluster": "2535", "prob": -8.44277572631836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "thinking", "id": 421, "lower": "thinking", "norm": "thinking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 8, "cluster": "4586", "prob": -8.442947387695312, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "hours", "id": 422, "lower": "hours", "norm": "hours", "shape": "xxxx", "prefix": "h", "suffix": "urs", "length": 5, "cluster": "957", "prob": -8.445417404174805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "working", "id": 423, "lower": "working", "norm": "working", "shape": "xxxx", "prefix": "w", "suffix": "ing", "length": 7, "cluster": "27626", "prob": -8.44786262512207, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "took", "id": 424, "lower": "took", "norm": "took", "shape": "xxxx", "prefix": "t", "suffix": "ook", "length": 4, "cluster": "27050", "prob": -8.452874183654785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "thanks", "id": 425, "lower": "thanks", "norm": "thanks", "shape": "xxxx", "prefix": "t", "suffix": "nks", "length": 6, "cluster": "554", "prob": -8.457283973693848, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "head", "id": 426, "lower": "head", "norm": "head", "shape": "xxxx", "prefix": "h", "suffix": "ead", "length": 4, "cluster": "1813", "prob": -8.458500862121582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "power", "id": 427, "lower": "power", "norm": "power", "shape": "xxxx", "prefix": "p", "suffix": "wer", "length": 5, "cluster": "11621", "prob": -8.460216522216797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "happen", "id": 428, "lower": "happen", "norm": "happen", "shape": "xxxx", "prefix": "h", "suffix": "pen", "length": 6, "cluster": "3466", "prob": -8.465093612670898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "goes", "id": 429, "lower": "goes", "norm": "goes", "shape": "xxxx", "prefix": "g", "suffix": "oes", "length": 4, "cluster": "7338", "prob": -8.465673446655273, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Good", "id": 430, "lower": "good", "norm": "Good", "shape": "Xxxx", "prefix": "G", "suffix": "ood", "length": 4, "cluster": "614", "prob": -8.468016624450684, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "saw", "id": 431, "lower": "saw", "norm": "saw", "shape": "xxx", "prefix": "s", "suffix": "saw", "length": 3, "cluster": "6570", "prob": -8.472514152526855, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "please", "id": 432, "lower": "please", "norm": "please", "shape": "xxxx", "prefix": "p", "suffix": "ase", "length": 6, "cluster": "309", "prob": -8.473013877868652, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "couple", "id": 433, "lower": "couple", "norm": "couple", "shape": "xxxx", "prefix": "c", "suffix": "ple", "length": 6, "cluster": "853", "prob": -8.47309398651123, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "hit", "id": 434, "lower": "hit", "norm": "hit", "shape": "xxx", "prefix": "h", "suffix": "hit", "length": 3, "cluster": "682", "prob": -8.473491668701172, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "likely", "id": 435, "lower": "likely", "norm": "likely", "shape": "xxxx", "prefix": "l", "suffix": "ely", "length": 6, "cluster": "42", "prob": -8.47359561920166, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "ones", "id": 436, "lower": "ones", "norm": "ones", "shape": "xxxx", "prefix": "o", "suffix": "nes", "length": 4, "cluster": "15821", "prob": -8.474469184875488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "often", "id": 437, "lower": "often", "norm": "often", "shape": "xxxx", "prefix": "o", "suffix": "ten", "length": 5, "cluster": "3706", "prob": -8.476237297058105, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "talk", "id": 438, "lower": "talk", "norm": "talk", "shape": "xxxx", "prefix": "t", "suffix": "alk", "length": 4, "cluster": "394", "prob": -8.479889869689941, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "issue", "id": 439, "lower": "issue", "norm": "issue", "shape": "xxxx", "prefix": "i", "suffix": "sue", "length": 5, "cluster": "3525", "prob": -8.48391342163086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "easy", "id": 440, "lower": "easy", "norm": "easy", "shape": "xxxx", "prefix": "e", "suffix": "asy", "length": 4, "cluster": "2538", "prob": -8.489182472229004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "One", "id": 441, "lower": "one", "norm": "One", "shape": "Xxx", "prefix": "O", "suffix": "One", "length": 3, "cluster": "350", "prob": -8.494391441345215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "needs", "id": 442, "lower": "needs", "norm": "needs", "shape": "xxxx", "prefix": "n", "suffix": "eds", "length": 5, "cluster": "14250", "prob": -8.49528694152832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "add", "id": 443, "lower": "add", "norm": "add", "shape": "xxx", "prefix": "a", "suffix": "add", "length": 3, "cluster": "3594", "prob": -8.496837615966797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "support", "id": 444, "lower": "support", "norm": "support", "shape": "xxxx", "prefix": "s", "suffix": "ort", "length": 7, "cluster": "7861", "prob": -8.503355026245117, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "face", "id": 445, "lower": "face", "norm": "face", "shape": "xxxx", "prefix": "f", "suffix": "ace", "length": 4, "cluster": "1685", "prob": -8.504852294921875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "hand", "id": 446, "lower": "hand", "norm": "hand", "shape": "xxxx", "prefix": "h", "suffix": "and", "length": 4, "cluster": "8037", "prob": -8.504961967468262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "half", "id": 447, "lower": "half", "norm": "half", "shape": "xxxx", "prefix": "h", "suffix": "alf", "length": 4, "cluster": "469", "prob": -8.508658409118652, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "check", "id": 448, "lower": "check", "norm": "check", "shape": "xxxx", "prefix": "c", "suffix": "eck", "length": 5, "cluster": "2485", "prob": -8.512067794799805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "night", "id": 449, "lower": "night", "norm": "night", "shape": "xxxx", "prefix": "n", "suffix": "ght", "length": 5, "cluster": "93", "prob": -8.517072677612305, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "months", "id": 450, "lower": "months", "norm": "months", "shape": "xxxx", "prefix": "m", "suffix": "ths", "length": 6, "cluster": "445", "prob": -8.517988204956055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "kids", "id": 451, "lower": "kids", "norm": "kids", "shape": "xxxx", "prefix": "k", "suffix": "ids", "length": 4, "cluster": "877", "prob": -8.520237922668457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "players", "id": 452, "lower": "players", "norm": "players", "shape": "xxxx", "prefix": "p", "suffix": "ers", "length": 7, "cluster": "3565", "prob": -8.520515441894531, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "line", "id": 453, "lower": "line", "norm": "line", "shape": "xxxx", "prefix": "l", "suffix": "ine", "length": 4, "cluster": "3941", "prob": -8.522600173950195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "told", "id": 454, "lower": "told", "norm": "told", "shape": "xxxx", "prefix": "t", "suffix": "old", "length": 4, "cluster": "20138", "prob": -8.52303409576416, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "example", "id": 455, "lower": "example", "norm": "example", "shape": "xxxx", "prefix": "e", "suffix": "ple", "length": 7, "cluster": "497", "prob": -8.523116111755371, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "played", "id": 456, "lower": "played", "norm": "played", "shape": "xxxx", "prefix": "p", "suffix": "yed", "length": 6, "cluster": "32426", "prob": -8.528886795043945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "reddit", "id": 457, "lower": "reddit", "norm": "reddit", "shape": "xxxx", "prefix": "r", "suffix": "dit", "length": 6, "cluster": "0", "prob": -8.52908992767334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "based", "id": 458, "lower": "based", "norm": "based", "shape": "xxxx", "prefix": "b", "suffix": "sed", "length": 5, "cluster": "1578", "prob": -8.53032112121582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "tried", "id": 459, "lower": "tried", "norm": "tried", "shape": "xxxx", "prefix": "t", "suffix": "ied", "length": 5, "cluster": "28586", "prob": -8.532145500183105, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "sounds", "id": 460, "lower": "sounds", "norm": "sounds", "shape": "xxxx", "prefix": "s", "suffix": "nds", "length": 6, "cluster": "2442", "prob": -8.53985595703125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "link", "id": 461, "lower": "link", "norm": "link", "shape": "xxxx", "prefix": "l", "suffix": "ink", "length": 4, "cluster": "5829", "prob": -8.540618896484375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "girl", "id": 462, "lower": "girl", "norm": "girl", "shape": "xxxx", "prefix": "g", "suffix": "irl", "length": 4, "cluster": "549", "prob": -8.542597770690918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "open", "id": 463, "lower": "open", "norm": "open", "shape": "xxxx", "prefix": "o", "suffix": "pen", "length": 4, "cluster": "1589", "prob": -8.553583145141602, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "To", "id": 464, "lower": "to", "norm": "To", "shape": "Xx", "prefix": "T", "suffix": "To", "length": 2, "cluster": "3582", "prob": -8.557126998901367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "taking", "id": 465, "lower": "taking", "norm": "taking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 6, "cluster": "31722", "prob": -8.55748462677002, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "happened", "id": 466, "lower": "happened", "norm": "happened", "shape": "xxxx", "prefix": "h", "suffix": "ned", "length": 8, "cluster": "5290", "prob": -8.559469223022461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "during", "id": 467, "lower": "during", "norm": "during", "shape": "xxxx", "prefix": "d", "suffix": "ing", "length": 6, "cluster": "262140", "prob": -8.559581756591797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "deal", "id": 468, "lower": "deal", "norm": "deal", "shape": "xxxx", "prefix": "d", "suffix": "eal", "length": 4, "cluster": "5829", "prob": -8.560197830200195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "single", "id": 469, "lower": "single", "norm": "single", "shape": "xxxx", "prefix": "s", "suffix": "gle", "length": 6, "cluster": "71", "prob": -8.571329116821289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "family", "id": 470, "lower": "family", "norm": "family", "shape": "xxxx", "prefix": "f", "suffix": "ily", "length": 6, "cluster": "1061", "prob": -8.571907043457031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "close", "id": 471, "lower": "close", "norm": "close", "shape": "xxxx", "prefix": "c", "suffix": "ose", "length": 5, "cluster": "53", "prob": -8.581155776977539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "happy", "id": 472, "lower": "happy", "norm": "happy", "shape": "xxxx", "prefix": "h", "suffix": "ppy", "length": 5, "cluster": "4586", "prob": -8.581560134887695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "move", "id": 473, "lower": "move", "norm": "move", "shape": "xxxx", "prefix": "m", "suffix": "ove", "length": 4, "cluster": "7093", "prob": -8.582797050476074, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "number", "id": 474, "lower": "number", "norm": "number", "shape": "xxxx", "prefix": "n", "suffix": "ber", "length": 6, "cluster": "341", "prob": -8.584420204162598, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "US", "id": 475, "lower": "us", "norm": "US", "shape": "XX", "prefix": "U", "suffix": "US", "length": 2, "cluster": "1642", "prob": -8.585862159729004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "water", "id": 476, "lower": "water", "norm": "water", "shape": "xxxx", "prefix": "w", "suffix": "ter", "length": 5, "cluster": "3705", "prob": -8.589462280273438, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "men", "id": 477, "lower": "men", "norm": "men", "shape": "xxx", "prefix": "m", "suffix": "men", "length": 3, "cluster": "877", "prob": -8.59007453918457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "yeah", "id": 478, "lower": "yeah", "norm": "yeah", "shape": "xxxx", "prefix": "y", "suffix": "eah", "length": 4, "cluster": "26", "prob": -8.593489646911621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "later", "id": 479, "lower": "later", "norm": "later", "shape": "xxxx", "prefix": "l", "suffix": "ter", "length": 5, "cluster": "5866", "prob": -8.603795051574707, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "whatever", "id": 480, "lower": "whatever", "norm": "whatever", "shape": "xxxx", "prefix": "w", "suffix": "ver", "length": 8, "cluster": "2026", "prob": -8.610091209411621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "government", "id": 481, "lower": "government", "norm": "government", "shape": "xxxx", "prefix": "g", "suffix": "ent", "length": 10, "cluster": "297", "prob": -8.610445022583008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "6", "id": 482, "lower": "6", "norm": "6", "shape": "d", "prefix": "6", "suffix": "6", "length": 1, "cluster": "50", "prob": -8.611133575439453, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "house", "id": 483, "lower": "house", "norm": "house", "shape": "xxxx", "prefix": "h", "suffix": "use", "length": 5, "cluster": "37", "prob": -8.613367080688477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "similar", "id": 484, "lower": "similar", "norm": "similar", "shape": "xxxx", "prefix": "s", "suffix": "lar", "length": 7, "cluster": "295", "prob": -8.613471031188965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "wait", "id": 485, "lower": "wait", "norm": "wait", "shape": "xxxx", "prefix": "w", "suffix": "ait", "length": 4, "cluster": "3765", "prob": -8.613734245300293, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "questions", "id": 486, "lower": "questions", "norm": "questions", "shape": "xxxx", "prefix": "q", "suffix": "ons", "length": 9, "cluster": "1165", "prob": -8.613752365112305, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "sex", "id": 487, "lower": "sex", "norm": "sex", "shape": "xxx", "prefix": "s", "suffix": "sex", "length": 3, "cluster": "633", "prob": -8.613862991333008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "especially", "id": 488, "lower": "especially", "norm": "especially", "shape": "xxxx", "prefix": "e", "suffix": "lly", "length": 10, "cluster": "27882", "prob": -8.616527557373047, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "lol", "id": 489, "lower": "lol", "norm": "lol", "shape": "xxx", "prefix": "l", "suffix": "lol", "length": 3, "cluster": "0", "prob": -8.621257781982422, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "Because", "id": 490, "lower": "because", "norm": "Because", "shape": "Xxxxx", "prefix": "B", "suffix": "use", "length": 7, "cluster": "1214", "prob": -8.623008728027344, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
||||
{"orth": "God", "id": 491, "lower": "god", "norm": "God", "shape": "Xxx", "prefix": "G", "suffix": "God", "length": 3, "cluster": "422", "prob": -8.62376594543457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
|
|
@ -1,5 +1,7 @@
|
|||
//- 💫 DOCS > API > ANNOTATION > TRAINING
|
||||
|
||||
+h(3, "json-input") JSON input format for training
|
||||
|
||||
p
|
||||
| spaCy takes training data in JSON format. The built-in
|
||||
| #[+api("cli#convert") #[code convert]] command helps you convert the
|
||||
|
@ -46,3 +48,57 @@ p
|
|||
| Treebank:
|
||||
|
||||
+github("spacy", "examples/training/training-data.json", false, false, "json")
|
||||
|
||||
+h(3, "vocab-jsonl") Lexical data for vocabulary
|
||||
+tag-new(2)
|
||||
|
||||
p
|
||||
| The populate a model's vocabulary, you can use the
|
||||
| #[+api("cli#vocab") #[code spacy vocab]] command and load in a
|
||||
| #[+a("https://jsonlines.readthedocs.io/en/latest/") newline-delimited JSON]
|
||||
| (JSONL) file containing one lexical entry per line. The first line
|
||||
| defines the language and vocabulary settings. All other lines are
|
||||
| expected to be JSON objects describing an individual lexeme. The lexical
|
||||
| attributes will be then set as attributes on spaCy's
|
||||
| #[+api("lexeme#attributes") #[code Lexeme]] object. The #[code vocab]
|
||||
| command outputs a ready-to-use spaCy model with a #[code Vocab]
|
||||
| containing the lexical data.
|
||||
|
||||
+code("First line").
|
||||
{"lang": "en", "settings": {"oov_prob": -20.502029418945312}}
|
||||
|
||||
+code("Entry structure").
|
||||
{
|
||||
"orth": string,
|
||||
"id": int,
|
||||
"lower": string,
|
||||
"norm": string,
|
||||
"shape": string
|
||||
"prefix": string,
|
||||
"suffix": string,
|
||||
"length": int,
|
||||
"cluster": string,
|
||||
"prob": float,
|
||||
"is_alpha": bool,
|
||||
"is_ascii": bool,
|
||||
"is_digit": bool,
|
||||
"is_lower": bool,
|
||||
"is_punct": bool,
|
||||
"is_space": bool,
|
||||
"is_title": bool,
|
||||
"is_upper": bool,
|
||||
"like_url": bool,
|
||||
"like_num": bool,
|
||||
"like_email": bool,
|
||||
"is_stop": bool,
|
||||
"is_oov": bool,
|
||||
"is_quote": bool,
|
||||
"is_left_punct": bool,
|
||||
"is_right_punct": bool
|
||||
}
|
||||
|
||||
p
|
||||
| Here's an example of the 500 most frequent lexemes in the English
|
||||
| training data:
|
||||
|
||||
+github("spacy", "examples/training/vocab-data.json", false, false, "json")
|
||||
|
|
|
@ -220,7 +220,7 @@
|
|||
"Lemmatization": "lemmatization",
|
||||
"Dependencies": "dependency-parsing",
|
||||
"Named Entities": "named-entities",
|
||||
"Training Data": "training"
|
||||
"Models & Training": "training"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,6 +99,6 @@ p This document describes the target annotations spaCy is trained to predict.
|
|||
include _annotation/_biluo
|
||||
|
||||
+section("training")
|
||||
+h(2, "json-input") JSON input format for training
|
||||
+h(2, "training") Models and training data
|
||||
|
||||
include _annotation/_training
|
||||
|
|
Loading…
Reference in New Issue
Block a user