Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-11-14 23:06:01 +03:00 · 2017-10-30 19:44:58 +01:00 · 2017-10-30 19:44:58 +01:00 · 4e3006cec7
commit 4e3006cec7
parent 4112a991ec f02b0af821
15 changed files with 392 additions and 69 deletions
--- a/examples/training/vocab-data.jsonl
+++ b/examples/training/vocab-data.jsonl
@ -0,0 +1,101 @@
 {"lang": "en", "settings": {"oov_prob": -20.502029418945312}}
 {"orth": ".", "id": 1, "lower": ".", "norm": ".", "shape": ".", "prefix": ".", "suffix": ".", "length": 1, "cluster": "8", "prob": -3.0678977966308594, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": ",", "id": 2, "lower": ",", "norm": ",", "shape": ",", "prefix": ",", "suffix": ",", "length": 1, "cluster": "4", "prob": -3.4549596309661865, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "the", "id": 3, "lower": "the", "norm": "the", "shape": "xxx", "prefix": "t", "suffix": "the", "length": 3, "cluster": "11", "prob": -3.528766632080078, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "I", "id": 4, "lower": "i", "norm": "I", "shape": "X", "prefix": "I", "suffix": "I", "length": 1, "cluster": "346", "prob": -3.791565179824829, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "to", "id": 5, "lower": "to", "norm": "to", "shape": "xx", "prefix": "t", "suffix": "to", "length": 2, "cluster": "12", "prob": -3.8560216426849365, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "a", "id": 6, "lower": "a", "norm": "a", "shape": "x", "prefix": "a", "suffix": "a", "length": 1, "cluster": "19", "prob": -3.92978835105896, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "and", "id": 7, "lower": "and", "norm": "and", "shape": "xxx", "prefix": "a", "suffix": "and", "length": 3, "cluster": "20", "prob": -4.113108158111572, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "of", "id": 8, "lower": "of", "norm": "of", "shape": "xx", "prefix": "o", "suffix": "of", "length": 2, "cluster": "28", "prob": -4.27587366104126, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "you", "id": 9, "lower": "you", "norm": "you", "shape": "xxx", "prefix": "y", "suffix": "you", "length": 3, "cluster": "602", "prob": -4.373791217803955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "it", "id": 10, "lower": "it", "norm": "it", "shape": "xx", "prefix": "i", "suffix": "it", "length": 2, "cluster": "474", "prob": -4.388050079345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "is", "id": 11, "lower": "is", "norm": "is", "shape": "xx", "prefix": "i", "suffix": "is", "length": 2, "cluster": "762", "prob": -4.457748889923096, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "that", "id": 12, "lower": "that", "norm": "that", "shape": "xxxx", "prefix": "t", "suffix": "hat", "length": 4, "cluster": "84", "prob": -4.464504718780518, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "\n\n", "id": 0, "lower": "\n\n", "norm": "\n\n", "shape": "\n\n", "prefix": "\n", "suffix": "\n\n", "length": 2, "cluster": "0", "prob": -4.606560707092285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "in", "id": 13, "lower": "in", "norm": "in", "shape": "xx", "prefix": "i", "suffix": "in", "length": 2, "cluster": "60", "prob": -4.619071960449219, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "'s", "id": 14, "lower": "'s", "norm": "'s", "shape": "'x", "prefix": "'", "suffix": "'s", "length": 2, "cluster": "52", "prob": -4.830559253692627, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "n't", "id": 15, "lower": "n't", "norm": "n't", "shape": "x'x", "prefix": "n", "suffix": "n't", "length": 3, "cluster": "74", "prob": -4.859938621520996, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "for", "id": 16, "lower": "for", "norm": "for", "shape": "xxx", "prefix": "f", "suffix": "for", "length": 3, "cluster": "508", "prob": -4.8801093101501465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "\"", "id": 17, "lower": "\"", "norm": "\"", "shape": "\"", "prefix": "\"", "suffix": "\"", "length": 1, "cluster": "0", "prob": -5.02677583694458, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true}
 {"orth": "?", "id": 18, "lower": "?", "norm": "?", "shape": "?", "prefix": "?", "suffix": "?", "length": 1, "cluster": "0", "prob": -5.05924654006958, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": " ", "id": 0, "lower": " ", "norm": " ", "shape": " ", "prefix": " ", "suffix": " ", "length": 1, "cluster": "0", "prob": -5.129165172576904, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "have", "id": 19, "lower": "have", "norm": "have", "shape": "xxxx", "prefix": "h", "suffix": "ave", "length": 4, "cluster": "378", "prob": -5.156484603881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "on", "id": 20, "lower": "on", "norm": "on", "shape": "xx", "prefix": "o", "suffix": "on", "length": 2, "cluster": "2044", "prob": -5.172736167907715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "*", "id": 21, "lower": "*", "norm": "*", "shape": "*", "prefix": "*", "suffix": "*", "length": 1, "cluster": "5098", "prob": -5.1977410316467285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": ")", "id": 22, "lower": ")", "norm": ")", "shape": ")", "prefix": ")", "suffix": ")", "length": 1, "cluster": "0", "prob": -5.197994232177734, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true}
 {"orth": "be", "id": 23, "lower": "be", "norm": "be", "shape": "xx", "prefix": "b", "suffix": "be", "length": 2, "cluster": "458", "prob": -5.225094318389893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "with", "id": 24, "lower": "with", "norm": "with", "shape": "xxxx", "prefix": "w", "suffix": "ith", "length": 4, "cluster": "1020", "prob": -5.243249893188477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "do", "id": 25, "lower": "do", "norm": "do", "shape": "xx", "prefix": "d", "suffix": "do", "length": 2, "cluster": "2042", "prob": -5.246996879577637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "was", "id": 26, "lower": "was", "norm": "was", "shape": "xxx", "prefix": "w", "suffix": "was", "length": 3, "cluster": "250", "prob": -5.252320289611816, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "are", "id": 27, "lower": "are", "norm": "are", "shape": "xxx", "prefix": "a", "suffix": "are", "length": 3, "cluster": "1530", "prob": -5.271068096160889, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "not", "id": 28, "lower": "not", "norm": "not", "shape": "xxx", "prefix": "n", "suffix": "not", "length": 3, "cluster": "1258", "prob": -5.332601070404053, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "but", "id": 29, "lower": "but", "norm": "but", "shape": "xxx", "prefix": "b", "suffix": "but", "length": 3, "cluster": "148", "prob": -5.3419694900512695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "!", "id": 30, "lower": "!", "norm": "!", "shape": "!", "prefix": "!", "suffix": "!", "length": 1, "cluster": "0", "prob": -5.359641075134277, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "this", "id": 31, "lower": "this", "norm": "this", "shape": "xxxx", "prefix": "t", "suffix": "his", "length": 4, "cluster": "63", "prob": -5.36181640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "[", "id": 32, "lower": "[", "norm": "[", "shape": "[", "prefix": "[", "suffix": "[", "length": 1, "cluster": "0", "prob": -5.438112258911133, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false}
 {"orth": "-", "id": 33, "lower": "-", "norm": "-", "shape": "-", "prefix": "-", "suffix": "-", "length": 1, "cluster": "36", "prob": -5.468655109405518, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "my", "id": 34, "lower": "my", "norm": "my", "shape": "xx", "prefix": "m", "suffix": "my", "length": 2, "cluster": "251", "prob": -5.491642951965332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "they", "id": 35, "lower": "they", "norm": "they", "shape": "xxxx", "prefix": "t", "suffix": "hey", "length": 4, "cluster": "90", "prob": -5.5243682861328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "as", "id": 36, "lower": "as", "norm": "as", "shape": "xx", "prefix": "a", "suffix": "as", "length": 2, "cluster": "212", "prob": -5.53448486328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "like", "id": 37, "lower": "like", "norm": "like", "shape": "xxxx", "prefix": "l", "suffix": "ike", "length": 4, "cluster": "1684", "prob": -5.610429763793945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "just", "id": 38, "lower": "just", "norm": "just", "shape": "xxxx", "prefix": "j", "suffix": "ust", "length": 4, "cluster": "31978", "prob": -5.630868434906006, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "your", "id": 39, "lower": "your", "norm": "your", "shape": "xxxx", "prefix": "y", "suffix": "our", "length": 4, "cluster": "251", "prob": -5.650108814239502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "or", "id": 40, "lower": "or", "norm": "or", "shape": "xx", "prefix": "o", "suffix": "or", "length": 2, "cluster": "404", "prob": -5.654984951019287, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "(", "id": 41, "lower": "(", "norm": "(", "shape": "(", "prefix": "(", "suffix": "(", "length": 1, "cluster": "0", "prob": -5.75598669052124, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false}
 {"orth": "at", "id": 42, "lower": "at", "norm": "at", "shape": "xx", "prefix": "a", "suffix": "at", "length": 2, "cluster": "124", "prob": -5.763442516326904, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "if", "id": 43, "lower": "if", "norm": "if", "shape": "xx", "prefix": "i", "suffix": "if", "length": 2, "cluster": "4052", "prob": -5.763589859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "would", "id": 44, "lower": "would", "norm": "would", "shape": "xxxx", "prefix": "w", "suffix": "uld", "length": 5, "cluster": "1978", "prob": -5.772674560546875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "so", "id": 45, "lower": "so", "norm": "so", "shape": "xx", "prefix": "s", "suffix": "so", "length": 2, "cluster": "2282", "prob": -5.823773384094238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "can", "id": 46, "lower": "can", "norm": "can", "shape": "xxx", "prefix": "c", "suffix": "can", "length": 3, "cluster": "58", "prob": -5.827763080596924, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "me", "id": 47, "lower": "me", "norm": "me", "shape": "xx", "prefix": "m", "suffix": "me", "length": 2, "cluster": "1898", "prob": -5.846089839935303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "about", "id": 48, "lower": "about", "norm": "about", "shape": "xxxx", "prefix": "a", "suffix": "out", "length": 5, "cluster": "618", "prob": -5.906808853149414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "he", "id": 49, "lower": "he", "norm": "he", "shape": "xx", "prefix": "h", "suffix": "he", "length": 2, "cluster": "218", "prob": -5.9319047927856445, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "It", "id": 50, "lower": "it", "norm": "It", "shape": "Xx", "prefix": "I", "suffix": "It", "length": 2, "cluster": "894", "prob": -5.93662691116333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "all", "id": 51, "lower": "all", "norm": "all", "shape": "xxx", "prefix": "a", "suffix": "all", "length": 3, "cluster": "6122", "prob": -5.936640739440918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "The", "id": 52, "lower": "the", "norm": "The", "shape": "Xxx", "prefix": "T", "suffix": "The", "length": 3, "cluster": "30", "prob": -5.958707332611084, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "get", "id": 53, "lower": "get", "norm": "get", "shape": "xxx", "prefix": "g", "suffix": "get", "length": 3, "cluster": "2570", "prob": -5.992605686187744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "one", "id": 54, "lower": "one", "norm": "one", "shape": "xxx", "prefix": "o", "suffix": "one", "length": 3, "cluster": "8170", "prob": -5.996385097503662, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "'m", "id": 55, "lower": "'m", "norm": "'m", "shape": "'x", "prefix": "'", "suffix": "'m", "length": 2, "cluster": "3066", "prob": -5.9999823570251465, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "out", "id": 56, "lower": "out", "norm": "out", "shape": "xxx", "prefix": "o", "suffix": "out", "length": 3, "cluster": "1386", "prob": -6.0027008056640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "from", "id": 57, "lower": "from", "norm": "from", "shape": "xxxx", "prefix": "f", "suffix": "rom", "length": 4, "cluster": "380", "prob": -6.010132312774658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "an", "id": 58, "lower": "an", "norm": "an", "shape": "xx", "prefix": "a", "suffix": "an", "length": 2, "cluster": "3", "prob": -6.014852046966553, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "what", "id": 59, "lower": "what", "norm": "what", "shape": "xxxx", "prefix": "w", "suffix": "hat", "length": 4, "cluster": "2026", "prob": -6.023346424102783, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "up", "id": 60, "lower": "up", "norm": "up", "shape": "xx", "prefix": "u", "suffix": "up", "length": 2, "cluster": "362", "prob": -6.028695583343506, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "]", "id": 61, "lower": "]", "norm": "]", "shape": "]", "prefix": "]", "suffix": "]", "length": 1, "cluster": "0", "prob": -6.0386552810668945, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true}
 {"orth": "\n", "id": 0, "lower": "\n", "norm": "\n", "shape": "\n", "prefix": "\n", "suffix": "\n", "length": 1, "cluster": "0", "prob": -6.0506510734558105, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "people", "id": 62, "lower": "people", "norm": "people", "shape": "xxxx", "prefix": "p", "suffix": "ple", "length": 6, "cluster": "365", "prob": -6.0715765953063965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "more", "id": 63, "lower": "more", "norm": "more", "shape": "xxxx", "prefix": "m", "suffix": "ore", "length": 4, "cluster": "1514", "prob": -6.081598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": ":", "id": 64, "lower": ":", "norm": ":", "shape": ":", "prefix": ":", "suffix": ":", "length": 1, "cluster": "228", "prob": -6.128875732421875, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "there", "id": 65, "lower": "there", "norm": "there", "shape": "xxxx", "prefix": "t", "suffix": "ere", "length": 5, "cluster": "986", "prob": -6.135282039642334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "deleted", "id": 66, "lower": "deleted", "norm": "deleted", "shape": "xxxx", "prefix": "d", "suffix": "ted", "length": 7, "cluster": "1706", "prob": -6.1543049812316895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "think", "id": 67, "lower": "think", "norm": "think", "shape": "xxxx", "prefix": "t", "suffix": "ink", "length": 5, "cluster": "1674", "prob": -6.180924892425537, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "will", "id": 68, "lower": "will", "norm": "will", "shape": "xxxx", "prefix": "w", "suffix": "ill", "length": 4, "cluster": "442", "prob": -6.199834823608398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "them", "id": 69, "lower": "them", "norm": "them", "shape": "xxxx", "prefix": "t", "suffix": "hem", "length": 4, "cluster": "5994", "prob": -6.2177276611328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "we", "id": 70, "lower": "we", "norm": "we", "shape": "xx", "prefix": "w", "suffix": "we", "length": 2, "cluster": "1626", "prob": -6.230024337768555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "'re", "id": 71, "lower": "'re", "norm": "'re", "shape": "'xx", "prefix": "'", "suffix": "'re", "length": 3, "cluster": "7162", "prob": -6.255462646484375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "when", "id": 72, "lower": "when", "norm": "when", "shape": "xxxx", "prefix": "w", "suffix": "hen", "length": 4, "cluster": "16340", "prob": -6.2623114585876465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "You", "id": 73, "lower": "you", "norm": "You", "shape": "Xxx", "prefix": "Y", "suffix": "You", "length": 3, "cluster": "858", "prob": -6.276494026184082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "...", "id": 74, "lower": "...", "norm": "...", "shape": "...", "prefix": ".", "suffix": "...", "length": 3, "cluster": "966", "prob": -6.278521537780762, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "some", "id": 75, "lower": "some", "norm": "some", "shape": "xxxx", "prefix": "s", "suffix": "ome", "length": 4, "cluster": "239", "prob": -6.318882465362549, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "has", "id": 76, "lower": "has", "norm": "has", "shape": "xxx", "prefix": "h", "suffix": "has", "length": 3, "cluster": "890", "prob": -6.325605392456055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "because", "id": 77, "lower": "because", "norm": "because", "shape": "xxxx", "prefix": "b", "suffix": "use", "length": 7, "cluster": "980", "prob": -6.349620342254639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "know", "id": 78, "lower": "know", "norm": "know", "shape": "xxxx", "prefix": "k", "suffix": "now", "length": 4, "cluster": "3722", "prob": -6.368943214416504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "really", "id": 79, "lower": "really", "norm": "really", "shape": "xxxx", "prefix": "r", "suffix": "lly", "length": 6, "cluster": "7802", "prob": -6.370757102966309, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "by", "id": 80, "lower": "by", "norm": "by", "shape": "xx", "prefix": "b", "suffix": "by", "length": 2, "cluster": "252", "prob": -6.375086784362793, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "time", "id": 81, "lower": "time", "norm": "time", "shape": "xxxx", "prefix": "t", "suffix": "ime", "length": 4, "cluster": "477", "prob": -6.3782219886779785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "did", "id": 82, "lower": "did", "norm": "did", "shape": "xxx", "prefix": "d", "suffix": "did", "length": 3, "cluster": "8186", "prob": -6.389003753662109, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "no", "id": 83, "lower": "no", "norm": "no", "shape": "xx", "prefix": "n", "suffix": "no", "length": 2, "cluster": "4074", "prob": -6.402691841125488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "had", "id": 84, "lower": "had", "norm": "had", "shape": "xxx", "prefix": "h", "suffix": "had", "length": 3, "cluster": "1914", "prob": -6.45427131652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "their", "id": 85, "lower": "their", "norm": "their", "shape": "xxxx", "prefix": "t", "suffix": "eir", "length": 5, "cluster": "187", "prob": -6.461463928222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "If", "id": 86, "lower": "if", "norm": "If", "shape": "Xx", "prefix": "I", "suffix": "If", "length": 2, "cluster": "190", "prob": -6.469156742095947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "how", "id": 87, "lower": "how", "norm": "how", "shape": "xxx", "prefix": "h", "suffix": "how", "length": 3, "cluster": "10218", "prob": -6.496722221374512, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "does", "id": 88, "lower": "does", "norm": "does", "shape": "xxxx", "prefix": "d", "suffix": "oes", "length": 4, "cluster": "4090", "prob": -6.500738143920898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "who", "id": 89, "lower": "who", "norm": "who", "shape": "xxx", "prefix": "w", "suffix": "who", "length": 3, "cluster": "410", "prob": -6.504637241363525, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "than", "id": 90, "lower": "than", "norm": "than", "shape": "xxxx", "prefix": "t", "suffix": "han", "length": 4, "cluster": "106", "prob": -6.512253761291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "good", "id": 91, "lower": "good", "norm": "good", "shape": "xxxx", "prefix": "g", "suffix": "ood", "length": 4, "cluster": "551", "prob": -6.518923759460449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "only", "id": 92, "lower": "only", "norm": "only", "shape": "xxxx", "prefix": "o", "suffix": "nly", "length": 4, "cluster": "15594", "prob": -6.535442352294922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "his", "id": 93, "lower": "his", "norm": "his", "shape": "xxx", "prefix": "h", "suffix": "his", "length": 3, "cluster": "123", "prob": -6.574275016784668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "much", "id": 94, "lower": "much", "norm": "much", "shape": "xxxx", "prefix": "m", "suffix": "uch", "length": 4, "cluster": "2794", "prob": -6.584301948547363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": ";", "id": 95, "lower": ";", "norm": ";", "shape": ";", "prefix": ";", "suffix": ";", "length": 1, "cluster": "36", "prob": -6.586422920227051, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "'ve", "id": 96, "lower": "'ve", "norm": "'ve", "shape": "'xx", "prefix": "'", "suffix": "'ve", "length": 3, "cluster": "1018", "prob": -6.593011379241943, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
 {"orth": "could", "id": 97, "lower": "could", "norm": "could", "shape": "xxxx", "prefix": "c", "suffix": "uld", "length": 5, "cluster": "954", "prob": -6.595959186553955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false}
--- a/spacy/main.py
+++ b/spacy/main.py
@ -19,7 +19,7 @@ if __name__ == '__main__':
        'convert': convert,
        'package': package,
        'model': model,
-        'model': vocab,
+        'vocab': vocab,
        'profile': profile,
        'validate': validate
    }
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -17,14 +17,14 @@ numpy.random.seed(0)
@plac.annotations(
-    model=("Model name or path", "positional", None, str),
+    model=("model name or path", "positional", None, str),
-    data_path=("Location of JSON-formatted evaluation data", "positional",
+    data_path=("location of JSON-formatted evaluation data", "positional",
               None, str),
-    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
+    gold_preproc=("use gold preprocessing", "flag", "G", bool),
-    gpu_id=("Use GPU", "option", "g", int),
+    gpu_id=("use GPU", "option", "g", int),
-    displacy_path=("Directory to output rendered parses as HTML", "option",
+    displacy_path=("directory to output rendered parses as HTML", "option",
                   "dp", str),
-    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int))
+    displacy_limit=("limit of parses to render as HTML", "option", "dl", int))
 def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
             displacy_path=None, displacy_limit=25):
    """
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -16,10 +16,11 @@ from .. import about
    input_dir=("directory with model data", "positional", None, str),
    output_dir=("output parent directory", "positional", None, str),
    meta_path=("path to meta.json", "option", "m", str),
-    create_meta=("create meta.json, even if one exists in directory", "flag",
+    create_meta=("create meta.json, even if one exists in directory – if "
-                 "c", bool),
+                 "existing meta is found, entries are shown as defaults in "
-    force=("force overwriting of existing folder in output directory", "flag",
+                 "the command line prompt", "flag", "c", bool),
-           "f", bool))
+    force=("force overwriting of existing model directory in output directory",
           "flag", "f", bool))
 def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
            force=False):
    """
@ -41,13 +42,13 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
    template_manifest = get_template('MANIFEST.in')
    template_init = get_template('xx_model_name/__init__.py')
    meta_path = meta_path or input_path / 'meta.json'
-    if not create_meta and meta_path.is_file():
+    if meta_path.is_file():
        prints(meta_path, title="Reading meta.json from file")
        meta = util.read_json(meta_path)
        if not create_meta:  # only print this if user doesn't want to overwrite
            prints(meta_path, title="Loaded meta.json from file")
        else:
-        meta = generate_meta(input_dir)
+            meta = generate_meta(input_dir, meta)
    meta = validate_meta(meta, ['lang', 'name', 'version'])
    model_name = meta['lang'] + '_' + meta['name']
    model_name_v = model_name + '-' + meta['version']
    main_path = output_path / model_name_v
@ -82,18 +83,19 @@ def create_file(file_path, contents):
    file_path.open('w', encoding='utf-8').write(contents)
-def generate_meta(model_path):
+def generate_meta(model_path, existing_meta):
-    meta = {}
+    meta = existing_meta or {}
-    settings = [('lang', 'Model language', 'en'),
+    settings = [('lang', 'Model language', meta.get('lang', 'en')),
-                ('name', 'Model name', 'model'),
+                ('name', 'Model name', meta.get('name', 'model')),
-                ('version', 'Model version', '0.0.0'),
+                ('version', 'Model version', meta.get('version', '0.0.0')),
                ('spacy_version', 'Required spaCy version',
                 '>=%s,<3.0.0' % about.__version__),
-                ('description', 'Model description', False),
+                ('description', 'Model description',
-                ('author', 'Author', False),
+                  meta.get('description', False)),
-                ('email', 'Author email', False),
+                ('author', 'Author', meta.get('author', False)),
-                ('url', 'Author website', False),
+                ('email', 'Author email', meta.get('email', False)),
-                ('license', 'License', 'CC BY-NC 3.0')]
+                ('url', 'Author website', meta.get('url', False)),
                ('license', 'License', meta.get('license', 'CC BY-SA 3.0'))]
    nlp = util.load_model_from_path(Path(model_path))
    meta['pipeline'] = nlp.pipe_names
    meta['vectors'] = {'width': nlp.vocab.vectors_length,
--- a/spacy/cli/vocab.py
+++ b/spacy/cli/vocab.py
@ -1,31 +1,33 @@
 '''Compile a vocabulary from a lexicon jsonl file and word vectors.'''
 # coding: utf8
 from __future__ import unicode_literals
 from pathlib import Path
 import plac
 import json
 import spacy
 import numpy
-from spacy.util import ensure_path
+from pathlib import Path
 from ..util import prints, ensure_path
@plac.annotations(
    lang=("model language", "positional", None, str),
-    output_dir=("output directory to store model in", "positional", None, str),
+    output_dir=("model output directory", "positional", None, Path),
    lexemes_loc=("location of JSONL-formatted lexical data", "positional",
-                None, str),
+                 None, Path),
-    vectors_loc=("location of vectors data, as numpy .npz (optional)",
+    vectors_loc=("optional: location of vectors data, as numpy .npz",
-              "positional", None, str),
+                 "positional", None, str))
-    version=("Model version", "option", "V", str),
+def make_vocab(cmd, lang, output_dir, lexemes_loc, vectors_loc=None):
-)
+    """Compile a vocabulary from a lexicon jsonl file and word vectors."""
-def make_vocab(lang, output_dir, lexemes_loc, vectors_loc=None, version=None):
+    if not lexemes_loc.exists():
-    out_dir = ensure_path(output_dir)
+        prints(lexemes_loc, title="Can't find lexical data", exits=1)
-    jsonl_loc = ensure_path(lexemes_loc)
+    vectors_loc = ensure_path(vectors_loc)
    nlp = spacy.blank(lang)
    for word in nlp.vocab:
        word.rank = 0
-    with jsonl_loc.open() as file_:
+    lex_added = 0
    vec_added = 0
    with lexemes_loc.open() as file_:
        for line in file_:
            if line.strip():
                attrs = json.loads(line)
@ -35,14 +37,18 @@ def make_vocab(lang, output_dir, lexemes_loc, vectors_loc=None, version=None):
                    lex = nlp.vocab[attrs['orth']]
                    lex.set_attrs(**attrs)
                    assert lex.rank == attrs['id']
                lex_added += 1
    if vectors_loc is not None:
        vector_data = numpy.load(open(vectors_loc, 'rb'))
        nlp.vocab.clear_vectors(width=vector_data.shape[1])
        added = 0
        for word in nlp.vocab:
            if word.rank:
                nlp.vocab.vectors.add(word.orth_, row=word.rank,
                                      vector=vector_data[word.rank])
-                added += 1
+                vec_added += 1
-    nlp.to_disk(out_dir)
+    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    prints("{} entries, {} vectors".format(lex_added, vec_added), output_dir,
           title="Sucessfully compiled vocab and vectors, and saved model")
    return nlp
--- a/spacy/language.py
+++ b/spacy/language.py
@ -154,6 +154,8 @@ class Language(object):
        self._meta.setdefault('email', '')
        self._meta.setdefault('url', '')
        self._meta.setdefault('license', '')
        self._meta['vectors'] = {'width': self.vocab.vectors_length,
                                 'entries': len(self.vocab.vectors)}
        self._meta['pipeline'] = self.pipe_names
        return self._meta
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -263,6 +263,15 @@ cdef class Vocab:
        The similarities are judged by cosine. The original vectors may
        be large, so the cosines are calculated in minibatches, to reduce
        memory usage.
        nr_row (int): The number of rows to keep in the vector table.
        batch_size (int): Batch of vectors for calculating the similarities.
            Larger batch sizes might be faster, while temporarily requiring
            more memory.
        RETURNS (dict): A dictionary keyed by removed words mapped to
            `(string, score)` tuples, where `string` is the entry the removed
            word was mapped to, and `score` the similarity score between the
            two words.
        """
        xp = get_array_module(self.vectors.data)
        # Work in batches, to avoid memory problems.
@ -294,6 +303,7 @@ cdef class Vocab:
                self.vectors.key2row[key] = neighbours[row-nr_row]
        # Make copy, to encourage the original table to be garbage collected.
        self.vectors.data = xp.ascontiguousarray(self.vectors.data[:nr_row])
        # TODO: return new mapping
    def get_vector(self, orth):
        """Retrieve a vector for a word in the vocabulary. Words can be looked
--- a/website/_includes/_functions.jade
+++ b/website/_includes/_functions.jade
@ -41,9 +41,6 @@
 -           var comps = path.split('#');
 -           return "top-level#" + comps[0] + '.' + comps[1];
 -       }
 -       else if (path.startsWith('cli#')) {
 -           return "top-level#" + path.split('#')[1];
 -       }
 -       return path;
 -   }
--- a/website/api/_annotation/_training.jade
+++ b/website/api/_annotation/_training.jade
@ -1,5 +1,7 @@
 //- 💫 DOCS > API > ANNOTATION > TRAINING
 +h(3, "json-input") JSON input format for training
 p
    |  spaCy takes training data in JSON format. The built-in
    |  #[+api("cli#convert") #[code convert]] command helps you convert the
@ -46,3 +48,57 @@ p
    |  Treebank:
 +github("spacy", "examples/training/training-data.json", false, false, "json")
 +h(3, "vocab-jsonl") Lexical data for vocabulary
    +tag-new(2)
 p
    |  The populate a model's vocabulary, you can use the
    |  #[+api("cli#vocab") #[code spacy vocab]] command and load in a
    |  #[+a("https://jsonlines.readthedocs.io/en/latest/") newline-delimited JSON]
    |  (JSONL) file containing one lexical entry per line. The first line
    |  defines the language and vocabulary settings. All other lines are
    |  expected to be JSON objects describing an individual lexeme. The lexical
    |  attributes will be then set as attributes on spaCy's
    |  #[+api("lexeme#attributes") #[code Lexeme]] object. The #[code vocab]
    |  command outputs a ready-to-use spaCy model with a #[code Vocab]
    |  containing the lexical data.
 +code("First line").
    {"lang": "en", "settings": {"oov_prob": -20.502029418945312}}
 +code("Entry structure").
    {
        "orth": string,
        "id": int,
        "lower": string,
        "norm": string,
        "shape": string
        "prefix": string,
        "suffix": string,
        "length": int,
        "cluster": string,
        "prob": float,
        "is_alpha": bool,
        "is_ascii": bool,
        "is_digit": bool,
        "is_lower": bool,
        "is_punct": bool,
        "is_space": bool,
        "is_title": bool,
        "is_upper": bool,
        "like_url": bool,
        "like_num": bool,
        "like_email": bool,
        "is_stop": bool,
        "is_oov": bool,
        "is_quote": bool,
        "is_left_punct": bool,
        "is_right_punct": bool
    }
 p
    |  Here's an example of the 100 most frequent lexemes in the English
    |  training data:
 +github("spacy", "examples/training/vocab-data.jsonl", false, false, "json")
--- a/website/api/_data.json
+++ b/website/api/_data.json
@ -3,8 +3,10 @@
        "Overview": {
            "Architecture": "./",
            "Annotation Specs": "annotation",
            "Command Line": "cli",
            "Functions": "top-level"
        },
        "Containers": {
            "Doc": "doc",
            "Token": "token",
@ -45,14 +47,19 @@
        }
    },
    "cli": {
        "title": "Command Line Interface",
        "teaser": "Download, train and package models, and debug spaCy.",
        "source": "spacy/cli"
    },
    "top-level": {
        "title": "Top-level Functions",
        "menu": {
            "spacy": "spacy",
            "displacy": "displacy",
            "Utility Functions": "util",
-            "Compatibility": "compat",
+            "Compatibility": "compat"
            "Command Line": "cli"
        }
    },
@ -213,7 +220,7 @@
            "Lemmatization": "lemmatization",
            "Dependencies": "dependency-parsing",
            "Named Entities": "named-entities",
-            "Training Data": "training"
+            "Models & Training": "training"
        }
    }
 }
--- a/website/api/_top-level/_spacy.jade
+++ b/website/api/_top-level/_spacy.jade
@ -85,7 +85,9 @@ p
    +row
        +cell #[code name]
        +cell unicode
-        +cell ISO code of the language class to load.
+        +cell
            |  #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code]
            |  of the language class to load.
    +row
        +cell #[code disable]
--- a/website/api/annotation.jade
+++ b/website/api/annotation.jade
@ -99,6 +99,6 @@ p This document describes the target annotations spaCy is trained to predict.
    include _annotation/_biluo
 +section("training")
-    +h(2, "json-input") JSON input format for training
+    +h(2, "training") Models and training data
    include _annotation/_training
--- a/website/api/_top-level/_cli.jade
+++ b/website/api/_top-level/_cli.jade
@ -1,4 +1,6 @@
-//- 💫 DOCS > API > TOP-LEVEL > COMMAND LINE INTERFACE
+//- 💫 DOCS > API > COMMAND LINE INTERFACE
 include ../_includes/_mixins
 p
    |  As of v1.7.0, spaCy comes with new command line helpers to download and
@ -34,6 +36,13 @@ p
        +cell flag
        +cell Show help message and available arguments.
    +row("foot")
        +cell creates
        +cell directory, symlink
        +cell
            |  The installed model package in your #[code site-packages]
            |  directory and a shortcut link as a symlink in #[code spacy/data].
 +aside("Downloading best practices")
    |  The #[code download] command is mostly intended as a convenient,
    |  interactive wrapper – it performs compatibility checks and prints
@ -86,6 +95,13 @@ p
        +cell flag
        +cell Show help message and available arguments.
    +row("foot")
        +cell creates
        +cell symlink
        +cell
            |  A shortcut link of the given name as a symlink in
            |  #[code spacy/data].
 +h(3, "info") Info
 p
@ -113,6 +129,11 @@ p
        +cell flag
        +cell Show help message and available arguments.
    +row("foot")
        +cell prints
        +cell #[code stdout]
        +cell Information about your spaCy installation.
 +h(3, "validate") Validate
    +tag-new(2)
@ -129,6 +150,12 @@ p
 +code(false, "bash", "$").
    spacy validate
 +table(["Argument", "Type", "Description"])
    +row("foot")
        +cell prints
        +cell #[code stdout]
        +cell Details about the compatibility of your installed models.
 +h(3, "convert") Convert
 p
@ -172,6 +199,11 @@ p
        +cell flag
        +cell Show help message and available arguments.
    +row("foot")
        +cell creates
        +cell JSON
        +cell Data in spaCy's #[+a("/api/annotation#json-input") JSON format].
 p The following converters are available:
 +table(["ID", "Description"])
@ -286,6 +318,11 @@ p
        +cell flag
        +cell Show help message and available arguments.
    +row("foot")
        +cell creates
        +cell model, pickle
        +cell A spaCy model on each epoch, and a final #[code .pickle] file.
 +h(4, "train-hyperparams") Environment variables for hyperparameters
    +tag-new(2)
@ -395,6 +432,50 @@ p
        +cell Gradient L2 norm constraint.
        +cell #[code 1.0]
 +h(3, "vocab") Vocab
    +tag-new(2)
 p
    |  Compile a vocabulary from a
    |  #[+a("/api/annotation#vocab-jsonl") lexicon JSONL] file and optional
    |  word vectors. Will save out a valid spaCy model that you can load via
    |  #[+api("spacy#load") #[code spacy.load]] or package using the
    |  #[+api("cli#package") #[code package]] command.
 +code(false, "bash", "$").
    spacy vocab [lang] [output_dir] [lexemes_loc] [vectors_loc]
 +table(["Argument", "Type", "Description"])
    +row
        +cell #[code lang]
        +cell positional
        +cell
            |  Model language
            |  #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code],
            |  e.g. #[code en].
    +row
        +cell #[code output_dir]
        +cell positional
        +cell Model output directory. Will be created if it doesn't exist.
    +row
        +cell #[code lexemes_loc]
        +cell positional
        +cell
            |  Location of lexical data in spaCy's
            |  #[+a("/api/annotation#vocab-jsonl") JSONL format].
    +row
        +cell #[code vectors_loc]
        +cell positional
        +cell Optional location of vectors data as numpy #[code .npz] file.
    +row("foot")
        +cell creates
        +cell model
        +cell A spaCy model containing the vocab and vectors.
 +h(3, "evaluate") Evaluate
    +tag-new(2)
@ -447,22 +528,36 @@ p
        +cell flag
        +cell Use gold preprocessing.
    +row("foot")
        +cell prints / creates
        +cell #[code stdout], HTML
        +cell Training results and optional displaCy visualizations.
 +h(3, "package") Package
 p
    |  Generate a #[+a("/usage/training#models-generating") model Python package]
    |  from an existing model data directory. All data files are copied over.
-    |  If the path to a meta.json is supplied, or a meta.json is found in the
+    |  If the path to a #[code meta.json] is supplied, or a #[code meta.json] is
-    |  input directory, this file is used. Otherwise, the data can be entered
+    |  found in the input directory, this file is used. Otherwise, the data can
-    |  directly from the command line. The required file templates are downloaded
+    |  be entered directly from the command line. The required file templates
-    |  from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make
+    |  are downloaded from
    |  #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make
    |  sure you're always using the latest versions. This means you need to be
-    |  connected to the internet to use this command.
+    |  connected to the internet to use this command. After packaging, you
    |  can run #[code python setup.py sdist] from the newly created directory
    |  to turn your model into an installable archive file.
 +code(false, "bash", "$", false, false, true).
    spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--force]
 +aside-code("Example", "bash").
    spacy package /input /output
    cd /output/en_model-0.0.0
    python setup.py sdist
    pip install dist/en_model-0.0.0.tar.gz
 +table(["Argument", "Type", "Description"])
    +row
        +cell #[code input_dir]
@ -477,15 +572,16 @@ p
    +row
        +cell #[code --meta-path], #[code -m]
        +cell option
-        +cell #[+tag-new(2)] Path to meta.json file (optional).
+        +cell #[+tag-new(2)] Path to #[code meta.json] file (optional).
    +row
        +cell #[code --create-meta], #[code -c]
        +cell flag
        +cell
-            |  #[+tag-new(2)] Create a meta.json file on the command line, even
+            |  #[+tag-new(2)] Create a #[code meta.json] file on the command
-            |  if one already exists in the directory.
+            |  line, even if one already exists in the directory. If an
-
+            |  existing file is found, its entries will be shown as the defaults
            |  in the command line prompt.
    +row
        +cell #[code --force], #[code -f]
        +cell flag
@ -495,3 +591,8 @@ p
        +cell #[code --help], #[code -h]
        +cell flag
        +cell Show help message and available arguments.
    +row("foot")
        +cell creates
        +cell directory
        +cell A Python package containing the spaCy model.
--- a/website/api/top-level.jade
+++ b/website/api/top-level.jade
@ -18,7 +18,3 @@ include ../_includes/_mixins
 +section("compat")
    +h(2, "compat", "spacy/compaty.py") Compatibility functions
    include _top-level/_compat
 +section("cli", "spacy/cli")
    +h(2, "cli") Command line
    include _top-level/_cli
--- a/website/api/vocab.jade
+++ b/website/api/vocab.jade
@ -162,7 +162,7 @@ p
        +cell int
        +cell The integer ID by which the flag value can be checked.
-+h(2, "add_flag") Vocab.clear_vectors
+h(2, "clear_vectors") Vocab.clear_vectors
    +tag method
    +tag-new(2)
@ -181,7 +181,50 @@ p
            |  Number of dimensions of the new vectors. If #[code None], size
            |  is not changed.
-+h(2, "add_flag") Vocab.get_vector
+h(2, "prune_vectors") Vocab.prune_vectors
    +tag method
    +tag-new(2)
 p
    |  Reduce the current vector table to #[code nr_row] unique entries. Words
    |  mapped to the discarded vectors will be remapped to the closest vector
    |  among those remaining. For example, suppose the original table had
    |  vectors for the words:
    |  #[code.u-break ['sat', 'cat', 'feline', 'reclined']]. If we prune the
    |  vector table to, two rows, we would discard the vectors for "feline"
    |  and "reclined". These words would then be remapped to the closest
    |  remaining vector – so "feline" would have the same vector as "cat",
    |  and "reclined" would have the same vector as "sat". The similarities are
    |  judged by cosine. The original vectors may be large, so the cosines are
    |  calculated in minibatches, to reduce memory usage.
 +aside-code("Example").
    nlp.vocab.prune_vectors(10000)
    assert len(nlp.vocab.vectors) &lt;= 1000
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code nr_row]
        +cell int
        +cell The number of rows to keep in the vector table.
    +row
        +cell #[code batch_size]
        +cell int
        +cell
            |  Batch of vectors for calculating the similarities. Larger batch
            |  sizes might be faster, while temporarily requiring more memory.
    +row("foot")
        +cell returns
        +cell dict
        +cell
            |  A dictionary keyed by removed words mapped to
            |  #[code (string, score)] tuples, where #[code string] is the entry
            |  the removed word was mapped to, and #[code score] the similarity
            |  score between the two words.
 +h(2, "get_vector") Vocab.get_vector
    +tag method
    +tag-new(2)
@ -206,7 +249,7 @@ p
            |  A word vector. Size and shape are determined by the
            |  #[code Vocab.vectors] instance.
-+h(2, "add_flag") Vocab.set_vector
+h(2, "set_vector") Vocab.set_vector
    +tag method
    +tag-new(2)
@ -228,7 +271,7 @@ p
        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
        +cell The vector to set.
-+h(2, "add_flag") Vocab.has_vector
+h(2, "has_vector") Vocab.has_vector
    +tag method
    +tag-new(2)