From bf24f7f67242d0b0786e29f81483824d0f6ecfc7 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 22 Jul 2020 16:02:51 +0200
Subject: [PATCH 01/71] Update invalid tag maps (#5796)

* Remove copy of (old?) PTB tag map for: bn, eu
* Remove unsupported features from: hy, pl, ro, ru
---
 spacy/lang/bn/__init__.py |    2 -
 spacy/lang/bn/tag_map.py  |   82 ---
 spacy/lang/eu/__init__.py |    2 -
 spacy/lang/eu/tag_map.py  |   71 --
 spacy/lang/hy/tag_map.py  |  423 ++++-------
 spacy/lang/pl/tag_map.py  |  348 ++++-----
 spacy/lang/ro/tag_map.py  |  682 ++++++++---------
 spacy/lang/ru/tag_map.py  | 1472 ++++++++++++++++++-------------------
 8 files changed, 1375 insertions(+), 1707 deletions(-)
 delete mode 100644 spacy/lang/bn/tag_map.py
 delete mode 100644 spacy/lang/eu/tag_map.py

diff --git a/spacy/lang/bn/__init__.py b/spacy/lang/bn/__init__.py
index e70232552..7da50ff2d 100644
--- a/spacy/lang/bn/__init__.py
+++ b/spacy/lang/bn/__init__.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
-from .tag_map import TAG_MAP
 from .stop_words import STOP_WORDS
 
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
@@ -16,7 +15,6 @@ class BengaliDefaults(Language.Defaults):
     lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
     lex_attr_getters[LANG] = lambda text: "bn"
     tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
-    tag_map = TAG_MAP
     stop_words = STOP_WORDS
     prefixes = TOKENIZER_PREFIXES
     suffixes = TOKENIZER_SUFFIXES
diff --git a/spacy/lang/bn/tag_map.py b/spacy/lang/bn/tag_map.py
deleted file mode 100644
index 1efb35858..000000000
--- a/spacy/lang/bn/tag_map.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, CONJ, SCONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import CCONJ, NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX, SYM
-
-
-TAG_MAP = {
-    ".": {POS: PUNCT, "PunctType": "peri"},
-    ",": {POS: PUNCT, "PunctType": "comm"},
-    "-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"},
-    "-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"},
-    "``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"},
-    '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
-    "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
-    ":": {POS: PUNCT},
-    "৳": {POS: SYM, "Other": {"SymType": "currency"}},
-    "#": {POS: SYM, "Other": {"SymType": "numbersign"}},
-    "AFX": {POS: ADJ, "Hyph": "yes"},
-    "CC": {POS: CONJ, "ConjType": "coor"},
-    "CD": {POS: NUM, "NumType": "card"},
-    "DT": {POS: DET},
-    "EX": {POS: ADV, "AdvType": "ex"},
-    "FW": {POS: X, "Foreign": "yes"},
-    "HYPH": {POS: PUNCT, "PunctType": "dash"},
-    "IN": {POS: ADP},
-    "JJ": {POS: ADJ, "Degree": "pos"},
-    "JJR": {POS: ADJ, "Degree": "comp"},
-    "JJS": {POS: ADJ, "Degree": "sup"},
-    "LS": {POS: PUNCT, "NumType": "ord"},
-    "MD": {POS: VERB, "VerbType": "mod"},
-    "NIL": {POS: ""},
-    "NN": {POS: NOUN, "Number": "sing"},
-    "NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
-    "NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
-    "NNS": {POS: NOUN, "Number": "plur"},
-    "PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"},
-    "POS": {POS: PART, "Poss": "yes"},
-    "PRP": {POS: PRON, "PronType": "prs"},
-    "PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"},
-    "RB": {POS: ADV, "Degree": "pos"},
-    "RBR": {POS: ADV, "Degree": "comp"},
-    "RBS": {POS: ADV, "Degree": "sup"},
-    "RP": {POS: PART},
-    "TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"},
-    "UH": {POS: INTJ},
-    "VB": {POS: VERB, "VerbForm": "inf"},
-    "VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"},
-    "VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"},
-    "VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"},
-    "VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"},
-    "VBZ": {
-        POS: VERB,
-        "VerbForm": "fin",
-        "Tense": "pres",
-        "Number": "sing",
-        "Person": 3,
-    },
-    "WDT": {POS: ADJ, "PronType": "int|rel"},
-    "WP": {POS: NOUN, "PronType": "int|rel"},
-    "WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"},
-    "WRB": {POS: ADV, "PronType": "int|rel"},
-    "SP": {POS: SPACE},
-    "ADV": {POS: ADV},
-    "NOUN": {POS: NOUN},
-    "ADP": {POS: ADP},
-    "PRON": {POS: PRON},
-    "SCONJ": {POS: SCONJ},
-    "PROPN": {POS: PROPN},
-    "DET": {POS: DET},
-    "SYM": {POS: SYM},
-    "INTJ": {POS: INTJ},
-    "PUNCT": {POS: PUNCT},
-    "NUM": {POS: NUM},
-    "AUX": {POS: AUX},
-    "X": {POS: X},
-    "CONJ": {POS: CONJ},
-    "CCONJ": {POS: CCONJ},
-    "ADJ": {POS: ADJ},
-    "VERB": {POS: VERB},
-    "PART": {POS: PART},
-}
diff --git a/spacy/lang/eu/__init__.py b/spacy/lang/eu/__init__.py
index 4f3338c1d..b72529fab 100644
--- a/spacy/lang/eu/__init__.py
+++ b/spacy/lang/eu/__init__.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_SUFFIXES
-from .tag_map import TAG_MAP
 
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ...language import Language
@@ -17,7 +16,6 @@ class BasqueDefaults(Language.Defaults):
     lex_attr_getters[LANG] = lambda text: "eu"
 
     tokenizer_exceptions = BASE_EXCEPTIONS
-    tag_map = TAG_MAP
     stop_words = STOP_WORDS
     suffixes = TOKENIZER_SUFFIXES
 
diff --git a/spacy/lang/eu/tag_map.py b/spacy/lang/eu/tag_map.py
deleted file mode 100644
index 2499d7e3e..000000000
--- a/spacy/lang/eu/tag_map.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
-
-TAG_MAP = {
-    ".": {POS: PUNCT, "PunctType": "peri"},
-    ",": {POS: PUNCT, "PunctType": "comm"},
-    "-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"},
-    "-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"},
-    "``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"},
-    '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
-    "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
-    ":": {POS: PUNCT},
-    "$": {POS: SYM, "Other": {"SymType": "currency"}},
-    "#": {POS: SYM, "Other": {"SymType": "numbersign"}},
-    "AFX": {POS: ADJ, "Hyph": "yes"},
-    "CC": {POS: CCONJ, "ConjType": "coor"},
-    "CD": {POS: NUM, "NumType": "card"},
-    "DT": {POS: DET},
-    "EX": {POS: ADV, "AdvType": "ex"},
-    "FW": {POS: X, "Foreign": "yes"},
-    "HYPH": {POS: PUNCT, "PunctType": "dash"},
-    "IN": {POS: ADP},
-    "JJ": {POS: ADJ, "Degree": "pos"},
-    "JJR": {POS: ADJ, "Degree": "comp"},
-    "JJS": {POS: ADJ, "Degree": "sup"},
-    "LS": {POS: PUNCT, "NumType": "ord"},
-    "MD": {POS: VERB, "VerbType": "mod"},
-    "NIL": {POS: ""},
-    "NN": {POS: NOUN, "Number": "sing"},
-    "NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
-    "NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
-    "NNS": {POS: NOUN, "Number": "plur"},
-    "PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"},
-    "POS": {POS: PART, "Poss": "yes"},
-    "PRP": {POS: PRON, "PronType": "prs"},
-    "PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"},
-    "RB": {POS: ADV, "Degree": "pos"},
-    "RBR": {POS: ADV, "Degree": "comp"},
-    "RBS": {POS: ADV, "Degree": "sup"},
-    "RP": {POS: PART},
-    "SP": {POS: SPACE},
-    "SYM": {POS: SYM},
-    "TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"},
-    "UH": {POS: INTJ},
-    "VB": {POS: VERB, "VerbForm": "inf"},
-    "VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"},
-    "VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"},
-    "VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"},
-    "VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"},
-    "VBZ": {
-        POS: VERB,
-        "VerbForm": "fin",
-        "Tense": "pres",
-        "Number": "sing",
-        "Person": 3,
-    },
-    "WDT": {POS: ADJ, "PronType": "int|rel"},
-    "WP": {POS: NOUN, "PronType": "int|rel"},
-    "WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"},
-    "WRB": {POS: ADV, "PronType": "int|rel"},
-    "ADD": {POS: X},
-    "NFP": {POS: PUNCT},
-    "GW": {POS: X},
-    "XX": {POS: X},
-    "BES": {POS: VERB},
-    "HVS": {POS: VERB},
-    "_SP": {POS: SPACE},
-}
diff --git a/spacy/lang/hy/tag_map.py b/spacy/lang/hy/tag_map.py
index 722270110..4d5b6e918 100644
--- a/spacy/lang/hy/tag_map.py
+++ b/spacy/lang/hy/tag_map.py
@@ -27,7 +27,7 @@ TAG_MAP = {
         POS: ADP,
         "AdpType": "Post",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
     },
     "ADP_AdpType=Post": {POS: ADP, "AdpType": "Post"},
     "ADP_AdpType=Prep": {POS: ADP, "AdpType": "Prep"},
@@ -35,12 +35,11 @@ TAG_MAP = {
     "ADV_Degree=Cmp": {POS: ADV, "Degree": "Cmp"},
     "ADV_Degree=Pos": {POS: ADV, "Degree": "Pos"},
     "ADV_Degree=Sup": {POS: ADV, "Degree": "Sup"},
-    "ADV_Distance=Dist|PronType=Dem": {POS: ADV, "Distance": "Dist", "PronType": "Dem"},
-    "ADV_Distance=Dist|PronType=Exc": {POS: ADV, "Distance": "Dist", "PronType": "Exc"},
-    "ADV_Distance=Med|PronType=Dem": {POS: ADV, "Distance": "Med", "PronType": "Dem"},
+    "ADV_Distance=Dist|PronType=Dem": {POS: ADV, "PronType": "Dem"},
+    "ADV_Distance=Dist|PronType=Exc": {POS: ADV, "PronType": "Exc"},
+    "ADV_Distance=Med|PronType=Dem": {POS: ADV, "PronType": "Dem"},
     "ADV_Distance=Med|PronType=Dem|Style=Coll": {
         POS: ADV,
-        "Distance": "Med",
         "PronType": "Dem",
         "Style": "Coll",
     },
@@ -63,7 +62,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -73,7 +72,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -83,7 +82,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -93,7 +92,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -103,7 +102,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -113,7 +112,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -123,7 +122,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -133,7 +132,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -143,7 +142,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -153,7 +152,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -163,7 +162,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -173,7 +172,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -183,7 +182,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -193,7 +192,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -203,7 +202,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -213,7 +212,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -227,7 +226,6 @@ TAG_MAP = {
     "DET_Case=Gen|Distance=Med|Number=Plur|Poss=Yes|PronType=Dem": {
         POS: DET,
         "Case": "Gen",
-        "Distance": "Med",
         "Number": "Plur",
         "Poss": "Yes",
         "PronType": "Dem",
@@ -235,7 +233,6 @@ TAG_MAP = {
     "DET_Case=Gen|Distance=Med|Number=Sing|Poss=Yes|PronType=Dem": {
         POS: DET,
         "Case": "Gen",
-        "Distance": "Med",
         "Number": "Sing",
         "Poss": "Yes",
         "PronType": "Dem",
@@ -244,7 +241,7 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -252,8 +249,7 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -261,24 +257,22 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
     },
     "DET_Case=Gen|Number=Plur|Person=3|Poss=Yes|PronType=Emp|Reflex=Yes": {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
         "Reflex": "Yes",
     },
     "DET_Case=Gen|Number=Sing|Person=1|Poss=Yes|PronType=Prs": {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -286,8 +280,7 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -295,24 +288,22 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
     },
     "DET_Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Emp|Reflex=Yes": {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
         "Reflex": "Yes",
     },
     "DET_Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -323,30 +314,26 @@ TAG_MAP = {
         "Poss": "Yes",
         "PronType": "Rel",
     },
-    "DET_Distance=Dist|PronType=Dem": {POS: DET, "Distance": "Dist", "PronType": "Dem"},
+    "DET_Distance=Dist|PronType=Dem": {POS: DET, "PronType": "Dem"},
     "DET_Distance=Dist|PronType=Dem|Style=Coll": {
         POS: DET,
-        "Distance": "Dist",
         "PronType": "Dem",
         "Style": "Coll",
     },
     "DET_Distance=Dist|PronType=Dem|Style=Vrnc": {
         POS: DET,
-        "Distance": "Dist",
         "PronType": "Dem",
         "Style": "Vrnc",
     },
-    "DET_Distance=Med|PronType=Dem": {POS: DET, "Distance": "Med", "PronType": "Dem"},
+    "DET_Distance=Med|PronType=Dem": {POS: DET, "PronType": "Dem"},
     "DET_Distance=Med|PronType=Dem|Style=Coll": {
         POS: DET,
-        "Distance": "Med",
         "PronType": "Dem",
         "Style": "Coll",
     },
-    "DET_Distance=Prox|PronType=Dem": {POS: DET, "Distance": "Prox", "PronType": "Dem"},
+    "DET_Distance=Prox|PronType=Dem": {POS: DET, "PronType": "Dem"},
     "DET_Distance=Prox|PronType=Dem|Style=Coll": {
         POS: DET,
-        "Distance": "Prox",
         "PronType": "Dem",
         "Style": "Coll",
     },
@@ -386,7 +373,6 @@ TAG_MAP = {
         "Case": "Abl",
         "Definite": "Ind",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Abl|Definite=Ind|Number=Sing": {
         POS: NOUN,
@@ -415,14 +401,12 @@ TAG_MAP = {
         "Case": "Dat",
         "Definite": "Def",
         "Number": "Sing",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Assoc": {
         POS: NOUN,
         "Animacy": "Hum",
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Assoc",
     },
     "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -445,7 +429,6 @@ TAG_MAP = {
         "Case": "Dat",
         "Definite": "Ind",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Sing": {
         POS: NOUN,
@@ -468,7 +451,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Hum|Case=Dat|Number=Sing|Number=Sing|Person=1|Style=Coll": {
         POS: NOUN,
@@ -476,7 +459,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Style": "Coll",
     },
     "NOUN_Animacy=Hum|Case=Ins|Definite=Ind|Number=Sing": {
@@ -499,7 +482,6 @@ TAG_MAP = {
         "Case": "Nom",
         "Definite": "Def",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Nom|Definite=Def|Number=Sing": {
         POS: NOUN,
@@ -521,7 +503,6 @@ TAG_MAP = {
         "Animacy": "Hum",
         "Case": "Nom",
         "Definite": "Ind",
-        "Number": "Assoc",
     },
     "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -544,7 +525,6 @@ TAG_MAP = {
         "Case": "Nom",
         "Definite": "Ind",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur|Typo=Yes": {
         POS: NOUN,
@@ -575,14 +555,13 @@ TAG_MAP = {
         "Case": "Nom",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Coll": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -612,14 +591,13 @@ TAG_MAP = {
         "Case": "Abl",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Coll": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Dat",
         "Definite": "Def",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Plur": {
         POS: NOUN,
@@ -672,7 +650,6 @@ TAG_MAP = {
         "Animacy": "Nhum",
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -716,9 +693,9 @@ TAG_MAP = {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Dat",
-        # "Number": "Coll",
+        #
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Number=Sing|Number=Sing|Person=1": {
         POS: NOUN,
@@ -726,7 +703,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Number=Sing|Number=Sing|Person=2": {
         POS: NOUN,
@@ -734,7 +711,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Gen|Definite=Ind|Number=Sing|Style=Arch": {
         POS: NOUN,
@@ -749,7 +726,6 @@ TAG_MAP = {
         "Animacy": "Nhum",
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Ins|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -779,7 +755,7 @@ TAG_MAP = {
         "Case": "Ins",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Loc|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -801,14 +777,13 @@ TAG_MAP = {
         "Case": "Loc",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Coll": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Nom",
         "Definite": "Def",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Plur|Number=Sing|Poss=Yes": {
         POS: NOUN,
@@ -846,14 +821,12 @@ TAG_MAP = {
         "Animacy": "Nhum",
         "Case": "Nom",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Coll|Typo=Yes": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Nom",
         "Definite": "Ind",
-        "Number": "Coll",
         "Typo": "Yes",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Plur": {
@@ -882,7 +855,7 @@ TAG_MAP = {
         "Case": "Nom",
         # "Number": "Plur",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Number=Sing|Number=Sing|Person=1": {
         POS: NOUN,
@@ -890,7 +863,7 @@ TAG_MAP = {
         "Case": "Nom",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Number=Sing|Number=Sing|Person=2": {
         POS: NOUN,
@@ -898,7 +871,7 @@ TAG_MAP = {
         "Case": "Nom",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NUM_NumForm=Digit|NumType=Card": {POS: NUM, "NumForm": "Digit", "NumType": "Card"},
     "NUM_NumForm=Digit|NumType=Frac|Typo=Yes": {
@@ -907,43 +880,37 @@ TAG_MAP = {
         "NumType": "Frac",
         "Typo": "Yes",
     },
-    "NUM_NumForm=Digit|NumType=Range": {
-        POS: NUM,
-        "NumForm": "Digit",
-        "NumType": "Range",
-    },
+    "NUM_NumForm=Digit|NumType=Range": {POS: NUM, "NumForm": "Digit",},
     "NUM_NumForm=Word|NumType=Card": {POS: NUM, "NumForm": "Word", "NumType": "Card"},
     "NUM_NumForm=Word|NumType=Dist": {POS: NUM, "NumForm": "Word", "NumType": "Dist"},
-    "NUM_NumForm=Word|NumType=Range": {POS: NUM, "NumForm": "Word", "NumType": "Range"},
+    "NUM_NumForm=Word|NumType=Range": {POS: NUM, "NumForm": "Word",},
     "PART_Polarity=Neg": {POS: PART, "Polarity": "Neg"},
     "PRON_Case=Abl|Definite=Ind|Number=Sing|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Abl",
         "Definite": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Abl|Number=Plur|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Abl",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Abl|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Abl",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Definite=Def|Distance=Dist|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Dat",
         "Definite": "Def",
-        "Distance": "Dist",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -952,7 +919,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Definite": "Def",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Definite=Ind|Number=Sing|PronType=Int": {
@@ -965,14 +932,12 @@ TAG_MAP = {
     "PRON_Case=Dat|Distance=Dist|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Dat",
-        "Distance": "Dist",
         "Number": "Sing",
         "PronType": "Dem",
     },
     "PRON_Case=Dat|Distance=Med|Number=Plur|PronType=Dem": {
         POS: PRON,
         "Case": "Dat",
-        "Distance": "Med",
         "Number": "Plur",
         "PronType": "Dem",
     },
@@ -980,30 +945,28 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Plur|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Plur|Person=3|PronType=Emp|Reflex=Yes": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
         "Reflex": "Yes",
     },
     "PRON_Case=Dat|Number=Plur|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Plur|PronType=Rcp": {
@@ -1016,30 +979,27 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Sing|Person=3|PronType=Emp": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
     },
     "PRON_Case=Dat|Number=Sing|Person=3|PronType=Emp|Reflex=Yes": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
         "Reflex": "Yes",
     },
     "PRON_Case=Dat|Number=Sing|PronType=Int": {
@@ -1058,7 +1018,6 @@ TAG_MAP = {
     "PRON_Case=Gen|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Gen",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1066,21 +1025,21 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Gen|Number=Sing|Person=2|PronType=Prs": {
         POS: PRON,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Gen|Number=Sing|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Gen|PronType=Tot": {POS: PRON, "Case": "Gen", "PronType": "Tot"},
@@ -1094,7 +1053,6 @@ TAG_MAP = {
     "PRON_Case=Ins|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Ins",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1108,7 +1066,6 @@ TAG_MAP = {
     "PRON_Case=Loc|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Loc",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1116,7 +1073,6 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Nom",
         "Definite": "Def",
-        "Distance": "Dist",
         "Number": "Plur",
         "PronType": "Dem",
     },
@@ -1124,7 +1080,6 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Nom",
         "Definite": "Def",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
         "Style": "Coll",
@@ -1167,29 +1122,25 @@ TAG_MAP = {
     "PRON_Case=Nom|Distance=Dist|Number=Plur|Person=1|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Dist",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Dem",
     },
     "PRON_Case=Nom|Distance=Med|Number=Plur|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Med",
         "Number": "Plur",
         "PronType": "Dem",
     },
     "PRON_Case=Nom|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
     "PRON_Case=Nom|Distance=Prox|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Prox",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1197,21 +1148,20 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Nom",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Plur|Person=3|PronType=Emp": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Plur",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
     },
     "PRON_Case=Nom|Number=Plur|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Plur|PronType=Rel": {
@@ -1225,44 +1175,41 @@ TAG_MAP = {
         "Case": "Nom",
         # "Number": "Sing",
         "Number": "Plur",
-        # "Person": "3",
-        "Person": "1",
-        "PronType": "Emp",
+        # "Person": "three",
+        "Person": "one",
     },
     "PRON_Case=Nom|Number=Sing|Person=1|PronType=Int": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Int",
     },
     "PRON_Case=Nom|Number=Sing|Person=1|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Sing|Person=3|PronType=Emp": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
     },
     "PRON_Case=Nom|Number=Sing|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Sing|PronType=Int": {
@@ -1280,26 +1227,23 @@ TAG_MAP = {
     "PRON_Case=Nom|Person=1|PronType=Tot": {
         POS: PRON,
         "Case": "Nom",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Tot",
     },
     "PRON_Case=Nom|PronType=Ind": {POS: PRON, "Case": "Nom", "PronType": "Ind"},
     "PRON_Case=Nom|PronType=Tot": {POS: PRON, "Case": "Nom", "PronType": "Tot"},
     "PRON_Distance=Dist|Number=Sing|PronType=Dem": {
         POS: PRON,
-        "Distance": "Dist",
         "Number": "Sing",
         "PronType": "Dem",
     },
     "PRON_Distance=Med|PronType=Dem|Style=Coll": {
         POS: PRON,
-        "Distance": "Med",
         "PronType": "Dem",
         "Style": "Coll",
     },
     "PRON_Distance=Prox|PronType=Dem|Style=Coll": {
         POS: PRON,
-        "Distance": "Prox",
         "PronType": "Dem",
         "Style": "Coll",
     },
@@ -1384,7 +1328,6 @@ TAG_MAP = {
         "Case": "Abl",
         "Definite": "Ind",
         "NameType": "Geo",
-        "Number": "Coll",
     },
     "PROPN_Animacy=Nhum|Case=Abl|Definite=Ind|NameType=Geo|Number=Sing": {
         POS: PROPN,
@@ -1449,7 +1392,6 @@ TAG_MAP = {
         "Case": "Nom",
         "Definite": "Ind",
         "NameType": "Geo",
-        "Number": "Coll",
     },
     "PROPN_Animacy=Nhum|Case=Nom|Definite=Ind|NameType=Geo|Number=Sing": {
         POS: PROPN,
@@ -1471,41 +1413,31 @@ TAG_MAP = {
     "SCONJ_Style=Coll": {POS: SCONJ, "Style": "Coll"},
     "VERB_Aspect=Dur|Polarity=Neg|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Mid": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -1514,9 +1446,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1526,9 +1457,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1538,9 +1468,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1550,9 +1479,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1562,9 +1490,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1574,9 +1501,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1586,9 +1512,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1598,9 +1523,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1610,9 +1534,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1622,9 +1545,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1634,9 +1556,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1645,7 +1566,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Imp",
         "Style": "Coll",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -1653,48 +1573,41 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Imp",
         "Style": "Vrnc",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Imp|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Imp|Subcat=Tran|VerbForm=Part|Voice=Cau": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Cau",
     },
@@ -1703,9 +1616,7 @@ TAG_MAP = {
         "Aspect": "Iter",
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -1714,9 +1625,7 @@ TAG_MAP = {
         "Aspect": "Iter",
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -1726,9 +1635,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1738,9 +1646,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1750,9 +1657,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1762,9 +1668,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1774,10 +1679,9 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
         "Style": "Vrnc",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1787,9 +1691,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1799,9 +1702,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1811,9 +1713,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1823,10 +1724,9 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Style": "Vrnc",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1836,9 +1736,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1848,9 +1747,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1860,9 +1758,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1872,9 +1769,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1883,7 +1779,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
@@ -1891,7 +1786,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -1899,7 +1793,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
@@ -1907,7 +1800,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
@@ -1915,7 +1807,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
@@ -1929,35 +1820,30 @@ TAG_MAP = {
     "VERB_Aspect=Perf|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Perf|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Perf|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Perf|Subcat=Tran|VerbForm=Part|Voice=Cau": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Cau",
     },
     "VERB_Aspect=Prog|Subcat=Intr|VerbForm=Conv|Voice=Mid": {
         POS: VERB,
         "Aspect": "Prog",
-        "Subcat": "Intr",
         "VerbForm": "Conv",
         "Voice": "Mid",
     },
@@ -1966,7 +1852,6 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Connegative": "Yes",
         "Mood": "Cnd",
-        "Subcat": "Tran",
         "VerbForm": "Fin",
         "Voice": "Act",
     },
@@ -1975,10 +1860,9 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Style": "Vrnc",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1988,9 +1872,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2000,9 +1883,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2012,9 +1894,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2024,9 +1905,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2036,9 +1916,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Pass",
@@ -2048,9 +1927,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2060,9 +1938,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2072,8 +1949,7 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Imp",
         "Number": "Sing",
-        "Person": "2",
-        "Subcat": "Intr",
+        "Person": "two",
         "VerbForm": "Fin",
         "Voice": "Mid",
     },
@@ -2082,8 +1958,7 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Imp",
         "Number": "Sing",
-        "Person": "2",
-        "Subcat": "Tran",
+        "Person": "two",
         "VerbForm": "Fin",
         "Voice": "Act",
     },
@@ -2092,9 +1967,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2104,9 +1978,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2116,9 +1989,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2128,9 +2000,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2140,9 +2011,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2152,9 +2022,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2164,9 +2033,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2176,9 +2044,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2188,9 +2055,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2200,9 +2066,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2212,9 +2077,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2224,9 +2088,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Fin",
         "Voice": "Pass",
     },
@@ -2235,9 +2098,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2247,9 +2109,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2258,9 +2119,8 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Prosp",
         "Mood": "Sub",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2269,7 +2129,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Prosp",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -2277,28 +2136,24 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Prosp",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Prosp|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
         "Aspect": "Prosp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Prosp|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
         "Aspect": "Prosp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Prosp|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Prosp",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
@@ -2306,9 +2161,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2316,9 +2169,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Pass",
     },
@@ -2326,9 +2177,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2336,9 +2185,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Def",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2346,9 +2193,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Pass",
     },
@@ -2356,9 +2201,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2366,9 +2209,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2376,9 +2217,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2386,9 +2225,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2396,9 +2233,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Nom",
         "Definite": "Def",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2406,9 +2241,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Nom",
         "Definite": "Def",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2416,15 +2249,13 @@ TAG_MAP = {
         POS: VERB,
         "Mood": "Imp",
         "Number": "Sing",
-        "Person": "2",
-        "Subcat": "Intr",
+        "Person": "two",
         "VerbForm": "Fin",
         "Voice": "Mid",
     },
     "VERB_Polarity=Neg|Subcat=Intr|VerbForm=Inf|Voice=Mid": {
         POS: VERB,
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Inf",
         "Voice": "Mid",
     },
@@ -2432,7 +2263,6 @@ TAG_MAP = {
         POS: VERB,
         "Polarity": "Pos",
         "Style": "Coll",
-        "Subcat": "Tran",
         "VerbForm": "Inf",
         "Voice": "Act",
     },
@@ -2440,28 +2270,24 @@ TAG_MAP = {
         POS: VERB,
         "Polarity": "Pos",
         "Style": "Vrnc",
-        "Subcat": "Tran",
         "VerbForm": "Inf",
         "Voice": "Act",
     },
     "VERB_Polarity=Pos|Subcat=Intr|VerbForm=Inf|Voice=Mid": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Inf",
         "Voice": "Mid",
     },
     "VERB_Polarity=Pos|Subcat=Intr|VerbForm=Inf|Voice=Pass": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Inf",
         "Voice": "Pass",
     },
     "VERB_Polarity=Pos|Subcat=Tran|Typo=Yes|VerbForm=Inf|Voice=Act": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Typo": "Yes",
         "VerbForm": "Inf",
         "Voice": "Act",
@@ -2469,7 +2295,6 @@ TAG_MAP = {
     "VERB_Polarity=Pos|Subcat=Tran|VerbForm=Inf|Voice=Act": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Inf",
         "Voice": "Act",
     },
diff --git a/spacy/lang/pl/tag_map.py b/spacy/lang/pl/tag_map.py
index 5356c26cb..ed7d6487e 100644
--- a/spacy/lang/pl/tag_map.py
+++ b/spacy/lang/pl/tag_map.py
@@ -104,79 +104,79 @@ TAG_MAP = {
     "adv:com": {POS: ADV, "Degree": "cmp"},
     "adv:pos": {POS: ADV, "Degree": "pos"},
     "adv:sup": {POS: ADV, "Degree": "sup"},
-    "aglt:pl:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 1, "Aspect": "imp", "Variant": "short"},
-    "aglt:pl:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 1, "Aspect": "imp", "Variant": "long"},
-    "aglt:pl:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 2, "Aspect": "imp", "Variant": "short"},
-    "aglt:pl:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 2, "Aspect": "imp", "Variant": "long"},
-    "aglt:sg:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 1, "Aspect": "imp", "Variant": "short"},
-    "aglt:sg:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 1, "Aspect": "imp", "Variant": "long"},
-    "aglt:sg:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 2, "Aspect": "imp", "Variant": "short"},
-    "aglt:sg:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 2, "Aspect": "imp", "Variant": "long"},
-    "bedzie:pl:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": 1, "Aspect": "imp"},
-    "bedzie:pl:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": 2, "Aspect": "imp"},
-    "bedzie:pl:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": 3, "Aspect": "imp"},
-    "bedzie:sg:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": 1, "Aspect": "imp"},
-    "bedzie:sg:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": 2, "Aspect": "imp"},
-    "bedzie:sg:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": 3, "Aspect": "imp"},
+    "aglt:pl:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "one", "Aspect": "imp", },
+    "aglt:pl:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "one", "Aspect": "imp", },
+    "aglt:pl:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "two", "Aspect": "imp", },
+    "aglt:pl:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "two", "Aspect": "imp", },
+    "aglt:sg:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "one", "Aspect": "imp", },
+    "aglt:sg:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "one", "Aspect": "imp", },
+    "aglt:sg:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "two", "Aspect": "imp", },
+    "aglt:sg:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "two", "Aspect": "imp", },
+    "bedzie:pl:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": "one", "Aspect": "imp"},
+    "bedzie:pl:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": "two", "Aspect": "imp"},
+    "bedzie:pl:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": "three", "Aspect": "imp"},
+    "bedzie:sg:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": "one", "Aspect": "imp"},
+    "bedzie:sg:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": "two", "Aspect": "imp"},
+    "bedzie:sg:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": "three", "Aspect": "imp"},
     "burk": {POS: X},
     "comp": {POS: SCONJ},
     "conj": {POS: CCONJ},
     "depr:pl:nom:m2": {POS: NOUN, "Animacy": "anim", "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "anim"},
     "depr:pl:voc:m2": {POS: NOUN, "Animacy": "anim", "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "anim"},
-    "fin:pl:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 1, "Aspect": "imp"},
-    "fin:pl:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 1, "Aspect": "imp|perf"},
-    "fin:pl:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 1, "Aspect": "perf"},
-    "fin:pl:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 2, "Aspect": "imp"},
-    "fin:pl:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 2, "Aspect": "imp|perf"},
-    "fin:pl:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 2, "Aspect": "perf"},
-    "fin:pl:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 3, "Aspect": "imp"},
-    "fin:pl:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 3, "Aspect": "imp|perf"},
-    "fin:pl:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 3, "Aspect": "perf"},
-    "fin:sg:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 1, "Aspect": "imp"},
-    "fin:sg:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 1, "Aspect": "imp|perf"},
-    "fin:sg:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 1, "Aspect": "perf"},
-    "fin:sg:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 2, "Aspect": "imp"},
-    "fin:sg:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 2, "Aspect": "imp|perf"},
-    "fin:sg:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 2, "Aspect": "perf"},
-    "fin:sg:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 3, "Aspect": "imp"},
-    "fin:sg:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 3, "Aspect": "imp|perf"},
-    "fin:sg:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 3, "Aspect": "perf"},
-    "ger:sg:dat.loc:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
-    "ger:sg:dat.loc:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
-    "ger:sg:dat.loc:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
-    "ger:sg:dat.loc:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
-    "ger:sg:dat.loc:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
-    "ger:sg:dat.loc:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
-    "ger:sg:gen:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
-    "ger:sg:gen:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
-    "ger:sg:gen:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
-    "ger:sg:gen:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
-    "ger:sg:gen:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
-    "ger:sg:gen:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
-    "ger:sg:inst:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
-    "ger:sg:inst:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
-    "ger:sg:inst:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
-    "ger:sg:inst:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
-    "ger:sg:inst:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
-    "ger:sg:inst:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
-    "ger:sg:nom.acc:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
-    "ger:sg:nom.acc:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
-    "ger:sg:nom.acc:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
-    "ger:sg:nom.acc:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
-    "ger:sg:nom.acc:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
-    "ger:sg:nom.acc:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
+    "fin:pl:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "one", "Aspect": "imp"},
+    "fin:pl:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "one", "Aspect": "imp|perf"},
+    "fin:pl:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "one", "Aspect": "perf"},
+    "fin:pl:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "two", "Aspect": "imp"},
+    "fin:pl:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "two", "Aspect": "imp|perf"},
+    "fin:pl:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "two", "Aspect": "perf"},
+    "fin:pl:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "three", "Aspect": "imp"},
+    "fin:pl:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "three", "Aspect": "imp|perf"},
+    "fin:pl:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "three", "Aspect": "perf"},
+    "fin:sg:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "one", "Aspect": "imp"},
+    "fin:sg:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "one", "Aspect": "imp|perf"},
+    "fin:sg:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "one", "Aspect": "perf"},
+    "fin:sg:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "two", "Aspect": "imp"},
+    "fin:sg:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "two", "Aspect": "imp|perf"},
+    "fin:sg:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "two", "Aspect": "perf"},
+    "fin:sg:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "three", "Aspect": "imp"},
+    "fin:sg:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "three", "Aspect": "imp|perf"},
+    "fin:sg:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "three", "Aspect": "perf"},
+    "ger:sg:dat.loc:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
+    "ger:sg:dat.loc:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
+    "ger:sg:dat.loc:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
+    "ger:sg:dat.loc:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
+    "ger:sg:dat.loc:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
+    "ger:sg:dat.loc:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
+    "ger:sg:gen:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
+    "ger:sg:gen:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
+    "ger:sg:gen:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
+    "ger:sg:gen:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
+    "ger:sg:gen:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
+    "ger:sg:gen:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
+    "ger:sg:inst:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
+    "ger:sg:inst:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
+    "ger:sg:inst:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
+    "ger:sg:inst:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
+    "ger:sg:inst:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
+    "ger:sg:inst:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
+    "ger:sg:nom.acc:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
+    "ger:sg:nom.acc:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
+    "ger:sg:nom.acc:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
+    "ger:sg:nom.acc:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
+    "ger:sg:nom.acc:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
+    "ger:sg:nom.acc:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
     "imps:imperf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "imp"},
     "imps:imperf.perf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "imp|perf"},
     "imps:perf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "perf"},
-    "impt:pl:pri:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 1, "Aspect": "imp"},
-    "impt:pl:pri:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 1, "Aspect": "imp|perf"},
-    "impt:pl:pri:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 1, "Aspect": "perf"},
-    "impt:pl:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 2, "Aspect": "imp"},
-    "impt:pl:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 2, "Aspect": "imp|perf"},
-    "impt:pl:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 2, "Aspect": "perf"},
-    "impt:sg:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": 2, "Aspect": "imp"},
-    "impt:sg:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": 2, "Aspect": "imp|perf"},
-    "impt:sg:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": 2, "Aspect": "perf"},
+    "impt:pl:pri:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "one", "Aspect": "imp"},
+    "impt:pl:pri:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "one", "Aspect": "imp|perf"},
+    "impt:pl:pri:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "one", "Aspect": "perf"},
+    "impt:pl:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "two", "Aspect": "imp"},
+    "impt:pl:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "two", "Aspect": "imp|perf"},
+    "impt:pl:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "two", "Aspect": "perf"},
+    "impt:sg:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": "two", "Aspect": "imp"},
+    "impt:sg:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": "two", "Aspect": "imp|perf"},
+    "impt:sg:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": "two", "Aspect": "perf"},
     "inf:imperf": {POS: VERB, "VerbForm": "inf", "Aspect": "imp"},
     "inf:imperf.perf": {POS: VERB, "VerbForm": "inf", "Aspect": "imp|perf"},
     "inf:perf": {POS: VERB, "VerbForm": "inf", "Aspect": "perf"},
@@ -372,86 +372,86 @@ TAG_MAP = {
     "ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "neg"},
     "ppas:sg:nom.voc:m1.m2.m3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "perf", "Polarity": "pos"},
     "ppas:sg:nom.voc:m1.m2.m3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "perf", "Polarity": "neg"},
-    "ppron12:pl:acc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": 1},
-    "ppron12:pl:acc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": 2},
-    "ppron12:pl:dat:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": 1},
-    "ppron12:pl:dat:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": 2},
-    "ppron12:pl:gen:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": 1},
-    "ppron12:pl:gen:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": 2},
-    "ppron12:pl:inst:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": 1},
-    "ppron12:pl:inst:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": 2},
-    "ppron12:pl:loc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": 1},
-    "ppron12:pl:loc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": 2},
-    "ppron12:pl:nom:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": 1},
-    "ppron12:pl:nom:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": 2},
-    "ppron12:pl:voc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": 1},
-    "ppron12:pl:voc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": 2},
-    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 1, "Variant": "long"},
-    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 1, "Variant": "short"},
-    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 2, "Variant": "long"},
-    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 2, "Variant": "short"},
-    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 1, "Variant": "long"},
-    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 1, "Variant": "short"},
-    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 2, "Variant": "long"},
-    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 2, "Variant": "short"},
-    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 1, "Variant": "long"},
-    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 1, "Variant": "short"},
-    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 2, "Variant": "long"},
-    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 2, "Variant": "short"},
-    "ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": 1},
-    "ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": 2},
-    "ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": 1},
-    "ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": 2},
-    "ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": 1},
-    "ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": 2},
-    "ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": 1},
-    "ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": 2},
-    "ppron3:pl:acc:m1.p1:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc", "Person": 3, "PrepCase": "npr"},
-    "ppron3:pl:acc:m1.p1:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc", "Person": 3, "PrepCase": "pre"},
-    "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": 3, "PrepCase": "npr"},
-    "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": 3, "PrepCase": "pre"},
-    "ppron3:pl:dat:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "dat", "Person": 3, "PrepCase": "npr"},
-    "ppron3:pl:dat:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "dat", "Person": 3, "PrepCase": "pre"},
-    "ppron3:pl:gen:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "gen", "Person": 3, "PrepCase": "npr"},
-    "ppron3:pl:gen:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "gen", "Person": 3, "PrepCase": "pre"},
-    "ppron3:pl:inst:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "ins", "Person": 3},
-    "ppron3:pl:loc:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "loc", "Person": 3},
-    "ppron3:pl:nom:m1.p1:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "nom", "Gender": "masc", "Person": 3},
-    "ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "nom", "Gender": "masc|fem|neut", "Person": 3},
-    "ppron3:sg:acc:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "fem", "Person": 3, "PrepCase": "npr"},
-    "ppron3:sg:acc:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "fem", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:acc:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "npr"},
-    "ppron3:sg:acc:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "pre"},
-    "ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "npr"},
-    "ppron3:sg:acc:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "pre"},
-    "ppron3:sg:acc:n1.n2:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "neut", "Person": 3, "PrepCase": "npr"},
-    "ppron3:sg:acc:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "neut", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:dat:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "fem", "Person": 3, "PrepCase": "npr"},
-    "ppron3:sg:dat:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "fem", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:dat:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "npr"},
-    "ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "npr"},
-    "ppron3:sg:dat:m1.m2.m3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:dat:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "neut", "Person": 3, "Variant": "long", "PrepCase": "npr"},
-    "ppron3:sg:dat:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "neut", "Person": 3, "Variant": "short", "PrepCase": "npr"},
-    "ppron3:sg:dat:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "neut", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:gen:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "fem", "Person": 3, "PrepCase": "npr"},
-    "ppron3:sg:gen:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "fem", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:gen:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "npr"},
-    "ppron3:sg:gen:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "pre"},
-    "ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "npr"},
-    "ppron3:sg:gen:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "pre"},
-    "ppron3:sg:gen:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "neut", "Person": 3, "Variant": "long", "PrepCase": "npr"},
-    "ppron3:sg:gen:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "neut", "Person": 3, "Variant": "short", "PrepCase": "npr"},
-    "ppron3:sg:gen:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "neut", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:inst:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "ins", "Gender": "fem", "Person": 3, "PrepCase": "pre"},
-    "ppron3:sg:inst:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "ins", "Gender": "Masc", "Person": 3},
-    "ppron3:sg:inst:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "ins", "Gender": "neut", "Person": 3},
-    "ppron3:sg:loc:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "loc", "Gender": "fem", "Person": 3},
-    "ppron3:sg:loc:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "loc", "Gender": "Masc", "Person": 3},
-    "ppron3:sg:loc:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "loc", "Gender": "neut", "Person": 3},
-    "ppron3:sg:nom:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "nom", "Gender": "fem", "Person": 3},
-    "ppron3:sg:nom:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "nom", "Gender": "Masc", "Person": 3},
-    "ppron3:sg:nom:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "nom", "Gender": "neut", "Person": 3},
+    "ppron12:pl:acc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": "one"},
+    "ppron12:pl:acc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": "two"},
+    "ppron12:pl:dat:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": "one"},
+    "ppron12:pl:dat:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": "two"},
+    "ppron12:pl:gen:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": "one"},
+    "ppron12:pl:gen:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": "two"},
+    "ppron12:pl:inst:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": "one"},
+    "ppron12:pl:inst:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": "two"},
+    "ppron12:pl:loc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": "one"},
+    "ppron12:pl:loc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": "two"},
+    "ppron12:pl:nom:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": "one"},
+    "ppron12:pl:nom:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": "two"},
+    "ppron12:pl:voc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": "one"},
+    "ppron12:pl:voc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": "two"},
+    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "one", },
+    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "one", },
+    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "two", },
+    "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "two", },
+    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "one", },
+    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "one", },
+    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "two", },
+    "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "two", },
+    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "one", },
+    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "one", },
+    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "two", },
+    "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "two", },
+    "ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": "one"},
+    "ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": "two"},
+    "ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": "one"},
+    "ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": "two"},
+    "ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": "one"},
+    "ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": "two"},
+    "ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": "one"},
+    "ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": "two"},
+    "ppron3:pl:acc:m1.p1:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc", "Person": "three", "PrepCase": "npr"},
+    "ppron3:pl:acc:m1.p1:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc", "Person": "three", "PrepCase": "pre"},
+    "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": "three", "PrepCase": "npr"},
+    "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": "three", "PrepCase": "pre"},
+    "ppron3:pl:dat:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "dat", "Person": "three", "PrepCase": "npr"},
+    "ppron3:pl:dat:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "dat", "Person": "three", "PrepCase": "pre"},
+    "ppron3:pl:gen:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "gen", "Person": "three", "PrepCase": "npr"},
+    "ppron3:pl:gen:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "gen", "Person": "three", "PrepCase": "pre"},
+    "ppron3:pl:inst:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "ins", "Person": "three"},
+    "ppron3:pl:loc:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "loc", "Person": "three"},
+    "ppron3:pl:nom:m1.p1:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "nom", "Gender": "masc", "Person": "three"},
+    "ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "nom", "Gender": "masc|fem|neut", "Person": "three"},
+    "ppron3:sg:acc:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "fem", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:acc:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:acc:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:acc:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:acc:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:acc:n1.n2:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:acc:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "neut", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:dat:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "fem", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:dat:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:dat:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:dat:m1.m2.m3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:dat:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:dat:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:dat:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "neut", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:gen:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "fem", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:gen:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:gen:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:gen:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:gen:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:gen:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:gen:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
+    "ppron3:sg:gen:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "neut", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:inst:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "ins", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
+    "ppron3:sg:inst:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "ins", "Gender": "Masc", "Person": "three"},
+    "ppron3:sg:inst:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "ins", "Gender": "neut", "Person": "three"},
+    "ppron3:sg:loc:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "loc", "Gender": "fem", "Person": "three"},
+    "ppron3:sg:loc:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "loc", "Gender": "Masc", "Person": "three"},
+    "ppron3:sg:loc:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "loc", "Gender": "neut", "Person": "three"},
+    "ppron3:sg:nom:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "nom", "Gender": "fem", "Person": "three"},
+    "ppron3:sg:nom:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "nom", "Gender": "Masc", "Person": "three"},
+    "ppron3:sg:nom:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "nom", "Gender": "neut", "Person": "three"},
     "praet:pl:m1.p1:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "imp"},
     "praet:pl:m1.p1:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "imp|perf"},
     "praet:pl:m1.p1:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "perf"},
@@ -473,18 +473,18 @@ TAG_MAP = {
     "praet:sg:n1.n2:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "neut", "Aspect": "perf"},
     "pred": {POS: VERB},
     "prep:acc": {POS: ADP, "AdpType": "prep", "Case": "acc"},
-    "prep:acc:nwok": {POS: ADP, "AdpType": "prep", "Case": "acc", "Variant": "short"},
-    "prep:acc:wok": {POS: ADP, "AdpType": "prep", "Case": "acc", "Variant": "long"},
+    "prep:acc:nwok": {POS: ADP, "AdpType": "prep", "Case": "acc", },
+    "prep:acc:wok": {POS: ADP, "AdpType": "prep", "Case": "acc", },
     "prep:dat": {POS: ADP, "AdpType": "prep", "Case": "dat"},
     "prep:gen": {POS: ADP, "AdpType": "prep", "Case": "gen"},
-    "prep:gen:nwok": {POS: ADP, "AdpType": "prep", "Case": "gen", "Variant": "short"},
-    "prep:gen:wok": {POS: ADP, "AdpType": "prep", "Case": "gen", "Variant": "long"},
+    "prep:gen:nwok": {POS: ADP, "AdpType": "prep", "Case": "gen", },
+    "prep:gen:wok": {POS: ADP, "AdpType": "prep", "Case": "gen", },
     "prep:inst": {POS: ADP, "AdpType": "prep", "Case": "ins"},
-    "prep:inst:nwok": {POS: ADP, "AdpType": "prep", "Case": "ins", "Variant": "short"},
-    "prep:inst:wok": {POS: ADP, "AdpType": "prep", "Case": "ins", "Variant": "long"},
+    "prep:inst:nwok": {POS: ADP, "AdpType": "prep", "Case": "ins", },
+    "prep:inst:wok": {POS: ADP, "AdpType": "prep", "Case": "ins", },
     "prep:loc": {POS: ADP, "AdpType": "prep", "Case": "loc"},
-    "prep:loc:nwok": {POS: ADP, "AdpType": "prep", "Case": "loc", "Variant": "short"},
-    "prep:loc:wok": {POS: ADP, "AdpType": "prep", "Case": "loc", "Variant": "long"},
+    "prep:loc:nwok": {POS: ADP, "AdpType": "prep", "Case": "loc", },
+    "prep:loc:wok": {POS: ADP, "AdpType": "prep", "Case": "loc", },
     "prep:nom": {POS: ADP, "AdpType": "prep", "Case": "nom"},
     "qub": {POS: PART},
     "subst:pl:acc:f": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "fem"},
@@ -493,63 +493,63 @@ TAG_MAP = {
     "subst:pl:acc:m3": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "masc", "Animacy": "inan"},
     "subst:pl:acc:n1": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "neut"},
     "subst:pl:acc:n2": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "neut"},
-    "subst:pl:acc:p1": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": 1},
-    "subst:pl:acc:p2": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": 2},
-    "subst:pl:acc:p3": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": 3},
+    "subst:pl:acc:p1": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": "one"},
+    "subst:pl:acc:p2": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": "two"},
+    "subst:pl:acc:p3": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": "three"},
     "subst:pl:dat:f": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "fem"},
     "subst:pl:dat:m1": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "Masc", "Animacy": "hum"},
     "subst:pl:dat:m2": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "masc", "Animacy": "anim"},
     "subst:pl:dat:m3": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "masc", "Animacy": "inan"},
     "subst:pl:dat:n1": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "neut"},
     "subst:pl:dat:n2": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "neut"},
-    "subst:pl:dat:p1": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": 1},
-    "subst:pl:dat:p2": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": 2},
-    "subst:pl:dat:p3": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": 3},
+    "subst:pl:dat:p1": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": "one"},
+    "subst:pl:dat:p2": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": "two"},
+    "subst:pl:dat:p3": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": "three"},
     "subst:pl:gen:f": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "fem"},
     "subst:pl:gen:m1": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "Masc", "Animacy": "hum"},
     "subst:pl:gen:m2": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "masc", "Animacy": "anim"},
     "subst:pl:gen:m3": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "masc", "Animacy": "inan"},
     "subst:pl:gen:n1": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "neut"},
     "subst:pl:gen:n2": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "neut"},
-    "subst:pl:gen:p1": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": 1},
-    "subst:pl:gen:p2": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": 2},
-    "subst:pl:gen:p3": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": 3},
+    "subst:pl:gen:p1": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": "one"},
+    "subst:pl:gen:p2": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": "two"},
+    "subst:pl:gen:p3": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": "three"},
     "subst:pl:inst:f": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "fem"},
     "subst:pl:inst:m1": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "Masc", "Animacy": "hum"},
     "subst:pl:inst:m2": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "masc", "Animacy": "anim"},
     "subst:pl:inst:m3": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "masc", "Animacy": "inan"},
     "subst:pl:inst:n1": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "neut"},
     "subst:pl:inst:n2": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "neut"},
-    "subst:pl:inst:p1": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": 1},
-    "subst:pl:inst:p2": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": 2},
-    "subst:pl:inst:p3": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": 3},
+    "subst:pl:inst:p1": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": "one"},
+    "subst:pl:inst:p2": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": "two"},
+    "subst:pl:inst:p3": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": "three"},
     "subst:pl:loc:f": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "fem"},
     "subst:pl:loc:m1": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "Masc", "Animacy": "hum"},
     "subst:pl:loc:m2": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "masc", "Animacy": "anim"},
     "subst:pl:loc:m3": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "masc", "Animacy": "inan"},
     "subst:pl:loc:n1": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "neut"},
     "subst:pl:loc:n2": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "neut"},
-    "subst:pl:loc:p1": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": 1},
-    "subst:pl:loc:p2": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": 2},
-    "subst:pl:loc:p3": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": 3},
+    "subst:pl:loc:p1": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": "one"},
+    "subst:pl:loc:p2": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": "two"},
+    "subst:pl:loc:p3": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": "three"},
     "subst:pl:nom:f": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "fem"},
     "subst:pl:nom:m1": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "Masc", "Animacy": "hum"},
     "subst:pl:nom:m2": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "anim"},
     "subst:pl:nom:m3": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "inan"},
     "subst:pl:nom:n1": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "neut"},
     "subst:pl:nom:n2": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "neut"},
-    "subst:pl:nom:p1": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": 1},
-    "subst:pl:nom:p2": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": 2},
-    "subst:pl:nom:p3": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": 3},
+    "subst:pl:nom:p1": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": "one"},
+    "subst:pl:nom:p2": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": "two"},
+    "subst:pl:nom:p3": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": "three"},
     "subst:pl:voc:f": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "fem"},
     "subst:pl:voc:m1": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "Masc", "Animacy": "hum"},
     "subst:pl:voc:m2": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "anim"},
     "subst:pl:voc:m3": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "inan"},
     "subst:pl:voc:n1": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "neut"},
     "subst:pl:voc:n2": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "neut"},
-    "subst:pl:voc:p1": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": 1},
-    "subst:pl:voc:p2": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": 2},
-    "subst:pl:voc:p3": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": 3},
+    "subst:pl:voc:p1": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": "one"},
+    "subst:pl:voc:p2": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": "two"},
+    "subst:pl:voc:p3": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": "three"},
     "subst:sg:acc:f": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "fem"},
     "subst:sg:acc:m1": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum"},
     "subst:sg:acc:m2": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "anim"},
diff --git a/spacy/lang/ro/tag_map.py b/spacy/lang/ro/tag_map.py
index cb5239809..5136793ef 100644
--- a/spacy/lang/ro/tag_map.py
+++ b/spacy/lang/ro/tag_map.py
@@ -5,14 +5,14 @@ from ...symbols import PRON, PROPN, PUNCT, SYM, VERB, X, CCONJ, SCONJ, DET, AUX
 
 TAG_MAP = {
     "Afcfson": {
-        "Case": "Dat,Gen",
+        
         "Degree": "Cmp",
         "Gender": "Fem",
         "Number": "Sing",
         POS: ADJ,
     },
     "Afcfsrn": {
-        "Case": "Acc,Nom",
+        
         "Degree": "Cmp",
         "Gender": "Fem",
         "Number": "Sing",
@@ -20,47 +20,47 @@ TAG_MAP = {
     },
     "Afp": {"Degree": "Pos", POS: ADJ},
     "Afp-p-n": {"Degree": "Pos", "Number": "Plur", POS: ADJ},
-    "Afp-p-ny": {"Degree": "Pos", "Number": "Plur", POS: ADJ, "Variant": "Short"},
-    "Afp-poy": {"Case": "Dat,Gen", "Degree": "Pos", "Number": "Plur", POS: ADJ},
+    "Afp-p-ny": {"Degree": "Pos", "Number": "Plur", POS: ADJ},
+    "Afp-poy": { "Degree": "Pos", "Number": "Plur", POS: ADJ},
     "Afpf--n": {"Degree": "Pos", "Gender": "Fem", POS: ADJ},
     "Afpfp-n": {"Degree": "Pos", "Gender": "Fem", "Number": "Plur", POS: ADJ},
     "Afpfpoy": {
-        "Case": "Dat,Gen",
+        
         "Degree": "Pos",
         "Gender": "Fem",
         "Number": "Plur",
         POS: ADJ,
     },
     "Afpfpry": {
-        "Case": "Acc,Nom",
+        
         "Degree": "Pos",
         "Gender": "Fem",
         "Number": "Plur",
         POS: ADJ,
     },
     "Afpfson": {
-        "Case": "Dat,Gen",
+        
         "Degree": "Pos",
         "Gender": "Fem",
         "Number": "Sing",
         POS: ADJ,
     },
     "Afpfsoy": {
-        "Case": "Dat,Gen",
+        
         "Degree": "Pos",
         "Gender": "Fem",
         "Number": "Sing",
         POS: ADJ,
     },
     "Afpfsrn": {
-        "Case": "Acc,Nom",
+        
         "Degree": "Pos",
         "Gender": "Fem",
         "Number": "Sing",
         POS: ADJ,
     },
     "Afpfsry": {
-        "Case": "Acc,Nom",
+        
         "Degree": "Pos",
         "Gender": "Fem",
         "Number": "Sing",
@@ -68,14 +68,14 @@ TAG_MAP = {
     },
     "Afpmp-n": {"Degree": "Pos", "Gender": "Masc", "Number": "Plur", POS: ADJ},
     "Afpmpoy": {
-        "Case": "Dat,Gen",
+        
         "Degree": "Pos",
         "Gender": "Masc",
         "Number": "Plur",
         POS: ADJ,
     },
     "Afpmpry": {
-        "Case": "Acc,Nom",
+        
         "Degree": "Pos",
         "Gender": "Masc",
         "Number": "Plur",
@@ -83,14 +83,14 @@ TAG_MAP = {
     },
     "Afpms-n": {"Degree": "Pos", "Gender": "Masc", "Number": "Sing", POS: ADJ},
     "Afpmsoy": {
-        "Case": "Dat,Gen",
+        
         "Degree": "Pos",
         "Gender": "Masc",
         "Number": "Sing",
         POS: ADJ,
     },
     "Afpmsry": {
-        "Case": "Acc,Nom",
+        
         "Degree": "Pos",
         "Gender": "Masc",
         "Number": "Sing",
@@ -101,304 +101,304 @@ TAG_MAP = {
     "Ccssp": {POS: CCONJ, "Polarity": "Pos"},
     "Crssp": {POS: CCONJ, "Polarity": "Pos"},
     "Csssp": {POS: SCONJ, "Polarity": "Pos"},
-    "Cssspy": {POS: SCONJ, "Polarity": "Pos", "Variant": "Short"},
+    "Cssspy": {POS: SCONJ, "Polarity": "Pos"},
     "DASH": {POS: PUNCT},
     "DBLQ": {POS: PUNCT},
     "Dd3-po---e": {
-        "Case": "Dat,Gen",
+        
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3fpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3fpr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3fso---e": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3fso---o": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3fsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3fsr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3fsr---o": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3mpo": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3mpr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3mso---e": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3msr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dd3msr---o": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Dh3fsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
+        
     },
     "Dh3mp": {
         "Gender": "Masc",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
+        
     },
     "Dh3ms": {
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
+        
     },
-    "Di3": {POS: DET, "Person": "3", "PronType": "Ind"},
-    "Di3--r---e": {"Case": "Acc,Nom", POS: DET, "Person": "3", "PronType": "Ind"},
+    "Di3": {POS: DET, "Person": "three", "PronType": "Ind"},
+    "Di3--r---e": { POS: DET, "Person": "three", "PronType": "Ind"},
     "Di3-po": {
-        "Case": "Dat,Gen",
+        
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3-po---e": {
-        "Case": "Dat,Gen",
+        
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3-sr": {
-        "Case": "Acc,Nom",
+        
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3-sr---e": {
-        "Case": "Acc,Nom",
+        
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3fp": {
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3fpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3fpr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3fso---e": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3fsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3fsr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3mp": {
         "Gender": "Masc",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3mpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3mpr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3ms": {
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3ms----e": {
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3mso---e": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3msr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Di3msr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Ds1fp-s": {
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
     "Ds1fsos": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
     "Ds1fsrp": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
     "Ds1fsrs": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -406,7 +406,7 @@ TAG_MAP = {
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -414,45 +414,45 @@ TAG_MAP = {
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
-    "Ds2---s": {POS: DET, "Person": "2", "Poss": "Yes", "PronType": "Prs"},
+    "Ds2---s": {POS: DET, "Person": "two", "Poss": "Yes", "PronType": "Prs"},
     "Ds2fsrs": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "2",
+        "Person": "two",
         "Poss": "Yes",
         "PronType": "Prs",
     },
-    "Ds3---p": {POS: DET, "Person": "3", "Poss": "Yes", "PronType": "Prs"},
-    "Ds3---s": {POS: DET, "Person": "3", "Poss": "Yes", "PronType": "Prs"},
+    "Ds3---p": {POS: DET, "Person": "three", "Poss": "Yes", "PronType": "Prs"},
+    "Ds3---s": {POS: DET, "Person": "three", "Poss": "Yes", "PronType": "Prs"},
     "Ds3fp-s": {
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
         "PronType": "Prs",
     },
     "Ds3fsos": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
         "PronType": "Prs",
     },
     "Ds3fsrs": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -460,41 +460,41 @@ TAG_MAP = {
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
         "PronType": "Prs",
     },
-    "Dw3--r---e": {"Case": "Acc,Nom", POS: DET, "Person": "3", "PronType": "Int,Rel"},
+    "Dw3--r---e": { POS: DET, "Person": "three"},
     "Dw3fpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
-        "Person": "3",
-        "PronType": "Int,Rel",
+        "Person": "three",
+        
     },
     "Dw3mso---e": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
-        "PronType": "Int,Rel",
+        "Person": "three",
+        
     },
     "Dz3fsr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Neg",
     },
     "Dz3msr---e": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Neg",
     },
     "EQUAL": {POS: SYM},
@@ -520,7 +520,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Mcfsrln": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "NumForm": "Word",
         "NumType": "Card",
@@ -535,7 +535,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Mcmsrl": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "NumForm": "Word",
         "NumType": "Card",
@@ -543,7 +543,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Mffprln": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "NumForm": "Word",
         "NumType": "Card",
@@ -551,7 +551,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Mlfpo": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "NumType": "Card",
         "Number": "Plur",
@@ -559,7 +559,7 @@ TAG_MAP = {
         "PronType": "Tot",
     },
     "Mlfpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "NumType": "Card",
         "Number": "Plur",
@@ -567,7 +567,7 @@ TAG_MAP = {
         "PronType": "Tot",
     },
     "Mlmpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "NumType": "Card",
         "Number": "Plur",
@@ -584,7 +584,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Mofprly": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "NumForm": "Word",
         "NumType": "Ord",
@@ -599,7 +599,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Mofsrln": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "NumForm": "Word",
         "NumType": "Ord",
@@ -607,7 +607,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Mofsrly": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "NumForm": "Word",
         "NumType": "Ord",
@@ -615,7 +615,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Momprly": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "NumForm": "Word",
         "NumType": "Ord",
@@ -637,7 +637,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Momsoly": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "NumForm": "Word",
         "NumType": "Ord",
@@ -645,7 +645,7 @@ TAG_MAP = {
         POS: NUM,
     },
     "Momsrly": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "NumForm": "Word",
         "NumType": "Ord",
@@ -655,338 +655,338 @@ TAG_MAP = {
     "Nc": {POS: NOUN},
     "Ncf--n": {"Gender": "Fem", POS: NOUN},
     "Ncfp-n": {"Gender": "Fem", "Number": "Plur", POS: NOUN},
-    "Ncfpoy": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Plur", POS: NOUN},
-    "Ncfpry": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Plur", POS: NOUN},
-    "Ncfson": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Sing", POS: NOUN},
-    "Ncfsoy": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Sing", POS: NOUN},
-    "Ncfsrn": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Sing", POS: NOUN},
-    "Ncfsry": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Sing", POS: NOUN},
+    "Ncfpoy": { "Gender": "Fem", "Number": "Plur", POS: NOUN},
+    "Ncfpry": { "Gender": "Fem", "Number": "Plur", POS: NOUN},
+    "Ncfson": { "Gender": "Fem", "Number": "Sing", POS: NOUN},
+    "Ncfsoy": { "Gender": "Fem", "Number": "Sing", POS: NOUN},
+    "Ncfsrn": { "Gender": "Fem", "Number": "Sing", POS: NOUN},
+    "Ncfsry": { "Gender": "Fem", "Number": "Sing", POS: NOUN},
     "Ncm--n": {"Gender": "Masc", POS: NOUN},
     "Ncmp-n": {"Gender": "Masc", "Number": "Plur", POS: NOUN},
-    "Ncmpoy": {"Case": "Dat,Gen", "Gender": "Masc", "Number": "Plur", POS: NOUN},
-    "Ncmpry": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Plur", POS: NOUN},
+    "Ncmpoy": { "Gender": "Masc", "Number": "Plur", POS: NOUN},
+    "Ncmpry": { "Gender": "Masc", "Number": "Plur", POS: NOUN},
     "Ncms-n": {"Gender": "Masc", "Number": "Sing", POS: NOUN},
-    "Ncms-ny": {"Gender": "Masc", "Number": "Sing", POS: NOUN, "Variant": "Short"},
-    "Ncmsoy": {"Case": "Dat,Gen", "Gender": "Masc", "Number": "Sing", POS: NOUN},
-    "Ncmsrn": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Sing", POS: NOUN},
-    "Ncmsry": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Sing", POS: NOUN},
+    "Ncms-ny": {"Gender": "Masc", "Number": "Sing", POS: NOUN},
+    "Ncmsoy": { "Gender": "Masc", "Number": "Sing", POS: NOUN},
+    "Ncmsrn": { "Gender": "Masc", "Number": "Sing", POS: NOUN},
+    "Ncmsry": { "Gender": "Masc", "Number": "Sing", POS: NOUN},
     "Np": {POS: PROPN},
-    "Npfsoy": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Sing", POS: PROPN},
-    "Npfsry": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Sing", POS: PROPN},
-    "Npmsoy": {"Case": "Dat,Gen", "Gender": "Masc", "Number": "Sing", POS: PROPN},
-    "Npmsry": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Sing", POS: PROPN},
+    "Npfsoy": { "Gender": "Fem", "Number": "Sing", POS: PROPN},
+    "Npfsry": { "Gender": "Fem", "Number": "Sing", POS: PROPN},
+    "Npmsoy": { "Gender": "Masc", "Number": "Sing", POS: PROPN},
+    "Npmsry": { "Gender": "Masc", "Number": "Sing", POS: PROPN},
     "PERCENT": {POS: SYM},
     "PERIOD": {POS: PUNCT},
     "PLUSMINUS": {POS: SYM},
     "Pd3-po": {
-        "Case": "Dat,Gen",
+        
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Pd3fpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Pd3fso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Pd3fsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Pd3mpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Pd3mso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
     "Pd3msr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Dem",
     },
-    "Pi3--r": {"Case": "Acc,Nom", POS: PRON, "Person": "3", "PronType": "Ind"},
+    "Pi3--r": { POS: PRON, "Person": "three", "PronType": "Ind"},
     "Pi3-po": {
-        "Case": "Dat,Gen",
+        
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3-so": {
-        "Case": "Dat,Gen",
+        
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3-sr": {
-        "Case": "Acc,Nom",
+        
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3fpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3fso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3fsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3mpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3msr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
     },
     "Pi3msr--y": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Ind",
-        "Variant": "Short",
+        
     },
     "Pp1-pa--------w": {
         "Case": "Acc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "Pp1-pa--y-----w": {
         "Case": "Acc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp1-pd--------w": {
         "Case": "Dat",
         "Number": "Plur",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "Pp1-pr--------s": {
-        "Case": "Acc,Nom",
+        
         "Number": "Plur",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "Pp1-sa--------s": {
         "Case": "Acc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "Pp1-sa--------w": {
         "Case": "Acc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "Pp1-sa--y-----w": {
         "Case": "Acc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp1-sd--------w": {
         "Case": "Dat",
         "Number": "Sing",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "Pp1-sd--y-----w": {
         "Case": "Dat",
         "Number": "Sing",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp1-sn--------s": {
         "Case": "Nom",
         "Number": "Sing",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
-    "Pp2-----------s": {POS: PRON, "Person": "2", "PronType": "Prs"},
+    "Pp2-----------s": {POS: PRON, "Person": "two", "PronType": "Prs"},
     "Pp2-pa--------w": {
         "Case": "Acc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "Pp2-pa--y-----w": {
         "Case": "Acc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp2-pd--------w": {
         "Case": "Dat",
         "Number": "Plur",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "Pp2-pr--------s": {
-        "Case": "Acc,Nom",
+        
         "Number": "Plur",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "Pp2-sa--------s": {
         "Case": "Acc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "Pp2-sa--------w": {
         "Case": "Acc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "Pp2-sa--y-----w": {
         "Case": "Acc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp2-sd--y-----w": {
         "Case": "Dat",
         "Number": "Sing",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp2-sn--------s": {
         "Case": "Nom",
         "Number": "Sing",
         POS: PRON,
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "Pp3-pd--------w": {
         "Case": "Dat",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3-pd--y-----w": {
         "Case": "Dat",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp3-po--------s": {
-        "Case": "Dat,Gen",
+        
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3-sd--------w": {
         "Case": "Dat",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3-sd--y-----w": {
         "Case": "Dat",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp3fpa--------w": {
         "Case": "Acc",
         "Gender": "Fem",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3fpa--y-----w": {
@@ -994,16 +994,16 @@ TAG_MAP = {
         "Gender": "Fem",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp3fpr--------s": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3fsa--------w": {
@@ -1011,7 +1011,7 @@ TAG_MAP = {
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3fsa--y-----w": {
@@ -1019,16 +1019,16 @@ TAG_MAP = {
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp3fsr--------s": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3mpa--------w": {
@@ -1036,7 +1036,7 @@ TAG_MAP = {
         "Gender": "Masc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3mpa--y-----w": {
@@ -1044,16 +1044,16 @@ TAG_MAP = {
         "Gender": "Masc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp3mpr--------s": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3msa--------w": {
@@ -1061,7 +1061,7 @@ TAG_MAP = {
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3msa--y-----w": {
@@ -1069,247 +1069,247 @@ TAG_MAP = {
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
-        "Variant": "Short",
+        
     },
     "Pp3mso--------s": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Pp3msr--------s": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "Ps1mp-s": {
         "Gender": "Masc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
-    "Ps3---p": {POS: PRON, "Person": "3", "Poss": "Yes", "PronType": "Prs"},
-    "Ps3---s": {POS: PRON, "Person": "3", "Poss": "Yes", "PronType": "Prs"},
+    "Ps3---p": {POS: PRON, "Person": "three", "Poss": "Yes", "PronType": "Prs"},
+    "Ps3---s": {POS: PRON, "Person": "three", "Poss": "Yes", "PronType": "Prs"},
     "Ps3fp-s": {
         "Gender": "Fem",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
         "PronType": "Prs",
     },
-    "Pw3--r": {"Case": "Acc,Nom", POS: PRON, "Person": "3", "PronType": "Int,Rel"},
+    "Pw3--r": { POS: PRON, "Person": "three"},
     "Pw3-po": {
-        "Case": "Dat,Gen",
+        
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
-        "PronType": "Int,Rel",
+        "Person": "three",
+        
     },
     "Pw3fso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
-        "PronType": "Int,Rel",
+        "Person": "three",
+        
     },
     "Pw3mpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: PRON,
-        "Person": "3",
-        "PronType": "Int,Rel",
+        "Person": "three",
+        
     },
     "Px3--a--------s": {
         "Case": "Acc",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
         "Reflex": "Yes",
     },
     "Px3--a--------w": {
         "Case": "Acc",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
         "Reflex": "Yes",
     },
     "Px3--a--y-----w": {
         "Case": "Acc",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
         "Reflex": "Yes",
-        "Variant": "Short",
+        
     },
     "Px3--d--------w": {
         "Case": "Dat",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
         "Reflex": "Yes",
     },
     "Px3--d--y-----w": {
         "Case": "Dat",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
         "Reflex": "Yes",
-        "Variant": "Short",
+        
     },
     "Pz3-sr": {
-        "Case": "Acc,Nom",
+        
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Neg",
     },
     "Pz3msr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: PRON,
-        "Person": "3",
+        "Person": "three",
         "PronType": "Neg",
     },
     "QUEST": {POS: PUNCT},
     "QUOT": {POS: PUNCT},
     "Qn": {POS: PART, "PartType": "Inf"},
     "Qs": {"Mood": "Sub", POS: PART},
-    "Qs-y": {"Mood": "Sub", POS: PART, "Variant": "Short"},
+    "Qs-y": {"Mood": "Sub", POS: PART},
     "Qz": {POS: PART, "Polarity": "Neg"},
-    "Qz-y": {POS: PART, "Polarity": "Neg", "Variant": "Short"},
+    "Qz-y": {POS: PART, "Polarity": "Neg"},
     "RPAR": {POS: PUNCT},
     "Rc": {POS: ADV},
     "Rgp": {"Degree": "Pos", POS: ADV},
-    "Rgpy": {"Degree": "Pos", POS: ADV, "Variant": "Short"},
+    "Rgpy": {"Degree": "Pos", POS: ADV},
     "Rgs": {"Degree": "Sup", POS: ADV},
     "Rp": {POS: ADV},
-    "Rw": {POS: ADV, "PronType": "Int,Rel"},
+    "Rw": {POS: ADV},
     "Rz": {POS: ADV, "PronType": "Neg"},
     "SCOLON": {"AdpType": "Prep", POS: PUNCT},
     "SLASH": {"AdpType": "Prep", POS: SYM},
     "Spsa": {"AdpType": "Prep", "Case": "Acc", POS: ADP},
-    "Spsay": {"AdpType": "Prep", "Case": "Acc", POS: ADP, "Variant": "Short"},
+    "Spsay": {"AdpType": "Prep", "Case": "Acc", POS: ADP},
     "Spsd": {"AdpType": "Prep", "Case": "Dat", POS: ADP},
     "Spsg": {"AdpType": "Prep", "Case": "Gen", POS: ADP},
-    "Spsgy": {"AdpType": "Prep", "Case": "Gen", POS: ADP, "Variant": "Short"},
-    "Td-po": {"Case": "Dat,Gen", "Number": "Plur", POS: DET, "PronType": "Dem"},
+    "Spsgy": {"AdpType": "Prep", "Case": "Gen", POS: ADP},
+    "Td-po": { "Number": "Plur", POS: DET, "PronType": "Dem"},
     "Tdfpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
         "PronType": "Dem",
     },
     "Tdfso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
         "PronType": "Dem",
     },
     "Tdfsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
         "PronType": "Dem",
     },
     "Tdmpr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Plur",
         POS: DET,
         "PronType": "Dem",
     },
     "Tdmso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
         "PronType": "Dem",
     },
     "Tdmsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
         "PronType": "Dem",
     },
-    "Tf-so": {"Case": "Dat,Gen", "Number": "Sing", POS: DET, "PronType": "Art"},
+    "Tf-so": { "Number": "Sing", POS: DET, "PronType": "Art"},
     "Tffs-y": {
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
         "PronType": "Art",
-        "Variant": "Short",
+        
     },
     "Tfms-y": {
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
         "PronType": "Art",
-        "Variant": "Short",
+        
     },
     "Tfmsoy": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
         "PronType": "Art",
-        "Variant": "Short",
+        
     },
     "Tfmsry": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
         "PronType": "Art",
-        "Variant": "Short",
+        
     },
-    "Ti-po": {"Case": "Dat,Gen", "Number": "Plur", POS: DET, "PronType": "Ind"},
+    "Ti-po": { "Number": "Plur", POS: DET, "PronType": "Ind"},
     "Tifp-y": {
         "Gender": "Fem",
         "Number": "Plur",
         POS: DET,
         "PronType": "Ind",
-        "Variant": "Short",
+        
     },
     "Tifso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
         "PronType": "Ind",
     },
     "Tifsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Fem",
         "Number": "Sing",
         POS: DET,
         "PronType": "Ind",
     },
     "Timso": {
-        "Case": "Dat,Gen",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
         "PronType": "Ind",
     },
     "Timsr": {
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: DET,
@@ -1343,23 +1343,23 @@ TAG_MAP = {
         "Poss": "Yes",
         "PronType": "Prs",
     },
-    "Va--1": {POS: AUX, "Person": "1"},
-    "Va--1p": {"Number": "Plur", POS: AUX, "Person": "1"},
-    "Va--1s": {"Number": "Sing", POS: AUX, "Person": "1"},
-    "Va--2p": {"Number": "Plur", POS: AUX, "Person": "2"},
-    "Va--2s": {"Number": "Sing", POS: AUX, "Person": "2"},
-    "Va--3": {POS: AUX, "Person": "3"},
-    "Va--3-----y": {POS: AUX, "Person": "3", "Variant": "Short"},
-    "Va--3p": {"Number": "Plur", POS: AUX, "Person": "3"},
-    "Va--3p----y": {"Number": "Plur", POS: AUX, "Person": "3", "Variant": "Short"},
-    "Va--3s": {"Number": "Sing", POS: AUX, "Person": "3"},
-    "Va--3s----y": {"Number": "Sing", POS: AUX, "Person": "3", "Variant": "Short"},
+    "Va--1": {POS: AUX, "Person": "one"},
+    "Va--1p": {"Number": "Plur", POS: AUX, "Person": "one"},
+    "Va--1s": {"Number": "Sing", POS: AUX, "Person": "one"},
+    "Va--2p": {"Number": "Plur", POS: AUX, "Person": "two"},
+    "Va--2s": {"Number": "Sing", POS: AUX, "Person": "two"},
+    "Va--3": {POS: AUX, "Person": "three"},
+    "Va--3-----y": {POS: AUX, "Person": "three"},
+    "Va--3p": {"Number": "Plur", POS: AUX, "Person": "three"},
+    "Va--3p----y": {"Number": "Plur", POS: AUX, "Person": "three"},
+    "Va--3s": {"Number": "Sing", POS: AUX, "Person": "three"},
+    "Va--3s----y": {"Number": "Sing", POS: AUX, "Person": "three"},
     "Vag": {POS: AUX, "VerbForm": "Ger"},
     "Vaii3p": {
         "Mood": "Ind",
         "Number": "Plur",
         POS: AUX,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Imp",
         "VerbForm": "Fin",
     },
@@ -1367,7 +1367,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: AUX,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Imp",
         "VerbForm": "Fin",
     },
@@ -1375,15 +1375,15 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: AUX,
-        "Person": "3",
-        "Tense": "Pqp",
+        "Person": "three",
+        
         "VerbForm": "Fin",
     },
     "Vaip1s": {
         "Mood": "Ind",
         "Number": "Sing",
         POS: AUX,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1391,7 +1391,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: AUX,
-        "Person": "2",
+        "Person": "two",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1399,7 +1399,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Plur",
         POS: AUX,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1407,7 +1407,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: AUX,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1416,32 +1416,32 @@ TAG_MAP = {
     "Vasp3": {
         "Mood": "Sub",
         POS: AUX,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
     "Vmg": {POS: VERB, "VerbForm": "Ger"},
-    "Vmg-------y": {POS: VERB, "Variant": "Short", "VerbForm": "Ger"},
+    "Vmg-------y": {POS: VERB,  "VerbForm": "Ger"},
     "Vmii1": {
         "Mood": "Ind",
         POS: VERB,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Imp",
         "VerbForm": "Fin",
     },
     "Vmii1-----y": {
         "Mood": "Ind",
         POS: VERB,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Imp",
-        "Variant": "Short",
+        
         "VerbForm": "Fin",
     },
     "Vmii2p": {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "2",
+        "Person": "two",
         "Tense": "Imp",
         "VerbForm": "Fin",
     },
@@ -1449,7 +1449,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "2",
+        "Person": "two",
         "Tense": "Imp",
         "VerbForm": "Fin",
     },
@@ -1457,7 +1457,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Imp",
         "VerbForm": "Fin",
     },
@@ -1465,16 +1465,16 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Imp",
-        "Variant": "Short",
+        
         "VerbForm": "Fin",
     },
     "Vmii3s": {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Imp",
         "VerbForm": "Fin",
     },
@@ -1482,23 +1482,23 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "3",
-        "Tense": "Pqp",
+        "Person": "three",
+        
         "VerbForm": "Fin",
     },
     "Vmil3s": {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "3",
-        "Tense": "Pqp",
+        "Person": "three",
+        
         "VerbForm": "Fin",
     },
     "Vmip1p": {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1506,7 +1506,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1514,16 +1514,16 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Pres",
-        "Variant": "Short",
+        
         "VerbForm": "Fin",
     },
     "Vmip2p": {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "2",
+        "Person": "two",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1531,30 +1531,30 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "2",
+        "Person": "two",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
     "Vmip3": {
         "Mood": "Ind",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
     "Vmip3-----y": {
         "Mood": "Ind",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
-        "Variant": "Short",
+        
         "VerbForm": "Fin",
     },
     "Vmip3p": {
         "Mood": "Ind",
         "Number": "Plur",
         POS: AUX,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1562,7 +1562,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
@@ -1570,16 +1570,16 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: AUX,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
-        "Variant": "Short",
+        
         "VerbForm": "Fin",
     },
     "Vmis1p": {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Past",
         "VerbForm": "Fin",
     },
@@ -1587,7 +1587,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "1",
+        "Person": "one",
         "Tense": "Past",
         "VerbForm": "Fin",
     },
@@ -1595,7 +1595,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Plur",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Past",
         "VerbForm": "Fin",
     },
@@ -1603,7 +1603,7 @@ TAG_MAP = {
         "Mood": "Ind",
         "Number": "Sing",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Past",
         "VerbForm": "Fin",
     },
@@ -1611,14 +1611,14 @@ TAG_MAP = {
         "Mood": "Imp",
         "Number": "Plur",
         POS: VERB,
-        "Person": "2",
+        "Person": "two",
         "VerbForm": "Fin",
     },
     "Vmm-2s": {
         "Mood": "Imp",
         "Number": "Sing",
         POS: VERB,
-        "Person": "2",
+        "Person": "two",
         "VerbForm": "Fin",
     },
     "Vmnp": {POS: VERB, "Tense": "Pres", "VerbForm": "Inf"},
@@ -1629,16 +1629,16 @@ TAG_MAP = {
     "Vmsp3": {
         "Mood": "Sub",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
         "VerbForm": "Fin",
     },
     "Vmsp3-----y": {
         "Mood": "Sub",
         POS: VERB,
-        "Person": "3",
+        "Person": "three",
         "Tense": "Pres",
-        "Variant": "Short",
+        
         "VerbForm": "Fin",
     },
     "X": {POS: X},
@@ -1646,7 +1646,7 @@ TAG_MAP = {
     "Yn": {"Abbr": "Yes", POS: NOUN},
     "Ynmsry": {
         "Abbr": "Yes",
-        "Case": "Acc,Nom",
+        
         "Gender": "Masc",
         "Number": "Sing",
         POS: NOUN,
diff --git a/spacy/lang/ru/tag_map.py b/spacy/lang/ru/tag_map.py
index baf065588..b6ca314b6 100644
--- a/spacy/lang/ru/tag_map.py
+++ b/spacy/lang/ru/tag_map.py
@@ -6,741 +6,741 @@ from ...symbols import PROPN, PART, INTJ, PRON, SCONJ, AUX, CCONJ
 
 # fmt: off
 TAG_MAP = {
-    'ADJ__Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing': {POS: ADJ, 'Animacy': 'Anim', 'Case': 'Acc', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Animacy=Anim|Case=Acc|Degree=Pos|Number=Plur': {POS: ADJ, 'Animacy': 'Anim', 'Case': 'Acc', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing': {POS: ADJ, 'Animacy': 'Anim', 'Case': 'Acc', 'Degree': 'Sup', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Animacy=Anim|Case=Nom|Degree=Pos|Number=Plur': {POS: ADJ, 'Animacy': 'Anim', 'Case': 'Nom', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing': {POS: ADJ, 'Animacy': 'Inan', 'Case': 'Acc', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Neut|Number=Sing': {POS: ADJ, 'Animacy': 'Inan', 'Case': 'Acc', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Number=Plur': {POS: ADJ, 'Animacy': 'Inan', 'Case': 'Acc', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing': {POS: ADJ, 'Animacy': 'Inan', 'Case': 'Acc', 'Degree': 'Sup', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Number=Plur': {POS: ADJ, 'Animacy': 'Inan', 'Case': 'Acc', 'Degree': 'Sup', 'Number': 'Plur'},
-    'ADJ__Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing': {POS: ADJ, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Animacy=Inan|Case=Nom|Degree=Pos|Gender=Fem|Number=Sing': {POS: ADJ, 'Animacy': 'Inan', 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Acc|Degree=Pos|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Acc', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Acc|Degree=Pos|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Acc', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Acc|Degree=Sup|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Acc', 'Degree': 'Sup', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Acc|Degree=Sup|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Acc', 'Degree': 'Sup', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Dat|Degree=Pos|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Dat', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Dat|Degree=Pos|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Dat', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Dat|Degree=Pos|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Dat', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Dat|Degree=Pos|Number=Plur': {POS: ADJ, 'Case': 'Dat', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Case=Dat|Degree=Sup|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Dat', 'Degree': 'Sup', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Dat|Degree=Sup|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Dat', 'Degree': 'Sup', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Dat|Degree=Sup|Number=Plur': {POS: ADJ, 'Case': 'Dat', 'Degree': 'Sup', 'Number': 'Plur'},
-    'ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing|Variant=Short': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing', 'Variant': 'Short'},
-    'ADJ__Case=Gen|Degree=Pos|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Gen|Degree=Pos|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Gen|Degree=Pos|Number=Plur': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Case=Gen|Degree=Sup|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Sup', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Gen|Degree=Sup|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Sup', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Gen|Degree=Sup|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Sup', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Gen|Degree=Sup|Number=Plur': {POS: ADJ, 'Case': 'Gen', 'Degree': 'Sup', 'Number': 'Plur'},
-    'ADJ__Case=Ins|Degree=Pos|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Ins|Degree=Pos|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Ins|Degree=Pos|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Ins|Degree=Pos|Number=Plur': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Case=Ins|Degree=Sup|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Sup', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Ins|Degree=Sup|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Sup', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Ins|Degree=Sup|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Sup', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Ins|Degree=Sup|Number=Plur': {POS: ADJ, 'Case': 'Ins', 'Degree': 'Sup', 'Number': 'Plur'},
-    'ADJ__Case=Loc|Degree=Pos|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Loc|Degree=Pos|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Loc|Degree=Pos|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Loc|Degree=Pos|Number=Plur': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Case=Loc|Degree=Sup|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Sup', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Loc|Degree=Sup|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Sup', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Loc|Degree=Sup|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Sup', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Loc|Degree=Sup|Number=Plur': {POS: ADJ, 'Case': 'Loc', 'Degree': 'Sup', 'Number': 'Plur'},
-    'ADJ__Case=Nom|Degree=Pos|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Nom|Degree=Pos|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Nom|Degree=Pos|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Nom|Degree=Pos|Number=Plur': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Pos', 'Number': 'Plur'},
-    'ADJ__Case=Nom|Degree=Sup|Gender=Fem|Number=Sing': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Sup', 'Gender': 'Fem', 'Number': 'Sing'},
-    'ADJ__Case=Nom|Degree=Sup|Gender=Masc|Number=Sing': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Sup', 'Gender': 'Masc', 'Number': 'Sing'},
-    'ADJ__Case=Nom|Degree=Sup|Gender=Neut|Number=Sing': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Sup', 'Gender': 'Neut', 'Number': 'Sing'},
-    'ADJ__Case=Nom|Degree=Sup|Number=Plur': {POS: ADJ, 'Case': 'Nom', 'Degree': 'Sup', 'Number': 'Plur'},
-    'ADJ__Degree=Cmp': {POS: ADJ, 'Degree': 'Cmp'},
-    'ADJ__Degree=Pos': {POS: ADJ, 'Degree': 'Pos'},
-    'ADJ__Degree=Pos|Gender=Fem|Number=Sing|Variant=Short': {POS: ADJ, 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing', 'Variant': 'Short'},
-    'ADJ__Degree=Pos|Gender=Masc|Number=Sing|Variant=Short': {POS: ADJ, 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing', 'Variant': 'Short'},
-    'ADJ__Degree=Pos|Gender=Neut|Number=Sing|Variant=Short': {POS: ADJ, 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing', 'Variant': 'Short'},
-    'ADJ__Degree=Pos|Number=Plur|Variant=Short': {POS: ADJ, 'Degree': 'Pos', 'Number': 'Plur', 'Variant': 'Short'},
-    'ADJ__Foreign=Yes': {POS: ADJ, 'Foreign': 'Yes'},
-    'ADJ___': {POS: ADJ},
-    'ADJ': {POS: ADJ},
-    'ADP___': {POS: ADP},
-    'ADP': {POS: ADP},
-    'ADV__Degree=Cmp': {POS: ADV, 'Degree': 'Cmp'},
-    'ADV__Degree=Pos': {POS: ADV, 'Degree': 'Pos'},
-    'ADV__Polarity=Neg': {POS: ADV, 'Polarity': 'Neg'},
-    'AUX__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Gender': 'Fem', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Gender': 'Masc', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Gender': 'Neut', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Imp', 'Number': 'Plur', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Imp', 'Number': 'Sing', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '2', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '2', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'Tense': 'Pres', 'VerbForm': 'Conv', 'Voice': 'Act'},
-    'AUX__Aspect=Imp|VerbForm=Inf|Voice=Act': {POS: AUX, 'Aspect': 'Imp', 'VerbForm': 'Inf', 'Voice': 'Act'},
-    'CCONJ___': {POS: CCONJ},
-    'CCONJ': {POS: CCONJ},
-    'DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing': {POS: DET, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'DET__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing': {POS: DET, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'DET__Animacy=Inan|Case=Gen|Gender=Fem|Number=Sing': {POS: DET, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing'},
-    'DET__Animacy=Inan|Case=Gen|Number=Plur': {POS: DET, 'Animacy': 'Inan', 'Case': 'Gen', 'Number': 'Plur'},
-    'DET__Case=Acc|Degree=Pos|Number=Plur': {POS: DET, 'Case': 'Acc', 'Degree': 'Pos', 'Number': 'Plur'},
-    'DET__Case=Acc|Gender=Fem|Number=Sing': {POS: DET, 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'DET__Case=Acc|Gender=Masc|Number=Sing': {POS: DET, 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'DET__Case=Acc|Gender=Neut|Number=Sing': {POS: DET, 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'DET__Case=Acc|Number=Plur': {POS: DET, 'Case': 'Acc', 'Number': 'Plur'},
-    'DET__Case=Dat|Gender=Fem|Number=Sing': {POS: DET, 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing'},
-    'DET__Case=Dat|Gender=Masc|Number=Plur': {POS: DET, 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Plur'},
-    'DET__Case=Dat|Gender=Masc|Number=Sing': {POS: DET, 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing'},
-    'DET__Case=Dat|Gender=Neut|Number=Sing': {POS: DET, 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing'},
-    'DET__Case=Dat|Number=Plur': {POS: DET, 'Case': 'Dat', 'Number': 'Plur'},
-    'DET__Case=Gen|Gender=Fem|Number=Sing': {POS: DET, 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing'},
-    'DET__Case=Gen|Gender=Masc|Number=Sing': {POS: DET, 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing'},
-    'DET__Case=Gen|Gender=Neut|Number=Sing': {POS: DET, 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing'},
-    'DET__Case=Gen|Number=Plur': {POS: DET, 'Case': 'Gen', 'Number': 'Plur'},
-    'DET__Case=Ins|Gender=Fem|Number=Sing': {POS: DET, 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing'},
-    'DET__Case=Ins|Gender=Masc|Number=Sing': {POS: DET, 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing'},
-    'DET__Case=Ins|Gender=Neut|Number=Sing': {POS: DET, 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing'},
-    'DET__Case=Ins|Number=Plur': {POS: DET, 'Case': 'Ins', 'Number': 'Plur'},
-    'DET__Case=Loc|Gender=Fem|Number=Sing': {POS: DET, 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'DET__Case=Loc|Gender=Masc|Number=Sing': {POS: DET, 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'DET__Case=Loc|Gender=Neut|Number=Sing': {POS: DET, 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'DET__Case=Loc|Number=Plur': {POS: DET, 'Case': 'Loc', 'Number': 'Plur'},
-    'DET__Case=Nom|Gender=Fem|Number=Sing': {POS: DET, 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing'},
-    'DET__Case=Nom|Gender=Masc|Number=Plur': {POS: DET, 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Plur'},
-    'DET__Case=Nom|Gender=Masc|Number=Sing': {POS: DET, 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing'},
-    'DET__Case=Nom|Gender=Neut|Number=Sing': {POS: DET, 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing'},
-    'DET__Case=Nom|Number=Plur': {POS: DET, 'Case': 'Nom', 'Number': 'Plur'},
-    'DET__Gender=Masc|Number=Sing': {POS: DET, 'Gender': 'Masc', 'Number': 'Sing'},
-    'INTJ___': {POS: INTJ},
-    'INTJ': {POS: INTJ},
-    'NOUN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Acc|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Acc|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Acc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Acc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Dat|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Dat|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Dat|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Dat', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Gen|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Gen|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Gen|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Gen', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Ins|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Ins|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Ins|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Ins|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Ins|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Ins', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Loc|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Loc|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Loc|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Loc|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Loc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Loc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Nom|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Nom|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Anim|Case=Nom|Number=Plur': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Nom', 'Number': 'Plur'},
-    'NOUN__Animacy=Anim|Case=Voc|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Anim', 'Case': 'Voc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Acc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Acc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Dat|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Dat', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Gen|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Gen', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Ins|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Ins', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Loc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Loc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Case=Nom|Number=Plur': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Nom', 'Number': 'Plur'},
-    'NOUN__Animacy=Inan|Case=Par|Gender=Masc|Number=Sing': {POS: NOUN, 'Animacy': 'Inan', 'Case': 'Par', 'Gender': 'Masc', 'Number': 'Sing'},
-    'NOUN__Animacy=Inan|Gender=Fem': {POS: NOUN, 'Animacy': 'Inan', 'Gender': 'Fem'},
-    'NOUN__Animacy=Inan|Gender=Masc': {POS: NOUN, 'Animacy': 'Inan', 'Gender': 'Masc'},
-    'NOUN__Animacy=Inan|Gender=Neut': {POS: NOUN, 'Animacy': 'Inan', 'Gender': 'Neut'},
-    'NOUN__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing': {POS: NOUN, 'Case': 'Gen', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'NOUN__Foreign=Yes': {POS: NOUN, 'Foreign': 'Yes'},
-    'NOUN___': {POS: NOUN},
-    'NOUN': {POS: NOUN},
-    'NUM__Animacy=Anim|Case=Acc': {POS: NUM, 'Animacy': 'Anim', 'Case': 'Acc'},
-    'NUM__Animacy=Anim|Case=Acc|Gender=Fem': {POS: NUM, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Fem'},
-    'NUM__Animacy=Anim|Case=Acc|Gender=Masc': {POS: NUM, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Masc'},
-    'NUM__Animacy=Inan|Case=Acc': {POS: NUM, 'Animacy': 'Inan', 'Case': 'Acc'},
-    'NUM__Animacy=Inan|Case=Acc|Gender=Fem': {POS: NUM, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Fem'},
-    'NUM__Animacy=Inan|Case=Acc|Gender=Masc': {POS: NUM, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Masc'},
-    'NUM__Case=Acc': {POS: NUM, 'Case': 'Acc'},
-    'NUM__Case=Acc|Gender=Fem': {POS: NUM, 'Case': 'Acc', 'Gender': 'Fem'},
-    'NUM__Case=Acc|Gender=Masc': {POS: NUM, 'Case': 'Acc', 'Gender': 'Masc'},
-    'NUM__Case=Acc|Gender=Neut': {POS: NUM, 'Case': 'Acc', 'Gender': 'Neut'},
-    'NUM__Case=Dat': {POS: NUM, 'Case': 'Dat'},
-    'NUM__Case=Dat|Gender=Fem': {POS: NUM, 'Case': 'Dat', 'Gender': 'Fem'},
-    'NUM__Case=Dat|Gender=Masc': {POS: NUM, 'Case': 'Dat', 'Gender': 'Masc'},
-    'NUM__Case=Dat|Gender=Neut': {POS: NUM, 'Case': 'Dat', 'Gender': 'Neut'},
-    'NUM__Case=Gen': {POS: NUM, 'Case': 'Gen'},
-    'NUM__Case=Gen|Gender=Fem': {POS: NUM, 'Case': 'Gen', 'Gender': 'Fem'},
-    'NUM__Case=Gen|Gender=Masc': {POS: NUM, 'Case': 'Gen', 'Gender': 'Masc'},
-    'NUM__Case=Gen|Gender=Neut': {POS: NUM, 'Case': 'Gen', 'Gender': 'Neut'},
-    'NUM__Case=Ins': {POS: NUM, 'Case': 'Ins'},
-    'NUM__Case=Ins|Gender=Fem': {POS: NUM, 'Case': 'Ins', 'Gender': 'Fem'},
-    'NUM__Case=Ins|Gender=Masc': {POS: NUM, 'Case': 'Ins', 'Gender': 'Masc'},
-    'NUM__Case=Ins|Gender=Neut': {POS: NUM, 'Case': 'Ins', 'Gender': 'Neut'},
-    'NUM__Case=Loc': {POS: NUM, 'Case': 'Loc'},
-    'NUM__Case=Loc|Gender=Fem': {POS: NUM, 'Case': 'Loc', 'Gender': 'Fem'},
-    'NUM__Case=Loc|Gender=Masc': {POS: NUM, 'Case': 'Loc', 'Gender': 'Masc'},
-    'NUM__Case=Loc|Gender=Neut': {POS: NUM, 'Case': 'Loc', 'Gender': 'Neut'},
-    'NUM__Case=Nom': {POS: NUM, 'Case': 'Nom'},
-    'NUM__Case=Nom|Gender=Fem': {POS: NUM, 'Case': 'Nom', 'Gender': 'Fem'},
-    'NUM__Case=Nom|Gender=Masc': {POS: NUM, 'Case': 'Nom', 'Gender': 'Masc'},
-    'NUM__Case=Nom|Gender=Neut': {POS: NUM, 'Case': 'Nom', 'Gender': 'Neut'},
-    'NUM___': {POS: NUM},
-    'NUM': {POS: NUM},
-    'PART__Mood=Cnd': {POS: PART, 'Mood': 'Cnd'},
-    'PART__Polarity=Neg': {POS: PART, 'Polarity': 'Neg'},
-    'PART___': {POS: PART},
-    'PART': {POS: PART},
-    'PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Case=Acc|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Acc', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Case=Dat|Gender=Masc|Number=Sing': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PRON__Animacy=Anim|Case=Dat|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Dat', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Case=Gen|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Gen', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Case=Ins|Gender=Masc|Number=Sing': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PRON__Animacy=Anim|Case=Ins|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Ins', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Case=Loc|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Loc', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Case=Nom|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Case': 'Nom', 'Number': 'Plur'},
-    'PRON__Animacy=Anim|Gender=Masc|Number=Plur': {POS: PRON, 'Animacy': 'Anim', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Dat|Gender=Neut|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Gen|Gender=Neut|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Ins|Gender=Fem|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Ins|Gender=Neut|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Loc|Gender=Neut|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Case=Nom|Gender=Neut|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PRON__Animacy=Inan|Gender=Neut|Number=Sing': {POS: PRON, 'Animacy': 'Inan', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PRON__Case=Acc': {POS: PRON, 'Case': 'Acc'},
-    'PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3': {POS: PRON, 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Acc|Gender=Masc|Number=Sing|Person=3': {POS: PRON, 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Acc|Gender=Neut|Number=Sing|Person=3': {POS: PRON, 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Acc|Number=Plur|Person=1': {POS: PRON, 'Case': 'Acc', 'Number': 'Plur', 'Person': '1'},
-    'PRON__Case=Acc|Number=Plur|Person=2': {POS: PRON, 'Case': 'Acc', 'Number': 'Plur', 'Person': '2'},
-    'PRON__Case=Acc|Number=Plur|Person=3': {POS: PRON, 'Case': 'Acc', 'Number': 'Plur', 'Person': '3'},
-    'PRON__Case=Acc|Number=Sing|Person=1': {POS: PRON, 'Case': 'Acc', 'Number': 'Sing', 'Person': '1'},
-    'PRON__Case=Acc|Number=Sing|Person=2': {POS: PRON, 'Case': 'Acc', 'Number': 'Sing', 'Person': '2'},
-    'PRON__Case=Dat': {POS: PRON, 'Case': 'Dat'},
-    'PRON__Case=Dat|Gender=Fem|Number=Sing|Person=3': {POS: PRON, 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Dat|Gender=Masc|Number=Sing|Person=3': {POS: PRON, 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Dat|Gender=Neut|Number=Sing|Person=3': {POS: PRON, 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Dat|Number=Plur|Person=1': {POS: PRON, 'Case': 'Dat', 'Number': 'Plur', 'Person': '1'},
-    'PRON__Case=Dat|Number=Plur|Person=2': {POS: PRON, 'Case': 'Dat', 'Number': 'Plur', 'Person': '2'},
-    'PRON__Case=Dat|Number=Plur|Person=3': {POS: PRON, 'Case': 'Dat', 'Number': 'Plur', 'Person': '3'},
-    'PRON__Case=Dat|Number=Sing|Person=1': {POS: PRON, 'Case': 'Dat', 'Number': 'Sing', 'Person': '1'},
-    'PRON__Case=Dat|Number=Sing|Person=2': {POS: PRON, 'Case': 'Dat', 'Number': 'Sing', 'Person': '2'},
-    'PRON__Case=Gen': {POS: PRON, 'Case': 'Gen'},
-    'PRON__Case=Gen|Gender=Fem|Number=Sing|Person=3': {POS: PRON, 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Gen|Gender=Masc|Number=Sing|Person=3': {POS: PRON, 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3': {POS: PRON, 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Gen|Number=Plur|Person=1': {POS: PRON, 'Case': 'Gen', 'Number': 'Plur', 'Person': '1'},
-    'PRON__Case=Gen|Number=Plur|Person=2': {POS: PRON, 'Case': 'Gen', 'Number': 'Plur', 'Person': '2'},
-    'PRON__Case=Gen|Number=Plur|Person=3': {POS: PRON, 'Case': 'Gen', 'Number': 'Plur', 'Person': '3'},
-    'PRON__Case=Gen|Number=Sing|Person=1': {POS: PRON, 'Case': 'Gen', 'Number': 'Sing', 'Person': '1'},
-    'PRON__Case=Gen|Number=Sing|Person=2': {POS: PRON, 'Case': 'Gen', 'Number': 'Sing', 'Person': '2'},
-    'PRON__Case=Ins': {POS: PRON, 'Case': 'Ins'},
-    'PRON__Case=Ins|Gender=Fem|Number=Sing|Person=3': {POS: PRON, 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Ins|Gender=Masc|Number=Sing|Person=3': {POS: PRON, 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Ins|Gender=Neut|Number=Sing|Person=3': {POS: PRON, 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Ins|Number=Plur|Person=1': {POS: PRON, 'Case': 'Ins', 'Number': 'Plur', 'Person': '1'},
-    'PRON__Case=Ins|Number=Plur|Person=2': {POS: PRON, 'Case': 'Ins', 'Number': 'Plur', 'Person': '2'},
-    'PRON__Case=Ins|Number=Plur|Person=3': {POS: PRON, 'Case': 'Ins', 'Number': 'Plur', 'Person': '3'},
-    'PRON__Case=Ins|Number=Sing|Person=1': {POS: PRON, 'Case': 'Ins', 'Number': 'Sing', 'Person': '1'},
-    'PRON__Case=Ins|Number=Sing|Person=2': {POS: PRON, 'Case': 'Ins', 'Number': 'Sing', 'Person': '2'},
-    'PRON__Case=Loc': {POS: PRON, 'Case': 'Loc'},
-    'PRON__Case=Loc|Gender=Fem|Number=Sing|Person=3': {POS: PRON, 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Loc|Gender=Masc|Number=Sing|Person=3': {POS: PRON, 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Loc|Gender=Neut|Number=Sing|Person=3': {POS: PRON, 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Loc|Number=Plur|Person=1': {POS: PRON, 'Case': 'Loc', 'Number': 'Plur', 'Person': '1'},
-    'PRON__Case=Loc|Number=Plur|Person=2': {POS: PRON, 'Case': 'Loc', 'Number': 'Plur', 'Person': '2'},
-    'PRON__Case=Loc|Number=Plur|Person=3': {POS: PRON, 'Case': 'Loc', 'Number': 'Plur', 'Person': '3'},
-    'PRON__Case=Loc|Number=Sing|Person=1': {POS: PRON, 'Case': 'Loc', 'Number': 'Sing', 'Person': '1'},
-    'PRON__Case=Loc|Number=Sing|Person=2': {POS: PRON, 'Case': 'Loc', 'Number': 'Sing', 'Person': '2'},
-    'PRON__Case=Nom': {POS: PRON, 'Case': 'Nom'},
-    'PRON__Case=Nom|Gender=Fem|Number=Sing|Person=3': {POS: PRON, 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Nom|Gender=Masc|Number=Sing|Person=3': {POS: PRON, 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Nom|Gender=Neut|Number=Sing|Person=3': {POS: PRON, 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Person': '3'},
-    'PRON__Case=Nom|Number=Plur|Person=1': {POS: PRON, 'Case': 'Nom', 'Number': 'Plur', 'Person': '1'},
-    'PRON__Case=Nom|Number=Plur|Person=2': {POS: PRON, 'Case': 'Nom', 'Number': 'Plur', 'Person': '2'},
-    'PRON__Case=Nom|Number=Plur|Person=3': {POS: PRON, 'Case': 'Nom', 'Number': 'Plur', 'Person': '3'},
-    'PRON__Case=Nom|Number=Sing|Person=1': {POS: PRON, 'Case': 'Nom', 'Number': 'Sing', 'Person': '1'},
-    'PRON__Case=Nom|Number=Sing|Person=2': {POS: PRON, 'Case': 'Nom', 'Number': 'Sing', 'Person': '2'},
-    'PRON__Number=Sing|Person=1': {POS: PRON, 'Number': 'Sing', 'Person': '1'},
-    'PRON___': {POS: PRON},
-    'PRON': {POS: PRON},
-    'PROPN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Acc|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Dat|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Gen|Foreign=Yes|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Gen', 'Foreign': 'Yes', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Ins|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Ins|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Loc|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Nom|Foreign=Yes|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Nom', 'Foreign': 'Yes', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Case=Nom|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Anim|Case=Voc|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Case': 'Voc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Anim|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Anim', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Acc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Acc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Dat|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Dat', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Gen|Foreign=Yes|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Foreign': 'Yes', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Gen|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Gen', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Ins|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Ins', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Loc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Loc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Nom|Foreign=Yes|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Foreign': 'Yes', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Nom|Foreign=Yes|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Foreign': 'Yes', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Nom|Foreign=Yes|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Foreign': 'Yes', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Case=Nom|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Nom', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Case=Par|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Case': 'Par', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Gender=Fem': {POS: PROPN, 'Animacy': 'Inan', 'Gender': 'Fem'},
-    'PROPN__Animacy=Inan|Gender=Masc': {POS: PROPN, 'Animacy': 'Inan', 'Gender': 'Masc'},
-    'PROPN__Animacy=Inan|Gender=Masc|Number=Plur': {POS: PROPN, 'Animacy': 'Inan', 'Gender': 'Masc', 'Number': 'Plur'},
-    'PROPN__Animacy=Inan|Gender=Masc|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Animacy=Inan|Gender=Neut|Number=Sing': {POS: PROPN, 'Animacy': 'Inan', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Case=Acc|Degree=Pos|Gender=Fem|Number=Sing': {POS: PROPN, 'Case': 'Acc', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Case=Dat|Degree=Pos|Gender=Masc|Number=Sing': {POS: PROPN, 'Case': 'Dat', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Case=Ins|Degree=Pos|Gender=Fem|Number=Sing': {POS: PROPN, 'Case': 'Ins', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Case=Ins|Degree=Pos|Number=Plur': {POS: PROPN, 'Case': 'Ins', 'Degree': 'Pos', 'Number': 'Plur'},
-    'PROPN__Case=Nom|Degree=Pos|Gender=Fem|Number=Sing': {POS: PROPN, 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'},
-    'PROPN__Case=Nom|Degree=Pos|Gender=Masc|Number=Sing': {POS: PROPN, 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Masc', 'Number': 'Sing'},
-    'PROPN__Case=Nom|Degree=Pos|Gender=Neut|Number=Sing': {POS: PROPN, 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing'},
-    'PROPN__Case=Nom|Degree=Pos|Number=Plur': {POS: PROPN, 'Case': 'Nom', 'Degree': 'Pos', 'Number': 'Plur'},
-    'PROPN__Degree=Pos|Gender=Neut|Number=Sing|Variant=Short': {POS: PROPN, 'Degree': 'Pos', 'Gender': 'Neut', 'Number': 'Sing', 'Variant': 'Short'},
-    'PROPN__Degree=Pos|Number=Plur|Variant=Short': {POS: PROPN, 'Degree': 'Pos', 'Number': 'Plur', 'Variant': 'Short'},
-    'PROPN__Foreign=Yes': {POS: PROPN, 'Foreign': 'Yes'},
-    'PROPN__Number=Sing': {POS: PROPN, 'Number': 'Sing'},
-    'PROPN___': {POS: PROPN},
-    'PROPN': {POS: PROPN},
-    'PUNCT___': {POS: PUNCT},
-    'PUNCT': {POS: PUNCT},
-    'SCONJ__Mood=Cnd': {POS: SCONJ, 'Mood': 'Cnd'},
-    'SCONJ___': {POS: SCONJ},
-    'SCONJ': {POS: SCONJ},
-    'SYM___': {POS: SYM},
-    'SYM': {POS: SYM},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Perf', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Perf', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Anim', 'Aspect': 'Perf', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Imp', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Perf', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Perf', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Animacy': 'Inan', 'Aspect': 'Perf', 'Case': 'Acc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Pres', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Fem', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Fem', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Fem', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Pres', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Masc', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Masc', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Masc', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Masc|Number=Sing|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Pres', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Neut', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Neut', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Neut', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Pres', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Imp', 'Number': 'Plur', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Imp', 'Number': 'Plur', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Imp', 'Number': 'Sing', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Imp', 'Number': 'Sing', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '2', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '2', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '2', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '2', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Number=Plur|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Number': 'Plur', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Number=Plur|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Number': 'Plur', 'Tense': 'Pres', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|Tense=Past|VerbForm=Conv|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Tense': 'Past', 'VerbForm': 'Conv', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'Tense': 'Pres', 'VerbForm': 'Conv', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'Tense': 'Pres', 'VerbForm': 'Conv', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'Tense': 'Pres', 'VerbForm': 'Conv', 'Voice': 'Pass'},
-    'VERB__Aspect=Imp|VerbForm=Inf|Voice=Act': {POS: VERB, 'Aspect': 'Imp', 'VerbForm': 'Inf', 'Voice': 'Act'},
-    'VERB__Aspect=Imp|VerbForm=Inf|Voice=Mid': {POS: VERB, 'Aspect': 'Imp', 'VerbForm': 'Inf', 'Voice': 'Mid'},
-    'VERB__Aspect=Imp|VerbForm=Inf|Voice=Pass': {POS: VERB, 'Aspect': 'Imp', 'VerbForm': 'Inf', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Acc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Dat', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Fut|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Fut', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Gen', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Ins', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Loc', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Case': 'Nom', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Fem', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Fem', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Masc', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Masc', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Masc', 'Number': 'Sing', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Neut', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Neut', 'Mood': 'Ind', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Gender': 'Neut', 'Number': 'Sing', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Imp', 'Number': 'Plur', 'Person': '1', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Imp', 'Number': 'Plur', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Imp', 'Number': 'Plur', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Imp', 'Number': 'Sing', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Imp', 'Number': 'Sing', 'Person': '2', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '1', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '1', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '2', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '2', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Plur', 'Tense': 'Past', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '1', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '1', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '2', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '2', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Fut', 'VerbForm': 'Fin', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|Number=Plur|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass': {POS: VERB, 'Aspect': 'Perf', 'Number': 'Plur', 'Tense': 'Past', 'Variant': 'Short', 'VerbForm': 'Part', 'Voice': 'Pass'},
-    'VERB__Aspect=Perf|Tense=Past|VerbForm=Conv|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'Tense': 'Past', 'VerbForm': 'Conv', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|Tense=Past|VerbForm=Conv|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'Tense': 'Past', 'VerbForm': 'Conv', 'Voice': 'Mid'},
-    'VERB__Aspect=Perf|VerbForm=Inf|Voice=Act': {POS: VERB, 'Aspect': 'Perf', 'VerbForm': 'Inf', 'Voice': 'Act'},
-    'VERB__Aspect=Perf|VerbForm=Inf|Voice=Mid': {POS: VERB, 'Aspect': 'Perf', 'VerbForm': 'Inf', 'Voice': 'Mid'},
-    'VERB__Voice=Act': {POS: VERB, 'Voice': 'Act'},
-    'VERB___': {POS: VERB},
-    'VERB': {POS: VERB},
-    'X__Foreign=Yes': {POS: X, 'Foreign': 'Yes'},
-    'X___': {POS: X},
-    'X': {POS: X},
+    "ADJ__Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "Animacy": "Anim", "Case": "Acc", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Animacy=Anim|Case=Acc|Degree=Pos|Number=Plur": {POS: ADJ, "Animacy": "Anim", "Case": "Acc", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "Animacy": "Anim", "Case": "Acc", "Degree": "Sup", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Animacy=Anim|Case=Nom|Degree=Pos|Number=Plur": {POS: ADJ, "Animacy": "Anim", "Case": "Nom", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "Animacy": "Inan", "Case": "Acc", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "Animacy": "Inan", "Case": "Acc", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Number=Plur": {POS: ADJ, "Animacy": "Inan", "Case": "Acc", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "Animacy": "Inan", "Case": "Acc", "Degree": "Sup", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Number=Plur": {POS: ADJ, "Animacy": "Inan", "Case": "Acc", "Degree": "Sup", "Number": "Plur"},
+    "ADJ__Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing": {POS: ADJ, "Animacy": "Inan", "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Animacy=Inan|Case=Nom|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "Animacy": "Inan", "Case": "Nom", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Acc|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Acc", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Acc|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Acc", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Acc|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Acc", "Degree": "Sup", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Acc|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Acc", "Degree": "Sup", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Dat|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Dat", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Dat|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Dat", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Dat|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Dat", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Dat|Degree=Pos|Number=Plur": {POS: ADJ, "Case": "Dat", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Case=Dat|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Dat", "Degree": "Sup", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Dat|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Dat", "Degree": "Sup", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Dat|Degree=Sup|Number=Plur": {POS: ADJ, "Case": "Dat", "Degree": "Sup", "Number": "Plur"},
+    "ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Gen", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing|Variant=Short": {POS: ADJ, "Case": "Gen", "Degree": "Pos", "Gender": "Fem", "Number": "Sing", },
+    "ADJ__Case=Gen|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Gen", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Gen|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Gen", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Gen|Degree=Pos|Number=Plur": {POS: ADJ, "Case": "Gen", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Case=Gen|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Gen", "Degree": "Sup", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Gen|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Gen", "Degree": "Sup", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Gen|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Gen", "Degree": "Sup", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Gen|Degree=Sup|Number=Plur": {POS: ADJ, "Case": "Gen", "Degree": "Sup", "Number": "Plur"},
+    "ADJ__Case=Ins|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Ins", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Ins|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Ins", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Ins|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Ins", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Ins|Degree=Pos|Number=Plur": {POS: ADJ, "Case": "Ins", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Case=Ins|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Ins", "Degree": "Sup", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Ins|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Ins", "Degree": "Sup", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Ins|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Ins", "Degree": "Sup", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Ins|Degree=Sup|Number=Plur": {POS: ADJ, "Case": "Ins", "Degree": "Sup", "Number": "Plur"},
+    "ADJ__Case=Loc|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Loc", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Loc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Loc", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Loc|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Loc", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Loc|Degree=Pos|Number=Plur": {POS: ADJ, "Case": "Loc", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Case=Loc|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Loc", "Degree": "Sup", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Loc|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Loc", "Degree": "Sup", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Loc|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Loc", "Degree": "Sup", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Loc|Degree=Sup|Number=Plur": {POS: ADJ, "Case": "Loc", "Degree": "Sup", "Number": "Plur"},
+    "ADJ__Case=Nom|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Nom", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Nom", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Nom|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Nom", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Nom|Degree=Pos|Number=Plur": {POS: ADJ, "Case": "Nom", "Degree": "Pos", "Number": "Plur"},
+    "ADJ__Case=Nom|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "Case": "Nom", "Degree": "Sup", "Gender": "Fem", "Number": "Sing"},
+    "ADJ__Case=Nom|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "Case": "Nom", "Degree": "Sup", "Gender": "Masc", "Number": "Sing"},
+    "ADJ__Case=Nom|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "Case": "Nom", "Degree": "Sup", "Gender": "Neut", "Number": "Sing"},
+    "ADJ__Case=Nom|Degree=Sup|Number=Plur": {POS: ADJ, "Case": "Nom", "Degree": "Sup", "Number": "Plur"},
+    "ADJ__Degree=Cmp": {POS: ADJ, "Degree": "Cmp"},
+    "ADJ__Degree=Pos": {POS: ADJ, "Degree": "Pos"},
+    "ADJ__Degree=Pos|Gender=Fem|Number=Sing|Variant=Short": {POS: ADJ, "Degree": "Pos", "Gender": "Fem", "Number": "Sing", },
+    "ADJ__Degree=Pos|Gender=Masc|Number=Sing|Variant=Short": {POS: ADJ, "Degree": "Pos", "Gender": "Masc", "Number": "Sing", },
+    "ADJ__Degree=Pos|Gender=Neut|Number=Sing|Variant=Short": {POS: ADJ, "Degree": "Pos", "Gender": "Neut", "Number": "Sing", },
+    "ADJ__Degree=Pos|Number=Plur|Variant=Short": {POS: ADJ, "Degree": "Pos", "Number": "Plur", },
+    "ADJ__Foreign=Yes": {POS: ADJ, "Foreign": "Yes"},
+    "ADJ___": {POS: ADJ},
+    "ADJ": {POS: ADJ},
+    "ADP___": {POS: ADP},
+    "ADP": {POS: ADP},
+    "ADV__Degree=Cmp": {POS: ADV, "Degree": "Cmp"},
+    "ADV__Degree=Pos": {POS: ADV, "Degree": "Pos"},
+    "ADV__Polarity=Neg": {POS: ADV, "Polarity": "Neg"},
+    "AUX__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "Aspect": "Imp", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "AUX__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "Aspect": "Imp", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "AUX__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "Aspect": "Imp", "Case": "Nom", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "AUX__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Gender": "Fem", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Gender": "Masc", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Gender": "Neut", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Imp", "Number": "Plur", "Person": "two", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Imp", "Number": "Sing", "Person": "two", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "one", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "two", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "one", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "two", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: AUX, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "AUX__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Act": {POS: AUX, "Aspect": "Imp", "Tense": "Pres", "VerbForm": "Conv", "Voice": "Act"},
+    "AUX__Aspect=Imp|VerbForm=Inf|Voice=Act": {POS: AUX, "Aspect": "Imp", "VerbForm": "Inf", "Voice": "Act"},
+    "CCONJ___": {POS: CCONJ},
+    "CCONJ": {POS: CCONJ},
+    "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: DET, "Animacy": "Inan", "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
+    "DET__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing": {POS: DET, "Animacy": "Inan", "Case": "Acc", "Gender": "Neut", "Number": "Sing"},
+    "DET__Animacy=Inan|Case=Gen|Gender=Fem|Number=Sing": {POS: DET, "Animacy": "Inan", "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
+    "DET__Animacy=Inan|Case=Gen|Number=Plur": {POS: DET, "Animacy": "Inan", "Case": "Gen", "Number": "Plur"},
+    "DET__Case=Acc|Degree=Pos|Number=Plur": {POS: DET, "Case": "Acc", "Degree": "Pos", "Number": "Plur"},
+    "DET__Case=Acc|Gender=Fem|Number=Sing": {POS: DET, "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
+    "DET__Case=Acc|Gender=Masc|Number=Sing": {POS: DET, "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
+    "DET__Case=Acc|Gender=Neut|Number=Sing": {POS: DET, "Case": "Acc", "Gender": "Neut", "Number": "Sing"},
+    "DET__Case=Acc|Number=Plur": {POS: DET, "Case": "Acc", "Number": "Plur"},
+    "DET__Case=Dat|Gender=Fem|Number=Sing": {POS: DET, "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
+    "DET__Case=Dat|Gender=Masc|Number=Plur": {POS: DET, "Case": "Dat", "Gender": "Masc", "Number": "Plur"},
+    "DET__Case=Dat|Gender=Masc|Number=Sing": {POS: DET, "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
+    "DET__Case=Dat|Gender=Neut|Number=Sing": {POS: DET, "Case": "Dat", "Gender": "Neut", "Number": "Sing"},
+    "DET__Case=Dat|Number=Plur": {POS: DET, "Case": "Dat", "Number": "Plur"},
+    "DET__Case=Gen|Gender=Fem|Number=Sing": {POS: DET, "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
+    "DET__Case=Gen|Gender=Masc|Number=Sing": {POS: DET, "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
+    "DET__Case=Gen|Gender=Neut|Number=Sing": {POS: DET, "Case": "Gen", "Gender": "Neut", "Number": "Sing"},
+    "DET__Case=Gen|Number=Plur": {POS: DET, "Case": "Gen", "Number": "Plur"},
+    "DET__Case=Ins|Gender=Fem|Number=Sing": {POS: DET, "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
+    "DET__Case=Ins|Gender=Masc|Number=Sing": {POS: DET, "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
+    "DET__Case=Ins|Gender=Neut|Number=Sing": {POS: DET, "Case": "Ins", "Gender": "Neut", "Number": "Sing"},
+    "DET__Case=Ins|Number=Plur": {POS: DET, "Case": "Ins", "Number": "Plur"},
+    "DET__Case=Loc|Gender=Fem|Number=Sing": {POS: DET, "Case": "Loc", "Gender": "Fem", "Number": "Sing"},
+    "DET__Case=Loc|Gender=Masc|Number=Sing": {POS: DET, "Case": "Loc", "Gender": "Masc", "Number": "Sing"},
+    "DET__Case=Loc|Gender=Neut|Number=Sing": {POS: DET, "Case": "Loc", "Gender": "Neut", "Number": "Sing"},
+    "DET__Case=Loc|Number=Plur": {POS: DET, "Case": "Loc", "Number": "Plur"},
+    "DET__Case=Nom|Gender=Fem|Number=Sing": {POS: DET, "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
+    "DET__Case=Nom|Gender=Masc|Number=Plur": {POS: DET, "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
+    "DET__Case=Nom|Gender=Masc|Number=Sing": {POS: DET, "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
+    "DET__Case=Nom|Gender=Neut|Number=Sing": {POS: DET, "Case": "Nom", "Gender": "Neut", "Number": "Sing"},
+    "DET__Case=Nom|Number=Plur": {POS: DET, "Case": "Nom", "Number": "Plur"},
+    "DET__Gender=Masc|Number=Sing": {POS: DET, "Gender": "Masc", "Number": "Sing"},
+    "INTJ___": {POS: INTJ},
+    "INTJ": {POS: INTJ},
+    "NOUN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Acc", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Acc|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Acc", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Acc|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Acc", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Acc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Acc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Dat", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Dat", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Dat|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Dat", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Dat|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Dat", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Dat|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Dat", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Gen", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Gen", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Gen|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Gen", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Gen|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Gen", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Gen|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Gen", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Ins|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Ins", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Ins|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Ins", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Ins|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Ins", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Ins|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Ins", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Ins|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Ins", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Loc|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Loc", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Loc|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Loc", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Loc", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Loc", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Loc|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Loc", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Loc|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Loc", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Loc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Loc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Nom", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Nom|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Nom", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Nom|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Nom", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Anim|Case=Nom|Number=Plur": {POS: NOUN, "Animacy": "Anim", "Case": "Nom", "Number": "Plur"},
+    "NOUN__Animacy=Anim|Case=Voc|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Anim", "Case": "Voc", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Acc", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Acc", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Acc", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Acc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Acc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Dat", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Dat", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Dat", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Dat", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Dat|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Dat", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Gen", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Gen", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Gen", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Gen", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Gen|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Gen", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Ins", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Ins", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Ins", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Ins", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Ins|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Ins", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Loc", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Loc", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Loc", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Loc", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Loc", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Loc", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Loc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Loc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Nom", "Gender": "Fem", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Nom", "Gender": "Neut", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Nom", "Gender": "Neut", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Case=Nom|Number=Plur": {POS: NOUN, "Animacy": "Inan", "Case": "Nom", "Number": "Plur"},
+    "NOUN__Animacy=Inan|Case=Par|Gender=Masc|Number=Sing": {POS: NOUN, "Animacy": "Inan", "Case": "Par", "Gender": "Masc", "Number": "Sing"},
+    "NOUN__Animacy=Inan|Gender=Fem": {POS: NOUN, "Animacy": "Inan", "Gender": "Fem"},
+    "NOUN__Animacy=Inan|Gender=Masc": {POS: NOUN, "Animacy": "Inan", "Gender": "Masc"},
+    "NOUN__Animacy=Inan|Gender=Neut": {POS: NOUN, "Animacy": "Inan", "Gender": "Neut"},
+    "NOUN__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing": {POS: NOUN, "Case": "Gen", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "NOUN__Foreign=Yes": {POS: NOUN, "Foreign": "Yes"},
+    "NOUN___": {POS: NOUN},
+    "NOUN": {POS: NOUN},
+    "NUM__Animacy=Anim|Case=Acc": {POS: NUM, "Animacy": "Anim", "Case": "Acc"},
+    "NUM__Animacy=Anim|Case=Acc|Gender=Fem": {POS: NUM, "Animacy": "Anim", "Case": "Acc", "Gender": "Fem"},
+    "NUM__Animacy=Anim|Case=Acc|Gender=Masc": {POS: NUM, "Animacy": "Anim", "Case": "Acc", "Gender": "Masc"},
+    "NUM__Animacy=Inan|Case=Acc": {POS: NUM, "Animacy": "Inan", "Case": "Acc"},
+    "NUM__Animacy=Inan|Case=Acc|Gender=Fem": {POS: NUM, "Animacy": "Inan", "Case": "Acc", "Gender": "Fem"},
+    "NUM__Animacy=Inan|Case=Acc|Gender=Masc": {POS: NUM, "Animacy": "Inan", "Case": "Acc", "Gender": "Masc"},
+    "NUM__Case=Acc": {POS: NUM, "Case": "Acc"},
+    "NUM__Case=Acc|Gender=Fem": {POS: NUM, "Case": "Acc", "Gender": "Fem"},
+    "NUM__Case=Acc|Gender=Masc": {POS: NUM, "Case": "Acc", "Gender": "Masc"},
+    "NUM__Case=Acc|Gender=Neut": {POS: NUM, "Case": "Acc", "Gender": "Neut"},
+    "NUM__Case=Dat": {POS: NUM, "Case": "Dat"},
+    "NUM__Case=Dat|Gender=Fem": {POS: NUM, "Case": "Dat", "Gender": "Fem"},
+    "NUM__Case=Dat|Gender=Masc": {POS: NUM, "Case": "Dat", "Gender": "Masc"},
+    "NUM__Case=Dat|Gender=Neut": {POS: NUM, "Case": "Dat", "Gender": "Neut"},
+    "NUM__Case=Gen": {POS: NUM, "Case": "Gen"},
+    "NUM__Case=Gen|Gender=Fem": {POS: NUM, "Case": "Gen", "Gender": "Fem"},
+    "NUM__Case=Gen|Gender=Masc": {POS: NUM, "Case": "Gen", "Gender": "Masc"},
+    "NUM__Case=Gen|Gender=Neut": {POS: NUM, "Case": "Gen", "Gender": "Neut"},
+    "NUM__Case=Ins": {POS: NUM, "Case": "Ins"},
+    "NUM__Case=Ins|Gender=Fem": {POS: NUM, "Case": "Ins", "Gender": "Fem"},
+    "NUM__Case=Ins|Gender=Masc": {POS: NUM, "Case": "Ins", "Gender": "Masc"},
+    "NUM__Case=Ins|Gender=Neut": {POS: NUM, "Case": "Ins", "Gender": "Neut"},
+    "NUM__Case=Loc": {POS: NUM, "Case": "Loc"},
+    "NUM__Case=Loc|Gender=Fem": {POS: NUM, "Case": "Loc", "Gender": "Fem"},
+    "NUM__Case=Loc|Gender=Masc": {POS: NUM, "Case": "Loc", "Gender": "Masc"},
+    "NUM__Case=Loc|Gender=Neut": {POS: NUM, "Case": "Loc", "Gender": "Neut"},
+    "NUM__Case=Nom": {POS: NUM, "Case": "Nom"},
+    "NUM__Case=Nom|Gender=Fem": {POS: NUM, "Case": "Nom", "Gender": "Fem"},
+    "NUM__Case=Nom|Gender=Masc": {POS: NUM, "Case": "Nom", "Gender": "Masc"},
+    "NUM__Case=Nom|Gender=Neut": {POS: NUM, "Case": "Nom", "Gender": "Neut"},
+    "NUM___": {POS: NUM},
+    "NUM": {POS: NUM},
+    "PART__Mood=Cnd": {POS: PART, "Mood": "Cnd"},
+    "PART__Polarity=Neg": {POS: PART, "Polarity": "Neg"},
+    "PART___": {POS: PART},
+    "PART": {POS: PART},
+    "PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
+    "PRON__Animacy=Anim|Case=Acc|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Acc", "Number": "Plur"},
+    "PRON__Animacy=Anim|Case=Dat|Gender=Masc|Number=Sing": {POS: PRON, "Animacy": "Anim", "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
+    "PRON__Animacy=Anim|Case=Dat|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Dat", "Number": "Plur"},
+    "PRON__Animacy=Anim|Case=Gen|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Gen", "Number": "Plur"},
+    "PRON__Animacy=Anim|Case=Ins|Gender=Masc|Number=Sing": {POS: PRON, "Animacy": "Anim", "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
+    "PRON__Animacy=Anim|Case=Ins|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Ins", "Number": "Plur"},
+    "PRON__Animacy=Anim|Case=Loc|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Loc", "Number": "Plur"},
+    "PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
+    "PRON__Animacy=Anim|Case=Nom|Number=Plur": {POS: PRON, "Animacy": "Anim", "Case": "Nom", "Number": "Plur"},
+    "PRON__Animacy=Anim|Gender=Masc|Number=Plur": {POS: PRON, "Animacy": "Anim", "Gender": "Masc", "Number": "Plur"},
+    "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Acc", "Gender": "Neut", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Dat|Gender=Neut|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Dat", "Gender": "Neut", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Gen|Gender=Neut|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Gen", "Gender": "Neut", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Ins|Gender=Fem|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Ins|Gender=Neut|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Ins", "Gender": "Neut", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Loc|Gender=Neut|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Loc", "Gender": "Neut", "Number": "Sing"},
+    "PRON__Animacy=Inan|Case=Nom|Gender=Neut|Number=Sing": {POS: PRON, "Animacy": "Inan", "Case": "Nom", "Gender": "Neut", "Number": "Sing"},
+    "PRON__Animacy=Inan|Gender=Neut|Number=Sing": {POS: PRON, "Animacy": "Inan", "Gender": "Neut", "Number": "Sing"},
+    "PRON__Case=Acc": {POS: PRON, "Case": "Acc"},
+    "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3": {POS: PRON, "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Acc|Gender=Masc|Number=Sing|Person=3": {POS: PRON, "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Acc|Gender=Neut|Number=Sing|Person=3": {POS: PRON, "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Acc|Number=Plur|Person=1": {POS: PRON, "Case": "Acc", "Number": "Plur", "Person": "one"},
+    "PRON__Case=Acc|Number=Plur|Person=2": {POS: PRON, "Case": "Acc", "Number": "Plur", "Person": "two"},
+    "PRON__Case=Acc|Number=Plur|Person=3": {POS: PRON, "Case": "Acc", "Number": "Plur", "Person": "three"},
+    "PRON__Case=Acc|Number=Sing|Person=1": {POS: PRON, "Case": "Acc", "Number": "Sing", "Person": "one"},
+    "PRON__Case=Acc|Number=Sing|Person=2": {POS: PRON, "Case": "Acc", "Number": "Sing", "Person": "two"},
+    "PRON__Case=Dat": {POS: PRON, "Case": "Dat"},
+    "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=3": {POS: PRON, "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Dat|Gender=Masc|Number=Sing|Person=3": {POS: PRON, "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Dat|Gender=Neut|Number=Sing|Person=3": {POS: PRON, "Case": "Dat", "Gender": "Neut", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Dat|Number=Plur|Person=1": {POS: PRON, "Case": "Dat", "Number": "Plur", "Person": "one"},
+    "PRON__Case=Dat|Number=Plur|Person=2": {POS: PRON, "Case": "Dat", "Number": "Plur", "Person": "two"},
+    "PRON__Case=Dat|Number=Plur|Person=3": {POS: PRON, "Case": "Dat", "Number": "Plur", "Person": "three"},
+    "PRON__Case=Dat|Number=Sing|Person=1": {POS: PRON, "Case": "Dat", "Number": "Sing", "Person": "one"},
+    "PRON__Case=Dat|Number=Sing|Person=2": {POS: PRON, "Case": "Dat", "Number": "Sing", "Person": "two"},
+    "PRON__Case=Gen": {POS: PRON, "Case": "Gen"},
+    "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=3": {POS: PRON, "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Gen|Gender=Masc|Number=Sing|Person=3": {POS: PRON, "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3": {POS: PRON, "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Gen|Number=Plur|Person=1": {POS: PRON, "Case": "Gen", "Number": "Plur", "Person": "one"},
+    "PRON__Case=Gen|Number=Plur|Person=2": {POS: PRON, "Case": "Gen", "Number": "Plur", "Person": "two"},
+    "PRON__Case=Gen|Number=Plur|Person=3": {POS: PRON, "Case": "Gen", "Number": "Plur", "Person": "three"},
+    "PRON__Case=Gen|Number=Sing|Person=1": {POS: PRON, "Case": "Gen", "Number": "Sing", "Person": "one"},
+    "PRON__Case=Gen|Number=Sing|Person=2": {POS: PRON, "Case": "Gen", "Number": "Sing", "Person": "two"},
+    "PRON__Case=Ins": {POS: PRON, "Case": "Ins"},
+    "PRON__Case=Ins|Gender=Fem|Number=Sing|Person=3": {POS: PRON, "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Ins|Gender=Masc|Number=Sing|Person=3": {POS: PRON, "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Ins|Gender=Neut|Number=Sing|Person=3": {POS: PRON, "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Ins|Number=Plur|Person=1": {POS: PRON, "Case": "Ins", "Number": "Plur", "Person": "one"},
+    "PRON__Case=Ins|Number=Plur|Person=2": {POS: PRON, "Case": "Ins", "Number": "Plur", "Person": "two"},
+    "PRON__Case=Ins|Number=Plur|Person=3": {POS: PRON, "Case": "Ins", "Number": "Plur", "Person": "three"},
+    "PRON__Case=Ins|Number=Sing|Person=1": {POS: PRON, "Case": "Ins", "Number": "Sing", "Person": "one"},
+    "PRON__Case=Ins|Number=Sing|Person=2": {POS: PRON, "Case": "Ins", "Number": "Sing", "Person": "two"},
+    "PRON__Case=Loc": {POS: PRON, "Case": "Loc"},
+    "PRON__Case=Loc|Gender=Fem|Number=Sing|Person=3": {POS: PRON, "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Loc|Gender=Masc|Number=Sing|Person=3": {POS: PRON, "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Loc|Gender=Neut|Number=Sing|Person=3": {POS: PRON, "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Loc|Number=Plur|Person=1": {POS: PRON, "Case": "Loc", "Number": "Plur", "Person": "one"},
+    "PRON__Case=Loc|Number=Plur|Person=2": {POS: PRON, "Case": "Loc", "Number": "Plur", "Person": "two"},
+    "PRON__Case=Loc|Number=Plur|Person=3": {POS: PRON, "Case": "Loc", "Number": "Plur", "Person": "three"},
+    "PRON__Case=Loc|Number=Sing|Person=1": {POS: PRON, "Case": "Loc", "Number": "Sing", "Person": "one"},
+    "PRON__Case=Loc|Number=Sing|Person=2": {POS: PRON, "Case": "Loc", "Number": "Sing", "Person": "two"},
+    "PRON__Case=Nom": {POS: PRON, "Case": "Nom"},
+    "PRON__Case=Nom|Gender=Fem|Number=Sing|Person=3": {POS: PRON, "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Nom|Gender=Masc|Number=Sing|Person=3": {POS: PRON, "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Nom|Gender=Neut|Number=Sing|Person=3": {POS: PRON, "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Person": "three"},
+    "PRON__Case=Nom|Number=Plur|Person=1": {POS: PRON, "Case": "Nom", "Number": "Plur", "Person": "one"},
+    "PRON__Case=Nom|Number=Plur|Person=2": {POS: PRON, "Case": "Nom", "Number": "Plur", "Person": "two"},
+    "PRON__Case=Nom|Number=Plur|Person=3": {POS: PRON, "Case": "Nom", "Number": "Plur", "Person": "three"},
+    "PRON__Case=Nom|Number=Sing|Person=1": {POS: PRON, "Case": "Nom", "Number": "Sing", "Person": "one"},
+    "PRON__Case=Nom|Number=Sing|Person=2": {POS: PRON, "Case": "Nom", "Number": "Sing", "Person": "two"},
+    "PRON__Number=Sing|Person=1": {POS: PRON, "Number": "Sing", "Person": "one"},
+    "PRON___": {POS: PRON},
+    "PRON": {POS: PRON},
+    "PROPN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Acc", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Acc|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Acc", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Dat", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Dat|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Dat", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Dat|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Dat|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Dat", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Gen|Foreign=Yes|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Gen", "Foreign": "Yes", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Gen", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Gen|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Gen", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Ins|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Ins", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Ins|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Ins", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Loc|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Loc", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Loc", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Loc", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Nom|Foreign=Yes|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Nom", "Foreign": "Yes", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Nom", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Case=Nom|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Anim", "Case": "Nom", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Anim|Case=Voc|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Case": "Voc", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Anim|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Anim", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Acc", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Acc", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Acc", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Acc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Acc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Dat", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Dat|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Dat", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Dat", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Dat|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Dat", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Dat|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Dat", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Gen|Foreign=Yes|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Foreign": "Yes", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Gen|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Gen|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Gen|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Gen", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Ins", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Ins|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Ins", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Ins", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Ins|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Ins", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Ins|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Ins", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Loc", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Loc|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Loc", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Loc", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Loc", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Loc", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Loc|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Loc", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Loc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Loc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Nom|Foreign=Yes|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Foreign": "Yes", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Nom|Foreign=Yes|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Foreign": "Yes", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Nom|Foreign=Yes|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Foreign": "Yes", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Gender": "Fem", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Gender": "Neut", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Nom|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Case=Nom|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Case": "Nom", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Case=Par|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Case": "Par", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Gender=Fem": {POS: PROPN, "Animacy": "Inan", "Gender": "Fem"},
+    "PROPN__Animacy=Inan|Gender=Masc": {POS: PROPN, "Animacy": "Inan", "Gender": "Masc"},
+    "PROPN__Animacy=Inan|Gender=Masc|Number=Plur": {POS: PROPN, "Animacy": "Inan", "Gender": "Masc", "Number": "Plur"},
+    "PROPN__Animacy=Inan|Gender=Masc|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Animacy=Inan|Gender=Neut|Number=Sing": {POS: PROPN, "Animacy": "Inan", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Case=Acc|Degree=Pos|Gender=Fem|Number=Sing": {POS: PROPN, "Case": "Acc", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Case=Dat|Degree=Pos|Gender=Masc|Number=Sing": {POS: PROPN, "Case": "Dat", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Case=Ins|Degree=Pos|Gender=Fem|Number=Sing": {POS: PROPN, "Case": "Ins", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Case=Ins|Degree=Pos|Number=Plur": {POS: PROPN, "Case": "Ins", "Degree": "Pos", "Number": "Plur"},
+    "PROPN__Case=Nom|Degree=Pos|Gender=Fem|Number=Sing": {POS: PROPN, "Case": "Nom", "Degree": "Pos", "Gender": "Fem", "Number": "Sing"},
+    "PROPN__Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: PROPN, "Case": "Nom", "Degree": "Pos", "Gender": "Masc", "Number": "Sing"},
+    "PROPN__Case=Nom|Degree=Pos|Gender=Neut|Number=Sing": {POS: PROPN, "Case": "Nom", "Degree": "Pos", "Gender": "Neut", "Number": "Sing"},
+    "PROPN__Case=Nom|Degree=Pos|Number=Plur": {POS: PROPN, "Case": "Nom", "Degree": "Pos", "Number": "Plur"},
+    "PROPN__Degree=Pos|Gender=Neut|Number=Sing|Variant=Short": {POS: PROPN, "Degree": "Pos", "Gender": "Neut", "Number": "Sing", },
+    "PROPN__Degree=Pos|Number=Plur|Variant=Short": {POS: PROPN, "Degree": "Pos", "Number": "Plur", },
+    "PROPN__Foreign=Yes": {POS: PROPN, "Foreign": "Yes"},
+    "PROPN__Number=Sing": {POS: PROPN, "Number": "Sing"},
+    "PROPN___": {POS: PROPN},
+    "PROPN": {POS: PROPN},
+    "PUNCT___": {POS: PUNCT},
+    "PUNCT": {POS: PUNCT},
+    "SCONJ__Mood=Cnd": {POS: SCONJ, "Mood": "Cnd"},
+    "SCONJ___": {POS: SCONJ},
+    "SCONJ": {POS: SCONJ},
+    "SYM___": {POS: SYM},
+    "SYM": {POS: SYM},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Anim|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Anim", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Anim", "Aspect": "Perf", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Anim", "Aspect": "Perf", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Anim", "Aspect": "Perf", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Anim", "Aspect": "Perf", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Anim", "Aspect": "Perf", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Anim|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Anim", "Aspect": "Perf", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Inan|Aspect=Imp|Case=Acc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Inan", "Aspect": "Imp", "Case": "Acc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Inan", "Aspect": "Perf", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Inan", "Aspect": "Perf", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Inan", "Aspect": "Perf", "Case": "Acc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Animacy": "Inan", "Aspect": "Perf", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Animacy": "Inan", "Aspect": "Perf", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Animacy=Inan|Aspect=Perf|Case=Acc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Animacy": "Inan", "Aspect": "Perf", "Case": "Acc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Dat|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Dat", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Gen|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Gen", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Ins|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Ins", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Loc|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Loc", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Case=Nom|Number=Plur|Tense=Pres|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Case": "Nom", "Number": "Plur", "Tense": "Pres", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Gender": "Fem", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Gender": "Fem", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Fem", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Fem", "Number": "Sing", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Fem", "Number": "Sing", "Tense": "Pres",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Gender": "Masc", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Gender": "Masc", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Masc", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Masc", "Number": "Sing", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Masc|Number=Sing|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Masc", "Number": "Sing", "Tense": "Pres",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Gender": "Neut", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Gender": "Neut", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Neut", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Neut", "Number": "Sing", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Gender": "Neut", "Number": "Sing", "Tense": "Pres",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Imp", "Number": "Plur", "Person": "two", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Imp", "Number": "Plur", "Person": "two", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Imp", "Number": "Sing", "Person": "two", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Imp", "Number": "Sing", "Person": "two", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "one", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "one", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "two", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "two", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Plur", "Tense": "Past", "VerbForm": "Fin", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "one", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "one", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "two", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "two", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Mood": "Ind", "Number": "Sing", "Person": "three", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Number=Plur|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Number": "Plur", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Number=Plur|Tense=Pres|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Number": "Plur", "Tense": "Pres",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Imp|Tense=Past|VerbForm=Conv|Voice=Act": {POS: VERB, "Aspect": "Imp", "Tense": "Past", "VerbForm": "Conv", "Voice": "Act"},
+    "VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Act": {POS: VERB, "Aspect": "Imp", "Tense": "Pres", "VerbForm": "Conv", "Voice": "Act"},
+    "VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Mid": {POS: VERB, "Aspect": "Imp", "Tense": "Pres", "VerbForm": "Conv", "Voice": "Mid"},
+    "VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv|Voice=Pass": {POS: VERB, "Aspect": "Imp", "Tense": "Pres", "VerbForm": "Conv", "Voice": "Pass"},
+    "VERB__Aspect=Imp|VerbForm=Inf|Voice=Act": {POS: VERB, "Aspect": "Imp", "VerbForm": "Inf", "Voice": "Act"},
+    "VERB__Aspect=Imp|VerbForm=Inf|Voice=Mid": {POS: VERB, "Aspect": "Imp", "VerbForm": "Inf", "Voice": "Mid"},
+    "VERB__Aspect=Imp|VerbForm=Inf|Voice=Pass": {POS: VERB, "Aspect": "Imp", "VerbForm": "Inf", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Acc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Acc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Dat|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Dat", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Fut|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Number": "Plur", "Tense": "Fut", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Gen|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Gen", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Ins|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Ins", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Loc", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Fem", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Masc", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Gender": "Neut", "Number": "Sing", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Act"},
+    "VERB__Aspect=Perf|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Case": "Nom", "Number": "Plur", "Tense": "Past", "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Gender": "Fem", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Gender": "Fem", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Gender": "Fem", "Number": "Sing", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Gender": "Masc", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Gender": "Masc", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Gender": "Masc", "Number": "Sing", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Gender": "Neut", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Gender=Neut|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Gender": "Neut", "Mood": "Ind", "Number": "Sing", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Gender": "Neut", "Number": "Sing", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Imp", "Number": "Plur", "Person": "one", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Imp", "Number": "Plur", "Person": "two", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Imp", "Number": "Plur", "Person": "two", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Imp", "Number": "Sing", "Person": "two", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Imp", "Number": "Sing", "Person": "two", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Person": "one", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Person": "one", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Person": "two", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Person": "two", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Person": "three", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Person": "three", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Person": "three", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Tense": "Past", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Plur", "Tense": "Past", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Sing", "Person": "one", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Sing", "Person": "one", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Sing", "Person": "two", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Sing", "Person": "two", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin|Voice=Act": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Sing", "Person": "three", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Act"},
+    "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Mood": "Ind", "Number": "Sing", "Person": "three", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Mid"},
+    "VERB__Aspect=Perf|Number=Plur|Tense=Past|Variant=Short|VerbForm=Part|Voice=Pass": {POS: VERB, "Aspect": "Perf", "Number": "Plur", "Tense": "Past",  "VerbForm": "Part", "Voice": "Pass"},
+    "VERB__Aspect=Perf|Tense=Past|VerbForm=Conv|Voice=Act": {POS: VERB, "Aspect": "Perf", "Tense": "Past", "VerbForm": "Conv", "Voice": "Act"},
+    "VERB__Aspect=Perf|Tense=Past|VerbForm=Conv|Voice=Mid": {POS: VERB, "Aspect": "Perf", "Tense": "Past", "VerbForm": "Conv", "Voice": "Mid"},
+    "VERB__Aspect=Perf|VerbForm=Inf|Voice=Act": {POS: VERB, "Aspect": "Perf", "VerbForm": "Inf", "Voice": "Act"},
+    "VERB__Aspect=Perf|VerbForm=Inf|Voice=Mid": {POS: VERB, "Aspect": "Perf", "VerbForm": "Inf", "Voice": "Mid"},
+    "VERB__Voice=Act": {POS: VERB, "Voice": "Act"},
+    "VERB___": {POS: VERB},
+    "VERB": {POS: VERB},
+    "X__Foreign=Yes": {POS: X, "Foreign": "Yes"},
+    "X___": {POS: X},
+    "X": {POS: X},
 }
 # fmt: on

From 038ff1a811e11a6c0425557455ccc5adadadcaf7 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 22 Jul 2020 16:04:58 +0200
Subject: [PATCH 02/71] Improve warnings around normalization tables (#5794)

Provide more customized normalization table warnings when training a new
model. Only suggest installing `spacy-lookups-data` if it's not already
installed and it includes a table for this language (currently checked
in a hard-coded list).
---
 spacy/errors.py            | 13 ++++++-------
 spacy/pipeline/pipes.pyx   |  6 ++++++
 spacy/syntax/nn_parser.pyx |  6 ++++++
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index ff71b60eb..79ed5ecdb 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -127,13 +127,12 @@ class Warnings(object):
             "this, download a newer compatible model or retrain your custom "
             "model with the current spaCy version. For more details and "
             "available updates, run: python -m spacy validate")
-    W033 = ("Training a new {model} using a model with no lexeme normalization "
-            "table. This may degrade the performance of the model to some "
-            "degree. If this is intentional or the language you're using "
-            "doesn't have a normalization table, please ignore this warning. "
-            "If this is surprising, make sure you have the spacy-lookups-data "
-            "package installed. The languages with lexeme normalization tables "
-            "are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.")
+    W033 = ("Training a new {model} using a model with an empty lexeme "
+            "normalization table. This may degrade the performance to some "
+            "degree. If this is intentional or this language doesn't have a "
+            "normalization table, please ignore this warning.")
+    W034 = ("Please install the package spacy-lookups-data in order to include "
+            "the default lexeme normalization table for the language '{lang}'.")
 
 
 @add_codes
diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx
index b28f34a7a..ea40e3ae0 100644
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@@ -519,6 +519,12 @@ class Tagger(Pipe):
             warnings.warn(Warnings.W022)
         if len(self.vocab.lookups.get_table("lexeme_norm", {})) == 0:
             warnings.warn(Warnings.W033.format(model="part-of-speech tagger"))
+            try:
+                import spacy_lookups_data
+            except ImportError:
+                if self.vocab.lang in ("da", "de", "el", "en", "id", "lb", "pt",
+                        "ru", "sr", "ta", "th"):
+                    warnings.warn(Warnings.W034.format(lang=self.vocab.lang))
         orig_tag_map = dict(self.vocab.morphology.tag_map)
         new_tag_map = OrderedDict()
         for raw_text, annots_brackets in get_gold_tuples():
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 6944e9113..145c382a5 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -604,6 +604,12 @@ cdef class Parser:
     def begin_training(self, get_gold_tuples, pipeline=None, sgd=None, **cfg):
         if len(self.vocab.lookups.get_table("lexeme_norm", {})) == 0:
             warnings.warn(Warnings.W033.format(model="parser or NER"))
+            try:
+                import spacy_lookups_data
+            except ImportError:
+                if self.vocab.lang in ("da", "de", "el", "en", "id", "lb", "pt",
+                        "ru", "sr", "ta", "th"):
+                    warnings.warn(Warnings.W034.format(lang=self.vocab.lang))
         if 'model' in cfg:
             self.model = cfg['model']
         if not hasattr(get_gold_tuples, '__call__'):

From 6d4d5c074c4add57e7643c531109d8c34f7c8b5f Mon Sep 17 00:00:00 2001
From: Joshua Olson <joshua+github@solarmist.net>
Date: Wed, 22 Jul 2020 23:57:01 -0700
Subject: [PATCH 03/71] Mark Japanese documents as tagged. (#5803)

Mark the document as tagged before returning it to the user from the JapaneseTokenizer.
Fixes #5802
---
 .github/contributors/solarmist.md | 106 ++++++++++++++++++++++++++++++
 spacy/lang/ja/__init__.py         |   1 +
 2 files changed, 107 insertions(+)
 create mode 100644 .github/contributors/solarmist.md

diff --git a/.github/contributors/solarmist.md b/.github/contributors/solarmist.md
new file mode 100644
index 000000000..6bfb21696
--- /dev/null
+++ b/.github/contributors/solarmist.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                     |
+|------------------------------- | ------------------------- |
+| Name                           | Joshua Olson              |
+| Company name (if applicable)   |                           |
+| Title or role (if applicable)  |                           |
+| Date                           | 2020-07-22                |
+| GitHub username                | solarmist                 |
+| Website (optional)             | http://blog.solarmist.net |
diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py
index fb8b9d7fe..30e73fd84 100644
--- a/spacy/lang/ja/__init__.py
+++ b/spacy/lang/ja/__init__.py
@@ -166,6 +166,7 @@ class JapaneseTokenizer(DummyTokenizer):
         doc.user_data["inflections"] = inflections
         doc.user_data["reading_forms"] = readings
         doc.user_data["sub_tokens"] = sub_tokens_list
+        doc.is_tagged = True
 
         return doc
 

From 19dc42776af33ee209c256c739303f27aa458144 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 24 Jul 2020 09:26:26 +0200
Subject: [PATCH 04/71] Remove hard-coded GPU ID from pretrain (#5808)

---
 spacy/cli/pretrain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 6d6c65161..e949f76cf 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -125,7 +125,7 @@ def pretrain(
             config[key] = str(config[key])
     util.fix_random_seed(seed)
 
-    has_gpu = prefer_gpu(gpu_id=1)
+    has_gpu = prefer_gpu()
     msg.info("Using GPU" if has_gpu else "Not using GPU")
 
     output_dir = Path(output_dir)

From a69eb445dcb54080af3fe8ec853adfa74364a11d Mon Sep 17 00:00:00 2001
From: Li Zhe <lizhe2004@163.com>
Date: Sat, 25 Jul 2020 19:13:38 +0800
Subject: [PATCH 05/71] fix the wrong hash url in adding-languages.md file
 (#5810)

* fix the wrong hash url in adding-languages.md file

change the #101 url hash path to #language-data

* filled in the spaCy Contributor Agreement

filled in the spaCy Contributor Agreement
---
 .github/contributors/lizhe2004.md      | 106 +++++++++++++++++++++++++
 website/docs/usage/adding-languages.md |   2 +-
 2 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 .github/contributors/lizhe2004.md

diff --git a/.github/contributors/lizhe2004.md b/.github/contributors/lizhe2004.md
new file mode 100644
index 000000000..6011506d6
--- /dev/null
+++ b/.github/contributors/lizhe2004.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your 
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                    |
+|------------------------------- | ------------------------ |
+| Name                           | Zhe li                   |
+| Company name (if applicable)   |                          |
+| Title or role (if applicable)  |                          |
+| Date                           | 2020-07-24               |
+| GitHub username                | lizhe2004                |
+| Website (optional)             | http://www.huahuaxia.net|
diff --git a/website/docs/usage/adding-languages.md b/website/docs/usage/adding-languages.md
index 29a9a1c27..96ffafe06 100644
--- a/website/docs/usage/adding-languages.md
+++ b/website/docs/usage/adding-languages.md
@@ -41,7 +41,7 @@ and morphological analysis.
 
 <Infobox title="Table of Contents" id="toc">
 
-- [Language data 101](#101)
+- [Language data 101](#language-data)
 - [The Language subclass](#language-subclass)
 - [Stop words](#stop-words)
 - [Tokenizer exceptions](#tokenizer-exceptions)

From a66ad89fcb51deed07d89b2ffafc800bb98d56d7 Mon Sep 17 00:00:00 2001
From: Nipun Sadvilkar <nipunsadvilkar@gmail.com>
Date: Mon, 27 Jul 2020 13:13:39 +0530
Subject: [PATCH 06/71] =?UTF-8?q?=E2=9C=8F=EF=B8=8F=20=20typo=20in=20pysbd?=
 =?UTF-8?q?=20code=20example=20(#5821)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 website/meta/universe.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index e832b511f..d6e55a2ef 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -2137,7 +2137,7 @@
             "category": ["scientific"],
             "tags": ["sentence segmentation"],
             "code_example": [
-                "from pysbd.util import PySBDFactory",
+                "from pysbd.utils import PySBDFactory",
                 "",
                 "nlp = spacy.blank('en')",
                 "nlp.add_pipe(PySBDFactory(nlp))",

From 2f6b8132ef81c8cb6863f378c8fdead2258fae03 Mon Sep 17 00:00:00 2001
From: Martino Mensio <martino.mensio@open.ac.uk>
Date: Mon, 27 Jul 2020 09:44:33 +0200
Subject: [PATCH 07/71] Sentence transformers added to spaCy universe (#5814)

* fix details for spacy-universal-sentence-encoder

* added sentence-transformers
---
 website/meta/universe.json | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index d6e55a2ef..8790cd6af 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1,5 +1,30 @@
 {
     "resources": [
+        {
+            "id": "spacy-sentence-bert",
+            "title": "SpaCy - sentence-transformers",
+            "slogan": "Pipelines for pretrained sentence-transformers (BERT, RoBERTa, XLM-RoBERTa & Co.) directly within SpaCy",
+            "description": "This library lets you use the embeddings from [sentence-transformers](https://github.com/UKPLab/sentence-transformers) of Docs, Spans and Tokens directly from spaCy. Most models are for the english language but three of them are multilingual.",
+            "github": "MartinoMensio/spacy-sentence-bert",
+            "pip": "spacy-sentence-bert",
+            "code_example": [
+                "import spacy_sentence_bert",
+                "# load one of the models listed at https://github.com/MartinoMensio/spacy-sentence-bert/",
+                "nlp = spacy_sentence_bert.load_model('en_roberta_large_nli_stsb_mean_tokens')",
+                "# get two documents",
+                "doc_1 = nlp('Hi there, how are you?')",
+                "doc_2 = nlp('Hello there, how are you doing today?')",
+                "# use the similarity method that is based on the vectors, on Doc, Span or Token",
+                "print(doc_1.similarity(doc_2[0:7]))"
+            ],
+            "category": ["models", "pipeline"],
+            "author": "Martino Mensio",
+            "author_links": {
+                "twitter": "MartinoMensio",
+                "github": "MartinoMensio",
+                "website": "https://martinomensio.github.io"
+            }
+        },
         {
             "id": "spacy-streamlit",
             "title": "spacy-streamlit",
@@ -58,10 +83,11 @@
             "title": "SpaCy - Universal Sentence Encoder",
             "slogan": "Make use of Google's Universal Sentence Encoder directly within SpaCy",
             "description": "This library lets you use Universal Sentence Encoder embeddings of Docs, Spans and Tokens directly from TensorFlow Hub",
-            "github": "MartinoMensio/spacy-universal-sentence-encoder-tfhub",
+            "github": "MartinoMensio/spacy-universal-sentence-encoder",
+            "pip": "spacy-universal-sentence-encoder",
             "code_example": [
                 "import spacy_universal_sentence_encoder",
-                "load one of the models: ['en_use_md', 'en_use_lg', 'xx_use_md', 'xx_use_lg']",
+                "# load one of the models: ['en_use_md', 'en_use_lg', 'xx_use_md', 'xx_use_lg']",
                 "nlp = spacy_universal_sentence_encoder.load_model('en_use_lg')",
                 "# get two documents",
                 "doc_1 = nlp('Hi there, how are you?')",

From 2880d8a5559f60b7461bf290154c8753815b70fa Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 27 Jul 2020 10:09:33 +0200
Subject: [PATCH 08/71] Normalize spelling for spaCy (#5822)

---
 website/meta/universe.json | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index 8790cd6af..a9a402a66 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -2,8 +2,8 @@
     "resources": [
         {
             "id": "spacy-sentence-bert",
-            "title": "SpaCy - sentence-transformers",
-            "slogan": "Pipelines for pretrained sentence-transformers (BERT, RoBERTa, XLM-RoBERTa & Co.) directly within SpaCy",
+            "title": "spaCy - sentence-transformers",
+            "slogan": "Pipelines for pretrained sentence-transformers (BERT, RoBERTa, XLM-RoBERTa & Co.) directly within spaCy",
             "description": "This library lets you use the embeddings from [sentence-transformers](https://github.com/UKPLab/sentence-transformers) of Docs, Spans and Tokens directly from spaCy. Most models are for the english language but three of them are multilingual.",
             "github": "MartinoMensio/spacy-sentence-bert",
             "pip": "spacy-sentence-bert",
@@ -80,8 +80,8 @@
         },
         {
             "id": "spacy-universal-sentence-encoder",
-            "title": "SpaCy - Universal Sentence Encoder",
-            "slogan": "Make use of Google's Universal Sentence Encoder directly within SpaCy",
+            "title": "spaCy - Universal Sentence Encoder",
+            "slogan": "Make use of Google's Universal Sentence Encoder directly within spaCy",
             "description": "This library lets you use Universal Sentence Encoder embeddings of Docs, Spans and Tokens directly from TensorFlow Hub",
             "github": "MartinoMensio/spacy-universal-sentence-encoder",
             "pip": "spacy-universal-sentence-encoder",
@@ -1461,7 +1461,7 @@
             "id": "podcast-init",
             "title": "Podcast.__init__ #87: spaCy with Matthew Honnibal",
             "slogan": "December 2017",
-            "description": "As the amount of text available on the internet and in businesses continues to increase, the need for fast and accurate language analysis becomes more prominent. This week Matthew Honnibal, the creator of SpaCy, talks about his experiences researching natural language processing and creating a library to make his findings accessible to industry.",
+            "description": "As the amount of text available on the internet and in businesses continues to increase, the need for fast and accurate language analysis becomes more prominent. This week Matthew Honnibal, the creator of spaCy, talks about his experiences researching natural language processing and creating a library to make his findings accessible to industry.",
             "iframe": "https://www.pythonpodcast.com/wp-content/plugins/podlove-podcasting-plugin-for-wordpress/lib/modules/podlove_web_player/player_v4/dist/share.html?episode=https://www.pythonpodcast.com/?podlove_player4=176",
             "iframe_height": 200,
             "thumb": "https://i.imgur.com/rpo6BuY.png",
@@ -1477,7 +1477,7 @@
             "id": "podcast-init2",
             "title": "Podcast.__init__ #256: An Open Source Toolchain For NLP From Explosion AI",
             "slogan": "March 2020",
-            "description": "The state of the art in natural language processing is a constantly moving target. With the rise of deep learning, previously cutting edge techniques have given way to robust language models. Through it all the team at Explosion AI have built a strong presence with the trifecta of SpaCy, Thinc, and Prodigy to support fast and flexible data labeling to feed deep learning models and performant and scalable text processing. In this episode founder and open source author Matthew Honnibal shares his experience growing a business around cutting edge open source libraries for the machine learning developent process.",
+            "description": "The state of the art in natural language processing is a constantly moving target. With the rise of deep learning, previously cutting edge techniques have given way to robust language models. Through it all the team at Explosion AI have built a strong presence with the trifecta of spaCy, Thinc, and Prodigy to support fast and flexible data labeling to feed deep learning models and performant and scalable text processing. In this episode founder and open source author Matthew Honnibal shares his experience growing a business around cutting edge open source libraries for the machine learning developent process.",
             "iframe": "https://cdn.podlove.org/web-player/share.html?episode=https%3A%2F%2Fwww.pythonpodcast.com%2F%3Fpodlove_player4%3D614",
             "iframe_height": 200,
             "thumb": "https://i.imgur.com/rpo6BuY.png",
@@ -1508,7 +1508,7 @@
             "id": "twimlai-podcast",
             "title": "TWiML & AI: Practical NLP with spaCy and Prodigy",
             "slogan": "May 2019",
-            "description": "\"Ines and I caught up to discuss her various projects, including the aforementioned SpaCy, an open-source NLP library built with a focus on industry and production use cases. In our conversation, Ines gives us an overview of the SpaCy Library, a look at some of the use cases that excite her, and the Spacy community and contributors. We also discuss her work with Prodigy, an annotation service tool that uses continuous active learning to train models, and finally, what other exciting projects she is working on.\"",
+            "description": "\"Ines and I caught up to discuss her various projects, including the aforementioned spaCy, an open-source NLP library built with a focus on industry and production use cases. In our conversation, Ines gives us an overview of the spaCy Library, a look at some of the use cases that excite her, and the Spacy community and contributors. We also discuss her work with Prodigy, an annotation service tool that uses continuous active learning to train models, and finally, what other exciting projects she is working on.\"",
             "thumb": "https://i.imgur.com/ng2F5gK.png",
             "url": "https://twimlai.com/twiml-talk-262-practical-natural-language-processing-with-spacy-and-prodigy-w-ines-montani",
             "iframe": "https://html5-player.libsyn.com/embed/episode/id/9691514/height/90/theme/custom/thumbnail/no/preload/no/direction/backward/render-playlist/no/custom-color/3e85b1/",
@@ -1540,7 +1540,7 @@
             "id": "practical-ai-podcast",
             "title": "Practical AI: Modern NLP with spaCy",
             "slogan": "December 2019",
-            "description": "\"SpaCy is awesome for NLP! It’s easy to use, has widespread adoption, is open source, and integrates the latest language models. Ines Montani and Matthew Honnibal (core developers of spaCy and co-founders of Explosion) join us to discuss the history of the project, its capabilities, and the latest trends in NLP. We also dig into the practicalities of taking NLP workflows to production. You don’t want to miss this episode!\"",
+            "description": "\"spaCy is awesome for NLP! It’s easy to use, has widespread adoption, is open source, and integrates the latest language models. Ines Montani and Matthew Honnibal (core developers of spaCy and co-founders of Explosion) join us to discuss the history of the project, its capabilities, and the latest trends in NLP. We also dig into the practicalities of taking NLP workflows to production. You don’t want to miss this episode!\"",
             "thumb": "https://i.imgur.com/jn8Bcdw.png",
             "url": "https://changelog.com/practicalai/68",
             "author": "Daniel Whitenack & Chris Benson",

From b97dbab998640479e8ba0dfbe8fa1759908195df Mon Sep 17 00:00:00 2001
From: graue70 <23035329+graue70@users.noreply.github.com>
Date: Mon, 27 Jul 2020 20:18:48 +0200
Subject: [PATCH 09/71] Fix typo in unit tests (#5823)

---
 spacy/tests/tokenizer/test_whitespace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/tests/tokenizer/test_whitespace.py b/spacy/tests/tokenizer/test_whitespace.py
index 74c9b369b..e32fa3efc 100644
--- a/spacy/tests/tokenizer/test_whitespace.py
+++ b/spacy/tests/tokenizer/test_whitespace.py
@@ -18,7 +18,7 @@ def test_tokenizer_splits_double_space(tokenizer, text):
 
 
 @pytest.mark.parametrize("text", ["lorem ipsum  "])
-def test_tokenizer_handles_double_trainling_ws(tokenizer, text):
+def test_tokenizer_handles_double_trailing_ws(tokenizer, text):
     tokens = tokenizer(text)
     assert repr(tokens.text_with_ws) == repr(text)
 

From 03ab518f285544cda22cffa8627255d405255c88 Mon Sep 17 00:00:00 2001
From: oculusrepairo <40419272+oculusrepairo@users.noreply.github.com>
Date: Wed, 29 Jul 2020 13:58:56 +0530
Subject: [PATCH 10/71] Update examples.py (#5820)

* Update examples.py

adding factual sentences to the list

* Add missing comma separators

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 spacy/lang/hi/examples.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py
index 1dd182532..76b0e8bf8 100644
--- a/spacy/lang/hi/examples.py
+++ b/spacy/lang/hi/examples.py
@@ -19,4 +19,6 @@ sentences = [
     "फ्रांस के राष्ट्रपति कौन हैं?",
     "संयुक्त राज्यों की राजधानी क्या है?",
     "बराक ओबामा का जन्म कब हुआ था?",
+    "जवाहरलाल नेहरू भारत के पहले प्रधानमंत्री हैं।",
+    "राजेंद्र प्रसाद, भारत के पहले राष्ट्रपति, दो कार्यकाल के लिए कार्यालय रखने वाले एकमात्र व्यक्ति हैं।",
 ]

From 90b958fd0100f2528e080d3b803d1d6c60a08ef2 Mon Sep 17 00:00:00 2001
From: Gustavo Zadrozny Leyendecker <gustavo.leyendecker@gmail.com>
Date: Wed, 29 Jul 2020 13:48:39 -0300
Subject: [PATCH 11/71] Fix on EntityRendered to support break lines (after
 last entity) (closes #5838)

---
 .github/contributors/leyendecker.md      | 106 +++++++++++++++++++++++
 spacy/displacy/render.py                 |   6 +-
 spacy/tests/regression/test_issue5838.py |  22 +++++
 3 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 .github/contributors/leyendecker.md
 create mode 100644 spacy/tests/regression/test_issue5838.py

diff --git a/.github/contributors/leyendecker.md b/.github/contributors/leyendecker.md
new file mode 100644
index 000000000..74e6cdd80
--- /dev/null
+++ b/.github/contributors/leyendecker.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                        |
+|------------------------------- | ---------------------------- |
+| Name                           | Gustavo Zadrozny Leyendecker |
+| Company name (if applicable)   |                              |
+| Title or role (if applicable)  |                              |
+| Date                           | July 29, 2020                |
+| GitHub username                | leyendecker                  |
+| Website (optional)             |                              |
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 57d67c96b..431e02841 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -328,7 +328,11 @@ class EntityRenderer(object):
             else:
                 markup += entity
             offset = end
-        markup += escape_html(text[offset:])
+        fragments = text[offset:].split("\n")
+        for i, fragment in enumerate(fragments):
+            markup += escape_html(fragment)
+            if len(fragments) > 1 and i != len(fragments) - 1:
+                markup += "</br>"
         markup = TPL_ENTS.format(content=markup, dir=self.direction)
         if title:
             markup = TPL_TITLE.format(title=title) + markup
diff --git a/spacy/tests/regression/test_issue5838.py b/spacy/tests/regression/test_issue5838.py
new file mode 100644
index 000000000..558273101
--- /dev/null
+++ b/spacy/tests/regression/test_issue5838.py
@@ -0,0 +1,22 @@
+from spacy.lang.en import English
+from spacy.tokens import Span
+from spacy import displacy
+
+SAMPLE_TEXT = '''First line
+Second line, with ent
+Third line
+Fourth line
+'''
+
+
+def test_issue5838():
+    # Displacy's EntityRenderer break line
+    # not working after last entity
+
+    nlp = English()
+    doc = nlp(SAMPLE_TEXT)
+    doc.ents = [Span(doc, 7, 8, label='test')]
+
+    html = displacy.render(doc, style='ent')
+    found = html.count('</br>')
+    assert found == 4

From f76fae0e8ddc655034e235b390572cf79a670dfc Mon Sep 17 00:00:00 2001
From: Rahul Gupta <rahul1990gupta@gmail.com>
Date: Wed, 29 Jul 2020 23:52:47 +0530
Subject: [PATCH 12/71] English: adds ordinal numbers (#5830)

---
 spacy/lang/en/lex_attrs.py       | 50 +++++++++++++++++++++++++++++++-
 spacy/tests/lang/en/test_text.py | 13 +++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/spacy/lang/en/lex_attrs.py b/spacy/lang/en/lex_attrs.py
index f92d41139..bd60b057b 100644
--- a/spacy/lang/en/lex_attrs.py
+++ b/spacy/lang/en/lex_attrs.py
@@ -44,6 +44,44 @@ _num_words = [
 ]
 
 
+_ordinal_words = [
+    "first",
+    "second",
+    "third",
+    "fourth",
+    "fifth",
+    "sixth",
+    "seventh",
+    "eighth",
+    "ninth",
+    "tenth",
+    "eleventh",
+    "twelfth",
+    "thirteenth",
+    "fourteenth",
+    "fifteenth",
+    "sixteenth",
+    "seventeenth",
+    "eighteenth",
+    "nineteenth",
+    "twentieth",
+    "thirtieth",
+    "fortieth",
+    "fiftieth",
+    "sixtieth",
+    "seventieth",
+    "eightieth",
+    "ninetieth",
+    "hundredth",
+    "thousandth",
+    "millionth",
+    "billionth",
+    "trillionth",
+    "quadrillionth",
+    "gajillionth",
+    "bazillionth",
+]
+
 def like_num(text):
     if text.startswith(("+", "-", "±", "~")):
         text = text[1:]
@@ -54,8 +92,18 @@ def like_num(text):
         num, denom = text.split("/")
         if num.isdigit() and denom.isdigit():
             return True
-    if text.lower() in _num_words:
+    
+    text_lower = text.lower()
+    if text_lower in _num_words:
         return True
+
+    # CHeck ordinal number
+    if text_lower in _ordinal_words:
+        return True
+    if text_lower.endswith("th"):
+        if text_lower[:-2].isdigit():
+            return True 
+
     return False
 
 
diff --git a/spacy/tests/lang/en/test_text.py b/spacy/tests/lang/en/test_text.py
index a7ebde989..0db1a6419 100644
--- a/spacy/tests/lang/en/test_text.py
+++ b/spacy/tests/lang/en/test_text.py
@@ -61,6 +61,19 @@ def test_lex_attrs_like_number(en_tokenizer, text, match):
     assert tokens[0].like_num == match
 
 
+@pytest.mark.parametrize(
+    "word",
+    [
+        "third",
+        "Millionth",
+        "100th",
+        "Hundredth",
+    ]
+)
+def test_en_lex_attrs_like_number_for_ordinal(word):
+    assert like_num(word)
+
+
 @pytest.mark.parametrize("word", ["eleven"])
 def test_en_lex_attrs_capitals(word):
     assert like_num(word)

From d16c0f2c3a021fbefe67f02639944d9246eafa42 Mon Sep 17 00:00:00 2001
From: holubvl3 <47881982+holubvl3@users.noreply.github.com>
Date: Thu, 30 Jul 2020 17:40:31 +0200
Subject: [PATCH 13/71] Create holubvl3 (#5845)

* Create holubvl3

* Rename holubvl3 to holubvl3.md

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 .github/contributors/holubvl3.md | 106 +++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 .github/contributors/holubvl3.md

diff --git a/.github/contributors/holubvl3.md b/.github/contributors/holubvl3.md
new file mode 100644
index 000000000..f2047b103
--- /dev/null
+++ b/.github/contributors/holubvl3.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |  Vladimir Holubec    |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |  30.07.2020          |
+| GitHub username                |  holubvl3            |
+| Website (optional)             |                      |

From ac14ce7c30c2f6da4a71dee7978f5b765af4d966 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 31 Jul 2020 16:09:32 +0200
Subject: [PATCH 14/71] Prefer earlier spans in EntityRuler (#5843)

Similar to #4414, update the sorting in EntityRuler to prefer the first
span in overlapping spans.
---
 spacy/pipeline/entityruler.py             |  2 +-
 spacy/tests/pipeline/test_entity_ruler.py | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 1786dda87..2abff62f1 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -95,7 +95,7 @@ class EntityRuler(object):
         matches = set(
             [(m_id, start, end) for m_id, start, end in matches if start != end]
         )
-        get_sort_key = lambda m: (m[2] - m[1], m[1])
+        get_sort_key = lambda m: (m[2] - m[1], -m[1])
         matches = sorted(matches, key=get_sort_key, reverse=True)
         entities = list(doc.ents)
         new_entities = []
diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py
index b6e3c40c9..9e22c9cc7 100644
--- a/spacy/tests/pipeline/test_entity_ruler.py
+++ b/spacy/tests/pipeline/test_entity_ruler.py
@@ -154,3 +154,15 @@ def test_entity_ruler_properties(nlp, patterns):
     ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
     assert sorted(ruler.labels) == sorted(["HELLO", "BYE", "COMPLEX", "TECH_ORG"])
     assert sorted(ruler.ent_ids) == ["a1", "a2"]
+
+
+def test_entity_ruler_overlapping_spans(nlp):
+    ruler = EntityRuler(nlp)
+    patterns = [
+        {"label": "FOOBAR", "pattern": "foo bar"},
+        {"label": "BARBAZ", "pattern": "bar baz"},
+    ]
+    ruler.add_patterns(patterns)
+    doc = ruler(nlp.make_doc("foo bar baz"))
+    assert len(doc.ents) == 1
+    assert doc.ents[0].label_ == "FOOBAR"

From cd59979ab446d7613ec7df5d5737539464918edf Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 3 Aug 2020 13:53:15 +0200
Subject: [PATCH 15/71] Fix span boundary handling in Spanish noun_chunks
 (#5860)

---
 spacy/lang/es/syntax_iterators.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py
index 5fda35211..d4572b682 100644
--- a/spacy/lang/es/syntax_iterators.py
+++ b/spacy/lang/es/syntax_iterators.py
@@ -20,8 +20,7 @@ def noun_chunks(doclike):
     np_left_deps = [doc.vocab.strings.add(label) for label in left_labels]
     np_right_deps = [doc.vocab.strings.add(label) for label in right_labels]
     stop_deps = [doc.vocab.strings.add(label) for label in stop_labels]
-    token = doc[0]
-    while token and token.i < len(doclike):
+    for token in doclike:
         if token.pos in [PROPN, NOUN, PRON]:
             left, right = noun_bounds(
                 doc, token, np_left_deps, np_right_deps, stop_deps

From b8412485897c4d2e1171014b4c863fc43ecf00db Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 4 Aug 2020 13:35:25 +0200
Subject: [PATCH 16/71] Add Span index boundary checks (#5861)

* Add Span index boundary checks

* Return Span-specific IndexError in all cases

* Simplify and fix if/else
---
 spacy/errors.py              |  1 +
 spacy/tests/doc/test_span.py | 12 ++++++++++++
 spacy/tokens/span.pyx        |  8 ++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 79ed5ecdb..fe59453c0 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -588,6 +588,7 @@ class Errors(object):
     E199 = ("Unable to merge 0-length span at doc[{start}:{end}].")
     E200 = ("Specifying a base model with a pretrained component '{component}' "
             "can not be combined with adding a pretrained Tok2Vec layer.")
+    E201 = ("Span index out of range.")
 
 
 @add_codes
diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index e76ca4697..25fa421b7 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -287,3 +287,15 @@ def test_span_eq_hash(doc, doc_not_parsed):
     assert hash(doc[0:2]) == hash(doc[0:2])
     assert hash(doc[0:2]) != hash(doc[1:3])
     assert hash(doc[0:2]) != hash(doc_not_parsed[0:2])
+
+
+def test_span_boundaries(doc):
+    start = 1
+    end = 5
+    span = doc[start:end]
+    for i in range(start, end):
+        assert span[i - start] == doc[i]
+    with pytest.raises(IndexError):
+        _ = span[-5]
+    with pytest.raises(IndexError):
+        _ = span[5]
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 2f1418a5b..29b87fa8d 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -181,9 +181,13 @@ cdef class Span:
             return Span(self.doc, start + self.start, end + self.start)
         else:
             if i < 0:
-                return self.doc[self.end + i]
+                token_i = self.end + i
             else:
-                return self.doc[self.start + i]
+                token_i = self.start + i
+            if self.start <= token_i < self.end:
+                return self.doc[token_i]
+            else:
+                raise IndexError(Errors.E201)
 
     def __iter__(self):
         """Iterate over `Token` objects.

From c62fd878a38fa0ce16243022b5dab5d043aaf31f Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 4 Aug 2020 13:36:32 +0200
Subject: [PATCH 17/71] Allow Doc.char_span to snap to token boundaries (#5849)

* Allow Doc.char_span to snap to token boundaries

Add a `mode` option to allow `Doc.char_span` to snap to token
boundaries. The `mode` options:

* `strict`: character offsets must match token boundaries (default, same as
before)
* `inside`: all tokens completely within the character span
* `outside`: all tokens at least partially covered by the character span

Add a new helper function `token_by_char` that returns the token
corresponding to a character position in the text. Update
`token_by_start` and `token_by_end` to use `token_by_char` for more
efficient searching.

* Remove unused import

* Rename mode to alignment_mode

Rename `mode` to `alignment_mode` with the options
`strict`/`contract`/`expand`. Any unrecognized modes are silently
converted to `strict`.
---
 spacy/tests/doc/test_span.py | 19 ++++++++++
 spacy/tokens/doc.pyx         | 71 +++++++++++++++++++++++++++---------
 website/docs/api/doc.md      | 62 ++++++++++++++++---------------
 3 files changed, 105 insertions(+), 47 deletions(-)

diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index 25fa421b7..107078df9 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -167,11 +167,30 @@ def test_spans_are_hashable(en_tokenizer):
 
 def test_spans_by_character(doc):
     span1 = doc[1:-2]
+
+    # default and specified alignment mode "strict"
     span2 = doc.char_span(span1.start_char, span1.end_char, label="GPE")
     assert span1.start_char == span2.start_char
     assert span1.end_char == span2.end_char
     assert span2.label_ == "GPE"
 
+    span2 = doc.char_span(span1.start_char, span1.end_char, label="GPE", alignment_mode="strict")
+    assert span1.start_char == span2.start_char
+    assert span1.end_char == span2.end_char
+    assert span2.label_ == "GPE"
+
+    # alignment mode "contract"
+    span2 = doc.char_span(span1.start_char - 3, span1.end_char, label="GPE", alignment_mode="contract")
+    assert span1.start_char == span2.start_char
+    assert span1.end_char == span2.end_char
+    assert span2.label_ == "GPE"
+
+    # alignment mode "expand"
+    span2 = doc.char_span(span1.start_char + 1, span1.end_char, label="GPE", alignment_mode="expand")
+    assert span1.start_char == span2.start_char
+    assert span1.end_char == span2.end_char
+    assert span2.label_ == "GPE"
+
 
 def test_span_to_array(doc):
     span = doc[1:-2]
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 5b03dc5d2..89573ba09 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -352,17 +352,25 @@ cdef class Doc:
     def doc(self):
         return self
 
-    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None):
-        """Create a `Span` object from the slice `doc.text[start : end]`.
+    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode="strict"):
+        """Create a `Span` object from the slice
+        `doc.text[start_idx : end_idx]`. Returns None if no valid `Span` can be
+        created.
 
         doc (Doc): The parent document.
-        start (int): The index of the first character of the span.
-        end (int): The index of the first character after the span.
+        start_idx (int): The index of the first character of the span.
+        end_idx (int): The index of the first character after the span.
         label (uint64 or string): A label to attach to the Span, e.g. for
             named entities.
-        kb_id (uint64 or string):  An ID from a KB to capture the meaning of a named entity.
+        kb_id (uint64 or string):  An ID from a KB to capture the meaning of a
+            named entity.
         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
             the span.
+        alignment_mode (str): How character indices are aligned to token
+            boundaries. Options: "strict" (character indices must be aligned
+            with token boundaries), "contract" (span of all tokens completely
+            within the character span), "expand" (span of all tokens at least
+            partially covered by the character span). Defaults to "strict".
         RETURNS (Span): The newly constructed object.
 
         DOCS: https://spacy.io/api/doc#char_span
@@ -371,12 +379,29 @@ cdef class Doc:
             label = self.vocab.strings.add(label)
         if not isinstance(kb_id, int):
             kb_id = self.vocab.strings.add(kb_id)
-        cdef int start = token_by_start(self.c, self.length, start_idx)
-        if start == -1:
+        if alignment_mode not in ("strict", "contract", "expand"):
+            alignment_mode = "strict"
+        cdef int start = token_by_char(self.c, self.length, start_idx)
+        if start < 0 or (alignment_mode == "strict" and start_idx != self[start].idx):
             return None
-        cdef int end = token_by_end(self.c, self.length, end_idx)
-        if end == -1:
+        # end_idx is exclusive, so find the token at one char before
+        cdef int end = token_by_char(self.c, self.length, end_idx - 1)
+        if end < 0 or (alignment_mode == "strict" and end_idx != self[end].idx + len(self[end])):
             return None
+        # Adjust start and end by alignment_mode
+        if alignment_mode == "contract":
+            if self[start].idx < start_idx:
+                start += 1
+            if end_idx < self[end].idx + len(self[end]):
+                end -= 1
+            # if no tokens are completely within the span, return None
+            if end < start:
+                return None
+        elif alignment_mode == "expand":
+            # Don't consider the trailing whitespace to be part of the previous
+            # token
+            if start_idx == self[start].idx + len(self[start]):
+                start += 1
         # Currently we have the token index, we want the range-end index
         end += 1
         cdef Span span = Span(self, start, end, label=label, kb_id=kb_id, vector=vector)
@@ -1167,23 +1192,35 @@ cdef class Doc:
 
 
 cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2:
-    cdef int i
-    for i in range(length):
-        if tokens[i].idx == start_char:
-            return i
+    cdef int i = token_by_char(tokens, length, start_char)
+    if i >= 0 and tokens[i].idx == start_char:
+        return i
     else:
         return -1
 
 
 cdef int token_by_end(const TokenC* tokens, int length, int end_char) except -2:
-    cdef int i
-    for i in range(length):
-        if tokens[i].idx + tokens[i].lex.length == end_char:
-            return i
+    # end_char is exclusive, so find the token at one char before
+    cdef int i = token_by_char(tokens, length, end_char - 1)
+    if i >= 0 and tokens[i].idx + tokens[i].lex.length == end_char:
+        return i
     else:
         return -1
 
 
+cdef int token_by_char(const TokenC* tokens, int length, int char_idx) except -2:
+    cdef int start = 0, mid, end = length - 1
+    while start <= end:
+        mid = (start + end) / 2
+        if char_idx < tokens[mid].idx:
+            end = mid - 1
+        elif char_idx >= tokens[mid].idx + tokens[mid].lex.length + tokens[mid].spacy:
+            start = mid + 1
+        else:
+            return mid
+    return -1
+
+
 cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
     cdef TokenC* head
     cdef TokenC* child
diff --git a/website/docs/api/doc.md b/website/docs/api/doc.md
index 7decc2278..420e12fcb 100644
--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@@ -187,8 +187,9 @@ Remove a previously registered extension.
 
 ## Doc.char_span {#char_span tag="method" new="2"}
 
-Create a `Span` object from the slice `doc.text[start:end]`. Returns `None` if
-the character indices don't map to a valid span.
+Create a `Span` object from the slice `doc.text[start_idx:end_idx]`. Returns
+`None` if the character indices don't map to a valid span using the default mode
+`"strict".
 
 > #### Example
 >
@@ -198,14 +199,15 @@ the character indices don't map to a valid span.
 > assert span.text == "New York"
 > ```
 
-| Name                                 | Type                                     | Description                                                           |
-| ------------------------------------ | ---------------------------------------- | --------------------------------------------------------------------- |
-| `start`                              | int                                      | The index of the first character of the span.                         |
-| `end`                                | int                                      | The index of the last character after the span.                       |
-| `label`                              | uint64 / unicode                         | A label to attach to the span, e.g. for named entities.               |
-| `kb_id` <Tag variant="new">2.2</Tag> | uint64 / unicode                         | An ID from a knowledge base to capture the meaning of a named entity. |
-| `vector`                             | `numpy.ndarray[ndim=1, dtype='float32']` | A meaning representation of the span.                                 |
-| **RETURNS**                          | `Span`                                   | The newly constructed object or `None`.                               |
+| Name                                 | Type                                     | Description                                                                                                                                                                                                                                                 |
+| ------------------------------------ | ---------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `start_idx`                          | int                                      | The index of the first character of the span.                                                                                                                                                                                                               |
+| `end_idx`                            | int                                      | The index of the last character after the span.                                                                                                                                                                                                             |
+| `label`                              | uint64 / unicode                         | A label to attach to the span, e.g. for named entities.                                                                                                                                                                                                     |
+| `kb_id` <Tag variant="new">2.2</Tag> | uint64 / unicode                         | An ID from a knowledge base to capture the meaning of a named entity.                                                                                                                                                                                       |
+| `vector`                             | `numpy.ndarray[ndim=1, dtype='float32']` | A meaning representation of the span.                                                                                                                                                                                                                       |
+| `mode`                               | `str`                                    | How character indices snap to token boundaries. Options: "strict" (no snapping), "inside" (span of all tokens completely within the character span), "outside" (span of all tokens at least partially covered by the character span). Defaults to "strict". |
+| **RETURNS**                          | `Span`                                   | The newly constructed object or `None`.                                                                                                                                                                                                                     |
 
 ## Doc.similarity {#similarity tag="method" model="vectors"}
 
@@ -646,26 +648,26 @@ The L2 norm of the document's vector representation.
 
 ## Attributes {#attributes}
 
-| Name                                    | Type         | Description                                                                                                                                                                                                                                                                                |
-| --------------------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `text`                                  | unicode      | A unicode representation of the document text.                                                                                                                                                                                                                                             |
-| `text_with_ws`                          | unicode      | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`.                                                                                                                                                                                                      |
-| `mem`                                   | `Pool`       | The document's local memory heap, for all C data it owns.                                                                                                                                                                                                                                  |
-| `vocab`                                 | `Vocab`      | The store of lexical types.                                                                                                                                                                                                                                                                |
-| `tensor` <Tag variant="new">2</Tag>     | `ndarray`    | Container for dense vector representations.                                                                                                                                                                                                                                                |
-| `cats` <Tag variant="new">2</Tag>       | dict         | Maps a label to a score for categories applied to the document. The label is a string and the score should be a float.                                                                                     |
-| `user_data`                             | -            | A generic storage area, for user custom data.                                                                                                                                                                                                                                              |
-| `lang` <Tag variant="new">2.1</Tag>     | int          | Language of the document's vocabulary.                                                                                                                                                                                                                                                     |
-| `lang_` <Tag variant="new">2.1</Tag>    | unicode      | Language of the document's vocabulary.                                                                                                                                                                                                                                                     |
-| `is_tagged`                             | bool         | A flag indicating that the document has been part-of-speech tagged. Returns `True` if the `Doc` is empty.                                                                                                                                                                                  |
-| `is_parsed`                             | bool         | A flag indicating that the document has been syntactically parsed. Returns `True` if the `Doc` is empty.                                                                                                                                                                                   |
-| `is_sentenced`                          | bool         | A flag indicating that sentence boundaries have been applied to the document. Returns `True` if the `Doc` is empty.                                                                                                                                                                        |
-| `is_nered` <Tag variant="new">2.1</Tag> | bool         | A flag indicating that named entities have been set. Will return `True` if the `Doc` is empty, or if _any_ of the tokens has an entity tag set, even if the others are unknown.                                                                                                            |
-| `sentiment`                             | float        | The document's positivity/negativity score, if available.                                                                                                                                                                                                                                  |
-| `user_hooks`                            | dict         | A dictionary that allows customization of the `Doc`'s properties.                                                                                                                                                                                                                          |
-| `user_token_hooks`                      | dict         | A dictionary that allows customization of properties of `Token` children.                                                                                                                                                                                                                  |
-| `user_span_hooks`                       | dict         | A dictionary that allows customization of properties of `Span` children.                                                                                                                                                                                                                   |
-| `_`                                     | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                                                                                                                             |
+| Name                                    | Type         | Description                                                                                                                                                                     |
+| --------------------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `text`                                  | unicode      | A unicode representation of the document text.                                                                                                                                  |
+| `text_with_ws`                          | unicode      | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`.                                                                                           |
+| `mem`                                   | `Pool`       | The document's local memory heap, for all C data it owns.                                                                                                                       |
+| `vocab`                                 | `Vocab`      | The store of lexical types.                                                                                                                                                     |
+| `tensor` <Tag variant="new">2</Tag>     | `ndarray`    | Container for dense vector representations.                                                                                                                                     |
+| `cats` <Tag variant="new">2</Tag>       | dict         | Maps a label to a score for categories applied to the document. The label is a string and the score should be a float.                                                          |
+| `user_data`                             | -            | A generic storage area, for user custom data.                                                                                                                                   |
+| `lang` <Tag variant="new">2.1</Tag>     | int          | Language of the document's vocabulary.                                                                                                                                          |
+| `lang_` <Tag variant="new">2.1</Tag>    | unicode      | Language of the document's vocabulary.                                                                                                                                          |
+| `is_tagged`                             | bool         | A flag indicating that the document has been part-of-speech tagged. Returns `True` if the `Doc` is empty.                                                                       |
+| `is_parsed`                             | bool         | A flag indicating that the document has been syntactically parsed. Returns `True` if the `Doc` is empty.                                                                        |
+| `is_sentenced`                          | bool         | A flag indicating that sentence boundaries have been applied to the document. Returns `True` if the `Doc` is empty.                                                             |
+| `is_nered` <Tag variant="new">2.1</Tag> | bool         | A flag indicating that named entities have been set. Will return `True` if the `Doc` is empty, or if _any_ of the tokens has an entity tag set, even if the others are unknown. |
+| `sentiment`                             | float        | The document's positivity/negativity score, if available.                                                                                                                       |
+| `user_hooks`                            | dict         | A dictionary that allows customization of the `Doc`'s properties.                                                                                                               |
+| `user_token_hooks`                      | dict         | A dictionary that allows customization of properties of `Token` children.                                                                                                       |
+| `user_span_hooks`                       | dict         | A dictionary that allows customization of properties of `Span` children.                                                                                                        |
+| `_`                                     | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                  |
 
 ## Serialization fields {#serialization-fields}
 

From 9e45d064bbe81df303406e42fa81f169070f1ad8 Mon Sep 17 00:00:00 2001
From: Bram Vanroy <Bram.Vanroy@UGent.be>
Date: Wed, 5 Aug 2020 14:34:12 +0200
Subject: [PATCH 18/71] Update universe details spacy_conll (#5871)

---
 website/meta/universe.json | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index a9a402a66..cf361435f 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1795,26 +1795,33 @@
         {
             "id": "spacy-conll",
             "title": "spacy_conll",
-            "slogan": "Parse text with spaCy and gets its output in CoNLL-U format",
-            "description": "This module allows you to parse a text to CoNLL-U format. It contains a pipeline component for spaCy that adds CoNLL-U properties to a Doc and its sentences. It can also be used as a command-line tool.",
+            "slogan": "Parsing to CoNLL with spaCy, spacy-stanza, and spacy-udpipe",
+            "description": "This module allows you to parse text into CoNLL-U format. You can use it as a command line tool, or embed it in your own scripts by adding it as a custom pipeline component to a spaCy, spacy-stanfordnlp, spacy-stanza, or spacy-udpipe pipeline. It also provides an easy-to-use function to quickly initialize a parser. CoNLL-related properties are added to Doc elements, sentence Spans, and Tokens.",
             "code_example": [
-                "import spacy",
-                "from spacy_conll import ConllFormatter",
+                "from spacy_conll import init_parser",
                 "",
-                "nlp = spacy.load('en')",
-                "conllformatter = ConllFormatter(nlp)",
-                "nlp.add_pipe(conllformatter, after='parser')",
-                "doc = nlp('I like cookies. Do you?')",
-                "conll = doc._.conll",
-                "print(doc._.conll_str_headers)",
-                "print(doc._.conll_str)"
+                "",
+                "# Initialise English parser, already including the ConllFormatter as a pipeline component.",
+                "# Indicate that we want to get the CoNLL headers in the string output.",
+                "# `use_gpu` and `verbose` are specific to stanza (and stanfordnlp). These keywords arguments",
+                "# are passed onto their Pipeline() initialisation",
+                "nlp = init_parser(\"stanza\",",
+                "                  \"en\",",
+                "                  parser_opts={\"use_gpu\": True, \"verbose\": False},",
+                "                  include_headers=True)",
+                "# Parse a given string",
+                "doc = nlp(\"A cookie is a baked or cooked food that is typically small, flat and sweet. It usually contains flour, sugar and some type of oil or fat.\")",
+                "",
+                "# Get the CoNLL representation of the whole document, including headers",
+                "conll = doc._.conll_str",
+                "print(conll)"
             ],
             "code_language": "python",
             "author": "Bram Vanroy",
             "author_links": {
                 "github": "BramVanroy",
                 "twitter": "BramVanroy",
-                "website": "https://bramvanroy.be"
+                "website": "http://bramvanroy.be"
             },
             "github": "BramVanroy/spacy_conll",
             "category": ["standalone", "pipeline"],

From 4193402c47fdf48da620e86b94f9f8d7f3878ab1 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 5 Aug 2020 14:56:14 +0200
Subject: [PATCH 19/71] Add warning when Matcher subpattern is discarded
 (#5873)

* Add a warning when a subpattern is not processed and discarded

* Normalize subpattern attribute/operator keys to upper case like
top-level attributes
---
 spacy/errors.py                                | 2 ++
 spacy/matcher/matcher.pyx                      | 9 +++++++--
 spacy/tests/matcher/test_pattern_validation.py | 9 +++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index fe59453c0..7f9164694 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -133,6 +133,8 @@ class Warnings(object):
             "normalization table, please ignore this warning.")
     W034 = ("Please install the package spacy-lookups-data in order to include "
             "the default lexeme normalization table for the language '{lang}'.")
+    W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
+            "attribute or operator.")
 
 
 @add_codes
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index 0c1a56187..8fbfe305a 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -811,9 +811,11 @@ def _get_extra_predicates(spec, extra_predicates):
                 attr = "ORTH"
             attr = IDS.get(attr.upper())
         if isinstance(value, dict):
+            processed = False
+            value_with_upper_keys = {k.upper(): v for k, v in value.items()}
             for type_, cls in predicate_types.items():
-                if type_ in value:
-                    predicate = cls(len(extra_predicates), attr, value[type_], type_)
+                if type_ in value_with_upper_keys:
+                    predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_)
                     # Don't create a redundant predicates.
                     # This helps with efficiency, as we're caching the results.
                     if predicate.key in seen_predicates:
@@ -822,6 +824,9 @@ def _get_extra_predicates(spec, extra_predicates):
                         extra_predicates.append(predicate)
                         output.append(predicate.i)
                         seen_predicates[predicate.key] = predicate.i
+                    processed = True
+            if not processed:
+                warnings.warn(Warnings.W035.format(pattern=value))
     return output
 
 
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index c536698d0..ec2660ab4 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -76,3 +76,12 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
             matcher.add("TEST", [pattern])
     elif n_errors == 0:
         matcher.add("TEST", [pattern])
+
+
+def test_pattern_warnings(en_vocab):
+    matcher = Matcher(en_vocab)
+    # normalize "regex" to upper like "text"
+    matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
+    # warn if subpattern attribute isn't recognized and processed
+    with pytest.warns(UserWarning):
+        matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])

From 49e690bde17f0a1b23844ffd5cc8499ddb7ce515 Mon Sep 17 00:00:00 2001
From: graue70 <23035329+graue70@users.noreply.github.com>
Date: Wed, 12 Aug 2020 15:35:25 +0200
Subject: [PATCH 20/71] Fix typos in comments (#5904)

* Fix typo in comment

* Fix typo

* Add spaCy Contributor Agreement
---
 .github/contributors/graue70.md | 106 ++++++++++++++++++++++++++++++++
 spacy/pipeline/pipes.pyx        |   4 +-
 2 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 .github/contributors/graue70.md

diff --git a/.github/contributors/graue70.md b/.github/contributors/graue70.md
new file mode 100644
index 000000000..7f9aa037b
--- /dev/null
+++ b/.github/contributors/graue70.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Thomas               |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2020-08-11           |
+| GitHub username                | graue70              |
+| Website (optional)             |                      |
diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx
index ea40e3ae0..4269dd3ea 100644
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@@ -1205,7 +1205,7 @@ class EntityLinker(Pipe):
         self.kb = None
         self.cfg = dict(cfg)
         
-        # how many neightbour sentences to take into account
+        # how many neighbour sentences to take into account
         self.n_sents = cfg.get("n_sents", 0)
 
     def set_kb(self, kb):
@@ -1375,7 +1375,7 @@ class EntityLinker(Pipe):
                 # This may go wrong if there are entities across sentences - which shouldn't happen normally.
                 for sent_index, sent in enumerate(sentences):
                     if sent.ents:
-                        # get n_neightbour sentences, clipped to the length of the document
+                        # get n_neighbour sentences, clipped to the length of the document
                         start_sentence = max(0, sent_index - self.n_sents)
                         end_sentence = min(len(sentences) -1, sent_index + self.n_sents)
 

From 7b33b2854f99ef531d72e19e6dda773f43a5fe13 Mon Sep 17 00:00:00 2001
From: Adam Bittlingmayer <bittlingmayer@users.noreply.github.com>
Date: Wed, 12 Aug 2020 17:36:14 +0400
Subject: [PATCH 21/71] Add Armenian sentence-final verchaket,  Greek question
 mark and Arabic question mark to default punct (#5910)

* Add Armenian sentence-final verchaket

* Add Greek and Arabic question marks, and contributor agreement

* Check box
---
 .github/contributors/bittlingmayer.md | 107 ++++++++++++++++++++++++++
 spacy/pipeline/pipes.pyx              |   1 +
 2 files changed, 108 insertions(+)
 create mode 100644 .github/contributors/bittlingmayer.md

diff --git a/.github/contributors/bittlingmayer.md b/.github/contributors/bittlingmayer.md
new file mode 100644
index 000000000..69ec98a00
--- /dev/null
+++ b/.github/contributors/bittlingmayer.md
@@ -0,0 +1,107 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Adam Bittlingmayer   |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 12 Aug 2020          |
+| GitHub username                | bittlingmayer        |
+| Website (optional)             |                      |
+
diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx
index 4269dd3ea..a5b891b54 100644
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@@ -1508,6 +1508,7 @@ class Sentencizer(object):
     """
 
     default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
+            ':', ';', '؟',
             '।', '॥', '၊', '။', '።', '፧', '፨', '᙮', '᜵', '᜶', '᠃', '᠉', '᥄',
             '᥅', '᪨', '᪩', '᪪', '᪫', '᭚', '᭛', '᭞', '᭟', '᰻', '᰼', '᱾', '᱿',
             '‼', '‽', '⁇', '⁈', '⁉', '⸮', '⸼', '꓿', '꘎', '꘏', '꛳', '꛷', '꡶',

From 669dc708229b5bb5b92c4d87f8a9d0344fca21ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Attila=20Sz=C3=A1sz?= <tilusnet@users.noreply.github.com>
Date: Wed, 12 Aug 2020 21:46:08 +0100
Subject: [PATCH 22/71] Create tilusnet.md (#5914)

---
 .github/contributors/tilusnet.md | 106 +++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 .github/contributors/tilusnet.md

diff --git a/.github/contributors/tilusnet.md b/.github/contributors/tilusnet.md
new file mode 100644
index 000000000..1618bac2e
--- /dev/null
+++ b/.github/contributors/tilusnet.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |  Attila Szász        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |  12 Aug 2020         |
+| GitHub username                |  tilusnet            |
+| Website (optional)             |                      |

From 071c09ff35dec169e801fe3c7cabc18a07975f74 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Thu, 20 Aug 2020 11:08:38 +0200
Subject: [PATCH 23/71] add coding (#5942)

---
 spacy/tests/regression/test_issue5838.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/spacy/tests/regression/test_issue5838.py b/spacy/tests/regression/test_issue5838.py
index 558273101..c008c5aec 100644
--- a/spacy/tests/regression/test_issue5838.py
+++ b/spacy/tests/regression/test_issue5838.py
@@ -1,3 +1,6 @@
+# coding: utf8
+from __future__ import unicode_literals
+
 from spacy.lang.en import English
 from spacy.tokens import Span
 from spacy import displacy

From 99d2a25687c7a788a20dfc6210c48045c95e4b6d Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 20 Aug 2020 16:30:11 +0200
Subject: [PATCH 24/71] Make sure sys.argv exists (#5943)

* Make sure sys.argv exists (resolves #5610)

* Fix typo
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 01e372e91..f78781918 100755
--- a/setup.py
+++ b/setup.py
@@ -193,7 +193,7 @@ def setup_package():
 
     root = os.path.abspath(os.path.dirname(__file__))
 
-    if len(sys.argv) > 1 and sys.argv[1] == "clean":
+    if hasattr(sys, "argv") and len(sys.argv) > 1 and sys.argv[1] == "clean":
         return clean(root)
 
     with chdir(root):

From a341b4ef09c99225421db2f21c1e4c18885ac779 Mon Sep 17 00:00:00 2001
From: holubvl3 <47881982+holubvl3@users.noreply.github.com>
Date: Fri, 21 Aug 2020 16:17:53 +0200
Subject: [PATCH 25/71] Adding support for Czech language (#5826)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Create lex_attrs.py

Hello,

I am missing a CZECH language in SpaCy. So I would like to help to push it a little. This file is base on others lex_attrs.py files just with translation to Czech.

* Update __init__.py

Updated for use with new Czech Lex_attrs file

* Update stop_words.py

* Create test_text.py

Co-authored-by: Vladimír Holubec <vholubec@arcdata.cz>
---
 spacy/lang/cs/__init__.py   |   2 +
 spacy/lang/cs/lex_attrs.py  |  64 +++++++++++++++++++
 spacy/lang/cs/stop_words.py | 118 +++++++++++++++++++++++++++++++++---
 spacy/lang/cs/test_text.py  |   0
 4 files changed, 176 insertions(+), 8 deletions(-)
 create mode 100644 spacy/lang/cs/lex_attrs.py
 create mode 100644 spacy/lang/cs/test_text.py

diff --git a/spacy/lang/cs/__init__.py b/spacy/lang/cs/__init__.py
index 5b1397ba2..baaaa162b 100644
--- a/spacy/lang/cs/__init__.py
+++ b/spacy/lang/cs/__init__.py
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
 from .stop_words import STOP_WORDS
 from ...language import Language
 from ...attrs import LANG
+from .lex_attrs import LEX_ATTRS
 
 
 class CzechDefaults(Language.Defaults):
     lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    lex_attr_getters.update(LEX_ATTRS)
     lex_attr_getters[LANG] = lambda text: "cs"
     stop_words = STOP_WORDS
 
diff --git a/spacy/lang/cs/lex_attrs.py b/spacy/lang/cs/lex_attrs.py
new file mode 100644
index 000000000..368cab6c8
--- /dev/null
+++ b/spacy/lang/cs/lex_attrs.py
@@ -0,0 +1,64 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from ...attrs import LIKE_NUM
+
+_num_words = [
+    "nula",
+    "jedna",
+    "dva",
+    "tři",
+    "čtyři",
+    "pět",
+    "šest",
+    "sedm",
+    "osm",
+    "devět",
+    "deset",
+    "jedenáct",
+    "dvanáct",
+    "třináct",
+    "čtrnáct",
+    "patnáct",
+    "šestnáct",
+    "sedmnáct",
+    "osmnáct",
+    "devatenáct",
+    "dvacet",
+    "třicet",
+    "čtyřicet",
+    "padesát",
+    "šedesát",
+    "sedmdesát",
+    "osmdesát",
+    "devadesát",
+    "sto",
+    "tisíc",
+    "milion",
+    "miliarda",
+    "bilion",
+    "biliarda",
+    "trilion",
+    "triliarda",
+    "kvadrilion",
+    "kvadriliarda",
+    "kvintilion",
+    ]
+
+
+def like_num(text):
+    if text.startswith(("+", "-", "±", "~")):
+        text = text[1:]
+    text = text.replace(",", "").replace(".", "")
+    if text.isdigit():
+        return True
+    if text.count("/") == 1:
+        num, denom = text.split("/")
+        if num.isdigit() and denom.isdigit():
+            return True
+    if text.lower() in _num_words:
+        return True
+    return False
+
+
+LEX_ATTRS = {LIKE_NUM: like_num}
diff --git a/spacy/lang/cs/stop_words.py b/spacy/lang/cs/stop_words.py
index 59d3c102e..9277772fb 100644
--- a/spacy/lang/cs/stop_words.py
+++ b/spacy/lang/cs/stop_words.py
@@ -1,18 +1,26 @@
 # coding: utf8
 from __future__ import unicode_literals
 
-
 # Source: https://github.com/Alir3z4/stop-words
+# Source: https://github.com/stopwords-iso/stopwords-cs/blob/master/stopwords-cs.txt
 
 STOP_WORDS = set(
     """
-ačkoli
+a
+aby
 ahoj
+ačkoli
 ale
+alespoň
 anebo
+ani
+aniž
 ano
+atd.
+atp.
 asi
 aspoň
+až
 během
 bez
 beze
@@ -25,12 +33,14 @@ budeš
 budete
 budou
 budu
+by
 byl
 byla
 byli
 bylo
 byly
 bys
+být
 čau
 chce
 chceme
@@ -39,14 +49,21 @@ chcete
 chci
 chtějí
 chtít
-chut'
+chuť
 chuti
 co
+což
+cz
+či
+článek
+článku
+články
 čtrnáct
 čtyři
 dál
 dále
 daleko
+další
 děkovat
 děkujeme
 děkuji
@@ -54,6 +71,7 @@ den
 deset
 devatenáct
 devět
+dnes
 do
 dobrý
 docela
@@ -61,9 +79,15 @@ dva
 dvacet
 dvanáct
 dvě
+email
+ho
 hodně
+i
 já
 jak
+jakmile
+jako
+jakož
 jde
 je
 jeden
@@ -73,25 +97,39 @@ jedno
 jednou
 jedou
 jeho
+jehož
+jej
 její
 jejich
+jejichž
+jehož
+jelikož
 jemu
 jen
 jenom
+jenž
+jež
 ještě
 jestli
 jestliže
+ještě
+ji
 jí
 jich
 jím
+jim
 jimi
 jinak
-jsem
+jiné
+již
 jsi
 jsme
+jsem
 jsou
 jste
+k
 kam
+každý
 kde
 kdo
 kdy
@@ -100,10 +138,13 @@ ke
 kolik
 kromě
 která
+kterak
+kterou
 které
 kteří
 který
 kvůli
+ku
 má
 mají
 málo
@@ -114,8 +155,10 @@ máte
 mé
 mě
 mezi
+mi
 mí
 mít
+mne
 mně
 mnou
 moc
@@ -138,6 +181,7 @@ nás
 náš
 naše
 naši
+načež
 ne
 ně
 nebo
@@ -145,6 +189,7 @@ nebyl
 nebyla
 nebyli
 nebyly
+nechť
 něco
 nedělá
 nedělají
@@ -154,6 +199,7 @@ neděláš
 neděláte
 nějak
 nejsi
+nejsou
 někde
 někdo
 nemají
@@ -161,15 +207,22 @@ nemáme
 nemáte
 neměl
 němu
+němuž
 není
 nestačí
+ně
 nevadí
+nové
+nový
+noví
 než
 nic
 nich
+ní
 ním
 nimi
 nula
+o
 od
 ode
 on
@@ -183,22 +236,37 @@ pak
 patnáct
 pět
 po
+pod
+pokud
 pořád
+pouze
 potom
 pozdě
+pravé
 před
+přede
 přes
-přese
+přece
 pro
 proč
 prosím
 prostě
+proto
 proti
+první
+právě
 protože
+při
+přičemž
 rovně
+s
 se
 sedm
 sedmnáct
+si
+sice
+skoro
+sic
 šest
 šestnáct
 skoro
@@ -207,41 +275,69 @@ smí
 snad
 spolu
 sta
+svůj
+své
+svá
+svých
+svým
+svými
+svůj
 sté
 sto
+strana
 ta
 tady
 tak
 takhle
 taky
+také
+takže
 tam
-tamhle
-tamhleto
+támhle
+támhleto
 tamto
 tě
 tebe
 tebou
-ted'
+teď
 tedy
 ten
+tento
+této
 ti
+tím
+tímto
 tisíc
 tisíce
 to
 tobě
 tohle
+tohoto
+tom
+tomto
+tomu
+tomuto
 toto
 třeba
 tři
 třináct
 trošku
+trochu
+tu
+tuto
 tvá
 tvé
 tvoje
 tvůj
 ty
+tyto
+těm
+těma
+těmi
+u
 určitě
 už
+v
 vám
 vámi
 vás
@@ -251,13 +347,19 @@ vaši
 ve
 večer
 vedle
+více
 vlastně
+však
+všechen
 všechno
 všichni
 vůbec
 vy
 vždy
+z
+zda
 za
+zde
 zač
 zatímco
 ze
diff --git a/spacy/lang/cs/test_text.py b/spacy/lang/cs/test_text.py
new file mode 100644
index 000000000..e69de29bb

From 56eabcb2f2dd732e1c440468817a99350caf3e51 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Fri, 21 Aug 2020 17:06:33 +0200
Subject: [PATCH 26/71] Adding num_like test for Czech (#5946)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Create lex_attrs.py

Hello,

I am missing a CZECH language in SpaCy. So I would like to help to push it a little. This file is base on others lex_attrs.py files just with translation to Czech.

* Update __init__.py

Updated for use with new Czech Lex_attrs file

* Update stop_words.py

* Create test_text.py

* add like_num testing for czech

Co-authored-by: holubvl3 <47881982+holubvl3@users.noreply.github.com>
Co-authored-by: holubvl3 <vilemrousi@gmail.com>
Co-authored-by: Vladimír Holubec <vholubec@arcdata.cz>
---
 spacy/tests/conftest.py          |  5 +++++
 spacy/tests/lang/cs/__init__.py  |  0
 spacy/tests/lang/cs/test_text.py | 26 ++++++++++++++++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 spacy/tests/lang/cs/__init__.py
 create mode 100644 spacy/tests/lang/cs/test_text.py

diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 91b7e4d9d..567bf901c 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -47,6 +47,11 @@ def ca_tokenizer():
     return get_lang_class("ca").Defaults.create_tokenizer()
 
 
+@pytest.fixture(scope="session")
+def cs_tokenizer():
+    return get_lang_class("cs").Defaults.create_tokenizer()
+
+
 @pytest.fixture(scope="session")
 def da_tokenizer():
     return get_lang_class("da").Defaults.create_tokenizer()
diff --git a/spacy/tests/lang/cs/__init__.py b/spacy/tests/lang/cs/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spacy/tests/lang/cs/test_text.py b/spacy/tests/lang/cs/test_text.py
new file mode 100644
index 000000000..d98961738
--- /dev/null
+++ b/spacy/tests/lang/cs/test_text.py
@@ -0,0 +1,26 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import pytest
+
+
+@pytest.mark.parametrize(
+    "text,match",
+    [
+        ("10", True),
+        ("1", True),
+        ("10.000", True),
+        ("1000", True),
+        ("999,0", True),
+        ("devatenáct", True),
+        ("osmdesát", True),
+        ("kvadrilion", True),
+        ("Pes", False),
+        (",", False),
+        ("1/2", True),
+    ],
+)
+def test_lex_attrs_like_number(cs_tokenizer, text, match):
+    tokens = cs_tokenizer(text)
+    assert len(tokens) == 1
+    assert tokens[0].like_num == match

From b10c7bc56ed74ae205f5e136c6dae59d70b27925 Mon Sep 17 00:00:00 2001
From: idoshr <35264146+idoshr@users.noreply.github.com>
Date: Mon, 24 Aug 2020 15:30:05 +0300
Subject: [PATCH 27/71] Hebrew like num (#5952)

* Update stop_words.py

Hebrew STOP WORDS

* Update stop_words.py

* contributor

* contributor

* add some common domain extentions
support human number 1K/1M....

* support human number 1K/1M....

* hebrew number tokenize
1K/1M implement in EN

* test human tokenize fix

* test

* heb like num
revert human number change

* heb like num
---
 .github/contributors/idoshr.md        | 106 ++++++++++++++++++++++++++
 spacy/lang/en/lex_attrs.py            |   3 +-
 spacy/lang/he/__init__.py             |   2 +
 spacy/lang/he/lex_attrs.py            |  97 +++++++++++++++++++++++
 spacy/lang/he/stop_words.py           |  13 ++--
 spacy/lang/lex_attrs.py               |   2 +-
 spacy/lang/tokenizer_exceptions.py    |   4 +
 spacy/tests/lang/he/test_tokenizer.py |  39 ++++++++++
 8 files changed, 255 insertions(+), 11 deletions(-)
 create mode 100644 .github/contributors/idoshr.md
 create mode 100644 spacy/lang/he/lex_attrs.py

diff --git a/.github/contributors/idoshr.md b/.github/contributors/idoshr.md
new file mode 100644
index 000000000..26e901530
--- /dev/null
+++ b/.github/contributors/idoshr.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Ido Shraga           |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 20-09-2020           |
+| GitHub username                | idoshr               |
+| Website (optional)             |                      |
diff --git a/spacy/lang/en/lex_attrs.py b/spacy/lang/en/lex_attrs.py
index bd60b057b..4f6988bd5 100644
--- a/spacy/lang/en/lex_attrs.py
+++ b/spacy/lang/en/lex_attrs.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 from ...attrs import LIKE_NUM
 
-
 _num_words = [
     "zero",
     "one",
@@ -92,7 +91,7 @@ def like_num(text):
         num, denom = text.split("/")
         if num.isdigit() and denom.isdigit():
             return True
-    
+
     text_lower = text.lower()
     if text_lower in _num_words:
         return True
diff --git a/spacy/lang/he/__init__.py b/spacy/lang/he/__init__.py
index 411cdf107..922f61462 100644
--- a/spacy/lang/he/__init__.py
+++ b/spacy/lang/he/__init__.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 from .stop_words import STOP_WORDS
 
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from .lex_attrs import LEX_ATTRS
 from ...language import Language
 from ...attrs import LANG
 from ...util import update_exc
@@ -11,6 +12,7 @@ from ...util import update_exc
 
 class HebrewDefaults(Language.Defaults):
     lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    lex_attr_getters.update(LEX_ATTRS)
     lex_attr_getters[LANG] = lambda text: "he"
     tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
     stop_words = STOP_WORDS
diff --git a/spacy/lang/he/lex_attrs.py b/spacy/lang/he/lex_attrs.py
new file mode 100644
index 000000000..9eab93ae4
--- /dev/null
+++ b/spacy/lang/he/lex_attrs.py
@@ -0,0 +1,97 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from ...attrs import LIKE_NUM
+
+_num_words = [
+    "אפס",
+    "אחד",
+    "אחת",
+    "שתיים",
+    "שתים",
+    "שניים",
+    "שנים",
+    "שלוש",
+    "שלושה",
+    "ארבע",
+    "ארבעה",
+    "חמש",
+    "חמישה",
+    "שש",
+    "שישה",
+    "שבע",
+    "שבעה",
+    "שמונה",
+    "תשע",
+    "תשעה",
+    "עשר",
+    "עשרה",
+    "אחד עשר",
+    "אחת עשרה",
+    "שנים עשר",
+    "שתים עשרה",
+    "שלושה עשר",
+    "שלוש עשרה",
+    "ארבעה עשר",
+    "ארבע עשרה",
+    "חמישה עשר",
+    "חמש עשרה",
+    "ששה עשר",
+    "שש עשרה",
+    "שבעה עשר",
+    "שבע עשרה",
+    "שמונה עשר",
+    "שמונה עשרה",
+    "תשעה עשר",
+    "תשע עשרה",
+    "עשרים",
+    "שלושים",
+    "ארבעים",
+    "חמישים",
+    "שישים",
+    "שבעים",
+    "שמונים",
+    "תשעים",
+    "מאה",
+    "אלף",
+    "מליון",
+    "מליארד",
+    "טריליון",
+]
+
+
+_ordinal_words = [
+    "ראשון",
+    "שני",
+    "שלישי",
+    "רביעי",
+    "חמישי",
+    "שישי",
+    "שביעי",
+    "שמיני",
+    "תשיעי",
+    "עשירי",
+]
+
+def like_num(text):
+    if text.startswith(("+", "-", "±", "~")):
+        text = text[1:]
+    text = text.replace(",", "").replace(".", "")
+    if text.isdigit():
+        return True
+
+    if text.count("/") == 1:
+        num, denom = text.split("/")
+        if num.isdigit() and denom.isdigit():
+            return True
+    
+    if text in _num_words:
+        return True
+
+    # CHeck ordinal number
+    if text in _ordinal_words:
+        return True
+    return False
+
+
+LEX_ATTRS = {LIKE_NUM: like_num}
diff --git a/spacy/lang/he/stop_words.py b/spacy/lang/he/stop_words.py
index a01ec4246..d4ac5e846 100644
--- a/spacy/lang/he/stop_words.py
+++ b/spacy/lang/he/stop_words.py
@@ -43,7 +43,6 @@ STOP_WORDS = set(
 בין
 עם
 עד
-נגר
 על
 אל
 מול
@@ -62,7 +61,7 @@ STOP_WORDS = set(
 עליך
 עלינו
 עליכם
-לעיכן
+עליכן
 עליהם
 עליהן
 כל
@@ -71,8 +70,8 @@ STOP_WORDS = set(
 כך
 ככה
 כזה
+כזאת
 זה
-זות
 אותי
 אותה
 אותם
@@ -95,7 +94,7 @@ STOP_WORDS = set(
 איתכן
 יהיה
 תהיה
-היתי
+הייתי
 היתה
 היה
 להיות
@@ -105,8 +104,6 @@ STOP_WORDS = set(
 עצמם
 עצמן
 עצמנו
-עצמהם
-עצמהן
 מי
 מה
 איפה
@@ -157,6 +154,7 @@ STOP_WORDS = set(
 לאו
 אי
 כלל
+בעד
 נגד
 אם
 עם
@@ -200,7 +198,6 @@ STOP_WORDS = set(
 אשר
 ואילו
 למרות
-אס
 כמו
 כפי
 אז
@@ -208,8 +205,8 @@ STOP_WORDS = set(
 כן
 לכן
 לפיכך
-מאד
 עז
+מאוד
 מעט
 מעטים
 במידה
diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py
index c9cd82d7b..254f8706d 100644
--- a/spacy/lang/lex_attrs.py
+++ b/spacy/lang/lex_attrs.py
@@ -10,7 +10,7 @@ from .. import attrs
 _like_email = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)").match
 _tlds = set(
     "com|org|edu|gov|net|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|"
-    "name|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|"
+    "name|pro|tel|travel|xyz|icu|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|"
     "ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|"
     "cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|"
     "ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|"
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index 67349916b..13140a230 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -133,6 +133,8 @@ emoticons = set(
 :-]
 [:
 [-:
+[=
+=]
 :o)
 (o:
 :}
@@ -164,6 +166,8 @@ emoticons = set(
 =|
 :|
 :-|
+]=
+=[
 :1
 :P
 :-P
diff --git a/spacy/tests/lang/he/test_tokenizer.py b/spacy/tests/lang/he/test_tokenizer.py
index f138ec6e7..67ad964d8 100644
--- a/spacy/tests/lang/he/test_tokenizer.py
+++ b/spacy/tests/lang/he/test_tokenizer.py
@@ -1,5 +1,6 @@
 # encoding: utf8
 from __future__ import unicode_literals
+from spacy.lang.he.lex_attrs import like_num
 
 import pytest
 
@@ -42,3 +43,41 @@ def test_he_tokenizer_handles_abbreviation(he_tokenizer, text, expected_tokens):
 def test_he_tokenizer_handles_punct(he_tokenizer, text, expected_tokens):
     tokens = he_tokenizer(text)
     assert expected_tokens == [token.text for token in tokens]
+
+
+
+@pytest.mark.parametrize(
+    "text,match",
+    [
+        ("10", True),
+        ("1", True),
+        ("10,000", True),
+        ("10,00", True),
+        ("999.0", True),
+        ("אחד", True),
+        ("שתיים", True),
+        ("מליון", True),
+        ("כלב", False),
+        (",", False),
+        ("1/2", True),
+    ],
+)
+def test_lex_attrs_like_number(he_tokenizer, text, match):
+    tokens = he_tokenizer(text)
+    assert len(tokens) == 1
+    assert tokens[0].like_num == match
+
+
+@pytest.mark.parametrize(
+    "word",
+    [
+        "שלישי",
+        "מליון",
+        "עשירי",
+        "מאה",
+        "עשר",
+        "אחד עשר",
+    ]
+)
+def test_he_lex_attrs_like_number_for_ordinal(word):
+    assert like_num(word)

From 450720aca2cfeb35a94c73b13e4336cdb2c366d3 Mon Sep 17 00:00:00 2001
From: Shashank <42868640+snsten@users.noreply.github.com>
Date: Tue, 25 Aug 2020 14:26:29 +0530
Subject: [PATCH 28/71] Added support for Sanskrit language (#5956)

* Added support for Sanskrit language

* Added tests for lexical attribute like_num
---
 .github/contributors/snsten.md   | 106 +++++++
 spacy/lang/sa/__init__.py        |  24 ++
 spacy/lang/sa/examples.py        |  19 ++
 spacy/lang/sa/lex_attrs.py       | 131 ++++++++
 spacy/lang/sa/stop_words.py      | 518 +++++++++++++++++++++++++++++++
 spacy/tests/conftest.py          |   5 +
 spacy/tests/lang/sa/__init__.py  |   0
 spacy/tests/lang/sa/test_text.py |  45 +++
 8 files changed, 848 insertions(+)
 create mode 100644 .github/contributors/snsten.md
 create mode 100644 spacy/lang/sa/__init__.py
 create mode 100644 spacy/lang/sa/examples.py
 create mode 100644 spacy/lang/sa/lex_attrs.py
 create mode 100644 spacy/lang/sa/stop_words.py
 create mode 100644 spacy/tests/lang/sa/__init__.py
 create mode 100644 spacy/tests/lang/sa/test_text.py

diff --git a/.github/contributors/snsten.md b/.github/contributors/snsten.md
new file mode 100644
index 000000000..0d7c28835
--- /dev/null
+++ b/.github/contributors/snsten.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Shashank Shekhar     |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2020-08-23           |
+| GitHub username                | snsten               |
+| Website (optional)             |                      |
diff --git a/spacy/lang/sa/__init__.py b/spacy/lang/sa/__init__.py
new file mode 100644
index 000000000..8a4533341
--- /dev/null
+++ b/spacy/lang/sa/__init__.py
@@ -0,0 +1,24 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from .stop_words import STOP_WORDS
+from .lex_attrs import LEX_ATTRS
+
+from ...language import Language
+from ...attrs import LANG
+
+
+class SanskritDefaults(Language.Defaults):
+    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    lex_attr_getters.update(LEX_ATTRS)
+    lex_attr_getters[LANG] = lambda text: "sa"
+
+    stop_words = STOP_WORDS
+
+
+class Sanskrit(Language):
+    lang = "sa"
+    Defaults = SanskritDefaults
+
+
+__all__ = ["Sanskrit"]
diff --git a/spacy/lang/sa/examples.py b/spacy/lang/sa/examples.py
new file mode 100644
index 000000000..9d4fa1e49
--- /dev/null
+++ b/spacy/lang/sa/examples.py
@@ -0,0 +1,19 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.sa.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "अभ्यावहति कल्याणं विविधं वाक् सुभाषिता ।",
+    "मनसि व्याकुले चक्षुः पश्यन्नपि न पश्यति ।",
+    "यस्य बुद्धिर्बलं तस्य निर्बुद्धेस्तु कुतो बलम्?",
+    "परो अपि हितवान् बन्धुः बन्धुः अपि अहितः परः ।",
+    "अहितः देहजः व्याधिः हितम् आरण्यं औषधम् ॥",
+]
diff --git a/spacy/lang/sa/lex_attrs.py b/spacy/lang/sa/lex_attrs.py
new file mode 100644
index 000000000..c33be2ce4
--- /dev/null
+++ b/spacy/lang/sa/lex_attrs.py
@@ -0,0 +1,131 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from ...attrs import LIKE_NUM
+
+
+# reference 1: https://en.wikibooks.org/wiki/Sanskrit/Numbers
+
+_num_words = [
+    "एकः",
+    "द्वौ",
+    "त्रयः",
+    "चत्वारः",
+    "पञ्च",
+    "षट्",
+    "सप्त",
+    "अष्ट",
+    "नव",
+    "दश",
+    "एकादश",
+    "द्वादश",
+    "त्रयोदश",
+    "चतुर्दश",
+    "पञ्चदश",
+    "षोडश",
+    "सप्तदश",
+    "अष्टादश",
+    "एकान्नविंशति",
+    "विंशति",
+    "एकाविंशति",
+    "द्वाविंशति",
+    "त्रयोविंशति",
+    "चतुर्विंशति",
+    "पञ्चविंशति",
+    "षड्विंशति",
+    "सप्तविंशति",
+    "अष्टाविंशति",
+    "एकान्नत्रिंशत्",
+    "त्रिंशत्",
+    "एकत्रिंशत्",
+    "द्वात्रिंशत्",
+    "त्रयत्रिंशत्",
+    "चतुस्त्रिंशत्",
+    "पञ्चत्रिंशत्",
+    "षट्त्रिंशत्",
+    "सप्तत्रिंशत्",
+    "अष्टात्रिंशत्",
+    "एकोनचत्वारिंशत्",
+    "चत्वारिंशत्",
+    "एकचत्वारिंशत्",
+    "द्वाचत्वारिंशत्",
+    "त्रयश्चत्वारिंशत्",
+    "चतुश्चत्वारिंशत्",
+    "पञ्चचत्वारिंशत्",
+    "षट्चत्वारिंशत्",
+    "सप्तचत्वारिंशत्",
+    "अष्टाचत्वारिंशत्",
+    "एकोनपञ्चाशत्",
+    "पञ्चाशत्",
+    "एकपञ्चाशत्",
+    "द्विपञ्चाशत्",
+    "त्रिपञ्चाशत्",
+    "चतुःपञ्चाशत्",
+    "पञ्चपञ्चाशत्",
+    "षट्पञ्चाशत्",
+    "सप्तपञ्चाशत्",
+    "अष्टपञ्चाशत्",
+    "एकोनषष्ठिः",
+    "षष्ठिः",
+    "एकषष्ठिः",
+    "द्विषष्ठिः",
+    "त्रिषष्ठिः",
+    "चतुःषष्ठिः",
+    "पञ्चषष्ठिः",
+    "षट्षष्ठिः",
+    "सप्तषष्ठिः",
+    "अष्टषष्ठिः",
+    "एकोनसप्ततिः",
+    "सप्ततिः",
+    "एकसप्ततिः",
+    "द्विसप्ततिः",
+    "त्रिसप्ततिः",
+    "चतुःसप्ततिः",
+    "पञ्चसप्ततिः",
+    "षट्सप्ततिः",
+    "सप्तसप्ततिः",
+    "अष्टसप्ततिः",
+    "एकोनाशीतिः",
+    "अशीतिः",
+    "एकाशीतिः",
+    "द्वशीतिः",
+    "त्र्यशीतिः",
+    "चतुरशीतिः",
+    "पञ्चाशीतिः",
+    "षडशीतिः",
+    "सप्ताशीतिः",
+    "अष्टाशीतिः",
+    "एकोननवतिः",
+    "नवतिः",
+    "एकनवतिः",
+    "द्विनवतिः",
+    "त्रिनवतिः",
+    "चतुर्नवतिः",
+    "पञ्चनवतिः",
+    "षण्णवतिः",
+    "सप्तनवतिः",
+    "अष्टनवतिः",
+    "एकोनशतम्",
+    "शतम्"
+]
+
+
+def like_num(text):
+   """
+   Check if text resembles a number
+   """
+   if text.startswith(("+", "-", "±", "~")):
+       text = text[1:]
+   text = text.replace(",", "").replace(".", "")
+   if text.isdigit():
+       return True
+   if text.count("/") == 1:
+       num, denom = text.split("/")
+       if num.isdigit() and denom.isdigit():
+           return True
+   if text in _num_words:
+       return True
+   return False
+
+
+LEX_ATTRS = {LIKE_NUM: like_num}
diff --git a/spacy/lang/sa/stop_words.py b/spacy/lang/sa/stop_words.py
new file mode 100644
index 000000000..aa51ceae0
--- /dev/null
+++ b/spacy/lang/sa/stop_words.py
@@ -0,0 +1,518 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+# Source: https://gist.github.com/Akhilesh28/fe8b8e180f64b72e64751bc31cb6d323
+
+STOP_WORDS = set(
+    """
+अहम्
+आवाम्
+वयम्
+माम्  मा
+आवाम्
+अस्मान्  नः
+मया
+आवाभ्याम्
+अस्माभिस्
+मह्यम्  मे
+आवाभ्याम्  नौ
+अस्मभ्यम्  नः
+मत्
+आवाभ्याम्
+अस्मत्
+मम  मे
+आवयोः
+अस्माकम्  नः
+मयि
+आवयोः
+अस्मासु
+त्वम्
+युवाम्
+यूयम्
+त्वाम्  त्वा
+युवाम्  वाम्
+युष्मान्  वः
+त्वया
+युवाभ्याम्
+युष्माभिः
+तुभ्यम्  ते
+युवाभ्याम्  वाम्
+युष्मभ्यम्  वः
+त्वत्
+युवाभ्याम्
+युष्मत्
+तव  ते
+युवयोः  वाम्
+युष्माकम्  वः
+त्वयि
+युवयोः
+युष्मासु
+सः
+तौ
+ते
+तम्
+तौ
+तान्
+तेन
+ताभ्याम्
+तैः
+तस्मै
+ताभ्याम्
+तेभ्यः
+तस्मात्
+ताभ्याम्
+तेभ्यः
+तस्य
+तयोः
+तेषाम्
+तस्मिन्
+तयोः
+तेषु
+सा
+ते
+ताः
+ताम्
+ते
+ताः
+तया
+ताभ्याम्
+ताभिः
+तस्यै
+ताभ्याम्
+ताभ्यः
+तस्याः
+ताभ्याम्
+ताभ्यः
+तस्य
+तयोः
+तासाम्
+तस्याम्
+तयोः
+तासु
+तत्
+ते
+तानि
+तत्
+ते
+तानि
+तया
+ताभ्याम्
+ताभिः
+तस्यै
+ताभ्याम्
+ताभ्यः
+तस्याः
+ताभ्याम्
+ताभ्यः
+तस्य
+तयोः
+तासाम्
+तस्याम्
+तयोः
+तासु
+अयम्
+इमौ
+इमे
+इमम्
+इमौ
+इमान्
+अनेन
+आभ्याम्
+एभिः
+अस्मै
+आभ्याम्
+एभ्यः
+अस्मात्
+आभ्याम्
+एभ्यः
+अस्य
+अनयोः
+एषाम्
+अस्मिन्
+अनयोः
+एषु
+इयम्
+इमे
+इमाः
+इमाम्
+इमे
+इमाः
+अनया
+आभ्याम्
+आभिः
+अस्यै
+आभ्याम्
+आभ्यः
+अस्याः
+आभ्याम्
+आभ्यः
+अस्याः
+अनयोः
+आसाम्
+अस्याम्
+अनयोः
+आसु
+इदम्
+इमे
+इमानि
+इदम्
+इमे
+इमानि
+अनेन
+आभ्याम्
+एभिः
+अस्मै
+आभ्याम्
+एभ्यः
+अस्मात्
+आभ्याम्
+एभ्यः
+अस्य
+अनयोः
+एषाम्
+अस्मिन्
+अनयोः
+एषु
+एषः
+एतौ
+एते
+एतम्  एनम्
+एतौ  एनौ
+एतान्  एनान्
+एतेन
+एताभ्याम्
+एतैः
+एतस्मै
+एताभ्याम्
+एतेभ्यः
+एतस्मात्
+एताभ्याम्
+एतेभ्यः
+एतस्य
+एतस्मिन्
+एतेषाम्
+एतस्मिन्
+एतस्मिन्
+एतेषु
+एषा
+एते
+एताः
+एताम्  एनाम्
+एते  एने
+एताः  एनाः
+एतया  एनया
+एताभ्याम्
+एताभिः
+एतस्यै
+एताभ्याम्
+एताभ्यः
+एतस्याः
+एताभ्याम्
+एताभ्यः
+एतस्याः
+एतयोः  एनयोः
+एतासाम्
+एतस्याम्
+एतयोः  एनयोः
+एतासु
+एतत्  एतद्
+एते
+एतानि
+एतत्  एतद्  एनत्  एनद्
+एते  एने
+एतानि  एनानि
+एतेन  एनेन
+एताभ्याम्
+एतैः
+एतस्मै
+एताभ्याम्
+एतेभ्यः
+एतस्मात्
+एताभ्याम्
+एतेभ्यः
+एतस्य
+एतयोः  एनयोः
+एतेषाम्
+एतस्मिन्
+एतयोः  एनयोः
+एतेषु
+असौ
+अमू
+अमी
+अमूम्
+अमू
+अमून्
+अमुना
+अमूभ्याम्
+अमीभिः
+अमुष्मै
+अमूभ्याम्
+अमीभ्यः
+अमुष्मात्
+अमूभ्याम्
+अमीभ्यः
+अमुष्य
+अमुयोः
+अमीषाम्
+अमुष्मिन्
+अमुयोः
+अमीषु
+असौ
+अमू
+अमूः
+अमूम्
+अमू
+अमूः
+अमुया
+अमूभ्याम्
+अमूभिः
+अमुष्यै
+अमूभ्याम्
+अमूभ्यः
+अमुष्याः
+अमूभ्याम्
+अमूभ्यः
+अमुष्याः
+अमुयोः
+अमूषाम्
+अमुष्याम्
+अमुयोः
+अमूषु
+अमु
+अमुनी
+अमूनि
+अमु
+अमुनी
+अमूनि
+अमुना
+अमूभ्याम्
+अमीभिः
+अमुष्मै
+अमूभ्याम्
+अमीभ्यः
+अमुष्मात्
+अमूभ्याम्
+अमीभ्यः
+अमुष्य
+अमुयोः
+अमीषाम्
+अमुष्मिन्
+अमुयोः
+अमीषु
+कः
+कौ
+के
+कम्
+कौ
+कान्
+केन
+काभ्याम्
+कैः
+कस्मै
+काभ्याम्
+केभ्य
+कस्मात्
+काभ्याम्
+केभ्य
+कस्य
+कयोः
+केषाम्
+कस्मिन्
+कयोः
+केषु
+का
+के
+काः
+काम्
+के
+काः
+कया
+काभ्याम्
+काभिः
+कस्यै
+काभ्याम्
+काभ्यः
+कस्याः
+काभ्याम्
+काभ्यः
+कस्याः
+कयोः
+कासाम्
+कस्याम्
+कयोः
+कासु
+किम्
+के
+कानि
+किम्
+के
+कानि
+केन
+काभ्याम्
+कैः
+कस्मै
+काभ्याम्
+केभ्य
+कस्मात्
+काभ्याम्
+केभ्य
+कस्य
+कयोः
+केषाम्
+कस्मिन्
+कयोः
+केषु
+भवान्
+भवन्तौ
+भवन्तः
+भवन्तम्
+भवन्तौ
+भवतः
+भवता
+भवद्भ्याम्
+भवद्भिः
+भवते
+भवद्भ्याम्
+भवद्भ्यः
+भवतः
+भवद्भ्याम्
+भवद्भ्यः
+भवतः
+भवतोः
+भवताम्
+भवति
+भवतोः
+भवत्सु
+भवती
+भवत्यौ
+भवत्यः
+भवतीम्
+भवत्यौ
+भवतीः
+भवत्या
+भवतीभ्याम्
+भवतीभिः
+भवत्यै
+भवतीभ्याम्
+भवतीभिः
+भवत्याः
+भवतीभ्याम्
+भवतीभिः
+भवत्याः
+भवत्योः
+भवतीनाम्
+भवत्याम्
+भवत्योः
+भवतीषु
+भवत्
+भवती
+भवन्ति
+भवत्
+भवती
+भवन्ति
+भवता
+भवद्भ्याम्
+भवद्भिः
+भवते
+भवद्भ्याम्
+भवद्भ्यः
+भवतः
+भवद्भ्याम्
+भवद्भ्यः
+भवतः
+भवतोः
+भवताम्
+भवति
+भवतोः
+भवत्सु
+अये
+अरे
+अरेरे
+अविधा
+असाधुना
+अस्तोभ
+अहह
+अहावस्
+आम्
+आर्यहलम्
+आह
+आहो
+इस्
+उम्
+उवे
+काम्
+कुम्
+चमत्
+टसत्
+दृन्
+धिक्
+पाट्
+फत्
+फाट्
+फुडुत्
+बत
+बाल्
+वट्
+व्यवस्तोभति व्यवस्तुभ्
+षाट्
+स्तोभ
+हुम्मा
+हूम्
+अति
+अधि
+अनु
+अप
+अपि
+अभि
+अव
+आ
+उद्
+उप
+नि
+निर्
+परा
+परि
+प्र
+प्रति
+वि
+सम्
+अथवा उत
+अन्यथा
+इव
+च
+चेत् यदि
+तु परन्तु
+यतः करणेन हि यतस् यदर्थम् यदर्थे यर्हि यथा यत्कारणम् येन ही हिन
+यथा यतस्
+यद्यपि
+यात् अवधेस् यावति
+येन प्रकारेण
+स्थाने
+अह
+एव
+एवम्
+कच्चित्
+कु
+कुवित्
+कूपत्
+च
+चण्
+चेत्
+तत्र
+नकिम्
+नह
+नुनम्
+नेत्
+भूयस्
+मकिम्
+मकिर्
+यत्र
+युगपत्
+वा
+शश्वत्
+सूपत्
+ह
+हन्त
+हि
+""".split()
+)
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 567bf901c..bf9851178 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -212,6 +212,11 @@ def ru_lemmatizer():
     return get_lang_class("ru").Defaults.create_lemmatizer()
 
 
+@pytest.fixture(scope="session")
+def sa_tokenizer():
+    return get_lang_class("sa").Defaults.create_tokenizer()
+
+
 @pytest.fixture(scope="session")
 def sr_tokenizer():
     return get_lang_class("sr").Defaults.create_tokenizer()
diff --git a/spacy/tests/lang/sa/__init__.py b/spacy/tests/lang/sa/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spacy/tests/lang/sa/test_text.py b/spacy/tests/lang/sa/test_text.py
new file mode 100644
index 000000000..7c961bdae
--- /dev/null
+++ b/spacy/tests/lang/sa/test_text.py
@@ -0,0 +1,45 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import pytest
+
+
+def test_sa_tokenizer_handles_long_text(sa_tokenizer):
+    text = """नानाविधानि दिव्यानि नानावर्णाकृतीनि च।।"""
+    tokens = sa_tokenizer(text)
+    assert len(tokens) == 6
+
+
+@pytest.mark.parametrize(
+    "text,length",
+    [
+        ("श्री भगवानुवाच पश्य मे पार्थ रूपाणि शतशोऽथ सहस्रशः।", 9,),
+        ("गुणान् सर्वान् स्वभावो मूर्ध्नि वर्तते ।", 6),
+    ],
+)
+def test_sa_tokenizer_handles_cnts(sa_tokenizer, text, length):
+    tokens = sa_tokenizer(text)
+    assert len(tokens) == length
+
+
+@pytest.mark.parametrize(
+    "text,match",
+    [
+        ("10", True),
+        ("1", True),
+        ("10.000", True),
+        ("1000", True),
+        ("999,0", True),
+        ("एकः ", True),
+        ("दश", True),
+        ("पञ्चदश", True),
+        ("चत्वारिंशत् ", True),
+        ("कूपे", False),
+        (",", False),
+        ("1/2", True),
+    ],
+)
+def test_lex_attrs_like_number(sa_tokenizer, text, match):
+    tokens = sa_tokenizer(text)
+    assert len(tokens) == 1
+    assert tokens[0].like_num == match

From 332803eda9e9999434d4da41e56d1689f353bbd8 Mon Sep 17 00:00:00 2001
From: Hiroshi Matsuda <40782025+hiroshi-matsuda-rit@users.noreply.github.com>
Date: Tue, 25 Aug 2020 21:16:24 +0900
Subject: [PATCH 29/71] fix ja leading spaces (#5969)

* change condition for space after

* add NAUGHTY_STRINGS test example
---
 spacy/lang/ja/__init__.py                     | 4 ++--
 spacy/tests/tokenizer/test_naughty_strings.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py
index 30e73fd84..80cb7a837 100644
--- a/spacy/lang/ja/__init__.py
+++ b/spacy/lang/ja/__init__.py
@@ -98,7 +98,7 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
         return text_dtokens, text_spaces
 
     # align words and dtokens by referring text, and insert gap tokens for the space char spans
-    for word, dtoken in zip(words, dtokens):
+    for i, (word, dtoken) in enumerate(zip(words, dtokens)):
         # skip all space tokens
         if word.isspace():
             continue
@@ -119,7 +119,7 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
         text_spaces.append(False)
         text_pos += len(word)
         # poll a space char after the word
-        if text_pos < len(text) and text[text_pos] == " ":
+        if i + 1 < len(dtokens) and dtokens[i + 1].surface == " ":
             text_spaces[-1] = True
             text_pos += 1
 
diff --git a/spacy/tests/tokenizer/test_naughty_strings.py b/spacy/tests/tokenizer/test_naughty_strings.py
index 36c69611e..9737b15cf 100644
--- a/spacy/tests/tokenizer/test_naughty_strings.py
+++ b/spacy/tests/tokenizer/test_naughty_strings.py
@@ -32,6 +32,7 @@ NAUGHTY_STRINGS = [
     r"₀₁₂",
     r"⁰⁴⁵₀₁₂",
     r"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
+    r" ̄  ̄",
     # Two-Byte Characters
     r"田中さんにあげて下さい",
     r"パーティーへ行かないか",

From 7d7b65ffd42c56ce3a0aa73b18196eb20a1dcc24 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 26 Aug 2020 04:00:49 +0200
Subject: [PATCH 30/71] Fix raw strings in URL pattern (#5972)

Add missing raw string specifiers.
---
 spacy/lang/tokenizer_exceptions.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index 13140a230..c903448b0 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -37,13 +37,13 @@ URL_PATTERN = (
     r"|"
     # host & domain names
     # mods: match is case-sensitive, so include [A-Z]
-      "(?:"  # noqa
-        "(?:"
-          "[A-Za-z0-9\u00a1-\uffff]"
-          "[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
-        ")?"
-        "[A-Za-z0-9\u00a1-\uffff]\."
-      ")+"
+      r"(?:"  # noqa
+        r"(?:"
+          r"[A-Za-z0-9\u00a1-\uffff]"
+          r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
+        r")?"
+        r"[A-Za-z0-9\u00a1-\uffff]\."
+      r")+"
     # TLD identifier
     # mods: use ALPHA_LOWER instead of a wider range so that this doesn't match
     # strings like "lower.Upper", which can be split on "." by infixes in some

From caf23462eb3bc0adb05300565b06697dcf7d1b1a Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 26 Aug 2020 15:23:59 +0200
Subject: [PATCH 31/71] Add 3rd party licenses (#5959)

---
 licenses/3rd_party_licenses.txt | 38 +++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 licenses/3rd_party_licenses.txt

diff --git a/licenses/3rd_party_licenses.txt b/licenses/3rd_party_licenses.txt
new file mode 100644
index 000000000..0aeef5507
--- /dev/null
+++ b/licenses/3rd_party_licenses.txt
@@ -0,0 +1,38 @@
+Third Party Licenses for spaCy
+==============================
+
+NumPy
+-----
+
+* Files: setup.py
+
+Copyright (c) 2005-2020, NumPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+    * Neither the name of the NumPy Developers nor the names of any
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

From 9002bea29f30438ed85cc61a11e5547de8318acb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Guti=C3=A9rrez?= <whan.kharlos@gmail.com>
Date: Mon, 31 Aug 2020 04:44:56 -0600
Subject: [PATCH 32/71] Update suffixes example (#5989)

* Update suffixes example

The current example will throw `TypeError: can only concatenate list (not "tuple") to list`

* Signing Contributor Agreement
---
 .github/contributors/jgutix.md            | 106 ++++++++++++++++++++++
 website/docs/usage/linguistic-features.md |   2 +-
 2 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 .github/contributors/jgutix.md

diff --git a/.github/contributors/jgutix.md b/.github/contributors/jgutix.md
new file mode 100644
index 000000000..4bda9486b
--- /dev/null
+++ b/.github/contributors/jgutix.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Juan Gutiérrez       |
+| Company name (if applicable)   | Ojtli                |
+| Title or role (if applicable)  |                      |
+| Date                           | 2020-08-28           |
+| GitHub username                | jgutix               |
+| Website (optional)             | ojtli.app            |
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 9031a356f..53ea2dfa6 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -915,7 +915,7 @@ expressions – for example,
 [`compile_suffix_regex`](/api/top-level#util.compile_suffix_regex):
 
 ```python
-suffixes = nlp.Defaults.suffixes + (r'''-+$''',)
+suffixes = nlp.Defaults.suffixes + [r'''-+$''',]
 suffix_regex = spacy.util.compile_suffix_regex(suffixes)
 nlp.tokenizer.suffix_search = suffix_regex.search
 ```

From f7a25d69f798841fcf54d924a6c84b784b2bc882 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Tue, 1 Sep 2020 21:57:52 +0200
Subject: [PATCH 33/71] Bugfix in merge_entities (#6005)

* failing test

* bugfix
---
 spacy/tests/regression/test_issue5918.py | 31 ++++++++++++++++++++++++
 spacy/tokens/_retokenize.pyx             |  6 +++--
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 spacy/tests/regression/test_issue5918.py

diff --git a/spacy/tests/regression/test_issue5918.py b/spacy/tests/regression/test_issue5918.py
new file mode 100644
index 000000000..2dee26d82
--- /dev/null
+++ b/spacy/tests/regression/test_issue5918.py
@@ -0,0 +1,31 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from spacy.lang.en import English
+from spacy.pipeline import merge_entities, EntityRuler
+
+
+def test_issue5918():
+    # Test edge case when merging entities.
+    nlp = English()
+    patterns = [
+        {"label": "ORG", "pattern": "Digicon Inc"},
+        {"label": "ORG", "pattern": "Rotan Mosle Inc's"},
+        {"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
+    ]
+    ruler = EntityRuler(nlp)
+    ruler.add_patterns(patterns)
+    nlp.add_pipe(ruler)
+
+    text = """
+        Digicon Inc said it has completed the previously-announced disposition
+        of its computer systems division to an investment group led by
+        Rotan Mosle Inc's Rotan Mosle Technology Partners Ltd affiliate.
+        """
+    doc = nlp(text)
+    assert len(doc.ents) == 3
+    # make it so that the third span's head is within the entity (ent_iob=I)
+    # bug #5918 would wrongly transfer that I to the full entity, resulting in 2 instead of 3 final ents.
+    doc[29].head = doc[33]
+    doc = merge_entities(doc)
+    assert len(doc.ents) == 3
diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index ce8e510d6..abc9b731b 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -175,6 +175,8 @@ def _merge(Doc doc, merges):
         spans.append(span)
         # House the new merged token where it starts
         token = &doc.c[start]
+        start_ent_iob = doc.c[start].ent_iob
+        start_ent_type = doc.c[start].ent_type
         # Initially set attributes to attributes of span root
         token.tag = doc.c[span.root.i].tag
         token.pos = doc.c[span.root.i].pos
@@ -187,8 +189,8 @@ def _merge(Doc doc, merges):
             merged_iob = 3
             # If start token is I-ENT and previous token is of the same
             # type, then I-ENT (could check I-ENT from start to span root)
-            if doc.c[start].ent_iob == 1 and start > 0 \
-                    and doc.c[start].ent_type == token.ent_type \
+            if start_ent_iob == 1 and start > 0 \
+                    and start_ent_type == token.ent_type \
                     and doc.c[start - 1].ent_type == token.ent_type:
                 merged_iob = 1
         token.ent_iob = merged_iob

From 92d7832a86b9bb525cfc02f97378712ff4770cfb Mon Sep 17 00:00:00 2001
From: Marek Grzenkowicz <chopeen@gmail.com>
Date: Wed, 2 Sep 2020 15:15:45 +0200
Subject: [PATCH 34/71] Fix off-by-one error for best iteration calculation
 (closes #6014) (#6016)

---
 spacy/cli/train.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index e24aa8a95..0614c7519 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -554,9 +554,10 @@ def train(
                         iter_since_best = 0
                         best_score = current_score
                     if iter_since_best >= n_early_stopping:
+                        iter_current = i + 1
                         msg.text(
                             "Early stopping, best iteration "
-                            "is: {}".format(i - iter_since_best)
+                            "is: {}".format(iter_current - iter_since_best)
                         )
                         msg.text(
                             "Best score = {}; Final iteration "

From 960d9cfadcd6449db15c05838f3c7c1d0c56fed5 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 31 Aug 2020 20:04:26 +0200
Subject: [PATCH 35/71] Officially support DependencyMatcher

Add official support for the `DependencyMatcher`. Redesign the pattern
specification. Fix and extend operator implementations. Update API docs
and add usage docs.

Patterns
--------

Refactor pattern structure to:

```
{
  "LEFT_ID": str,
  "REL_OP": str,
  "RIGHT_ID": str,
  "RIGHT_ATTRS": dict,
}
```

The first node contains only `RIGHT_ID` and `RIGHT_ATTRS` and all
subsequent nodes contain all four keys.

New operators
-------------

Because of the way patterns are constructed from left to right, it's
helpful to have `follows` operators along with `precedes` operators. Add
operators for simple precedes / follows alongside immediate precedes /
follows.

* `.*`: precedes
* `;`: immediately follows
* `;*`: follows

Operator fixes
--------------

* `<` and `<<` do not include the node itself
* Fix reversed order for all operators involving linear precedence (`.`,
  all sibling operators)
* Linear precedence operators do not match nodes outside the same parse

Additional fixes
----------------

* Use v3 Matcher API
* Support `get` and `remove`
* Support pickling
---
 spacy/errors.py                               |  15 +-
 spacy/matcher/dependencymatcher.pyx           | 198 +++++----
 .../tests/matcher/test_dependency_matcher.py  | 386 ++++++++++++++++++
 spacy/tests/matcher/test_matcher_api.py       |  81 +---
 spacy/tests/regression/test_issue4501-5000.py |  26 --
 website/docs/api/dependencymatcher.md         | 244 +++++++----
 website/docs/images/dep-match-diagram.svg     |  64 +++
 website/docs/images/displacy-dep-founded.html |  58 +++
 website/docs/usage/rule-based-matching.md     | 271 +++++++++++-
 website/docs/usage/v3.md                      |   4 +-
 10 files changed, 1069 insertions(+), 278 deletions(-)
 create mode 100644 spacy/tests/matcher/test_dependency_matcher.py
 create mode 100644 website/docs/images/dep-match-diagram.svg
 create mode 100644 website/docs/images/displacy-dep-founded.html

diff --git a/spacy/errors.py b/spacy/errors.py
index be71de820..4ab4e5421 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -284,12 +284,12 @@ class Errors:
             "Span objects, or dicts if set to manual=True.")
     E097 = ("Invalid pattern: expected token pattern (list of dicts) or "
             "phrase pattern (string) but got:\n{pattern}")
-    E098 = ("Invalid pattern specified: expected both SPEC and PATTERN.")
-    E099 = ("First node of pattern should be a root node. The root should "
-            "only contain NODE_NAME.")
-    E100 = ("Nodes apart from the root should contain NODE_NAME, NBOR_NAME and "
-            "NBOR_RELOP.")
-    E101 = ("NODE_NAME should be a new node and NBOR_NAME should already have "
+    E098 = ("Invalid pattern: expected both RIGHT_ID and RIGHT_ATTRS.")
+    E099 = ("Invalid pattern: the first node of pattern should be an anchor "
+            "node. The node should only contain RIGHT_ID and RIGHT_ATTRS.")
+    E100 = ("Nodes other than the anchor node should all contain LEFT_ID, "
+            "REL_OP and RIGHT_ID.")
+    E101 = ("RIGHT_ID should be a new node and LEFT_ID should already have "
             "have been declared in previous edges.")
     E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
             "tokens to merge. If you want to find the longest non-overlapping "
@@ -652,6 +652,9 @@ class Errors:
              "'{chunk}'. Tokenizer exceptions are only allowed to specify "
              "`ORTH` and `NORM`.")
     E1006 = ("Unable to initialize {name} model with 0 labels.")
+    E1007 = ("Unsupported DependencyMatcher operator '{op}'.")
+    E1008 = ("Invalid pattern: each pattern should be a list of dicts. Check "
+             "that you are providing a list of patterns as `List[List[dict]]`.")
 
 
 @add_codes
diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx
index e0a54e6f1..067b2167c 100644
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@@ -1,16 +1,16 @@
 # cython: infer_types=True, profile=True
-from cymem.cymem cimport Pool
-from preshed.maps cimport PreshMap
-from libcpp cimport bool
+from typing import List
 
 import numpy
 
+from cymem.cymem cimport Pool
+
 from .matcher cimport Matcher
 from ..vocab cimport Vocab
 from ..tokens.doc cimport Doc
 
-from .matcher import unpickle_matcher
 from ..errors import Errors
+from ..tokens import Span
 
 
 DELIMITER = "||"
@@ -22,36 +22,52 @@ cdef class DependencyMatcher:
     """Match dependency parse tree based on pattern rules."""
     cdef Pool mem
     cdef readonly Vocab vocab
-    cdef readonly Matcher token_matcher
+    cdef readonly Matcher matcher
     cdef public object _patterns
+    cdef public object _raw_patterns
     cdef public object _keys_to_token
     cdef public object _root
-    cdef public object _entities
     cdef public object _callbacks
     cdef public object _nodes
     cdef public object _tree
+    cdef public object _ops
 
-    def __init__(self, vocab):
+    def __init__(self, vocab, *, validate=False):
         """Create the DependencyMatcher.
 
         vocab (Vocab): The vocabulary object, which must be shared with the
             documents the matcher will operate on.
+        validate (bool): Whether patterns should be validated, passed to
+            Matcher as `validate`
         """
         size = 20
-        # TODO: make matcher work with validation
-        self.token_matcher = Matcher(vocab, validate=False)
+        self.matcher = Matcher(vocab, validate=validate)
         self._keys_to_token = {}
         self._patterns = {}
+        self._raw_patterns = {}
         self._root = {}
         self._nodes = {}
         self._tree = {}
-        self._entities = {}
         self._callbacks = {}
         self.vocab = vocab
         self.mem = Pool()
+        self._ops = {
+            "<": self.dep,
+            ">": self.gov,
+            "<<": self.dep_chain,
+            ">>": self.gov_chain,
+            ".": self.imm_precede,
+            ".*": self.precede,
+            ";": self.imm_follow,
+            ";*": self.follow,
+            "$+": self.imm_right_sib,
+            "$-": self.imm_left_sib,
+            "$++": self.right_sib,
+            "$--": self.left_sib,
+        }
 
     def __reduce__(self):
-        data = (self.vocab, self._patterns,self._tree, self._callbacks)
+        data = (self.vocab, self._raw_patterns, self._callbacks)
         return (unpickle_matcher, data, None, None)
 
     def __len__(self):
@@ -74,54 +90,61 @@ cdef class DependencyMatcher:
         idx = 0
         visited_nodes = {}
         for relation in pattern:
-            if "PATTERN" not in relation or "SPEC" not in relation:
+            if not isinstance(relation, dict):
+                raise ValueError(Errors.E1008)
+            if "RIGHT_ATTRS" not in relation and "RIGHT_ID" not in relation:
                 raise ValueError(Errors.E098.format(key=key))
             if idx == 0:
                 if not(
-                    "NODE_NAME" in relation["SPEC"]
-                    and "NBOR_RELOP" not in relation["SPEC"]
-                    and "NBOR_NAME" not in relation["SPEC"]
+                    "RIGHT_ID" in relation
+                    and "REL_OP" not in relation
+                    and "LEFT_ID" not in relation
                 ):
                     raise ValueError(Errors.E099.format(key=key))
-                visited_nodes[relation["SPEC"]["NODE_NAME"]] = True
+                visited_nodes[relation["RIGHT_ID"]] = True
             else:
                 if not(
-                    "NODE_NAME" in relation["SPEC"]
-                    and "NBOR_RELOP" in relation["SPEC"]
-                    and "NBOR_NAME" in relation["SPEC"]
+                    "RIGHT_ID" in relation
+                    and "RIGHT_ATTRS" in relation
+                    and "REL_OP" in relation
+                    and "LEFT_ID" in relation
                 ):
                     raise ValueError(Errors.E100.format(key=key))
                 if (
-                    relation["SPEC"]["NODE_NAME"] in visited_nodes
-                    or relation["SPEC"]["NBOR_NAME"] not in visited_nodes
+                    relation["RIGHT_ID"] in visited_nodes
+                    or relation["LEFT_ID"] not in visited_nodes
                 ):
                     raise ValueError(Errors.E101.format(key=key))
-                visited_nodes[relation["SPEC"]["NODE_NAME"]] = True
-                visited_nodes[relation["SPEC"]["NBOR_NAME"]] = True
+                if relation["REL_OP"] not in self._ops:
+                    raise ValueError(Errors.E1007.format(op=relation["REL_OP"]))
+                visited_nodes[relation["RIGHT_ID"]] = True
+                visited_nodes[relation["LEFT_ID"]] = True
             idx = idx + 1
 
-    def add(self, key, patterns, *_patterns, on_match=None):
+    def add(self, key, patterns, *, on_match=None):
         """Add a new matcher rule to the matcher.
 
         key (str): The match ID.
         patterns (list): The patterns to add for the given key.
         on_match (callable): Optional callback executed on match.
         """
-        if patterns is None or hasattr(patterns, "__call__"):  # old API
-            on_match = patterns
-            patterns = _patterns
+        if on_match is not None and not hasattr(on_match, "__call__"):
+            raise ValueError(Errors.E171.format(arg_type=type(on_match)))
+        if patterns is None or not isinstance(patterns, List):  # old API
+            raise ValueError(Errors.E948.format(arg_type=type(patterns)))
         for pattern in patterns:
             if len(pattern) == 0:
                 raise ValueError(Errors.E012.format(key=key))
-            self.validate_input(pattern,key)
+            self.validate_input(pattern, key)
         key = self._normalize_key(key)
+        self._raw_patterns.setdefault(key, [])
+        self._raw_patterns[key].extend(patterns)
         _patterns = []
         for pattern in patterns:
             token_patterns = []
             for i in range(len(pattern)):
-                token_pattern = [pattern[i]["PATTERN"]]
+                token_pattern = [pattern[i]["RIGHT_ATTRS"]]
                 token_patterns.append(token_pattern)
-            # self.patterns.append(token_patterns)
             _patterns.append(token_patterns)
         self._patterns.setdefault(key, [])
         self._callbacks[key] = on_match
@@ -135,7 +158,7 @@ cdef class DependencyMatcher:
             # TODO: Better ways to hash edges in pattern?
             for j in range(len(_patterns[i])):
                 k = self._normalize_key(unicode(key) + DELIMITER + unicode(i) + DELIMITER + unicode(j))
-                self.token_matcher.add(k, [_patterns[i][j]])
+                self.matcher.add(k, [_patterns[i][j]])
                 _keys_to_token[k] = j
             _keys_to_token_list.append(_keys_to_token)
         self._keys_to_token.setdefault(key, [])
@@ -144,14 +167,14 @@ cdef class DependencyMatcher:
         for pattern in patterns:
             nodes = {}
             for i in range(len(pattern)):
-                nodes[pattern[i]["SPEC"]["NODE_NAME"]] = i
+                nodes[pattern[i]["RIGHT_ID"]] = i
             _nodes_list.append(nodes)
         self._nodes.setdefault(key, [])
         self._nodes[key].extend(_nodes_list)
         # Create an object tree to traverse later on. This data structure
         # enables easy tree pattern match. Doc-Token based tree cannot be
         # reused since it is memory-heavy and tightly coupled with the Doc.
-        self.retrieve_tree(patterns, _nodes_list,key)
+        self.retrieve_tree(patterns, _nodes_list, key)
 
     def retrieve_tree(self, patterns, _nodes_list, key):
         _heads_list = []
@@ -161,13 +184,13 @@ cdef class DependencyMatcher:
             root = -1
             for j in range(len(patterns[i])):
                 token_pattern = patterns[i][j]
-                if ("NBOR_RELOP" not in token_pattern["SPEC"]):
+                if ("REL_OP" not in token_pattern):
                     heads[j] = ('root', j)
                     root = j
                 else:
                     heads[j] = (
-                        token_pattern["SPEC"]["NBOR_RELOP"],
-                        _nodes_list[i][token_pattern["SPEC"]["NBOR_NAME"]]
+                        token_pattern["REL_OP"],
+                        _nodes_list[i][token_pattern["LEFT_ID"]]
                     )
             _heads_list.append(heads)
             _root_list.append(root)
@@ -202,11 +225,21 @@ cdef class DependencyMatcher:
         RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
         """
         key = self._normalize_key(key)
-        if key not in self._patterns:
+        if key not in self._raw_patterns:
             return default
-        return (self._callbacks[key], self._patterns[key])
+        return (self._callbacks[key], self._raw_patterns[key])
 
-    def __call__(self, Doc doc):
+    def remove(self, key):
+        key = self._normalize_key(key)
+        if not key in self._patterns:
+            raise ValueError(Errors.E175.format(key=key))
+        self._patterns.pop(key)
+        self._raw_patterns.pop(key)
+        self._nodes.pop(key)
+        self._tree.pop(key)
+        self._root.pop(key)
+
+    def __call__(self, object doclike):
         """Find all token sequences matching the supplied pattern.
 
         doclike (Doc or Span): The document to match over.
@@ -214,8 +247,14 @@ cdef class DependencyMatcher:
             describing the matches. A match tuple describes a span
             `doc[start:end]`. The `label_id` and `key` are both integers.
         """
+        if isinstance(doclike, Doc):
+            doc = doclike
+        elif isinstance(doclike, Span):
+            doc = doclike.as_doc()
+        else:
+            raise ValueError(Errors.E195.format(good="Doc or Span", got=type(doclike).__name__))
         matched_key_trees = []
-        matches = self.token_matcher(doc)
+        matches = self.matcher(doc)
         for key in list(self._patterns.keys()):
             _patterns_list = self._patterns[key]
             _keys_to_token_list = self._keys_to_token[key]
@@ -244,26 +283,26 @@ cdef class DependencyMatcher:
                 length = len(_nodes)
 
                 matched_trees = []
-                self.recurse(_tree,id_to_position,_node_operator_map,0,[],matched_trees)
-                matched_key_trees.append((key,matched_trees))
-
-            for i, (ent_id, nodes) in enumerate(matched_key_trees):
-                on_match = self._callbacks.get(ent_id)
+                self.recurse(_tree, id_to_position, _node_operator_map, 0, [], matched_trees)
+                for matched_tree in matched_trees:
+                    matched_key_trees.append((key, matched_tree))
+            for i, (match_id, nodes) in enumerate(matched_key_trees):
+                on_match = self._callbacks.get(match_id)
                 if on_match is not None:
                     on_match(self, doc, i, matched_key_trees)
         return matched_key_trees
 
-    def recurse(self,tree,id_to_position,_node_operator_map,int patternLength,visited_nodes,matched_trees):
-        cdef bool isValid;
-        if(patternLength == len(id_to_position.keys())):
+    def recurse(self, tree, id_to_position, _node_operator_map, int patternLength, visited_nodes, matched_trees):
+        cdef bint isValid;
+        if patternLength == len(id_to_position.keys()):
             isValid = True
             for node in range(patternLength):
-                if(node in tree):
+                if node in tree:
                     for idx, (relop,nbor) in enumerate(tree[node]):
                         computed_nbors = numpy.asarray(_node_operator_map[visited_nodes[node]][relop])
                         isNbor = False
                         for computed_nbor in computed_nbors:
-                            if(computed_nbor.i == visited_nodes[nbor]):
+                            if computed_nbor.i == visited_nodes[nbor]:
                                 isNbor = True
                         isValid = isValid & isNbor
             if(isValid):
@@ -271,14 +310,14 @@ cdef class DependencyMatcher:
             return
         allPatternNodes = numpy.asarray(id_to_position[patternLength])
         for patternNode in allPatternNodes:
-            self.recurse(tree,id_to_position,_node_operator_map,patternLength+1,visited_nodes+[patternNode],matched_trees)
+            self.recurse(tree, id_to_position, _node_operator_map, patternLength+1, visited_nodes+[patternNode], matched_trees)
 
     # Given a node and an edge operator, to return the list of nodes
     # from the doc that belong to node+operator. This is used to store
     # all the results beforehand to prevent unnecessary computation while
     # pattern matching
     # _node_operator_map[node][operator] = [...]
-    def get_node_operator_map(self,doc,tree,id_to_position,nodes,root):
+    def get_node_operator_map(self, doc, tree, id_to_position, nodes, root):
         _node_operator_map = {}
         all_node_indices = nodes.values()
         all_operators = []
@@ -295,24 +334,14 @@ cdef class DependencyMatcher:
             _node_operator_map[node] = {}
             for operator in all_operators:
                 _node_operator_map[node][operator] = []
-        # Used to invoke methods for each operator
-        switcher = {
-            "<": self.dep,
-            ">": self.gov,
-            "<<": self.dep_chain,
-            ">>": self.gov_chain,
-            ".": self.imm_precede,
-            "$+": self.imm_right_sib,
-            "$-": self.imm_left_sib,
-            "$++": self.right_sib,
-            "$--": self.left_sib
-        }
         for operator in all_operators:
             for node in all_nodes:
-                _node_operator_map[node][operator] = switcher.get(operator)(doc,node)
+                _node_operator_map[node][operator] = self._ops.get(operator)(doc, node)
         return _node_operator_map
 
     def dep(self, doc, node):
+        if doc[node].head == doc[node]:
+            return []
         return [doc[node].head]
 
     def gov(self,doc,node):
@@ -322,36 +351,51 @@ cdef class DependencyMatcher:
         return list(doc[node].ancestors)
 
     def gov_chain(self, doc, node):
-        return list(doc[node].subtree)
+        return [t for t in doc[node].subtree if t != doc[node]]
 
     def imm_precede(self, doc, node):
-        if node > 0:
+        sent = self._get_sent(doc[node])
+        if node < len(doc) - 1 and doc[node + 1] in sent:
+            return [doc[node + 1]]
+        return []
+
+    def precede(self, doc, node):
+        sent = self._get_sent(doc[node])
+        return [doc[i] for i in range(node + 1, sent.end)]
+
+    def imm_follow(self, doc, node):
+        sent = self._get_sent(doc[node])
+        if node > 0 and doc[node - 1] in sent:
             return [doc[node - 1]]
         return []
 
+    def follow(self, doc, node):
+        sent = self._get_sent(doc[node])
+        return [doc[i] for i in range(sent.start, node)]
+
     def imm_right_sib(self, doc, node):
         for child in list(doc[node].head.children):
-            if child.i == node - 1:
+            if child.i == node + 1:
                 return [doc[child.i]]
         return []
 
     def imm_left_sib(self, doc, node):
         for child in list(doc[node].head.children):
-            if child.i == node + 1:
+            if child.i == node - 1:
                 return [doc[child.i]]
         return []
 
     def right_sib(self, doc, node):
         candidate_children = []
         for child in list(doc[node].head.children):
-            if child.i < node:
+            if child.i > node:
                 candidate_children.append(doc[child.i])
         return candidate_children
 
     def left_sib(self, doc, node):
         candidate_children = []
         for child in list(doc[node].head.children):
-            if child.i > node:
+            if child.i < node:
                 candidate_children.append(doc[child.i])
         return candidate_children
 
@@ -360,3 +404,15 @@ cdef class DependencyMatcher:
             return self.vocab.strings.add(key)
         else:
             return key
+
+    def _get_sent(self, token):
+        root = (list(token.ancestors) or [token])[-1]
+        return token.doc[root.left_edge.i:root.right_edge.i + 1]
+
+
+def unpickle_matcher(vocab, patterns, callbacks):
+    matcher = DependencyMatcher(vocab)
+    for key, pattern in patterns.items():
+        callback = callbacks.get(key, None)
+        matcher.add(key, pattern, on_match=callback)
+    return matcher
diff --git a/spacy/tests/matcher/test_dependency_matcher.py b/spacy/tests/matcher/test_dependency_matcher.py
new file mode 100644
index 000000000..69e85140d
--- /dev/null
+++ b/spacy/tests/matcher/test_dependency_matcher.py
@@ -0,0 +1,386 @@
+import pytest
+import pickle
+import re
+import copy
+from mock import Mock
+from spacy.matcher import DependencyMatcher
+from ..util import get_doc
+
+
+@pytest.fixture
+def doc(en_vocab):
+    text = "The quick brown fox jumped over the lazy fox"
+    heads = [3, 2, 1, 1, 0, -1, 2, 1, -3]
+    deps = ["det", "amod", "amod", "nsubj", "ROOT", "prep", "pobj", "det", "amod"]
+    doc = get_doc(en_vocab, text.split(), heads=heads, deps=deps)
+    return doc
+
+
+@pytest.fixture
+def patterns(en_vocab):
+    def is_brown_yellow(text):
+        return bool(re.compile(r"brown|yellow").match(text))
+
+    IS_BROWN_YELLOW = en_vocab.add_flag(is_brown_yellow)
+
+    pattern1 = [
+        {"RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"}},
+        {
+            "LEFT_ID": "fox",
+            "REL_OP": ">",
+            "RIGHT_ID": "q",
+            "RIGHT_ATTRS": {"ORTH": "quick", "DEP": "amod"},
+        },
+        {
+            "LEFT_ID": "fox",
+            "REL_OP": ">",
+            "RIGHT_ID": "r",
+            "RIGHT_ATTRS": {IS_BROWN_YELLOW: True},
+        },
+    ]
+
+    pattern2 = [
+        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+        {
+            "LEFT_ID": "jumped",
+            "REL_OP": ">",
+            "RIGHT_ID": "fox1",
+            "RIGHT_ATTRS": {"ORTH": "fox"},
+        },
+        {
+            "LEFT_ID": "jumped",
+            "REL_OP": ".",
+            "RIGHT_ID": "over",
+            "RIGHT_ATTRS": {"ORTH": "over"},
+        },
+    ]
+
+    pattern3 = [
+        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+        {
+            "LEFT_ID": "jumped",
+            "REL_OP": ">",
+            "RIGHT_ID": "fox",
+            "RIGHT_ATTRS": {"ORTH": "fox"},
+        },
+        {
+            "LEFT_ID": "fox",
+            "REL_OP": ">>",
+            "RIGHT_ID": "r",
+            "RIGHT_ATTRS": {"ORTH": "brown"},
+        },
+    ]
+
+    pattern4 = [
+        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+        {
+            "LEFT_ID": "jumped",
+            "REL_OP": ">",
+            "RIGHT_ID": "fox",
+            "RIGHT_ATTRS": {"ORTH": "fox"},
+        }
+    ]
+
+    pattern5 = [
+        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+        {
+            "LEFT_ID": "jumped",
+            "REL_OP": ">>",
+            "RIGHT_ID": "fox",
+            "RIGHT_ATTRS": {"ORTH": "fox"},
+        },
+    ]
+
+    return [pattern1, pattern2, pattern3, pattern4, pattern5]
+
+
+@pytest.fixture
+def dependency_matcher(en_vocab, patterns, doc):
+    matcher = DependencyMatcher(en_vocab)
+    mock = Mock()
+    for i in range(1, len(patterns) + 1):
+        if i == 1:
+            matcher.add("pattern1", [patterns[0]], on_match=mock)
+        else:
+            matcher.add("pattern" + str(i), [patterns[i - 1]])
+
+    return matcher
+
+
+def test_dependency_matcher(dependency_matcher, doc, patterns):
+    assert len(dependency_matcher) == 5
+    assert "pattern3" in dependency_matcher
+    assert dependency_matcher.get("pattern3") == (None, [patterns[2]])
+    matches = dependency_matcher(doc)
+    assert len(matches) == 6
+    assert matches[0][1] == [3, 1, 2]
+    assert matches[1][1] == [4, 3, 5]
+    assert matches[2][1] == [4, 3, 2]
+    assert matches[3][1] == [4, 3]
+    assert matches[4][1] == [4, 3]
+    assert matches[5][1] == [4, 8]
+
+    span = doc[0:6]
+    matches = dependency_matcher(span)
+    assert len(matches) == 5
+    assert matches[0][1] == [3, 1, 2]
+    assert matches[1][1] == [4, 3, 5]
+    assert matches[2][1] == [4, 3, 2]
+    assert matches[3][1] == [4, 3]
+    assert matches[4][1] == [4, 3]
+
+
+def test_dependency_matcher_pickle(en_vocab, patterns, doc):
+    matcher = DependencyMatcher(en_vocab)
+    for i in range(1, len(patterns) + 1):
+        matcher.add("pattern" + str(i), [patterns[i - 1]])
+
+    matches = matcher(doc)
+    assert matches[0][1] == [3, 1, 2]
+    assert matches[1][1] == [4, 3, 5]
+    assert matches[2][1] == [4, 3, 2]
+    assert matches[3][1] == [4, 3]
+    assert matches[4][1] == [4, 3]
+    assert matches[5][1] == [4, 8]
+
+    b = pickle.dumps(matcher)
+    matcher_r = pickle.loads(b)
+
+    assert len(matcher) == len(matcher_r)
+    matches = matcher_r(doc)
+    assert matches[0][1] == [3, 1, 2]
+    assert matches[1][1] == [4, 3, 5]
+    assert matches[2][1] == [4, 3, 2]
+    assert matches[3][1] == [4, 3]
+    assert matches[4][1] == [4, 3]
+    assert matches[5][1] == [4, 8]
+
+
+def test_dependency_matcher_pattern_validation(en_vocab):
+    pattern = [
+        {"RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"}},
+        {
+            "LEFT_ID": "fox",
+            "REL_OP": ">",
+            "RIGHT_ID": "q",
+            "RIGHT_ATTRS": {"ORTH": "quick", "DEP": "amod"},
+        },
+        {
+            "LEFT_ID": "fox",
+            "REL_OP": ">",
+            "RIGHT_ID": "r",
+            "RIGHT_ATTRS": {"ORTH": "brown"},
+        },
+    ]
+
+    matcher = DependencyMatcher(en_vocab)
+    # original pattern is valid
+    matcher.add("FOUNDED", [pattern])
+    # individual pattern not wrapped in a list
+    with pytest.raises(ValueError):
+        matcher.add("FOUNDED", pattern)
+    # no anchor node
+    with pytest.raises(ValueError):
+        matcher.add("FOUNDED", [pattern[1:]])
+    # required keys missing
+    with pytest.raises(ValueError):
+        pattern2 = copy.deepcopy(pattern)
+        del pattern2[0]["RIGHT_ID"]
+        matcher.add("FOUNDED", [pattern2])
+    with pytest.raises(ValueError):
+        pattern2 = copy.deepcopy(pattern)
+        del pattern2[1]["RIGHT_ID"]
+        matcher.add("FOUNDED", [pattern2])
+    with pytest.raises(ValueError):
+        pattern2 = copy.deepcopy(pattern)
+        del pattern2[1]["RIGHT_ATTRS"]
+        matcher.add("FOUNDED", [pattern2])
+    with pytest.raises(ValueError):
+        pattern2 = copy.deepcopy(pattern)
+        del pattern2[1]["LEFT_ID"]
+        matcher.add("FOUNDED", [pattern2])
+    with pytest.raises(ValueError):
+        pattern2 = copy.deepcopy(pattern)
+        del pattern2[1]["REL_OP"]
+        matcher.add("FOUNDED", [pattern2])
+    # invalid operator
+    with pytest.raises(ValueError):
+        pattern2 = copy.deepcopy(pattern)
+        pattern2[1]["REL_OP"] = "!!!"
+        matcher.add("FOUNDED", [pattern2])
+    # duplicate node name
+    with pytest.raises(ValueError):
+        pattern2 = copy.deepcopy(pattern)
+        pattern2[1]["RIGHT_ID"] = "fox"
+        matcher.add("FOUNDED", [pattern2])
+
+
+def test_dependency_matcher_callback(en_vocab, doc):
+    pattern = [
+        {"RIGHT_ID": "quick", "RIGHT_ATTRS": {"ORTH": "quick"}},
+    ]
+
+    matcher = DependencyMatcher(en_vocab)
+    mock = Mock()
+    matcher.add("pattern", [pattern], on_match=mock)
+    matches = matcher(doc)
+    mock.assert_called_once_with(matcher, doc, 0, matches)
+
+    # check that matches with and without callback are the same (#4590)
+    matcher2 = DependencyMatcher(en_vocab)
+    matcher2.add("pattern", [pattern])
+    matches2 = matcher2(doc)
+    assert matches == matches2
+
+
+@pytest.mark.parametrize(
+    "op,num_matches", [(".", 8), (".*", 20), (";", 8), (";*", 20),]
+)
+def test_dependency_matcher_precedence_ops(en_vocab, op, num_matches):
+    # two sentences to test that all matches are within the same sentence
+    doc = get_doc(
+        en_vocab,
+        words=["a", "b", "c", "d", "e"] * 2,
+        heads=[0, -1, -2, -3, -4] * 2,
+        deps=["dep"] * 10,
+    )
+    match_count = 0
+    for text in ["a", "b", "c", "d", "e"]:
+        pattern = [
+            {"RIGHT_ID": "1", "RIGHT_ATTRS": {"ORTH": text}},
+            {"LEFT_ID": "1", "REL_OP": op, "RIGHT_ID": "2", "RIGHT_ATTRS": {},},
+        ]
+        matcher = DependencyMatcher(en_vocab)
+        matcher.add("A", [pattern])
+        matches = matcher(doc)
+        match_count += len(matches)
+        for match in matches:
+            match_id, token_ids = match
+            # token_ids[0] op token_ids[1]
+            if op == ".":
+                assert token_ids[0] == token_ids[1] - 1
+            elif op == ";":
+                assert token_ids[0] == token_ids[1] + 1
+            elif op == ".*":
+                assert token_ids[0] < token_ids[1]
+            elif op == ";*":
+                assert token_ids[0] > token_ids[1]
+            # all tokens are within the same sentence
+            assert doc[token_ids[0]].sent == doc[token_ids[1]].sent
+    assert match_count == num_matches
+
+
+@pytest.mark.parametrize(
+    "left,right,op,num_matches",
+    [
+        ("fox", "jumped", "<", 1),
+        ("the", "lazy", "<", 0),
+        ("jumped", "jumped", "<", 0),
+        ("fox", "jumped", ">", 0),
+        ("fox", "lazy", ">", 1),
+        ("lazy", "lazy", ">", 0),
+        ("fox", "jumped", "<<", 2),
+        ("jumped", "fox", "<<", 0),
+        ("the", "fox", "<<", 2),
+        ("fox", "jumped", ">>", 0),
+        ("over", "the", ">>", 1),
+        ("fox", "the", ">>", 2),
+        ("fox", "jumped", ".", 1),
+        ("lazy", "fox", ".", 1),
+        ("the", "fox", ".", 0),
+        ("the", "the", ".", 0),
+        ("fox", "jumped", ";", 0),
+        ("lazy", "fox", ";", 0),
+        ("the", "fox", ";", 0),
+        ("the", "the", ";", 0),
+        ("quick", "fox", ".*", 2),
+        ("the", "fox", ".*", 3),
+        ("the", "the", ".*", 1),
+        ("fox", "jumped", ";*", 1),
+        ("quick", "fox", ";*", 0),
+        ("the", "fox", ";*", 1),
+        ("the", "the", ";*", 1),
+        ("quick", "brown", "$+", 1),
+        ("brown", "quick", "$+", 0),
+        ("brown", "brown", "$+", 0),
+        ("quick", "brown", "$-", 0),
+        ("brown", "quick", "$-", 1),
+        ("brown", "brown", "$-", 0),
+        ("the", "brown", "$++", 1),
+        ("brown", "the", "$++", 0),
+        ("brown", "brown", "$++", 0),
+        ("the", "brown", "$--", 0),
+        ("brown", "the", "$--", 1),
+        ("brown", "brown", "$--", 0),
+    ],
+)
+def test_dependency_matcher_ops(en_vocab, doc, left, right, op, num_matches):
+    right_id = right
+    if left == right:
+        right_id = right + "2"
+    pattern = [
+        {"RIGHT_ID": left, "RIGHT_ATTRS": {"LOWER": left}},
+        {
+            "LEFT_ID": left,
+            "REL_OP": op,
+            "RIGHT_ID": right_id,
+            "RIGHT_ATTRS": {"LOWER": right},
+        },
+    ]
+
+    matcher = DependencyMatcher(en_vocab)
+    matcher.add("pattern", [pattern])
+    matches = matcher(doc)
+    assert len(matches) == num_matches
+
+
+@pytest.mark.parametrize(
+    "pattern",
+    [
+        # empty
+        [],
+        # unsupported op
+        [
+            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+            {
+                "LEFT_ID": "jumped",
+                "REL_OP": "==",
+                "RIGHT_ID": "fox",
+                "RIGHT_ATTRS": {"ORTH": "fox"},
+            },
+        ],
+        # first dict isn't just a node
+        [
+            {
+                "LEFT_ID": "jumped",
+                "REL_OP": "==",
+                "RIGHT_ID": "fox",
+                "RIGHT_ATTRS": {"ORTH": "fox"},
+            },
+        ],
+        # missing op
+        [
+            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+            {"LEFT_ID": "jumped", "RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"},},
+        ],
+        # missing left-hand ID
+        [
+            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+            {"REL_OP": ">", "RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"},},
+        ],
+        # missing right-hand ID
+        [
+            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+            {"LEFT_ID": "jumped", "REL_OP": ">", "RIGHT_ATTRS": {"ORTH": "fox"},},
+        ],
+        # missing right-hand attrs
+        [
+            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
+            {"LEFT_ID": "jumped", "REL_OP": ">", "RIGHT_ID": "fox",},
+        ],
+    ],
+)
+def test_dependency_matcher_pattern_validation(en_vocab, pattern):
+    matcher = DependencyMatcher(en_vocab)
+    with pytest.raises(ValueError):
+        matcher.add("pattern", [pattern])
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index 8310c4466..e0f335a19 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -1,7 +1,6 @@
 import pytest
-import re
 from mock import Mock
-from spacy.matcher import Matcher, DependencyMatcher
+from spacy.matcher import Matcher
 from spacy.tokens import Doc, Token, Span
 
 from ..doc.test_underscore import clean_underscore  # noqa: F401
@@ -292,84 +291,6 @@ def test_matcher_extension_set_membership(en_vocab):
     assert len(matches) == 0
 
 
-@pytest.fixture
-def text():
-    return "The quick brown fox jumped over the lazy fox"
-
-
-@pytest.fixture
-def heads():
-    return [3, 2, 1, 1, 0, -1, 2, 1, -3]
-
-
-@pytest.fixture
-def deps():
-    return ["det", "amod", "amod", "nsubj", "prep", "pobj", "det", "amod"]
-
-
-@pytest.fixture
-def dependency_matcher(en_vocab):
-    def is_brown_yellow(text):
-        return bool(re.compile(r"brown|yellow|over").match(text))
-
-    IS_BROWN_YELLOW = en_vocab.add_flag(is_brown_yellow)
-
-    pattern1 = [
-        {"SPEC": {"NODE_NAME": "fox"}, "PATTERN": {"ORTH": "fox"}},
-        {
-            "SPEC": {"NODE_NAME": "q", "NBOR_RELOP": ">", "NBOR_NAME": "fox"},
-            "PATTERN": {"ORTH": "quick", "DEP": "amod"},
-        },
-        {
-            "SPEC": {"NODE_NAME": "r", "NBOR_RELOP": ">", "NBOR_NAME": "fox"},
-            "PATTERN": {IS_BROWN_YELLOW: True},
-        },
-    ]
-
-    pattern2 = [
-        {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}},
-        {
-            "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
-            "PATTERN": {"ORTH": "fox"},
-        },
-        {
-            "SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"},
-            "PATTERN": {"ORTH": "fox"},
-        },
-    ]
-
-    pattern3 = [
-        {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}},
-        {
-            "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
-            "PATTERN": {"ORTH": "fox"},
-        },
-        {
-            "SPEC": {"NODE_NAME": "r", "NBOR_RELOP": ">>", "NBOR_NAME": "fox"},
-            "PATTERN": {"ORTH": "brown"},
-        },
-    ]
-
-    matcher = DependencyMatcher(en_vocab)
-    matcher.add("pattern1", [pattern1])
-    matcher.add("pattern2", [pattern2])
-    matcher.add("pattern3", [pattern3])
-
-    return matcher
-
-
-def test_dependency_matcher_compile(dependency_matcher):
-    assert len(dependency_matcher) == 3
-
-
-# def test_dependency_matcher(dependency_matcher, text, heads, deps):
-#     doc = get_doc(dependency_matcher.vocab, text.split(), heads=heads, deps=deps)
-#     matches = dependency_matcher(doc)
-#     assert matches[0][1] == [[3, 1, 2]]
-#     assert matches[1][1] == [[4, 3, 3]]
-#     assert matches[2][1] == [[4, 3, 2]]
-
-
 def test_matcher_basic_check(en_vocab):
     matcher = Matcher(en_vocab)
     # Potential mistake: pass in pattern instead of list of patterns
diff --git a/spacy/tests/regression/test_issue4501-5000.py b/spacy/tests/regression/test_issue4501-5000.py
index 39533f70a..d83a2c718 100644
--- a/spacy/tests/regression/test_issue4501-5000.py
+++ b/spacy/tests/regression/test_issue4501-5000.py
@@ -38,32 +38,6 @@ def test_gold_misaligned(en_tokenizer, text, words):
     Example.from_dict(doc, {"words": words})
 
 
-def test_issue4590(en_vocab):
-    """Test that matches param in on_match method are the same as matches run with no on_match method"""
-    pattern = [
-        {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}},
-        {
-            "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
-            "PATTERN": {"ORTH": "fox"},
-        },
-        {
-            "SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"},
-            "PATTERN": {"ORTH": "fox"},
-        },
-    ]
-
-    on_match = Mock()
-    matcher = DependencyMatcher(en_vocab)
-    matcher.add("pattern", on_match, pattern)
-    text = "The quick brown fox jumped over the lazy fox"
-    heads = [3, 2, 1, 1, 0, -1, 2, 1, -3]
-    deps = ["det", "amod", "amod", "nsubj", "ROOT", "prep", "det", "amod", "pobj"]
-    doc = get_doc(en_vocab, text.split(), heads=heads, deps=deps)
-    matches = matcher(doc)
-    on_match_args = on_match.call_args
-    assert on_match_args[0][3] == matches
-
-
 def test_issue4651_with_phrase_matcher_attr():
     """Test that the EntityRuler PhraseMatcher is deserialized correctly using
     the method from_disk when the EntityRuler argument phrase_matcher_attr is
diff --git a/website/docs/api/dependencymatcher.md b/website/docs/api/dependencymatcher.md
index 2fb903100..333f82043 100644
--- a/website/docs/api/dependencymatcher.md
+++ b/website/docs/api/dependencymatcher.md
@@ -1,65 +1,135 @@
 ---
 title: DependencyMatcher
-teaser: Match sequences of tokens, based on the dependency parse
+teaser: Match subtrees within a dependency parse
 tag: class
+new: 3
 source: spacy/matcher/dependencymatcher.pyx
 ---
 
 The `DependencyMatcher` follows the same API as the [`Matcher`](/api/matcher)
 and [`PhraseMatcher`](/api/phrasematcher) and lets you match on dependency trees
-using the
-[Semgrex syntax](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html).
+using
+[Semgrex operators](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html).
 It requires a pretrained [`DependencyParser`](/api/parser) or other component
-that sets the `Token.dep` attribute.
+that sets the `Token.dep` and `Token.head` attributes.
 
 ## Pattern format {#patterns}
 
-> ```json
+> ```python
 > ### Example
+> # pattern: "[subject] ... initially founded"
 > [
+>   # anchor token: founded
 >   {
->     "SPEC": {"NODE_NAME": "founded"},
->     "PATTERN": {"ORTH": "founded"}
+>     "RIGHT_ID": "founded",
+>     "RIGHT_ATTRS": {"ORTH": "founded"}
 >   },
+>   # founded -> subject
 >   {
->     "SPEC": {
->       "NODE_NAME": "founder",
->       "NBOR_RELOP": ">",
->       "NBOR_NAME": "founded"
->   },
->     "PATTERN": {"DEP": "nsubj"}
+>     "LEFT_ID": "founded",
+>     "REL_OP": ">",
+>     "RIGHT_ID": "subject",
+>     "RIGHT_ATTRS": {"DEP": "nsubj"}
 >   },
+>   # "founded" follows "initially"
 >   {
->     "SPEC": {
->       "NODE_NAME": "object",
->       "NBOR_RELOP": ">",
->       "NBOR_NAME": "founded"
->   },
->     "PATTERN": {"DEP": "dobj"}
+>     "LEFT_ID": "founded",
+>     "REL_OP": ";",
+>     "RIGHT_ID": "initially",
+>     "RIGHT_ATTRS": {"ORTH": "initially"}
 >   }
 > ]
 > ```
 
 A pattern added to the `DependencyMatcher` consists of a list of dictionaries,
-with each dictionary describing a node to match. Each pattern should have the
-following top-level keys:
+with each dictionary describing a token to match. Except for the first
+dictionary, which defines an anchor token using only `RIGHT_ID` and
+`RIGHT_ATTRS`, each pattern should have the following keys:
 
-| Name      | Description                                                                                                                                    |
-| --------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| `PATTERN` | The token attributes to match in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
-| `SPEC`    | The relationships of the nodes in the subtree that should be matched. ~~Dict[str, str]~~                                                       |
+| Name          | Description                                                                                                                                                            |
+| ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node.                                                                             |
+| `REL_OP`      | An operator that describes how the two nodes are related. ~~str~~                                                                                                      |
+| `RIGHT_ID`    | A unique name for the right-hand node in the relation. ~~str~~                                                                                                         |
+| `RIGHT_ATTRS` | The token attributes to match for the right-hand node in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
 
-The `SPEC` includes the following fields:
+The first pattern defines an anchor token and each additional token added to the
+pattern is linked to an existing token `LEFT_ID` by the relation `REL_OP` and is
+described by the name `RIGHT_ID` and the attributes `RIGHT_ATTRS`.
 
-| Name         | Description                                                                                                                                                                    |
-| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `NODE_NAME`  | A unique name for this node to refer to it in other specs. ~~str~~                                                                                                             |
-| `NBOR_RELOP` | A [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html) operator that describes how the two nodes are related. ~~str~~ |
-| `NBOR_NAME`  | The unique name of the node that this node is connected to. ~~str~~                                                                                                            |
+Let's say we want to find sentences describing who founded what kind of company:
+
+- `Smith founded a healthcare company in 2005.`
+- `Williams initially founded an insurance company in 1987.`
+- `Lee, an established CEO, founded yet another AI startup.`
+
+Since it's the root of the dependency parse, `founded` is a good choice for the
+anchor token in our pattern:
+
+```python
+pattern = [
+    {"RIGHT_ID": "anchor_founded", "RIGHT_ATTRS": {"ORTH": "founded"}}
+]
+```
+
+We can add the subject as the token with the dependency label `nsubj` that is a
+direct child `>` of the anchor token named `anchor_founded`:
+
+```python
+pattern = [
+    {"RIGHT_ID": "anchor_founded", "RIGHT_ATTRS": {"ORTH": "founded"}},
+    {
+        "LEFT_ID": "anchor_founded",
+        "REL_OP": ">",
+        "RIGHT_ID": "subject",
+        "RIGHT_ATTRS": {"DEP": "nsubj"},
+    }
+]
+```
+
+And the direct object along with its modifier:
+
+```python
+pattern = [ ...
+    {
+        "LEFT_ID": "anchor_founded",
+        "REL_OP": ">",
+        "RIGHT_ID": "founded_object",
+        "RIGHT_ATTRS": {"DEP": "dobj"},
+    },
+    {
+        "LEFT_ID": "founded_object",
+        "REL_OP": ">",
+        "RIGHT_ID": "founded_object_modifier",
+        "RIGHT_ATTRS": {"DEP": {"IN": ["amod", "compound"]}},
+    }
+]
+```
+
+### Operators
+
+The following operators are supported by the `DependencyMatcher`, most of which
+come directly from
+[Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html):
+
+| Symbol    | Description                                                                                                         |
+| --------- | ------------------------------------------------------------------------------------------------------------------- |
+| `A < B`   | `A` is the immediate dependent of `B`                                                                               |
+| `A > B`   | `A` is the immediate head of `B`                                                                                    |
+| `A << B`  | `A` is the dependent in a chain to `B` following dep->head paths                                                    |
+| `A >> B`  | `A` is the head in a chain to `B` following head->dep paths                                                         |
+| `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree                   |
+| `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_                 |
+| `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_ |
+| `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_                  |
+| `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`                 |
+| `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`                  |
+| `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`                                |
+| `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`                                 |
 
 ## DependencyMatcher.\_\_init\_\_ {#init tag="method"}
 
-Create a rule-based `DependencyMatcher`.
+Create a `DependencyMatcher`.
 
 > #### Example
 >
@@ -68,13 +138,15 @@ Create a rule-based `DependencyMatcher`.
 > matcher = DependencyMatcher(nlp.vocab)
 > ```
 
-| Name    | Description                                                                                           |
-| ------- | ----------------------------------------------------------------------------------------------------- |
-| `vocab` | The vocabulary object, which must be shared with the documents the matcher will operate on. ~~Vocab~~ |
+| Name           | Description                                                                                           |
+| -------------- | ----------------------------------------------------------------------------------------------------- |
+| `vocab`        | The vocabulary object, which must be shared with the documents the matcher will operate on. ~~Vocab~~ |
+| _keyword-only_ |                                                                                                       |
+| `validate`     | Validate all patterns added to this matcher. ~~bool~~                                                 |
 
 ## DependencyMatcher.\_\call\_\_ {#call tag="method"}
 
-Find all token sequences matching the supplied patterns on the `Doc` or `Span`.
+Find all tokens matching the supplied patterns on the `Doc` or `Span`.
 
 > #### Example
 >
@@ -82,36 +154,32 @@ Find all token sequences matching the supplied patterns on the `Doc` or `Span`.
 > from spacy.matcher import DependencyMatcher
 >
 > matcher = DependencyMatcher(nlp.vocab)
-> pattern = [
->     {"SPEC": {"NODE_NAME": "founded"}, "PATTERN": {"ORTH": "founded"}},
->     {"SPEC": {"NODE_NAME": "founder", "NBOR_RELOP": ">", "NBOR_NAME": "founded"}, "PATTERN": {"DEP": "nsubj"}},
-> ]
-> matcher.add("Founder", [pattern])
+> pattern = [{"RIGHT_ID": "founded_id",
+>   "RIGHT_ATTRS": {"ORTH": "founded"}}]
+> matcher.add("FOUNDED", [pattern])
 > doc = nlp("Bill Gates founded Microsoft.")
 > matches = matcher(doc)
 > ```
 
-| Name        | Description                                                                                                                                                                                             |
-| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `doclike`   | The `Doc` or `Span` to match over. ~~Union[Doc, Span]~~                                                                                                                                                 |
-| **RETURNS** | A list of `(match_id, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end`]. The `match_id` is the ID of the added match pattern. ~~List[Tuple[int, int, int]]~~ |
+| Name        | Description                                                                                                                                                                                                                                                                                                                           |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `doclike`   | The `Doc` or `Span` to match over. ~~Union[Doc, Span]~~                                                                                                                                                                                                                                                                               |
+| **RETURNS** | A list of `(match_id, token_ids)` tuples, describing the matches. The `match_id` is the ID of the match pattern and `token_ids` is a list of token indices matched by the pattern, where the position of each token in the list corresponds to the position of the node specification in the pattern. ~~List[Tuple[int, List[int]]]~~ |
 
 ## DependencyMatcher.\_\_len\_\_ {#len tag="method"}
 
-Get the number of rules (edges) added to the dependency matcher. Note that this
-only returns the number of rules (identical with the number of IDs), not the
-number of individual patterns.
+Get the number of rules added to the dependency matcher. Note that this only
+returns the number of rules (identical with the number of IDs), not the number
+of individual patterns.
 
 > #### Example
 >
 > ```python
 > matcher = DependencyMatcher(nlp.vocab)
 > assert len(matcher) == 0
-> pattern = [
->     {"SPEC": {"NODE_NAME": "founded"}, "PATTERN": {"ORTH": "founded"}},
->     {"SPEC": {"NODE_NAME": "START_ENTITY", "NBOR_RELOP": ">", "NBOR_NAME": "founded"}, "PATTERN": {"DEP": "nsubj"}},
-> ]
-> matcher.add("Rule", [pattern])
+> pattern = [{"RIGHT_ID": "founded_id",
+>   "RIGHT_ATTRS": {"ORTH": "founded"}}]
+> matcher.add("FOUNDED", [pattern])
 > assert len(matcher) == 1
 > ```
 
@@ -126,10 +194,10 @@ Check whether the matcher contains rules for a match ID.
 > #### Example
 >
 > ```python
-> matcher = Matcher(nlp.vocab)
-> assert "Rule" not in matcher
-> matcher.add("Rule", [pattern])
-> assert "Rule" in matcher
+> matcher = DependencyMatcher(nlp.vocab)
+> assert "FOUNDED" not in matcher
+> matcher.add("FOUNDED", [pattern])
+> assert "FOUNDED" in matcher
 > ```
 
 | Name        | Description                                                    |
@@ -152,33 +220,15 @@ will be overwritten.
 >     print('Matched!', matches)
 >
 > matcher = DependencyMatcher(nlp.vocab)
-> matcher.add("TEST_PATTERNS", patterns)
+> matcher.add("FOUNDED", patterns, on_match=on_match)
 > ```
 
-| Name           | Description                                                                                                                                                |
-| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `match_id`     | An ID for the thing you're matching. ~~str~~                                                                                                               |
-| `patterns`     | list                                                                                                                                                       | Match pattern. A pattern consists of a list of dicts, where each dict describes a `"PATTERN"` and `"SPEC"`. ~~List[List[Dict[str, dict]]]~~ |
-| _keyword-only_ |                                                                                                                                                            |  |
-| `on_match`     | Callback function to act on matches. Takes the arguments `matcher`, `doc`, `i` and `matches`. ~~Optional[Callable[[Matcher, Doc, int, List[tuple], Any]]~~ |
-
-## DependencyMatcher.remove {#remove tag="method"}
-
-Remove a rule from the matcher. A `KeyError` is raised if the match ID does not
-exist.
-
-> #### Example
->
-> ```python
-> matcher.add("Rule", [pattern]])
-> assert "Rule" in matcher
-> matcher.remove("Rule")
-> assert "Rule" not in matcher
-> ```
-
-| Name  | Description                       |
-| ----- | --------------------------------- |
-| `key` | The ID of the match rule. ~~str~~ |
+| Name           | Description                                                                                                                                                          |
+| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `match_id`     | An ID for the patterns. ~~str~~                                                                                                                                      |
+| `patterns`     | A list of match patterns. A pattern consists of a list of dicts, where each dict describes a token in the tree. ~~List[List[Dict[str, Union[str, Dict]]]]~~          |
+| _keyword-only_ |                                                                                                                                                                      |  |
+| `on_match`     | Callback function to act on matches. Takes the arguments `matcher`, `doc`, `i` and `matches`. ~~Optional[Callable[[DependencyMatcher, Doc, int, List[Tuple], Any]]~~ |
 
 ## DependencyMatcher.get {#get tag="method"}
 
@@ -188,11 +238,29 @@ Retrieve the pattern stored for a key. Returns the rule as an
 > #### Example
 >
 > ```python
-> matcher.add("Rule", [pattern], on_match=on_match)
-> on_match, patterns = matcher.get("Rule")
+> matcher.add("FOUNDED", patterns, on_match=on_match)
+> on_match, patterns = matcher.get("FOUNDED")
 > ```
 
-| Name        | Description                                                                                   |
-| ----------- | --------------------------------------------------------------------------------------------- |
-| `key`       | The ID of the match rule. ~~str~~                                                             |
-| **RETURNS** | The rule, as an `(on_match, patterns)` tuple. ~~Tuple[Optional[Callable], List[List[dict]]]~~ |
+| Name        | Description                                                                                                 |
+| ----------- | ----------------------------------------------------------------------------------------------------------- |
+| `key`       | The ID of the match rule. ~~str~~                                                                           |
+| **RETURNS** | The rule, as an `(on_match, patterns)` tuple. ~~Tuple[Optional[Callable], List[List[Union[Dict, Tuple]]]]~~ |
+
+## DependencyMatcher.remove {#remove tag="method"}
+
+Remove a rule from the dependency matcher. A `KeyError` is raised if the match
+ID does not exist.
+
+> #### Example
+>
+> ```python
+> matcher.add("FOUNDED", patterns)
+> assert "FOUNDED" in matcher
+> matcher.remove("FOUNDED")
+> assert "FOUNDED" not in matcher
+> ```
+
+| Name  | Description                       |
+| ----- | --------------------------------- |
+| `key` | The ID of the match rule. ~~str~~ |
diff --git a/website/docs/images/dep-match-diagram.svg b/website/docs/images/dep-match-diagram.svg
new file mode 100644
index 000000000..f23c573e2
--- /dev/null
+++ b/website/docs/images/dep-match-diagram.svg
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
+<svg width="40cm" height="9cm" viewBox="78 215 793 171" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+  <g id="Background">
+    <g>
+      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="79.225" y="263.65" width="169.55" height="54" rx="0" ry="0"/>
+      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="164" y="286.55">
+        <tspan x="164" y="286.55">ID: founded</tspan>
+        <tspan x="164" y="302.55">ORTH: founded</tspan>
+      </text>
+    </g>
+    <g>
+      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="426.238" y="216" width="139.1" height="54" rx="0" ry="0"/>
+      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="495.788" y="238.9">
+        <tspan x="495.788" y="238.9">ID: subject</tspan>
+        <tspan x="495.788" y="254.9">DEP: nsubj</tspan>
+      </text>
+    </g>
+    <g>
+      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="429.662" y="330.65" width="132.25" height="54" rx="0" ry="0"/>
+      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="495.787" y="353.55">
+        <tspan x="495.787" y="353.55">ID: object</tspan>
+        <tspan x="495.787" y="369.55">DEP: dobj</tspan>
+      </text>
+    </g>
+    <g>
+      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="248.775" y1="290.65" x2="416.834" y2="245.525"/>
+      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="424.078,243.58 415.717,251.002 416.834,245.525 413.123,241.344 "/>
+    </g>
+    <g>
+      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="248.775" y1="290.65" x2="420.533" y2="354.268"/>
+      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="427.566,356.873 416.452,358.089 420.533,354.268 419.925,348.711 "/>
+    </g>
+    <g>
+      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="342.393" cy="265.102" rx="20.1432" ry="22.8019"/>
+      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="342.393" y="269.002">
+        <tspan x="342.393" y="269.002">&gt;</tspan>
+      </text>
+    </g>
+    <g>
+      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="342.393" cy="326.65" rx="20.1432" ry="22.8019"/>
+      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="342.393" y="330.55">
+        <tspan x="342.393" y="330.55">&gt;</tspan>
+      </text>
+    </g>
+    <g>
+      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="697.337" y="330.65" width="172.4" height="54" rx="0" ry="0"/>
+      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="783.537" y="353.55">
+        <tspan x="783.537" y="353.55">ID: modifier</tspan>
+        <tspan x="783.537" y="369.55">DEP: amod | compound</tspan>
+      </text>
+    </g>
+    <g>
+      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="561.912" y1="357.65" x2="687.601" y2="357.65"/>
+      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="695.101,357.65 685.101,362.65 687.601,357.65 685.101,352.65 "/>
+    </g>
+    <g>
+      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="629.625" cy="357.65" rx="20.1432" ry="22.8019"/>
+      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="629.625" y="361.55">
+        <tspan x="629.625" y="361.55">&gt;</tspan>
+      </text>
+    </g>
+  </g>
+</svg>
diff --git a/website/docs/images/displacy-dep-founded.html b/website/docs/images/displacy-dep-founded.html
new file mode 100644
index 000000000..3f89ffd4a
--- /dev/null
+++ b/website/docs/images/displacy-dep-founded.html
@@ -0,0 +1,58 @@
+<svg xmlns="http://www.w3.org/2000/svg" xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="c3124cc3e661444cb9d4175a5b7c09d1-0" class="displacy" width="925" height="399.5" direction="ltr" style="max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
+<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
+    <tspan class="displacy-word" fill="currentColor" x="50">Smith</tspan>
+    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50"></tspan>
+</text>
+
+<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
+    <tspan class="displacy-word" fill="currentColor" x="225">founded</tspan>
+    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="225"></tspan>
+</text>
+
+<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
+    <tspan class="displacy-word" fill="currentColor" x="400">a</tspan>
+    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="400"></tspan>
+</text>
+
+<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
+    <tspan class="displacy-word" fill="currentColor" x="575">healthcare</tspan>
+    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="575"></tspan>
+</text>
+
+<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
+    <tspan class="displacy-word" fill="currentColor" x="750">company.</tspan>
+    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="750"></tspan>
+</text>
+
+<g class="displacy-arrow">
+    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-0" stroke-width="2px" d="M70,264.5 C70,177.0 215.0,177.0 215.0,264.5" fill="none" stroke="currentColor"></path>
+    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
+        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
+    </text>
+    <path class="displacy-arrowhead" d="M70,266.5 L62,254.5 78,254.5" fill="currentColor"></path>
+</g>
+
+<g class="displacy-arrow">
+    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-1" stroke-width="2px" d="M420,264.5 C420,89.5 745.0,89.5 745.0,264.5" fill="none" stroke="currentColor"></path>
+    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
+        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">det</textPath>
+    </text>
+    <path class="displacy-arrowhead" d="M420,266.5 L412,254.5 428,254.5" fill="currentColor"></path>
+</g>
+
+<g class="displacy-arrow">
+    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-2" stroke-width="2px" d="M595,264.5 C595,177.0 740.0,177.0 740.0,264.5" fill="none" stroke="currentColor"></path>
+    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
+        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
+    </text>
+    <path class="displacy-arrowhead" d="M595,266.5 L587,254.5 603,254.5" fill="currentColor"></path>
+</g>
+
+<g class="displacy-arrow">
+    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-3" stroke-width="2px" d="M245,264.5 C245,2.0 750.0,2.0 750.0,264.5" fill="none" stroke="currentColor"></path>
+    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
+        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
+    </text>
+    <path class="displacy-arrowhead" d="M750.0,266.5 L758.0,254.5 742.0,254.5" fill="currentColor"></path>
+</g>
+</svg>
diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md
index a589c556e..ca409f9fb 100644
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@@ -4,6 +4,7 @@ teaser: Find phrases and tokens, and match entities
 menu:
   - ['Token Matcher', 'matcher']
   - ['Phrase Matcher', 'phrasematcher']
+  - ['Dependency Matcher', 'dependencymatcher']
   - ['Entity Ruler', 'entityruler']
   - ['Models & Rules', 'models-rules']
 ---
@@ -938,10 +939,10 @@ object patterns as efficiently as possible and without running any of the other
 pipeline components. If the token attribute you want to match on are set by a
 pipeline component, **make sure that the pipeline component runs** when you
 create the pattern. For example, to match on `POS` or `LEMMA`, the pattern `Doc`
-objects need to have part-of-speech tags set by the `tagger`. You can either
-call the `nlp` object on your pattern texts instead of `nlp.make_doc`, or use
-[`nlp.select_pipes`](/api/language#select_pipes) to disable components
-selectively.
+objects need to have part-of-speech tags set by the `tagger` or `morphologizer`.
+You can either call the `nlp` object on your pattern texts instead of
+`nlp.make_doc`, or use [`nlp.select_pipes`](/api/language#select_pipes) to
+disable components selectively.
 
 </Infobox>
 
@@ -972,10 +973,268 @@ to match phrases with the same sequence of punctuation and non-punctuation
 tokens as the pattern. But this can easily get confusing and doesn't have much
 of an advantage over writing one or two token patterns.
 
+## Dependency Matcher {#dependencymatcher new="3"}
+
+The [`DependencyMatcher`](/api/dependencymatcher) lets you match patterns within
+the dependency parse. It requires a model containing a parser such as the
+[`DependencyParser`](/api/dependencyparser). Instead of defining a list of
+adjacent tokens as in `Matcher` patterns, the `DependencyMatcher` patterns match
+tokens in the dependency parse and specify the relations between them.
+
+> ```python
+> ### Example
+> from spacy.matcher import DependencyMatcher
+>
+> # "[subject] ... initially founded"
+> pattern = [
+>   # anchor token: founded
+>   {
+>     "RIGHT_ID": "founded",
+>     "RIGHT_ATTRS": {"ORTH": "founded"}
+>   },
+>   # founded -> subject
+>   {
+>     "LEFT_ID": "founded",
+>     "REL_OP": ">",
+>     "RIGHT_ID": "subject",
+>     "RIGHT_ATTRS": {"DEP": "nsubj"}
+>   },
+>   # "founded" follows "initially"
+>   {
+>     "LEFT_ID": "founded",
+>     "REL_OP": ";",
+>     "RIGHT_ID": "initially",
+>     "RIGHT_ATTRS": {"ORTH": "initially"}
+>   }
+> ]
+>
+> matcher = DependencyMatcher(nlp.vocab)
+> matcher.add("FOUNDED", [pattern])
+> matches = matcher(doc)
+> ```
+
+A pattern added to the `DependencyMatcher` consists of a list of dictionaries,
+with each dictionary describing a token to match and its relation to an existing
+token in the pattern. Except for the first dictionary, which defines an anchor
+token using only `RIGHT_ID` and `RIGHT_ATTRS`, each pattern should have the
+following keys:
+
+| Name          | Description                                                                                                                                                            |
+| ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node.                                                                             |
+| `REL_OP`      | An operator that describes how the two nodes are related. ~~str~~                                                                                                      |
+| `RIGHT_ID`    | A unique name for the right-hand node in the relation. ~~str~~                                                                                                         |
+| `RIGHT_ATTRS` | The token attributes to match for the right-hand node in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
+
+Each additional token added to the pattern is linked to an existing token
+`LEFT_ID` by the relation `REL_OP`. The new token is given the name `RIGHT_ID`
+and described by the attributes `RIGHT_ATTRS`.
+
+<Infobox title="Important note" variant="warning">
+
+Because the unique token **names** in `LEFT_ID` and `RIGHT_ID` are used to
+identify tokens, the order of the dicts in the patterns is important: a token
+name needs to be defined as `RIGHT_ID` in one dict in the pattern **before** it
+can be used as `LEFT_ID` in another dict.
+
+</Infobox>
+
+### Dependency matcher operators
+
+The following operators are supported by the `DependencyMatcher`, most of which
+come directly from
+[Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html):
+
+| Symbol    | Description                                                                                                         |
+| --------- | ------------------------------------------------------------------------------------------------------------------- |
+| `A < B`   | `A` is the immediate dependent of `B`                                                                               |
+| `A > B`   | `A` is the immediate head of `B`                                                                                    |
+| `A << B`  | `A` is the dependent in a chain to `B` following dep->head paths                                                    |
+| `A >> B`  | `A` is the head in a chain to `B` following head->dep paths                                                         |
+| `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree                   |
+| `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_                 |
+| `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_ |
+| `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_                  |
+| `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`                 |
+| `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`                  |
+| `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`                                |
+| `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`                                 |
+
+### Designing dependency matcher patterns
+
+Let's say we want to find sentences describing who founded what kind of company:
+
+- `Smith founded a healthcare company in 2005.`
+- `Williams initially founded an insurance company in 1987.`
+- `Lee, an experienced CEO, has founded two AI startups.`
+
+The dependency parse for `Smith founded a healthcare company` shows types of
+relations and tokens we want to match:
+
+import DisplaCyDepFoundedHtml from 'images/displacy-dep-founded.html'
+
+<Iframe title="displaCy visualization of dependencies" html={DisplaCyDepFoundedHtml} height={450} />
+
+The relations we're interested in are:
+
+- the founder is the subject (`nsubj`) of the token with the text `founded`
+- the company is the object (`dobj`) of `founded`
+- the kind of company may be an adjective (`amod`, not shown above) or a
+  compound (`compound`)
+
+The first step is to pick an anchor token for the pattern. Since it's the root
+of the dependency parse, `founded` is a good choice here. It is often easier to
+construct patterns when all dependency relation operators point from the head to
+the children. In this example, we'll only use `>`, which connects a head to an
+immediate dependent as `head > child`.
+
+The simplest dependency matcher pattern will identify and name a single token in
+the tree:
+
+```python
+### {executable="true"}
+import spacy
+from spacy.matcher import DependencyMatcher
+
+nlp = spacy.load("en_core_web_sm")
+matcher = DependencyMatcher(nlp.vocab)
+
+pattern = [
+  {
+    "RIGHT_ID": "anchor_founded",      # unique name
+    "RIGHT_ATTRS": {"ORTH": "founded"} # token pattern for "founded"
+  }
+]
+matcher.add("FOUNDED", [pattern])
+doc = nlp("Smith founded two companies.")
+matches = matcher(doc)
+print(matches) # [(4851363122962674176, [1])]
+```
+
+Now that we have a named anchor token (`anchor_founded`), we can add the founder
+as the immediate dependent (`>`) of `founded` with the dependency label `nsubj`:
+
+```python
+pattern = [
+    {
+        "RIGHT_ID": "anchor_founded",
+        "RIGHT_ATTRS": {"ORTH": "founded"}
+    },
+    {
+        "LEFT_ID": "anchor_founded",
+        "REL_OP": ">",
+        "RIGHT_ID": "subject",
+        "RIGHT_ATTRS": {"DEP": "nsubj"},
+    }
+]
+```
+
+The direct object (`dobj`) is added in the same way:
+
+```python
+pattern = [ ...
+    {
+        "LEFT_ID": "anchor_founded",
+        "REL_OP": ">",
+        "RIGHT_ID": "founded_object",
+        "RIGHT_ATTRS": {"DEP": "dobj"},
+    }
+]
+```
+
+When the subject and object tokens are added, they are required to have names
+under the key `RIGHT_ID`, which are allowed to be any unique string, e.g.
+`founded_subject`. These names can then be used as `LEFT_ID` to link new tokens
+into the pattern. For the final part of our pattern, we'll specify that the
+token `founded_object` should have a modifier with the dependency relation
+`amod` or `compound`:
+
+```python
+pattern = [ ...
+    {
+        "LEFT_ID": "founded_object",
+        "REL_OP": ">",
+        "RIGHT_ID": "founded_object_modifier",
+        "RIGHT_ATTRS": {"DEP": {"IN": ["amod", "compound"]}},
+    }
+]
+```
+
+You can picture the process of creating a dependency matcher pattern as defining
+an anchor token on the left and building up the pattern by linking tokens
+one-by-one on the right using relation operators. To create a valid pattern,
+each new token needs to be linked to an existing token on its left. As for
+`founded` in this example, a token may be linked to more than one token on its
+right:
+
+<!-- TODO: adjust for final example, prettify -->
+
+![Dependency matcher pattern](../images/dep-match-diagram.svg)
+
+The full pattern comes together as shown in the example below:
+
+```python
+### {executable="true"}
+import spacy
+from spacy.matcher import DependencyMatcher
+
+nlp = spacy.load("en_core_web_sm")
+matcher = DependencyMatcher(nlp.vocab)
+
+pattern = [
+    {
+        "RIGHT_ID": "anchor_founded",
+        "RIGHT_ATTRS": {"ORTH": "founded"}
+    },
+    {
+        "LEFT_ID": "anchor_founded",
+        "REL_OP": ">",
+        "RIGHT_ID": "subject",
+        "RIGHT_ATTRS": {"DEP": "nsubj"},
+    },
+    {
+        "LEFT_ID": "anchor_founded",
+        "REL_OP": ">",
+        "RIGHT_ID": "founded_object",
+        "RIGHT_ATTRS": {"DEP": "dobj"},
+    },
+    {
+        "LEFT_ID": "founded_object",
+        "REL_OP": ">",
+        "RIGHT_ID": "founded_object_modifier",
+        "RIGHT_ATTRS": {"DEP": {"IN": ["amod", "compound"]}},
+    }
+]
+
+matcher.add("FOUNDED", [pattern])
+doc = nlp("Lee, an experienced CEO, has founded two AI startups.")
+
+matches = matcher(doc)
+print(matches) # [(4851363122962674176, [6, 0, 10, 9])]
+
+# each token_id corresponds to one pattern dict
+match_id, token_ids = matches[0]
+for i in range(len(token_ids)):
+    print(pattern[i]["RIGHT_ID"] + ":", doc[token_ids[i]].text)
+```
+
+<Infobox title="Important note on speed" variant="warning">
+
+The dependency matcher may be slow when token patterns can potentially match
+many tokens in the sentence or when relation operators allow longer paths in the
+dependency parse, e.g. `<<`, `>>`, `.*` and `;*`.
+
+To improve the matcher speed, try to make your token patterns and operators as
+specific as possible. For example, use `>` instead of `>>` if possible and use
+token patterns that include dependency labels and other token attributes instead
+of patterns such as `{}` that match any token in the sentence.
+
+</Infobox>
+
 ## Rule-based entity recognition {#entityruler new="2.1"}
 
-The [`EntityRuler`](/api/entityruler) is an exciting new component that lets you
-add named entities based on pattern dictionaries, and makes it easy to combine
+The [`EntityRuler`](/api/entityruler) is a component that lets you add named
+entities based on pattern dictionaries, which makes it easy to combine
 rule-based and statistical named entity recognition for even more powerful
 models.
 
diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index 6a1499bdf..e40297b58 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -153,6 +153,7 @@ add to your pipeline and customize for your use case:
 | [`Morphologizer`](/api/morphologizer)           | Trainable component to predict morphological features.                                                                                                                                                                  |
 | [`Lemmatizer`](/api/lemmatizer)                 | Standalone component for rule-based and lookup lemmatization.                                                                                                                                                           |
 | [`AttributeRuler`](/api/attributeruler)         | Component for setting token attributes using match patterns.                                                                                                                                                            |
+| [`DependencyMatcher`](/api/dependencymatcher)   | Component for matching subtrees within a dependency parse.                                                                                                                                                              |
 | [`Transformer`](/api/transformer)               | Component for using [transformer models](/usage/embeddings-transformers) in your pipeline, accessing outputs and aligning tokens. Provided via [`spacy-transformers`](https://github.com/explosion/spacy-transformers). |
 
 <Infobox title="Details & Documentation" emoji="📖" list>
@@ -314,7 +315,8 @@ format for documenting argument and return types.
   [`Transformer`](/api/transformer), [`Lemmatizer`](/api/lemmatizer),
   [`Morphologizer`](/api/morphologizer),
   [`AttributeRuler`](/api/attributeruler),
-  [`SentenceRecognizer`](/api/sentencerecognizer), [`Pipe`](/api/pipe),
+  [`SentenceRecognizer`](/api/sentencerecognizer),
+  [`DependencyMatcher`])(/api/dependencymatcher), [`Pipe`](/api/pipe),
   [`Corpus`](/api/corpus)
 
 </Infobox>

From 8b5594df869db5e573a90a8150380a13b4ae543f Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 2 Sep 2020 20:32:01 +0200
Subject: [PATCH 36/71] Remove near-duplicate test

---
 .../tests/matcher/test_dependency_matcher.py  | 52 -------------------
 1 file changed, 52 deletions(-)

diff --git a/spacy/tests/matcher/test_dependency_matcher.py b/spacy/tests/matcher/test_dependency_matcher.py
index 69e85140d..72005cc82 100644
--- a/spacy/tests/matcher/test_dependency_matcher.py
+++ b/spacy/tests/matcher/test_dependency_matcher.py
@@ -332,55 +332,3 @@ def test_dependency_matcher_ops(en_vocab, doc, left, right, op, num_matches):
     matcher.add("pattern", [pattern])
     matches = matcher(doc)
     assert len(matches) == num_matches
-
-
-@pytest.mark.parametrize(
-    "pattern",
-    [
-        # empty
-        [],
-        # unsupported op
-        [
-            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
-            {
-                "LEFT_ID": "jumped",
-                "REL_OP": "==",
-                "RIGHT_ID": "fox",
-                "RIGHT_ATTRS": {"ORTH": "fox"},
-            },
-        ],
-        # first dict isn't just a node
-        [
-            {
-                "LEFT_ID": "jumped",
-                "REL_OP": "==",
-                "RIGHT_ID": "fox",
-                "RIGHT_ATTRS": {"ORTH": "fox"},
-            },
-        ],
-        # missing op
-        [
-            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
-            {"LEFT_ID": "jumped", "RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"},},
-        ],
-        # missing left-hand ID
-        [
-            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
-            {"REL_OP": ">", "RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"},},
-        ],
-        # missing right-hand ID
-        [
-            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
-            {"LEFT_ID": "jumped", "REL_OP": ">", "RIGHT_ATTRS": {"ORTH": "fox"},},
-        ],
-        # missing right-hand attrs
-        [
-            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
-            {"LEFT_ID": "jumped", "REL_OP": ">", "RIGHT_ID": "fox",},
-        ],
-    ],
-)
-def test_dependency_matcher_pattern_validation(en_vocab, pattern):
-    matcher = DependencyMatcher(en_vocab)
-    with pytest.raises(ValueError):
-        matcher.add("pattern", [pattern])

From 1815c613c90d29d3d18ed377d166cf1dec3813ad Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 10:07:45 +0200
Subject: [PATCH 37/71] Update docs [ci skip]

---
 website/docs/usage/layers-architectures.md | 13 ++++----
 website/docs/usage/training.md             | 39 ++++++++++------------
 2 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md
index 419048f65..e24b776c8 100644
--- a/website/docs/usage/layers-architectures.md
+++ b/website/docs/usage/layers-architectures.md
@@ -103,7 +103,7 @@ bit of validation goes a long way, especially if you
 tools to highlight these errors early. The config file is also validated at the
 beginning of training, to verify that all the types match correctly.
 
-<Accordion title="Tip: Static type checking in your editor" emoji="💡">
+<Accordion title="Tip: Static type checking in your editor">
 
 If you're using a modern editor like Visual Studio Code, you can
 [set up `mypy`](https://thinc.ai/docs/usage-type-checking#install) with the
@@ -143,11 +143,11 @@ nO = null
 
 spaCy has two additional built-in `textcat` architectures, and you can easily
 use those by swapping out the definition of the textcat's model. For instance,
-to use the simpel and fast [bag-of-words model](/api/architectures#TextCatBOW),
-you can change the config to:
+to use the simple and fast bag-of-words model
+[TextCatBOW](/api/architectures#TextCatBOW), you can change the config to:
 
 ```ini
-### config.cfg (excerpt)
+### config.cfg (excerpt) {highlight="6-10"}
 [components.textcat]
 factory = "textcat"
 labels = []
@@ -160,8 +160,9 @@ no_output_layer = false
 nO = null
 ```
 
-The details of all prebuilt architectures and their parameters, can be consulted
-on the [API page for model architectures](/api/architectures).
+For details on all pre-defined architectures shipped with spaCy and how to
+configure them, check out the [model architectures](/api/architectures)
+documentation.
 
 ### Defining sublayers {#sublayers}
 
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 2967a0353..43e1193ab 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -669,10 +669,9 @@ def custom_logger(log_path):
 
 #### Example: Custom batch size schedule {#custom-code-schedule}
 
-You can also implement your own batch size schedule to use
-during training. The `@spacy.registry.schedules` decorator lets you register
-that function in the `schedules` [registry](/api/top-level#registry) and assign
-it a string name:
+You can also implement your own batch size schedule to use during training. The
+`@spacy.registry.schedules` decorator lets you register that function in the
+`schedules` [registry](/api/top-level#registry) and assign it a string name:
 
 > #### Why the version in the name?
 >
@@ -806,14 +805,22 @@ def filter_batch(size: int) -> Callable[[Iterable[Example]], Iterator[List[Examp
 
 ### Defining custom architectures {#custom-architectures}
 
-Built-in pipeline components such as the tagger or named entity recognizer are 
-constructed with default neural network [models](/api/architectures). 
-You can change the model architecture 
-entirely by implementing your own custom models and providing those in the config 
-when creating the pipeline component. See the
-documentation on
-[layers and model architectures](/usage/layers-architectures) for more details.
+Built-in pipeline components such as the tagger or named entity recognizer are
+constructed with default neural network [models](/api/architectures). You can
+change the model architecture entirely by implementing your own custom models
+and providing those in the config when creating the pipeline component. See the
+documentation on [layers and model architectures](/usage/layers-architectures)
+for more details.
 
+> ```ini
+> ### config.cfg
+> [components.tagger]
+> factory = "tagger"
+>
+> [components.tagger.model]
+> @architectures = "custom_neural_network.v1"
+> output_width = 512
+> ```
 
 ```python
 ### functions.py
@@ -828,16 +835,6 @@ def MyModel(output_width: int) -> Model[List[Doc], List[Floats2d]]:
     return create_model(output_width)
 ```
 
-```ini
-### config.cfg (excerpt)
-[components.tagger]
-factory = "tagger"
-
-[components.tagger.model]
-@architectures = "custom_neural_network.v1"
-output_width = 512
-```
-
 ## Internal training API {#api}
 
 <Infobox variant="warning">

From 5af432e0f2db1d6aeba7a031a8a707fb90b6332a Mon Sep 17 00:00:00 2001
From: Yohei Tamura <tamuhey@gmail.com>
Date: Thu, 3 Sep 2020 17:09:03 +0900
Subject: [PATCH 38/71] fix for empty string (#5936)

---
 spacy/tests/doc/test_doc_api.py | 19 ++++++++++---------
 spacy/tokens/doc.pyx            |  6 ++++--
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index 954181df5..b37a31e43 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -317,7 +317,8 @@ def test_doc_from_array_morph(en_vocab):
 
 
 def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
-    en_texts = ["Merging the docs is fun.", "They don't think alike."]
+    en_texts = ["Merging the docs is fun.", "", "They don't think alike."]
+    en_texts_without_empty = [t for t in en_texts if len(t)]
     de_text = "Wie war die Frage?"
     en_docs = [en_tokenizer(text) for text in en_texts]
     docs_idx = en_texts[0].index("docs")
@@ -338,14 +339,14 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
         Doc.from_docs(en_docs + [de_doc])
 
     m_doc = Doc.from_docs(en_docs)
-    assert len(en_docs) == len(list(m_doc.sents))
+    assert len(en_texts_without_empty) == len(list(m_doc.sents))
     assert len(str(m_doc)) > len(en_texts[0]) + len(en_texts[1])
-    assert str(m_doc) == " ".join(en_texts)
+    assert str(m_doc) == " ".join(en_texts_without_empty)
     p_token = m_doc[len(en_docs[0]) - 1]
     assert p_token.text == "." and bool(p_token.whitespace_)
     en_docs_tokens = [t for doc in en_docs for t in doc]
     assert len(m_doc) == len(en_docs_tokens)
-    think_idx = len(en_texts[0]) + 1 + en_texts[1].index("think")
+    think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
     assert m_doc[9].idx == think_idx
     with pytest.raises(AttributeError):
         # not callable, because it was not set via set_extension
@@ -353,14 +354,14 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
     assert len(m_doc.user_data) == len(en_docs[0].user_data)  # but it's there
 
     m_doc = Doc.from_docs(en_docs, ensure_whitespace=False)
-    assert len(en_docs) == len(list(m_doc.sents))
-    assert len(str(m_doc)) == len(en_texts[0]) + len(en_texts[1])
+    assert len(en_texts_without_empty) == len(list(m_doc.sents))
+    assert len(str(m_doc)) == sum(len(t) for t in en_texts)
     assert str(m_doc) == "".join(en_texts)
     p_token = m_doc[len(en_docs[0]) - 1]
     assert p_token.text == "." and not bool(p_token.whitespace_)
     en_docs_tokens = [t for doc in en_docs for t in doc]
     assert len(m_doc) == len(en_docs_tokens)
-    think_idx = len(en_texts[0]) + 0 + en_texts[1].index("think")
+    think_idx = len(en_texts[0]) + 0 + en_texts[2].index("think")
     assert m_doc[9].idx == think_idx
 
     m_doc = Doc.from_docs(en_docs, attrs=["lemma", "length", "pos"])
@@ -369,12 +370,12 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
         assert list(m_doc.sents)
     assert len(str(m_doc)) > len(en_texts[0]) + len(en_texts[1])
     # space delimiter considered, although spacy attribute was missing
-    assert str(m_doc) == " ".join(en_texts)
+    assert str(m_doc) == " ".join(en_texts_without_empty)
     p_token = m_doc[len(en_docs[0]) - 1]
     assert p_token.text == "." and bool(p_token.whitespace_)
     en_docs_tokens = [t for doc in en_docs for t in doc]
     assert len(m_doc) == len(en_docs_tokens)
-    think_idx = len(en_texts[0]) + 1 + en_texts[1].index("think")
+    think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
     assert m_doc[9].idx == think_idx
 
 
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index cd080bf35..3c7b4f8b3 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -920,7 +920,9 @@ cdef class Doc:
                         warnings.warn(Warnings.W101.format(name=name))
                 else:
                     warnings.warn(Warnings.W102.format(key=key, value=value))
-            char_offset += len(doc.text) if not ensure_whitespace or doc[-1].is_space else len(doc.text) + 1
+            char_offset += len(doc.text)
+            if ensure_whitespace and not (len(doc) > 0 and doc[-1].is_space):
+                char_offset += 1
 
         arrays = [doc.to_array(attrs) for doc in docs]
 
@@ -932,7 +934,7 @@ cdef class Doc:
             token_offset = -1
             for doc in docs[:-1]:
                 token_offset += len(doc)
-                if not doc[-1].is_space:
+                if not (len(doc) > 0 and doc[-1].is_space):
                     concat_spaces[token_offset] = True
 
         concat_array = numpy.concatenate(arrays)

From b02ad8045bcec91ac8c234e3cb6c42f93e3a115e Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 10:10:13 +0200
Subject: [PATCH 39/71] Update docs [ci skip]

---
 website/docs/usage/training.md | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 43e1193ab..2fabd3f7d 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -377,7 +377,8 @@ A **model architecture** is a function that wires up a Thinc
 component or as a layer of a larger network. You can use Thinc as a thin
 [wrapper around frameworks](https://thinc.ai/docs/usage-frameworks) such as
 PyTorch, TensorFlow or MXNet, or you can implement your logic in Thinc
-[directly](https://thinc.ai/docs/usage-models).
+[directly](https://thinc.ai/docs/usage-models). For more details and examples,
+see the usage guide on [layers and architectures](/usage/layers-architectures).
 
 spaCy's built-in components will never construct their `Model` instances
 themselves, so you won't have to subclass the component to change its model
@@ -395,8 +396,6 @@ different tasks. For example:
 | [TransitionBasedParser](/api/architectures#TransitionBasedParser) | Build a [transition-based parser](https://explosion.ai/blog/parsing-english-in-python) model used in the default [`EntityRecognizer`](/api/entityrecognizer) and [`DependencyParser`](/api/dependencyparser). ~~Model[List[Docs], List[List[Floats2d]]]~~ |
 | [TextCatEnsemble](/api/architectures#TextCatEnsemble)             | Stacked ensemble of a bag-of-words model and a neural network model with an internal CNN embedding layer. Used in the default [`TextCategorizer`](/api/textcategorizer). ~~Model[List[Doc], Floats2d]~~                                                   |
 
-<!-- TODO: link to not yet existing usage page on custom architectures etc. -->
-
 ### Metrics, training output and weighted scores {#metrics}
 
 When you train a model using the [`spacy train`](/api/cli#train) command, you'll
@@ -474,11 +473,9 @@ Each custom function can have any numbers of arguments that are passed in via
 the [config](#config), just the built-in functions. If your function defines
 **default argument values**, spaCy is able to auto-fill your config when you run
 [`init fill-config`](/api/cli#init-fill-config). If you want to make sure that a
-given parameter is always explicitely set in the config, avoid setting a default
+given parameter is always explicitly set in the config, avoid setting a default
 value for it.
 
-<!-- TODO: possibly link to new (not yet created) page on creating models ? -->
-
 ### Training with custom code {#custom-code}
 
 > #### Example

From ef0d0630a4fa5af2acfd71187a03b98784d80fed Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 3 Sep 2020 12:51:04 +0200
Subject: [PATCH 40/71] Let Langugae.use_params work with falsey inputs

The Language.use_params method was failing if you passed in None, which
meant we had to use awkward conditionals for the parameter averaging.
This solves the problem.
---
 spacy/language.py | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/spacy/language.py b/spacy/language.py
index 8e7c39b90..7a354ee3d 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1,5 +1,5 @@
 from typing import Optional, Any, Dict, Callable, Iterable, Union, List, Pattern
-from typing import Tuple, Iterator
+from typing import Tuple, Iterator, Optional
 from dataclasses import dataclass
 import random
 import itertools
@@ -1275,7 +1275,7 @@ class Language:
         return results
 
     @contextmanager
-    def use_params(self, params: dict):
+    def use_params(self, params: Optional[dict]):
         """Replace weights of models in the pipeline with those provided in the
         params dictionary. Can be used as a contextmanager, in which case,
         models go back to their original weights after the block.
@@ -1288,24 +1288,27 @@ class Language:
 
         DOCS: https://spacy.io/api/language#use_params
         """
-        contexts = [
-            pipe.use_params(params)
-            for name, pipe in self.pipeline
-            if hasattr(pipe, "use_params") and hasattr(pipe, "model")
-        ]
-        # TODO: Having trouble with contextlib
-        # Workaround: these aren't actually context managers atm.
-        for context in contexts:
-            try:
-                next(context)
-            except StopIteration:
-                pass
-        yield
-        for context in contexts:
-            try:
-                next(context)
-            except StopIteration:
-                pass
+        if not params:
+            yield
+        else:
+            contexts = [
+                pipe.use_params(params)
+                for name, pipe in self.pipeline
+                if hasattr(pipe, "use_params") and hasattr(pipe, "model")
+            ]
+            # TODO: Having trouble with contextlib
+            # Workaround: these aren't actually context managers atm.
+            for context in contexts:
+                try:
+                    next(context)
+                except StopIteration:
+                    pass
+            yield
+            for context in contexts:
+                try:
+                    next(context)
+                except StopIteration:
+                    pass
 
     def pipe(
         self,

From b5a0657fd6a104ff61c7c18a0fbdd1c251df5d31 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 13:13:03 +0200
Subject: [PATCH 41/71] "model" terminology consistency in docs

---
 netlify.toml                                  |   2 +-
 spacy/cli/__init__.py                         |   8 +-
 spacy/cli/_util.py                            |   2 +-
 spacy/cli/convert.py                          |   2 +-
 spacy/cli/debug_data.py                       |  10 +-
 spacy/cli/download.py                         |  29 ++-
 spacy/cli/evaluate.py                         |   4 +-
 spacy/cli/info.py                             |  18 +-
 spacy/cli/init_config.py                      |   4 +-
 spacy/cli/init_model.py                       |  28 ++-
 spacy/cli/package.py                          |  30 +--
 spacy/cli/pretrain.py                         |   9 +-
 spacy/cli/profile.py                          |   6 +-
 spacy/cli/train.py                            |  10 +-
 spacy/cli/validate.py                         |  16 +-
 spacy/language.py                             |   2 +-
 website/docs/api/cli.md                       | 152 +++++------
 website/docs/api/data-formats.md              | 104 ++++----
 website/docs/api/dependencymatcher.md         |   4 +-
 website/docs/api/entitylinker.md              |   4 +-
 website/docs/api/language.md                  |  81 +++---
 website/docs/api/pipe.md                      |   2 +-
 website/docs/api/top-level.md                 | 133 +++++-----
 website/docs/models/index.md                  |  65 +++--
 website/docs/usage/101/_pipelines.md          |  19 +-
 website/docs/usage/101/_pos-deps.md           |  13 +-
 website/docs/usage/101/_serialization.md      |  10 +-
 website/docs/usage/101/_training.md           |  20 +-
 website/docs/usage/101/_vectors-similarity.md |  35 ++-
 website/docs/usage/index.md                   |  64 +++--
 website/docs/usage/linguistic-features.md     | 136 +++++-----
 website/docs/usage/models.md                  | 235 +++++++++---------
 website/docs/usage/processing-pipelines.md    | 137 +++++-----
 website/docs/usage/projects.md                |  86 +++----
 website/docs/usage/rule-based-matching.md     |  55 ++--
 website/docs/usage/saving-loading.md          | 193 +++++++-------
 website/docs/usage/spacy-101.md               |  56 +++--
 website/docs/usage/training.md                |  85 +++----
 website/docs/usage/v3.md                      |  90 +++----
 website/docs/usage/visualizers.md             |  14 +-
 website/meta/sidebars.json                    |   4 +-
 website/src/components/tag.js                 |   2 +-
 website/src/templates/models.js               |  10 +-
 website/src/widgets/quickstart-install.js     |   4 +-
 website/src/widgets/quickstart-models.js      |   4 +-
 45 files changed, 1006 insertions(+), 991 deletions(-)

diff --git a/netlify.toml b/netlify.toml
index 2f3e350e6..3c17b876c 100644
--- a/netlify.toml
+++ b/netlify.toml
@@ -24,7 +24,7 @@ redirects = [
     {from = "/docs/usage/customizing-tokenizer", to = "/usage/linguistic-features#tokenization", force = true},
     {from = "/docs/usage/language-processing-pipeline", to = "/usage/processing-pipelines", force = true},
     {from = "/docs/usage/customizing-pipeline", to = "/usage/processing-pipelines", force = true},
-    {from = "/docs/usage/training-ner", to = "/usage/training#ner", force = true},
+    {from = "/docs/usage/training-ner", to = "/usage/training", force = true},
     {from = "/docs/usage/tutorials", to = "/usage/examples", force = true},
     {from = "/docs/usage/data-model", to = "/api", force = true},
     {from = "/docs/usage/cli", to = "/api/cli", force = true},
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index b47c1c16b..92cb76971 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -29,9 +29,9 @@ from .project.document import project_document  # noqa: F401
 
 @app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
 def link(*args, **kwargs):
-    """As of spaCy v3.0, model symlinks are deprecated. You can load models
-    using their full names or from a directory path."""
+    """As of spaCy v3.0, symlinks like "en" are deprecated. You can load trained
+    pipeline packages using their full names or from a directory path."""
     msg.warn(
-        "As of spaCy v3.0, model symlinks are deprecated. You can load models "
-        "using their full names or from a directory path."
+        "As of spaCy v3.0, model symlinks are deprecated. You can load trained "
+        "pipeline packages using their full names or from a directory path."
     )
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index cfa126cc4..6a24a4ba4 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -36,7 +36,7 @@ DEBUG_HELP = """Suite of helpful commands for debugging and profiling. Includes
 commands to check and validate your config files, training and evaluation data,
 and custom model implementations.
 """
-INIT_HELP = """Commands for initializing configs and models."""
+INIT_HELP = """Commands for initializing configs and pipeline packages."""
 
 # Wrappers for Typer's annotations. Initially created to set defaults and to
 # keep the names short, but not needed at the moment.
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index f73c2f2c0..2a24bd145 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -44,7 +44,7 @@ def convert_cli(
     file_type: FileTypes = Opt("spacy", "--file-type", "-t", help="Type of data to produce"),
     n_sents: int = Opt(1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"),
     seg_sents: bool = Opt(False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"),
-    model: Optional[str] = Opt(None, "--model", "-b", help="Model for sentence segmentation (for -s)"),
+    model: Optional[str] = Opt(None, "--model", "-b", help="Trained spaCy pipeline for sentence segmentation (for -s)"),
     morphology: bool = Opt(False, "--morphology", "-m", help="Enable appending morphology to tags"),
     merge_subtokens: bool = Opt(False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"),
     converter: str = Opt("auto", "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"),
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 2f48a29cd..a4269796f 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -18,7 +18,7 @@ from .. import util
 NEW_LABEL_THRESHOLD = 50
 # Minimum number of expected occurrences of dependency labels
 DEP_LABEL_THRESHOLD = 20
-# Minimum number of expected examples to train a blank model
+# Minimum number of expected examples to train a new pipeline
 BLANK_MODEL_MIN_THRESHOLD = 100
 BLANK_MODEL_THRESHOLD = 2000
 
@@ -148,7 +148,7 @@ def debug_data(
     msg.text(f"Language: {config['nlp']['lang']}")
     msg.text(f"Training pipeline: {', '.join(pipeline)}")
     if resume_components:
-        msg.text(f"Components from other models: {', '.join(resume_components)}")
+        msg.text(f"Components from other pipelines: {', '.join(resume_components)}")
     if frozen_components:
         msg.text(f"Frozen components: {', '.join(frozen_components)}")
     msg.text(f"{len(train_dataset)} training docs")
@@ -164,9 +164,7 @@ def debug_data(
     # TODO: make this feedback more fine-grained and report on updated
     # components vs. blank components
     if not resume_components and len(train_dataset) < BLANK_MODEL_THRESHOLD:
-        text = (
-            f"Low number of examples to train from a blank model ({len(train_dataset)})"
-        )
+        text = f"Low number of examples to train a new pipeline ({len(train_dataset)})"
         if len(train_dataset) < BLANK_MODEL_MIN_THRESHOLD:
             msg.fail(text)
         else:
@@ -214,7 +212,7 @@ def debug_data(
             show=verbose,
         )
     else:
-        msg.info("No word vectors present in the model")
+        msg.info("No word vectors present in the package")
 
     if "ner" in factory_names:
         # Get all unique NER labels present in the data
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index e55e6e40e..3d5e0a765 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -17,16 +17,19 @@ from ..errors import OLD_MODEL_SHORTCUTS
 def download_cli(
     # fmt: off
     ctx: typer.Context,
-    model: str = Arg(..., help="Name of model to download"),
+    model: str = Arg(..., help="Name of pipeline package to download"),
     direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
     # fmt: on
 ):
     """
-    Download compatible model from default download path using pip. If --direct
-    flag is set, the command expects the full model name with version.
-    For direct downloads, the compatibility check will be skipped. All
+    Download compatible trained pipeline from the default download path using
+    pip. If --direct flag is set, the command expects the full package name with
+    version. For direct downloads, the compatibility check will be skipped. All
     additional arguments provided to this command will be passed to `pip install`
-    on model installation.
+    on package installation.
+
+    DOCS: https://spacy.io/api/cli#download
+    AVAILABLE PACKAGES: https://spacy.io/models
     """
     download(model, direct, *ctx.args)
 
@@ -34,11 +37,11 @@ def download_cli(
 def download(model: str, direct: bool = False, *pip_args) -> None:
     if not is_package("spacy") and "--no-deps" not in pip_args:
         msg.warn(
-            "Skipping model package dependencies and setting `--no-deps`. "
+            "Skipping pipeline package dependencies and setting `--no-deps`. "
             "You don't seem to have the spaCy package itself installed "
             "(maybe because you've built from source?), so installing the "
-            "model dependencies would cause spaCy to be downloaded, which "
-            "probably isn't what you want. If the model package has other "
+            "package dependencies would cause spaCy to be downloaded, which "
+            "probably isn't what you want. If the pipeline package has other "
             "dependencies, you'll have to install them manually."
         )
         pip_args = pip_args + ("--no-deps",)
@@ -53,7 +56,7 @@ def download(model: str, direct: bool = False, *pip_args) -> None:
         if model in OLD_MODEL_SHORTCUTS:
             msg.warn(
                 f"As of spaCy v3.0, shortcuts like '{model}' are deprecated. Please"
-                f"use the full model name '{OLD_MODEL_SHORTCUTS[model]}' instead."
+                f"use the full pipeline package name '{OLD_MODEL_SHORTCUTS[model]}' instead."
             )
             model_name = OLD_MODEL_SHORTCUTS[model]
         compatibility = get_compatibility()
@@ -61,7 +64,7 @@ def download(model: str, direct: bool = False, *pip_args) -> None:
         download_model(dl_tpl.format(m=model_name, v=version), pip_args)
     msg.good(
         "Download and installation successful",
-        f"You can now load the model via spacy.load('{model_name}')",
+        f"You can now load the package via spacy.load('{model_name}')",
     )
 
 
@@ -71,7 +74,7 @@ def get_compatibility() -> dict:
     if r.status_code != 200:
         msg.fail(
             f"Server error ({r.status_code})",
-            f"Couldn't fetch compatibility table. Please find a model for your spaCy "
+            f"Couldn't fetch compatibility table. Please find a package for your spaCy "
             f"installation (v{about.__version__}), and download it manually. "
             f"For more details, see the documentation: "
             f"https://spacy.io/usage/models",
@@ -80,7 +83,7 @@ def get_compatibility() -> dict:
     comp_table = r.json()
     comp = comp_table["spacy"]
     if version not in comp:
-        msg.fail(f"No compatible models found for v{version} of spaCy", exits=1)
+        msg.fail(f"No compatible packages found for v{version} of spaCy", exits=1)
     return comp[version]
 
 
@@ -88,7 +91,7 @@ def get_version(model: str, comp: dict) -> str:
     model = get_base_version(model)
     if model not in comp:
         msg.fail(
-            f"No compatible model found for '{model}' (spaCy v{about.__version__})",
+            f"No compatible package found for '{model}' (spaCy v{about.__version__})",
             exits=1,
         )
     return comp[model][0]
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 3847c74f3..3898c89a1 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -26,8 +26,8 @@ def evaluate_cli(
     # fmt: on
 ):
     """
-    Evaluate a model. Expects a loadable spaCy model and evaluation data in the
-    binary .spacy format. The --gold-preproc option sets up the evaluation
+    Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation
+    data in the binary .spacy format. The --gold-preproc option sets up the evaluation
     examples with gold-standard sentences and tokens for the predictions. Gold
     preprocessing helps the annotations align to the tokenization, and may
     result in sequences of more consistent length. However, it may reduce
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index ca082b939..98cd042a8 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -12,14 +12,14 @@ from .. import about
 @app.command("info")
 def info_cli(
     # fmt: off
-    model: Optional[str] = Arg(None, help="Optional model name"),
+    model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"),
     markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
     silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
     # fmt: on
 ):
     """
-    Print info about spaCy installation. If a model is speficied as an argument,
-    print model information. Flag --markdown prints details in Markdown for easy
+    Print info about spaCy installation. If a pipeline is speficied as an argument,
+    print its meta information. Flag --markdown prints details in Markdown for easy
     copy-pasting to GitHub issues.
     """
     info(model, markdown=markdown, silent=silent)
@@ -30,14 +30,16 @@ def info(
 ) -> Union[str, dict]:
     msg = Printer(no_print=silent, pretty=not silent)
     if model:
-        title = f"Info about model '{model}'"
+        title = f"Info about pipeline '{model}'"
         data = info_model(model, silent=silent)
     else:
         title = "Info about spaCy"
         data = info_spacy()
     raw_data = {k.lower().replace(" ", "_"): v for k, v in data.items()}
-    if "Models" in data and isinstance(data["Models"], dict):
-        data["Models"] = ", ".join(f"{n} ({v})" for n, v in data["Models"].items())
+    if "Pipelines" in data and isinstance(data["Pipelines"], dict):
+        data["Pipelines"] = ", ".join(
+            f"{n} ({v})" for n, v in data["Pipelines"].items()
+        )
     markdown_data = get_markdown(data, title=title)
     if markdown:
         if not silent:
@@ -63,7 +65,7 @@ def info_spacy() -> Dict[str, any]:
         "Location": str(Path(__file__).parent.parent),
         "Platform": platform.platform(),
         "Python version": platform.python_version(),
-        "Models": all_models,
+        "Pipelines": all_models,
     }
 
 
@@ -81,7 +83,7 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
         model_path = model
     meta_path = model_path / "meta.json"
     if not meta_path.is_file():
-        msg.fail("Can't find model meta.json", meta_path, exits=1)
+        msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
     meta = srsly.read_json(meta_path)
     if model_path.resolve() != model_path:
         meta["source"] = str(model_path.resolve())
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index 1e1e55e06..b75718a2e 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -27,7 +27,7 @@ def init_config_cli(
     # fmt: off
     output_file: Path = Arg(..., help="File to save config.cfg to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
     lang: Optional[str] = Opt("en", "--lang", "-l", help="Two-letter code of the language to use"),
-    pipeline: Optional[str] = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include in the model (without 'tok2vec' or 'transformer')"),
+    pipeline: Optional[str] = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
     optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
     cpu: bool = Opt(False, "--cpu", "-C", help="Whether the model needs to run on CPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
     # fmt: on
@@ -168,7 +168,7 @@ def save_config(
             output_file.parent.mkdir(parents=True)
         config.to_disk(output_file, interpolate=False)
         msg.good("Saved config", output_file)
-        msg.text("You can now add your data and train your model:")
+        msg.text("You can now add your data and train your pipeline:")
         variables = ["--paths.train ./train.spacy", "--paths.dev ./dev.spacy"]
         if not no_print:
             print(f"{COMMAND} train {output_file.parts[-1]} {' '.join(variables)}")
diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index 4fdd2bbbc..071d5f659 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -28,7 +28,7 @@ except ImportError:
 DEFAULT_OOV_PROB = -20
 
 
-@init_cli.command("model")
+@init_cli.command("vectors")
 @app.command(
     "init-model",
     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
@@ -37,8 +37,8 @@ DEFAULT_OOV_PROB = -20
 def init_model_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    lang: str = Arg(..., help="Model language"),
-    output_dir: Path = Arg(..., help="Model output directory"),
+    lang: str = Arg(..., help="Pipeline language"),
+    output_dir: Path = Arg(..., help="Pipeline output directory"),
     freqs_loc: Optional[Path] = Arg(None, help="Location of words frequencies file", exists=True),
     clusters_loc: Optional[Path] = Opt(None, "--clusters-loc", "-c", help="Optional location of brown clusters data", exists=True),
     jsonl_loc: Optional[Path] = Opt(None, "--jsonl-loc", "-j", help="Location of JSONL-formatted attributes file", exists=True),
@@ -46,19 +46,20 @@ def init_model_cli(
     prune_vectors: int = Opt(-1, "--prune-vectors", "-V", help="Optional number of vectors to prune to"),
     truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
     vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
-    model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the model meta"),
-    base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base model (for languages with custom tokenizers)")
+    model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the pipeline meta"),
+    base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base pipeline (for languages with custom tokenizers)")
     # fmt: on
 ):
     """
-    Create a new model from raw data. If vectors are provided in Word2Vec format,
-    they can be either a .txt or zipped as a .zip or .tar.gz.
+    Create a new blank pipeline directory with vocab and vectors from raw data.
+    If vectors are provided in Word2Vec format, they can be either a .txt or
+    zipped as a .zip or .tar.gz.
     """
     if ctx.command.name == "init-model":
         msg.warn(
-            "The init-model command is now available via the 'init model' "
-            "subcommand (without the hyphen). You can run python -m spacy init "
-            "--help for an overview of the other available initialization commands."
+            "The init-model command is now called 'init vocab'. You can run "
+            "'python -m spacy init --help' for an overview of the other "
+            "available initialization commands."
         )
     init_model(
         lang,
@@ -115,10 +116,10 @@ def init_model(
             msg.fail("Can't find words frequencies file", freqs_loc, exits=1)
         lex_attrs = read_attrs_from_deprecated(msg, freqs_loc, clusters_loc)
 
-    with msg.loading("Creating model..."):
+    with msg.loading("Creating blank pipeline..."):
         nlp = create_model(lang, lex_attrs, name=model_name, base_model=base_model)
 
-    msg.good("Successfully created model")
+    msg.good("Successfully created blank pipeline")
     if vectors_loc is not None:
         add_vectors(
             msg, nlp, vectors_loc, truncate_vectors, prune_vectors, vectors_name
@@ -242,7 +243,8 @@ def add_vectors(
         if vectors_data is not None:
             nlp.vocab.vectors = Vectors(data=vectors_data, keys=vector_keys)
     if name is None:
-        nlp.vocab.vectors.name = f"{nlp.meta['lang']}_model.vectors"
+        # TODO: Is this correct? Does this matter?
+        nlp.vocab.vectors.name = f"{nlp.meta['lang']}_{nlp.meta['name']}.vectors"
     else:
         nlp.vocab.vectors.name = name
     nlp.meta["vectors"]["name"] = nlp.vocab.vectors.name
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 4e5038951..f464c97e8 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -14,19 +14,19 @@ from .. import about
 @app.command("package")
 def package_cli(
     # fmt: off
-    input_dir: Path = Arg(..., help="Directory with model data", exists=True, file_okay=False),
+    input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False),
     output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
     meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
     create_meta: bool = Opt(False, "--create-meta", "-c", "-C", help="Create meta.json, even if one exists"),
     version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
     no_sdist: bool = Opt(False, "--no-sdist", "-NS", help="Don't build .tar.gz sdist, can be set if you want to run this step manually"),
-    force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing model in output directory"),
+    force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
     # fmt: on
 ):
     """
-    Generate an installable Python package for a model. Includes model data,
+    Generate an installable Python package for a pipeline. Includes binary data,
     meta and required installation files. A new directory will be created in the
-    specified output directory, and model data will be copied over. If
+    specified output directory, and the data will be copied over. If
     --create-meta is set and a meta.json already exists in the output directory,
     the existing values will be used as the defaults in the command-line prompt.
     After packaging, "python setup.py sdist" is run in the package directory,
@@ -59,14 +59,14 @@ def package(
     output_path = util.ensure_path(output_dir)
     meta_path = util.ensure_path(meta_path)
     if not input_path or not input_path.exists():
-        msg.fail("Can't locate model data", input_path, exits=1)
+        msg.fail("Can't locate pipeline data", input_path, exits=1)
     if not output_path or not output_path.exists():
         msg.fail("Output directory not found", output_path, exits=1)
     if meta_path and not meta_path.exists():
-        msg.fail("Can't find model meta.json", meta_path, exits=1)
+        msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
     meta_path = meta_path or input_dir / "meta.json"
     if not meta_path.exists() or not meta_path.is_file():
-        msg.fail("Can't load model meta.json", meta_path, exits=1)
+        msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
     meta = srsly.read_json(meta_path)
     meta = get_meta(input_dir, meta)
     if version is not None:
@@ -77,7 +77,7 @@ def package(
         meta = generate_meta(meta, msg)
     errors = validate(ModelMetaSchema, meta)
     if errors:
-        msg.fail("Invalid model meta.json")
+        msg.fail("Invalid pipeline meta.json")
         print("\n".join(errors))
         sys.exit(1)
     model_name = meta["lang"] + "_" + meta["name"]
@@ -118,7 +118,7 @@ def get_meta(
 ) -> Dict[str, Any]:
     meta = {
         "lang": "en",
-        "name": "model",
+        "name": "pipeline",
         "version": "0.0.0",
         "description": "",
         "author": "",
@@ -143,10 +143,10 @@ def get_meta(
 def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any]:
     meta = existing_meta or {}
     settings = [
-        ("lang", "Model language", meta.get("lang", "en")),
-        ("name", "Model name", meta.get("name", "model")),
-        ("version", "Model version", meta.get("version", "0.0.0")),
-        ("description", "Model description", meta.get("description", None)),
+        ("lang", "Pipeline language", meta.get("lang", "en")),
+        ("name", "Pipeline name", meta.get("name", "pipeline")),
+        ("version", "Package version", meta.get("version", "0.0.0")),
+        ("description", "Package description", meta.get("description", None)),
         ("author", "Author", meta.get("author", None)),
         ("email", "Author email", meta.get("email", None)),
         ("url", "Author website", meta.get("url", None)),
@@ -154,8 +154,8 @@ def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any]
     ]
     msg.divider("Generating meta.json")
     msg.text(
-        "Enter the package settings for your model. The following information "
-        "will be read from your model data: pipeline, vectors."
+        "Enter the package settings for your pipeline. The following information "
+        "will be read from your pipeline data: pipeline, vectors."
     )
     for setting, desc, default in settings:
         response = get_raw_input(desc, default)
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 5f20773e1..fe6bfa92e 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -31,7 +31,7 @@ def pretrain_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
     texts_loc: Path = Arg(..., help="Path to JSONL file with raw texts to learn from, with text provided as the key 'text' or tokens as the key 'tokens'", exists=True),
-    output_dir: Path = Arg(..., help="Directory to write models to on each epoch"),
+    output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
     config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False),
     code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
     resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
@@ -376,10 +376,9 @@ def verify_cli_args(texts_loc, output_dir, config_path, resume_path, epoch_resum
     if output_dir.exists() and [p for p in output_dir.iterdir()]:
         if resume_path:
             msg.warn(
-                "Output directory is not empty. ",
-                "If you're resuming a run from a previous model in this directory, "
-                "the old models for the consecutive epochs will be overwritten "
-                "with the new ones.",
+                "Output directory is not empty.",
+                "If you're resuming a run in this directory, the old weights "
+                "for the consecutive epochs will be overwritten with the new ones.",
             )
         else:
             msg.warn(
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index 14d8435fe..1b995f4bc 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -19,7 +19,7 @@ from ..util import load_model
 def profile_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read current calling context
-    model: str = Arg(..., help="Model to load"),
+    model: str = Arg(..., help="Trained pipeline to load"),
     inputs: Optional[Path] = Arg(None, help="Location of input file. '-' for stdin.", exists=True, allow_dash=True),
     n_texts: int = Opt(10000, "--n-texts", "-n", help="Maximum number of texts to use if available"),
     # fmt: on
@@ -60,9 +60,9 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) ->
             inputs, _ = zip(*imdb_train)
         msg.info(f"Loaded IMDB dataset and using {n_inputs} examples")
         inputs = inputs[:n_inputs]
-    with msg.loading(f"Loading model '{model}'..."):
+    with msg.loading(f"Loading pipeline '{model}'..."):
         nlp = load_model(model)
-    msg.good(f"Loaded model '{model}'")
+    msg.good(f"Loaded pipeline '{model}'")
     texts = list(itertools.islice(inputs, n_texts))
     cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof")
     s = pstats.Stats("Profile.prof")
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 4ce02286a..5377f7f8f 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -26,7 +26,7 @@ def train_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
     config_path: Path = Arg(..., help="Path to config file", exists=True),
-    output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store model in"),
+    output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
     code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
     verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
@@ -34,7 +34,7 @@ def train_cli(
     # fmt: on
 ):
     """
-    Train or update a spaCy model. Requires data in spaCy's binary format. To
+    Train or update a spaCy pipeline. Requires data in spaCy's binary format. To
     convert data from other formats, use the `spacy convert` command. The
     config file includes all settings and hyperparameters used during traing.
     To override settings in the config, e.g. settings that point to local
@@ -113,12 +113,12 @@ def train(
         # Load morph rules
         nlp.vocab.morphology.load_morph_exceptions(morph_rules)
 
-    # Load a pretrained tok2vec model - cf. CLI command 'pretrain'
+    # Load pretrained tok2vec weights - cf. CLI command 'pretrain'
     if weights_data is not None:
         tok2vec_path = config["pretraining"].get("tok2vec_model", None)
         if tok2vec_path is None:
             msg.fail(
-                f"To use a pretrained tok2vec model, the config needs to specify which "
+                f"To pretrained tok2vec weights, the config needs to specify which "
                 f"tok2vec layer to load in the setting [pretraining.tok2vec_model].",
                 exits=1,
             )
@@ -183,7 +183,7 @@ def train(
                     nlp.to_disk(final_model_path)
             else:
                 nlp.to_disk(final_model_path)
-            msg.good(f"Saved model to output directory {final_model_path}")
+            msg.good(f"Saved pipeline to output directory {final_model_path}")
 
 
 def create_train_batches(iterator, batcher, max_epochs: int):
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index e6ba284df..a1e05fdcd 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -13,9 +13,9 @@ from ..util import get_package_path, get_model_meta, is_compatible_version
 @app.command("validate")
 def validate_cli():
     """
-    Validate the currently installed models and spaCy version. Checks if the
-    installed models are compatible and shows upgrade instructions if available.
-    Should be run after `pip install -U spacy`.
+    Validate the currently installed pipeline packages and spaCy version. Checks
+    if the installed packages are compatible and shows upgrade instructions if
+    available. Should be run after `pip install -U spacy`.
     """
     validate()
 
@@ -25,13 +25,13 @@ def validate() -> None:
     spacy_version = get_base_version(about.__version__)
     current_compat = compat.get(spacy_version, {})
     if not current_compat:
-        msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
+        msg.warn(f"No compatible packages found for v{spacy_version} of spaCy")
     incompat_models = {d["name"] for _, d in model_pkgs.items() if not d["compat"]}
     na_models = [m for m in incompat_models if m not in current_compat]
     update_models = [m for m in incompat_models if m in current_compat]
     spacy_dir = Path(__file__).parent.parent
 
-    msg.divider(f"Installed models (spaCy v{about.__version__})")
+    msg.divider(f"Installed pipeline packages (spaCy v{about.__version__})")
     msg.info(f"spaCy installation: {spacy_dir}")
 
     if model_pkgs:
@@ -47,15 +47,15 @@ def validate() -> None:
             rows.append((data["name"], data["spacy"], version, comp))
         msg.table(rows, header=header)
     else:
-        msg.text("No models found in your current environment.", exits=0)
+        msg.text("No pipeline packages found in your current environment.", exits=0)
     if update_models:
         msg.divider("Install updates")
-        msg.text("Use the following commands to update the model packages:")
+        msg.text("Use the following commands to update the packages:")
         cmd = "python -m spacy download {}"
         print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
     if na_models:
         msg.info(
-            f"The following models are custom spaCy models or not "
+            f"The following packages are custom spaCy pipelines or not "
             f"available for spaCy v{about.__version__}:",
             ", ".join(na_models),
         )
diff --git a/spacy/language.py b/spacy/language.py
index 8e7c39b90..211e6c547 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -192,7 +192,7 @@ class Language:
             self._meta.setdefault("lang", self.vocab.lang)
         else:
             self._meta.setdefault("lang", self.lang)
-        self._meta.setdefault("name", "model")
+        self._meta.setdefault("name", "pipeline")
         self._meta.setdefault("version", "0.0.0")
         self._meta.setdefault("spacy_version", spacy_version)
         self._meta.setdefault("description", "")
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index 9070855fa..98da62eb3 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -1,6 +1,6 @@
 ---
 title: Command Line Interface
-teaser: Download, train and package models, and debug spaCy
+teaser: Download, train and package pipelines, and debug spaCy
 source: spacy/cli
 menu:
   - ['download', 'download']
@@ -17,45 +17,47 @@ menu:
 ---
 
 spaCy's CLI provides a range of helpful commands for downloading and training
-models, converting data and debugging your config, data and installation. For a
-list of available commands, you can type `python -m spacy --help`. You can also
-add the `--help` flag to any command or subcommand to see the description,
+pipelines, converting data and debugging your config, data and installation. For
+a list of available commands, you can type `python -m spacy --help`. You can
+also add the `--help` flag to any command or subcommand to see the description,
 available arguments and usage.
 
 ## download {#download tag="command"}
 
-Download [models](/usage/models) for spaCy. The downloader finds the
-best-matching compatible version and uses `pip install` to download the model as
-a package. Direct downloads don't perform any compatibility checks and require
-the model name to be specified with its version (e.g. `en_core_web_sm-2.2.0`).
+Download [trained pipelines](/usage/models) for spaCy. The downloader finds the
+best-matching compatible version and uses `pip install` to download the Python
+package. Direct downloads don't perform any compatibility checks and require the
+pipeline name to be specified with its version (e.g. `en_core_web_sm-2.2.0`).
 
 > #### Downloading best practices
 >
 > The `download` command is mostly intended as a convenient, interactive wrapper
 > – it performs compatibility checks and prints detailed messages in case things
 > go wrong. It's **not recommended** to use this command as part of an automated
-> process. If you know which model your project needs, you should consider a
-> [direct download via pip](/usage/models#download-pip), or uploading the model
-> to a local PyPi installation and fetching it straight from there. This will
-> also allow you to add it as a versioned package dependency to your project.
+> process. If you know which package your project needs, you should consider a
+> [direct download via pip](/usage/models#download-pip), or uploading the
+> package to a local PyPi installation and fetching it straight from there. This
+> will also allow you to add it as a versioned package dependency to your
+> project.
 
 ```cli
 $ python -m spacy download [model] [--direct] [pip_args]
 ```
 
-| Name                                  | Description                                                                                                                                                                                                                          |
-| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                               | Model name, e.g. [`en_core_web_sm`](/models/en#en_core_web_sm). ~~str (positional)~~                                                                                                                                                 |
-| `--direct`, `-d`                      | Force direct download of exact model version. ~~bool (flag)~~                                                                                                                                                                        |
-| `--help`, `-h`                        | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                           |
-| pip args <Tag variant="new">2.1</Tag> | Additional installation options to be passed to `pip install` when installing the model package. For example, `--user` to install to the user home directory or `--no-deps` to not install model dependencies. ~~Any (option/flag)~~ |
-| **CREATES**                           | The installed model package in your `site-packages` directory.                                                                                                                                                                       |
+| Name                                  | Description                                                                                                                                                                                                                               |
+| ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`                               | Pipeline package name, e.g. [`en_core_web_sm`](/models/en#en_core_web_sm). ~~str (positional)~~                                                                                                                                           |
+| `--direct`, `-d`                      | Force direct download of exact package version. ~~bool (flag)~~                                                                                                                                                                           |
+| `--help`, `-h`                        | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                                |
+| pip args <Tag variant="new">2.1</Tag> | Additional installation options to be passed to `pip install` when installing the pipeline package. For example, `--user` to install to the user home directory or `--no-deps` to not install package dependencies. ~~Any (option/flag)~~ |
+| **CREATES**                           | The installed pipeline package in your `site-packages` directory.                                                                                                                                                                         |
 
 ## info {#info tag="command"}
 
-Print information about your spaCy installation, models and local setup, and
-generate [Markdown](https://en.wikipedia.org/wiki/Markdown)-formatted markup to
-copy-paste into [GitHub issues](https://github.com/explosion/spaCy/issues).
+Print information about your spaCy installation, trained pipelines and local
+setup, and generate [Markdown](https://en.wikipedia.org/wiki/Markdown)-formatted
+markup to copy-paste into
+[GitHub issues](https://github.com/explosion/spaCy/issues).
 
 ```cli
 $ python -m spacy info [--markdown] [--silent]
@@ -65,41 +67,41 @@ $ python -m spacy info [--markdown] [--silent]
 $ python -m spacy info [model] [--markdown] [--silent]
 ```
 
-| Name                                             | Description                                                                    |
-| ------------------------------------------------ | ------------------------------------------------------------------------------ |
-| `model`                                          | A model, i.e. package name or path (optional). ~~Optional[str] \(positional)~~ |
-| `--markdown`, `-md`                              | Print information as Markdown. ~~bool (flag)~~                                 |
-| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~                  |
-| `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                     |
-| **PRINTS**                                       | Information about your spaCy installation.                                     |
+| Name                                             | Description                                                                               |
+| ------------------------------------------------ | ----------------------------------------------------------------------------------------- |
+| `model`                                          | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(positional)~~ |
+| `--markdown`, `-md`                              | Print information as Markdown. ~~bool (flag)~~                                            |
+| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~                             |
+| `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                                |
+| **PRINTS**                                       | Information about your spaCy installation.                                                |
 
 ## validate {#validate new="2" tag="command"}
 
-Find all models installed in the current environment and check whether they are
-compatible with the currently installed version of spaCy. Should be run after
-upgrading spaCy via `pip install -U spacy` to ensure that all installed models
-are can be used with the new version. It will show a list of models and their
-installed versions. If any model is out of date, the latest compatible versions
-and command for updating are shown.
+Find all trained pipeline packages installed in the current environment and
+check whether they are compatible with the currently installed version of spaCy.
+Should be run after upgrading spaCy via `pip install -U spacy` to ensure that
+all installed packages are can be used with the new version. It will show a list
+of packages and their installed versions. If any package is out of date, the
+latest compatible versions and command for updating are shown.
 
 > #### Automated validation
 >
 > You can also use the `validate` command as part of your build process or test
-> suite, to ensure all models are up to date before proceeding. If incompatible
-> models are found, it will return `1`.
+> suite, to ensure all packages are up to date before proceeding. If
+> incompatible packages are found, it will return `1`.
 
 ```cli
 $ python -m spacy validate
 ```
 
-| Name       | Description                                               |
-| ---------- | --------------------------------------------------------- |
-| **PRINTS** | Details about the compatibility of your installed models. |
+| Name       | Description                                                          |
+| ---------- | -------------------------------------------------------------------- |
+| **PRINTS** | Details about the compatibility of your installed pipeline packages. |
 
 ## init {#init new="3"}
 
 The `spacy init` CLI includes helpful commands for initializing training config
-files and model directories.
+files and pipeline directories.
 
 ### init config {#init-config new="3" tag="command"}
 
@@ -125,7 +127,7 @@ $ python -m spacy init config [output_file] [--lang] [--pipeline] [--optimize] [
 | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `output_file`      | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~                                                                                                                        |
 | `--lang`, `-l`     | Optional code of the [language](/usage/models#languages) to use. Defaults to `"en"`. ~~str (option)~~                                                                                                                                                                                                                              |
-| `--pipeline`, `-p` | Comma-separated list of trainable [pipeline components](/usage/processing-pipelines#built-in) to include in the model. Defaults to `"tagger,parser,ner"`. ~~str (option)~~                                                                                                                                                         |
+| `--pipeline`, `-p` | Comma-separated list of trainable [pipeline components](/usage/processing-pipelines#built-in) to include. Defaults to `"tagger,parser,ner"`. ~~str (option)~~                                                                                                                                                                      |
 | `--optimize`, `-o` | `"efficiency"` or `"accuracy"`. Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters. Defaults to `"efficiency"`. ~~str (option)~~ |
 | `--cpu`, `-C`      | Whether the model needs to run on CPU. This will impact the choice of architecture, pretrained weights and related hyperparameters. ~~bool (flag)~~                                                                                                                                                                                |
 | `--help`, `-h`     | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                                                                                                                         |
@@ -165,36 +167,36 @@ $ python -m spacy init fill-config [base_path] [output_file] [--diff]
 | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~                                                                          |
 | **CREATES**    | Complete and auto-filled config file for training.                                                                                  |
 
-### init model {#init-model new="2" tag="command"}
+### init vocab {#init-vocab new="3" tag="command"}
 
-Create a new model directory from raw data, like word frequencies, Brown
-clusters and word vectors. Note that in order to populate the model's vocab, you
+Create a blank pipeline directory from raw data, like word frequencies, Brown
+clusters and word vectors. Note that in order to populate the vocabulary, you
 need to pass in a JSONL-formatted
 [vocabulary file](/api/data-formats#vocab-jsonl) as `--jsonl-loc` with optional
 `id` values that correspond to the vectors table. Just loading in vectors will
 not automatically populate the vocab.
 
-<Infobox title="New in v3.0" variant="warning">
+<Infobox title="New in v3.0" variant="warning" id="init-model">
 
-The `init-model` command is now available as a subcommand of `spacy init`.
+This command was previously called `init-model`.
 
 </Infobox>
 
 ```cli
-$ python -m spacy init model [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [--prune-vectors]
+$ python -m spacy init vocab [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [--prune-vectors]
 ```
 
 | Name                                                    | Description                                                                                                                                                                                                                                                                         |
 | ------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `lang`                                                  | Model language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes), e.g. `en`. ~~str (positional)~~                                                                                                                                                                   |
-| `output_dir`                                            | Model output directory. Will be created if it doesn't exist. ~~Path (positional)~~                                                                                                                                                                                                  |
+| `lang`                                                  | Pipeline language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes), e.g. `en`. ~~str (positional)~~                                                                                                                                                                |
+| `output_dir`                                            | Pipeline output directory. Will be created if it doesn't exist. ~~Path (positional)~~                                                                                                                                                                                               |
 | `--jsonl-loc`, `-j`                                     | Optional location of JSONL-formatted [vocabulary file](/api/data-formats#vocab-jsonl) with lexical attributes. ~~Optional[Path] \(option)~~                                                                                                                                         |
 | `--vectors-loc`, `-v`                                   | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. ~~Optional[Path] \(option)~~ |
 | `--truncate-vectors`, `-t` <Tag variant="new">2.3</Tag> | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. ~~int (option)~~                                                                                                                                                                  |
 | `--prune-vectors`, `-V`                                 | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. ~~int (option)~~                                                                                                                                                                                     |
 | `--vectors-name`, `-vn`                                 | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~str (option)~~                                                                                                                                                                              |
 | `--help`, `-h`                                          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                                                                          |
-| **CREATES**                                             | A spaCy model containing the vocab and vectors.                                                                                                                                                                                                                                     |
+| **CREATES**                                             | A spaCy pipeline directory containing the vocab and vectors.                                                                                                                                                                                                                        |
 
 ## convert {#convert tag="command"}
 
@@ -594,11 +596,11 @@ $ python -m spacy debug profile [model] [inputs] [--n-texts]
 
 | Name              | Description                                                                        |
 | ----------------- | ---------------------------------------------------------------------------------- |
-| `model`           | A loadable spaCy model. ~~str (positional)~~                                       |
+| `model`           | A loadable spaCy pipeline (package name or path). ~~str (positional)~~             |
 | `inputs`          | Optional path to input file, or `-` for standard input. ~~Path (positional)~~      |
 | `--n-texts`, `-n` | Maximum number of texts to use if available. Defaults to `10000`. ~~int (option)~~ |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                         |
-| **PRINTS**        | Profiling information for the model.                                               |
+| **PRINTS**        | Profiling information for the pipeline.                                            |
 
 ### debug model {#debug-model new="3" tag="command"}
 
@@ -724,10 +726,10 @@ $ python -m spacy debug model ./config.cfg tagger -l "5,15" -DIM -PAR -P0 -P1 -P
 
 ## train {#train tag="command"}
 
-Train a model. Expects data in spaCy's
+Train a pipeline. Expects data in spaCy's
 [binary format](/api/data-formats#training) and a
 [config file](/api/data-formats#config) with all settings and hyperparameters.
-Will save out the best model from all epochs, as well as the final model. The
+Will save out the best model from all epochs, as well as the final pipeline. The
 `--code` argument can be used to provide a Python file that's imported before
 the training process starts. This lets you register
 [custom functions](/usage/training#custom-functions) and architectures and refer
@@ -753,12 +755,12 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [overrides
 | Name              | Description                                                                                                                                                                                |
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`     | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. ~~Path (positional)~~                                                                |
-| `--output`, `-o`  | Directory to store model in. Will be created if it doesn't exist. ~~Optional[Path] \(positional)~~                                                                                         |
+| `--output`, `-o`  | Directory to store trained pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(positional)~~                                                                              |
 | `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~       |
 | `--verbose`, `-V` | Show more detailed messages during training. ~~bool (flag)~~                                                                                                                               |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                 |
 | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~ |
-| **CREATES**       | The final model and the best model.                                                                                                                                                        |
+| **CREATES**       | The final trained pipeline and the best trained pipeline.                                                                                                                                  |
 
 ## pretrain {#pretrain new="2.1" tag="command,experimental"}
 
@@ -769,7 +771,7 @@ a component like a CNN, BiLSTM, etc to predict vectors which match the
 pretrained ones. The weights are saved to a directory after each epoch. You can
 then include a **path to one of these pretrained weights files** in your
 [training config](/usage/training#config) as the `init_tok2vec` setting when you
-train your model. This technique may be especially helpful if you have little
+train your pipeline. This technique may be especially helpful if you have little
 labelled data. See the usage docs on [pretraining](/usage/training#pretraining)
 for more info.
 
@@ -792,7 +794,7 @@ $ python -m spacy pretrain [texts_loc] [output_dir] [config_path] [--code] [--re
 | Name                    | Description                                                                                                                                                                                        |
 | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `texts_loc`             | Path to JSONL file with raw texts to learn from, with text provided as the key `"text"` or tokens as the key `"tokens"`. [See here](/api/data-formats#pretrain) for details. ~~Path (positional)~~ |
-| `output_dir`            | Directory to write models to on each epoch. ~~Path (positional)~~                                                                                                                                  |
+| `output_dir`            | Directory to save binary weights to on each epoch. ~~Path (positional)~~                                                                                                                           |
 | `config_path`           | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. ~~Path (positional)~~                                                                        |
 | `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~               |
 | `--resume-path`, `-r`   | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~                                                                                                          |
@@ -803,7 +805,8 @@ $ python -m spacy pretrain [texts_loc] [output_dir] [config_path] [--code] [--re
 
 ## evaluate {#evaluate new="2" tag="command"}
 
-Evaluate a model. Expects a loadable spaCy model and evaluation data in the
+Evaluate a trained pipeline. Expects a loadable spaCy pipeline (package name or
+path) and evaluation data in the
 [binary `.spacy` format](/api/data-formats#binary-training). The
 `--gold-preproc` option sets up the evaluation examples with gold-standard
 sentences and tokens for the predictions. Gold preprocessing helps the
@@ -819,7 +822,7 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--gold-preproc] [--gp
 
 | Name                      | Description                                                                                                                                                               |
 | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model`                   | Model to evaluate. Can be a package or a path to a model data directory. ~~str (positional)~~                                                                             |
+| `model`                   | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                |
 | `data_path`               | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~                                                                 |
 | `--output`, `-o`          | Output JSON file for metrics. If not set, no metrics will be exported. ~~Optional[Path] \(option)~~                                                                       |
 | `--gold-preproc`, `-G`    | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                   |
@@ -831,13 +834,12 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--gold-preproc] [--gp
 
 ## package {#package tag="command"}
 
-Generate an installable
-[model Python package](/usage/training#models-generating) from an existing model
-data directory. All data files are copied over. If the path to a
-[`meta.json`](/api/data-formats#meta) is supplied, or a `meta.json` is found in
-the input directory, this file is used. Otherwise, the data can be entered
-directly from the command line. spaCy will then create a `.tar.gz` archive file
-that you can distribute and install with `pip install`.
+Generate an installable [Python package](/usage/training#models-generating) from
+an existing pipeline data directory. All data files are copied over. If the path
+to a [`meta.json`](/api/data-formats#meta) is supplied, or a `meta.json` is
+found in the input directory, this file is used. Otherwise, the data can be
+entered directly from the command line. spaCy will then create a `.tar.gz`
+archive file that you can distribute and install with `pip install`.
 
 <Infobox title="New in v3.0" variant="warning">
 
@@ -855,13 +857,13 @@ $ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta]
 >
 > ```cli
 > $ python -m spacy package /input /output
-> $ cd /output/en_model-0.0.0
-> $ pip install dist/en_model-0.0.0.tar.gz
+> $ cd /output/en_pipeline-0.0.0
+> $ pip install dist/en_pipeline-0.0.0.tar.gz
 > ```
 
 | Name                                             | Description                                                                                                                                                                                                     |
 | ------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `input_dir`                                      | Path to directory containing model data. ~~Path (positional)~~                                                                                                                                                  |
+| `input_dir`                                      | Path to directory containing pipeline data. ~~Path (positional)~~                                                                                                                                               |
 | `output_dir`                                     | Directory to create package folder in. ~~Path (positional)~~                                                                                                                                                    |
 | `--meta-path`, `-m` <Tag variant="new">2</Tag>   | Path to [`meta.json`](/api/data-formats#meta) file (optional). ~~Optional[Path] \(option)~~                                                                                                                     |
 | `--create-meta`, `-C` <Tag variant="new">2</Tag> | Create a `meta.json` file on the command line, even if one already exists in the directory. If an existing file is found, its entries will be shown as the defaults in the command line prompt. ~~bool (flag)~~ |
@@ -869,13 +871,13 @@ $ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta]
 | `--version`, `-v` <Tag variant="new">3</Tag>     | Package version to override in meta. Useful when training new versions, as it doesn't require editing the meta template. ~~Optional[str] \(option)~~                                                            |
 | `--force`, `-f`                                  | Force overwriting of existing folder in output directory. ~~bool (flag)~~                                                                                                                                       |
 | `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                      |
-| **CREATES**                                      | A Python package containing the spaCy model.                                                                                                                                                                    |
+| **CREATES**                                      | A Python package containing the spaCy pipeline.                                                                                                                                                                 |
 
 ## project {#project new="3"}
 
 The `spacy project` CLI includes subcommands for working with
 [spaCy projects](/usage/projects), end-to-end workflows for building and
-deploying custom spaCy models.
+deploying custom spaCy pipelines.
 
 ### project clone {#project-clone tag="command"}
 
@@ -1015,9 +1017,9 @@ Download all files or directories listed as `outputs` for commands, unless they
 are not already present locally. When searching for files in the remote, `pull`
 won't just look at the output path, but will also consider the **command
 string** and the **hashes of the dependencies**. For instance, let's say you've
-previously pushed a model checkpoint to the remote, but now you've changed some
+previously pushed a checkpoint to the remote, but now you've changed some
 hyper-parameters. Because you've changed the inputs to the command, if you run
-`pull`, you won't retrieve the stale result. If you train your model and push
+`pull`, you won't retrieve the stale result. If you train your pipeline and push
 the outputs to the remote, the outputs will be saved alongside the prior
 outputs, so if you change the config back, you'll be able to fetch back the
 result.
diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md
index 8ef8041ee..3fd2818f4 100644
--- a/website/docs/api/data-formats.md
+++ b/website/docs/api/data-formats.md
@@ -6,18 +6,18 @@ menu:
   - ['Training Data', 'training']
   - ['Pretraining Data', 'pretraining']
   - ['Vocabulary', 'vocab-jsonl']
-  - ['Model Meta', 'meta']
+  - ['Pipeline Meta', 'meta']
 ---
 
 This section documents input and output formats of data used by spaCy, including
 the [training config](/usage/training#config), training data and lexical
 vocabulary data. For an overview of label schemes used by the models, see the
-[models directory](/models). Each model documents the label schemes used in its
-components, depending on the data it was trained on.
+[models directory](/models). Each trained pipeline documents the label schemes
+used in its components, depending on the data it was trained on.
 
 ## Training config {#config new="3"}
 
-Config files define the training process and model pipeline and can be passed to
+Config files define the training process and pipeline and can be passed to
 [`spacy train`](/api/cli#train). They use
 [Thinc's configuration system](https://thinc.ai/docs/usage-config) under the
 hood. For details on how to use training configs, see the
@@ -74,16 +74,16 @@ your config and check that it's valid, you can run the
 Defines the `nlp` object, its tokenizer and
 [processing pipeline](/usage/processing-pipelines) component names.
 
-| Name                      | Description                                                                                                                                                                                                                                                                                          |
-| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `lang`                    | Model language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). Defaults to `null`. ~~str~~                                                                                                                                                                                        |
-| `pipeline`                | Names of pipeline components in order. Should correspond to sections in the `[components]` block, e.g. `[components.ner]`. See docs on [defining components](/usage/training#config-components). Defaults to `[]`. ~~List[str]~~                                                                     |
-| `disabled`                | Names of pipeline components that are loaded but disabled by default and not run as part of the pipeline. Should correspond to components listed in `pipeline`. After a model is loaded, disabled components can be enabled using [`Language.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
-| `load_vocab_data`         | Whether to load additional lexeme and vocab data from [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) if available. Defaults to `true`. ~~bool~~                                                                                                                             |
-| `before_creation`         | Optional [callback](/usage/training#custom-code-nlp-callbacks) to modify `Language` subclass before it's initialized. Defaults to `null`. ~~Optional[Callable[[Type[Language]], Type[Language]]]~~                                                                                                   |
-| `after_creation`          | Optional [callback](/usage/training#custom-code-nlp-callbacks) to modify `nlp` object right after it's initialized. Defaults to `null`. ~~Optional[Callable[[Language], Language]]~~                                                                                                                 |
-| `after_pipeline_creation` | Optional [callback](/usage/training#custom-code-nlp-callbacks) to modify `nlp` object after the pipeline components have been added. Defaults to `null`. ~~Optional[Callable[[Language], Language]]~~                                                                                                |
-| `tokenizer`               | The tokenizer to use. Defaults to [`Tokenizer`](/api/tokenizer). ~~Callable[[str], Doc]~~                                                                                                                                                                                                            |
+| Name                      | Description                                                                                                                                                                                                                                                                                             |
+| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `lang`                    | Pipeline language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). Defaults to `null`. ~~str~~                                                                                                                                                                                        |
+| `pipeline`                | Names of pipeline components in order. Should correspond to sections in the `[components]` block, e.g. `[components.ner]`. See docs on [defining components](/usage/training#config-components). Defaults to `[]`. ~~List[str]~~                                                                        |
+| `disabled`                | Names of pipeline components that are loaded but disabled by default and not run as part of the pipeline. Should correspond to components listed in `pipeline`. After a pipeline is loaded, disabled components can be enabled using [`Language.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
+| `load_vocab_data`         | Whether to load additional lexeme and vocab data from [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) if available. Defaults to `true`. ~~bool~~                                                                                                                                |
+| `before_creation`         | Optional [callback](/usage/training#custom-code-nlp-callbacks) to modify `Language` subclass before it's initialized. Defaults to `null`. ~~Optional[Callable[[Type[Language]], Type[Language]]]~~                                                                                                      |
+| `after_creation`          | Optional [callback](/usage/training#custom-code-nlp-callbacks) to modify `nlp` object right after it's initialized. Defaults to `null`. ~~Optional[Callable[[Language], Language]]~~                                                                                                                    |
+| `after_pipeline_creation` | Optional [callback](/usage/training#custom-code-nlp-callbacks) to modify `nlp` object after the pipeline components have been added. Defaults to `null`. ~~Optional[Callable[[Language], Language]]~~                                                                                                   |
+| `tokenizer`               | The tokenizer to use. Defaults to [`Tokenizer`](/api/tokenizer). ~~Callable[[str], Doc]~~                                                                                                                                                                                                               |
 
 ### components {#config-components tag="section"}
 
@@ -105,8 +105,8 @@ This section includes definitions of the
 [pipeline components](/usage/processing-pipelines) and their models, if
 available. Components in this section can be referenced in the `pipeline` of the
 `[nlp]` block. Component blocks need to specify either a `factory` (named
-function to use to create component) or a `source` (name of path of pretrained
-model to copy components from). See the docs on
+function to use to create component) or a `source` (name of path of trained
+pipeline to copy components from). See the docs on
 [defining pipeline components](/usage/training#config-components) for details.
 
 ### paths, system {#config-variables tag="variables"}
@@ -145,7 +145,7 @@ process that are used when you run [`spacy train`](/api/cli#train).
 | `score_weights`       | Score names shown in metrics mapped to their weight towards the final weighted score. See [here](/usage/training#metrics) for details. Defaults to `{}`. ~~Dict[str, float]~~                                |
 | `seed`                | The random seed. Defaults to variable `${system.seed}`. ~~int~~                                                                                                                                              |
 | `train_corpus`        | Callable that takes the current `nlp` object and yields [`Example`](/api/example) objects. Defaults to [`Corpus`](/api/corpus). ~~Callable[[Language], Iterator[Example]]~~                                  |
-| `vectors`             | Model name or path to model containing pretrained word vectors to use, e.g. created with [`init model`](/api/cli#init-model). Defaults to `null`. ~~Optional[str]~~                                          |
+| `vectors`             | Name or path of pipeline containing pretrained word vectors to use, e.g. created with [`init vocab`](/api/cli#init-vocab). Defaults to `null`. ~~Optional[str]~~                                             |
 
 ### pretraining {#config-pretraining tag="section,optional"}
 
@@ -184,7 +184,7 @@ run [`spacy pretrain`](/api/cli#pretrain).
 
 The main data format used in spaCy v3.0 is a **binary format** created by
 serializing a [`DocBin`](/api/docbin), which represents a collection of `Doc`
-objects. This means that you can train spaCy models using the same format it
+objects. This means that you can train spaCy pipelines using the same format it
 outputs: annotated `Doc` objects. The binary format is extremely **efficient in
 storage**, especially when packing multiple documents together.
 
@@ -286,8 +286,8 @@ a dictionary of gold-standard annotations.
 [internal training API](/usage/training#api) and they're expected when you call
 [`nlp.update`](/api/language#update). However, for most use cases, you
 **shouldn't** have to write your own training scripts. It's recommended to train
-your models via the [`spacy train`](/api/cli#train) command with a config file
-to keep track of your settings and hyperparameters and your own
+your pipelines via the [`spacy train`](/api/cli#train) command with a config
+file to keep track of your settings and hyperparameters and your own
 [registered functions](/usage/training/#custom-code) to customize the setup.
 
 </Infobox>
@@ -406,15 +406,15 @@ in line-by-line, while still making it easy to represent newlines in the data.
 
 ## Lexical data for vocabulary {#vocab-jsonl new="2"}
 
-To populate a model's vocabulary, you can use the
-[`spacy init model`](/api/cli#init-model) command and load in a
+To populate a pipeline's vocabulary, you can use the
+[`spacy init vocab`](/api/cli#init-vocab) command and load in a
 [newline-delimited JSON](http://jsonlines.org/) (JSONL) file containing one
 lexical entry per line via the `--jsonl-loc` option. The first line defines the
 language and vocabulary settings. All other lines are expected to be JSON
 objects describing an individual lexeme. The lexical attributes will be then set
 as attributes on spaCy's [`Lexeme`](/api/lexeme#attributes) object. The `vocab`
-command outputs a ready-to-use spaCy model with a `Vocab` containing the lexical
-data.
+command outputs a ready-to-use spaCy pipeline with a `Vocab` containing the
+lexical data.
 
 ```python
 ### First line
@@ -459,11 +459,11 @@ Here's an example of the 20 most frequent lexemes in the English training data:
 https://github.com/explosion/spaCy/tree/master/examples/training/vocab-data.jsonl
 ```
 
-## Model meta {#meta}
+## Pipeline meta {#meta}
 
-The model meta is available as the file `meta.json` and exported automatically
-when you save an `nlp` object to disk. Its contents are available as
-[`nlp.meta`](/api/language#meta).
+The pipeline meta is available as the file `meta.json` and exported
+automatically when you save an `nlp` object to disk. Its contents are available
+as [`nlp.meta`](/api/language#meta).
 
 <Infobox variant="warning" title="Changed in v3.0">
 
@@ -473,8 +473,8 @@ creating a Python package with [`spacy package`](/api/cli#package). How to set
 up the `nlp` object is now defined in the
 [`config.cfg`](/api/data-formats#config), which includes detailed information
 about the pipeline components and their model architectures, and all other
-settings and hyperparameters used to train the model. It's the **single source
-of truth** used for loading a model.
+settings and hyperparameters used to train the pipeline. It's the **single
+source of truth** used for loading a pipeline.
 
 </Infobox>
 
@@ -482,12 +482,12 @@ of truth** used for loading a model.
 >
 > ```json
 > {
->   "name": "example_model",
+>   "name": "example_pipeline",
 >   "lang": "en",
 >   "version": "1.0.0",
 >   "spacy_version": ">=3.0.0,<3.1.0",
 >   "parent_package": "spacy",
->   "description": "Example model for spaCy",
+>   "description": "Example pipeline for spaCy",
 >   "author": "You",
 >   "email": "you@example.com",
 >   "url": "https://example.com",
@@ -510,23 +510,23 @@ of truth** used for loading a model.
 > }
 > ```
 
-| Name                                           | Description                                                                                                                                                                                                                                                                                                              |
-| ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `lang`                                         | Model language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). Defaults to `"en"`. ~~str~~                                                                                                                                                                                                            |
-| `name`                                         | Model name, e.g. `"core_web_sm"`. The final model package name will be `{lang}_{name}`. Defaults to `"model"`. ~~str~~                                                                                                                                                                                                   |
-| `version`                                      | Model version. Will be used to version a Python package created with [`spacy package`](/api/cli#package). Defaults to `"0.0.0"`. ~~str~~                                                                                                                                                                                 |
-| `spacy_version`                                | spaCy version range the model is compatible with. Defaults to the spaCy version used to create the model, up to next minor version, which is the default compatibility for the available [pretrained models](/models). For instance, a model trained with v3.0.0 will have the version range `">=3.0.0,<3.1.0"`. ~~str~~ |
-| `parent_package`                               | Name of the spaCy package. Typically `"spacy"` or `"spacy_nightly"`. Defaults to `"spacy"`. ~~str~~                                                                                                                                                                                                                      |
-| `description`                                  | Model description. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                               |
-| `author`                                       | Model author name. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                               |
-| `email`                                        | Model author email. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                              |
-| `url`                                          | Model author URL. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                                |
-| `license`                                      | Model license. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                                   |
-| `sources`                                      | Data sources used to train the model. Typically a list of dicts with the keys `"name"`, `"url"`, `"author"` and `"license"`. [See here](https://github.com/explosion/spacy-models/tree/master/meta) for examples. Defaults to `None`. ~~Optional[List[Dict[str, str]]]~~                                                 |
-| `vectors`                                      | Information about the word vectors included with the model. Typically a dict with the keys `"width"`, `"vectors"` (number of vectors), `"keys"` and `"name"`. ~~Dict[str, Any]~~                                                                                                                                         |
-| `pipeline`                                     | Names of pipeline component names in the model, in order. Corresponds to [`nlp.pipe_names`](/api/language#pipe_names). Only exists for reference and is not used to create the components. This information is defined in the [`config.cfg`](/api/data-formats#config). Defaults to `[]`. ~~List[str]~~                  |
-| `labels`                                       | Label schemes of the trained pipeline components, keyed by component name. Corresponds to [`nlp.pipe_labels`](/api/language#pipe_labels). [See here](https://github.com/explosion/spacy-models/tree/master/meta) for examples. Defaults to `{}`. ~~Dict[str, Dict[str, List[str]]]~~                                     |
-| `accuracy`                                     | Training accuracy, added automatically by [`spacy train`](/api/cli#train). Dictionary of [score names](/usage/training#metrics) mapped to scores. Defaults to `{}`. ~~Dict[str, Union[float, Dict[str, float]]]~~                                                                                                        |
-| `speed`                                        | Model speed, added automatically by [`spacy train`](/api/cli#train). Typically a dictionary with the keys `"cpu"`, `"gpu"` and `"nwords"` (words per second). Defaults to `{}`. ~~Dict[str, Optional[Union[float, str]]]~~                                                                                               |
-| `spacy_git_version` <Tag variant="new">3</Tag> | Git commit of [`spacy`](https://github.com/explosion/spaCy) used to create model. ~~str~~                                                                                                                                                                                                                                |
-| other                                          | Any other custom meta information you want to add. The data is preserved in [`nlp.meta`](/api/language#meta). ~~Any~~                                                                                                                                                                                                    |
+| Name                                           | Description                                                                                                                                                                                                                                                                                                                      |
+| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `lang`                                         | Pipeline language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). Defaults to `"en"`. ~~str~~                                                                                                                                                                                                                 |
+| `name`                                         | Pipeline name, e.g. `"core_web_sm"`. The final package name will be `{lang}_{name}`. Defaults to `"pipeline"`. ~~str~~                                                                                                                                                                                                           |
+| `version`                                      | Pipeline version. Will be used to version a Python package created with [`spacy package`](/api/cli#package). Defaults to `"0.0.0"`. ~~str~~                                                                                                                                                                                      |
+| `spacy_version`                                | spaCy version range the package is compatible with. Defaults to the spaCy version used to create the pipeline, up to next minor version, which is the default compatibility for the available [trained pipelines](/models). For instance, a pipeline trained with v3.0.0 will have the version range `">=3.0.0,<3.1.0"`. ~~str~~ |
+| `parent_package`                               | Name of the spaCy package. Typically `"spacy"` or `"spacy_nightly"`. Defaults to `"spacy"`. ~~str~~                                                                                                                                                                                                                              |
+| `description`                                  | Pipeline description. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                                    |
+| `author`                                       | Pipeline author name. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                                    |
+| `email`                                        | Pipeline author email. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                                   |
+| `url`                                          | Pipeline author URL. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                                     |
+| `license`                                      | Pipeline license. Also used for Python package. Defaults to `""`. ~~str~~                                                                                                                                                                                                                                                        |
+| `sources`                                      | Data sources used to train the pipeline. Typically a list of dicts with the keys `"name"`, `"url"`, `"author"` and `"license"`. [See here](https://github.com/explosion/spacy-models/tree/master/meta) for examples. Defaults to `None`. ~~Optional[List[Dict[str, str]]]~~                                                      |
+| `vectors`                                      | Information about the word vectors included with the pipeline. Typically a dict with the keys `"width"`, `"vectors"` (number of vectors), `"keys"` and `"name"`. ~~Dict[str, Any]~~                                                                                                                                              |
+| `pipeline`                                     | Names of pipeline component names, in order. Corresponds to [`nlp.pipe_names`](/api/language#pipe_names). Only exists for reference and is not used to create the components. This information is defined in the [`config.cfg`](/api/data-formats#config). Defaults to `[]`. ~~List[str]~~                                       |
+| `labels`                                       | Label schemes of the trained pipeline components, keyed by component name. Corresponds to [`nlp.pipe_labels`](/api/language#pipe_labels). [See here](https://github.com/explosion/spacy-models/tree/master/meta) for examples. Defaults to `{}`. ~~Dict[str, Dict[str, List[str]]]~~                                             |
+| `accuracy`                                     | Training accuracy, added automatically by [`spacy train`](/api/cli#train). Dictionary of [score names](/usage/training#metrics) mapped to scores. Defaults to `{}`. ~~Dict[str, Union[float, Dict[str, float]]]~~                                                                                                                |
+| `speed`                                        | Inference speed, added automatically by [`spacy train`](/api/cli#train). Typically a dictionary with the keys `"cpu"`, `"gpu"` and `"nwords"` (words per second). Defaults to `{}`. ~~Dict[str, Optional[Union[float, str]]]~~                                                                                                   |
+| `spacy_git_version` <Tag variant="new">3</Tag> | Git commit of [`spacy`](https://github.com/explosion/spaCy) used to create pipeline. ~~str~~                                                                                                                                                                                                                                     |
+| other                                          | Any other custom meta information you want to add. The data is preserved in [`nlp.meta`](/api/language#meta). ~~Any~~                                                                                                                                                                                                            |
diff --git a/website/docs/api/dependencymatcher.md b/website/docs/api/dependencymatcher.md
index 2fb903100..b0395cc42 100644
--- a/website/docs/api/dependencymatcher.md
+++ b/website/docs/api/dependencymatcher.md
@@ -9,8 +9,8 @@ The `DependencyMatcher` follows the same API as the [`Matcher`](/api/matcher)
 and [`PhraseMatcher`](/api/phrasematcher) and lets you match on dependency trees
 using the
 [Semgrex syntax](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html).
-It requires a pretrained [`DependencyParser`](/api/parser) or other component
-that sets the `Token.dep` attribute.
+It requires a trained [`DependencyParser`](/api/parser) or other component that
+sets the `Token.dep` attribute.
 
 ## Pattern format {#patterns}
 
diff --git a/website/docs/api/entitylinker.md b/website/docs/api/entitylinker.md
index 679c3c0c2..637bd3c68 100644
--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@@ -13,8 +13,8 @@ An `EntityLinker` component disambiguates textual mentions (tagged as named
 entities) to unique identifiers, grounding the named entities into the "real
 world". It requires a `KnowledgeBase`, as well as a function to generate
 plausible candidates from that `KnowledgeBase` given a certain textual mention,
-and a ML model to pick the right candidate, given the local context of the
-mention.
+and a machine learning model to pick the right candidate, given the local
+context of the mention.
 
 ## Config and implementation {#config}
 
diff --git a/website/docs/api/language.md b/website/docs/api/language.md
index e2668c522..d65b217a4 100644
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@@ -7,9 +7,9 @@ source: spacy/language.py
 
 Usually you'll load this once per process as `nlp` and pass the instance around
 your application. The `Language` class is created when you call
-[`spacy.load()`](/api/top-level#spacy.load) and contains the shared vocabulary
-and [language data](/usage/adding-languages), optional model data loaded from a
-[model package](/models) or a path, and a
+[`spacy.load`](/api/top-level#spacy.load) and contains the shared vocabulary and
+[language data](/usage/adding-languages), optional binary weights, e.g. provided
+by a [trained pipeline](/models), and the
 [processing pipeline](/usage/processing-pipelines) containing components like
 the tagger or parser that are called on a document in order. You can also add
 your own processing pipeline components that take a `Doc` object, modify it and
@@ -37,7 +37,7 @@ Initialize a `Language` object.
 | `vocab`            | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~                      |
 | _keyword-only_     |                                                                                                                          |
 | `max_length`       | Maximum number of characters allowed in a single text. Defaults to `10 ** 6`. ~~int~~                                    |
-| `meta`             | Custom meta data for the `Language` class. Is written to by models to add model meta data. ~~dict~~                      |
+| `meta`             | Custom meta data for the `Language` class. Is written to by pipelines to add meta data. ~~dict~~                         |
 | `create_tokenizer` | Optional function that receives the `nlp` object and returns a tokenizer. ~~Callable[[Language], Callable[[str], Doc]]~~ |
 
 ## Language.from_config {#from_config tag="classmethod" new="3"}
@@ -232,7 +232,7 @@ tuples of `Doc` and `GoldParse` objects.
 
 ## Language.resume_training {#resume_training tag="method,experimental" new="3"}
 
-Continue training a pretrained model. Create and return an optimizer, and
+Continue training a trained pipeline. Create and return an optimizer, and
 initialize "rehearsal" for any pipeline component that has a `rehearse` method.
 Rehearsal is used to prevent models from "forgetting" their initialized
 "knowledge". To perform rehearsal, collect samples of text you want the models
@@ -314,7 +314,7 @@ the "catastrophic forgetting" problem. This feature is experimental.
 
 ## Language.evaluate {#evaluate tag="method"}
 
-Evaluate a model's pipeline components.
+Evaluate a pipeline's components.
 
 <Infobox variant="warning" title="Changed in v3.0">
 
@@ -386,24 +386,24 @@ component, adds it to the pipeline and returns it.
 > nlp.add_pipe("component", before="ner")
 > component = nlp.add_pipe("component", name="custom_name", last=True)
 >
-> # Add component from source model
+> # Add component from source pipeline
 > source_nlp = spacy.load("en_core_web_sm")
 > nlp.add_pipe("ner", source=source_nlp)
 > ```
 
-| Name                                  | Description                                                                                                                                                                                                                                                                     |
-| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `factory_name`                        | Name of the registered component factory. ~~str~~                                                                                                                                                                                                                               |
-| `name`                                | Optional unique name of pipeline component instance. If not set, the factory name is used. An error is raised if the name already exists in the pipeline. ~~Optional[str]~~                                                                                                     |
-| _keyword-only_                        |                                                                                                                                                                                                                                                                                 |
-| `before`                              | Component name or index to insert component directly before. ~~Optional[Union[str, int]]~~                                                                                                                                                                                      |
-| `after`                               | Component name or index to insert component directly after. ~~Optional[Union[str, int]]~~                                                                                                                                                                                       |
-| `first`                               | Insert component first / not first in the pipeline. ~~Optional[bool]~~                                                                                                                                                                                                          |
-| `last`                                | Insert component last / not last in the pipeline. ~~Optional[bool]~~                                                                                                                                                                                                            |
-| `config` <Tag variant="new">3</Tag>   | Optional config parameters to use for this component. Will be merged with the `default_config` specified by the component factory. ~~Optional[Dict[str, Any]]~~                                                                                                                 |
-| `source` <Tag variant="new">3</Tag>   | Optional source model to copy component from. If a source is provided, the `factory_name` is interpreted as the name of the component in the source pipeline. Make sure that the vocab, vectors and settings of the source model match the target model. ~~Optional[Language]~~ |
-| `validate` <Tag variant="new">3</Tag> | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                                                  |
-| **RETURNS**                           | The pipeline component. ~~Callable[[Doc], Doc]~~                                                                                                                                                                                                                                |
+| Name                                  | Description                                                                                                                                                                                                                                                                              |
+| ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `factory_name`                        | Name of the registered component factory. ~~str~~                                                                                                                                                                                                                                        |
+| `name`                                | Optional unique name of pipeline component instance. If not set, the factory name is used. An error is raised if the name already exists in the pipeline. ~~Optional[str]~~                                                                                                              |
+| _keyword-only_                        |                                                                                                                                                                                                                                                                                          |
+| `before`                              | Component name or index to insert component directly before. ~~Optional[Union[str, int]]~~                                                                                                                                                                                               |
+| `after`                               | Component name or index to insert component directly after. ~~Optional[Union[str, int]]~~                                                                                                                                                                                                |
+| `first`                               | Insert component first / not first in the pipeline. ~~Optional[bool]~~                                                                                                                                                                                                                   |
+| `last`                                | Insert component last / not last in the pipeline. ~~Optional[bool]~~                                                                                                                                                                                                                     |
+| `config` <Tag variant="new">3</Tag>   | Optional config parameters to use for this component. Will be merged with the `default_config` specified by the component factory. ~~Optional[Dict[str, Any]]~~                                                                                                                          |
+| `source` <Tag variant="new">3</Tag>   | Optional source pipeline to copy component from. If a source is provided, the `factory_name` is interpreted as the name of the component in the source pipeline. Make sure that the vocab, vectors and settings of the source pipeline match the target pipeline. ~~Optional[Language]~~ |
+| `validate` <Tag variant="new">3</Tag> | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                                                           |
+| **RETURNS**                           | The pipeline component. ~~Callable[[Doc], Doc]~~                                                                                                                                                                                                                                         |
 
 ## Language.create_pipe {#create_pipe tag="method" new="2"}
 
@@ -790,9 +790,10 @@ token.ent_iob, token.ent_type
 
 ## Language.meta {#meta tag="property"}
 
-Custom meta data for the Language class. If a model is loaded, contains meta
-data of the model. The `Language.meta` is also what's serialized as the
-[`meta.json`](/api/data-formats#meta) when you save an `nlp` object to disk.
+Custom meta data for the Language class. If a trained pipeline is loaded, this
+contains meta data of the pipeline. The `Language.meta` is also what's
+serialized as the [`meta.json`](/api/data-formats#meta) when you save an `nlp`
+object to disk.
 
 > #### Example
 >
@@ -827,13 +828,13 @@ subclass of the built-in `dict`. It supports the additional methods `to_disk`
 
 ## Language.to_disk {#to_disk tag="method" new="2"}
 
-Save the current state to a directory. If a model is loaded, this will **include
-the model**.
+Save the current state to a directory. If a trained pipeline is loaded, this
+will **include all model data**.
 
 > #### Example
 >
 > ```python
-> nlp.to_disk("/path/to/models")
+> nlp.to_disk("/path/to/pipeline")
 > ```
 
 | Name           | Description                                                                                                                                |
@@ -844,22 +845,28 @@ the model**.
 
 ## Language.from_disk {#from_disk tag="method" new="2"}
 
-Loads state from a directory. Modifies the object in place and returns it. If
-the saved `Language` object contains a model, the model will be loaded. Note
-that this method is commonly used via the subclasses like `English` or `German`
-to make language-specific functionality like the
-[lexical attribute getters](/usage/adding-languages#lex-attrs) available to the
-loaded object.
+Loads state from a directory, including all data that was saved with the
+`Language` object. Modifies the object in place and returns it.
+
+<Infobox variant="warning" title="Important note">
+
+Keep in mind that this method **only loads serialized state** and doesn't set up
+the `nlp` object. This means that it requires the correct language class to be
+initialized and all pipeline components to be added to the pipeline. If you want
+to load a serialized pipeline from a directory, you should use
+[`spacy.load`](/api/top-level#spacy.load), which will set everything up for you.
+
+</Infobox>
 
 > #### Example
 >
 > ```python
 > from spacy.language import Language
-> nlp = Language().from_disk("/path/to/model")
+> nlp = Language().from_disk("/path/to/pipeline")
 >
-> # using language-specific subclass
+> # Using language-specific subclass
 > from spacy.lang.en import English
-> nlp = English().from_disk("/path/to/en_model")
+> nlp = English().from_disk("/path/to/pipeline")
 > ```
 
 | Name           | Description                                                                                                 |
@@ -924,7 +931,7 @@ available to the loaded object.
 | `components` <Tag variant="new">3</Tag>       | List of all available `(name, component)` tuples, including components that are currently disabled. ~~List[Tuple[str, Callable[[Doc], Doc]]]~~ |
 | `component_names` <Tag variant="new">3</Tag>  | List of all available component names, including components that are currently disabled. ~~List[str]~~                                         |
 | `disabled` <Tag variant="new">3</Tag>         | Names of components that are currently disabled and don't run as part of the pipeline. ~~List[str]~~                                           |
-| `path` <Tag variant="new">2</Tag>             | Path to the model data directory, if a model is loaded. Otherwise `None`. ~~Optional[Path]~~                                                   |
+| `path` <Tag variant="new">2</Tag>             | Path to the pipeline data directory, if a pipeline is loaded from a path or package. Otherwise `None`. ~~Optional[Path]~~                      |
 
 ## Class attributes {#class-attributes}
 
@@ -1004,7 +1011,7 @@ serialization by passing in the string names via the `exclude` argument.
 >
 > ```python
 > data = nlp.to_bytes(exclude=["tokenizer", "vocab"])
-> nlp.from_disk("./model-data", exclude=["ner"])
+> nlp.from_disk("/pipeline", exclude=["ner"])
 > ```
 
 | Name        | Description                                                        |
diff --git a/website/docs/api/pipe.md b/website/docs/api/pipe.md
index 9c3a4104e..57b2af44d 100644
--- a/website/docs/api/pipe.md
+++ b/website/docs/api/pipe.md
@@ -286,7 +286,7 @@ context, the original parameters are restored.
 
 ## Pipe.add_label {#add_label tag="method"}
 
-Add a new label to the pipe. It's possible to extend pretrained models with new
+Add a new label to the pipe. It's possible to extend trained models with new
 labels, but care should be taken to avoid the "catastrophic forgetting" problem.
 
 > #### Example
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index d437ecc07..6e52585ee 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -12,14 +12,14 @@ menu:
 
 ## spaCy {#spacy hidden="true"}
 
-### spacy.load {#spacy.load tag="function" model="any"}
+### spacy.load {#spacy.load tag="function"}
 
-Load a model using the name of an installed
-[model package](/usage/training#models-generating), a string path or a
-`Path`-like object. spaCy will try resolving the load argument in this order. If
-a model is loaded from a model name, spaCy will assume it's a Python package and
-import it and call the model's own `load()` method. If a model is loaded from a
-path, spaCy will assume it's a data directory, load its
+Load a pipeline using the name of an installed
+[package](/usage/saving-loading#models), a string path or a `Path`-like object.
+spaCy will try resolving the load argument in this order. If a pipeline is
+loaded from a string name, spaCy will assume it's a Python package and import it
+and call the package's own `load()` method. If a pipeline is loaded from a path,
+spaCy will assume it's a data directory, load its
 [`config.cfg`](/api/data-formats#config) and use the language and pipeline
 information to construct the `Language` class. The data will be loaded in via
 [`Language.from_disk`](/api/language#from_disk).
@@ -36,38 +36,38 @@ specified separately using the new `exclude` keyword argument.
 >
 > ```python
 > nlp = spacy.load("en_core_web_sm") # package
-> nlp = spacy.load("/path/to/en") # string path
-> nlp = spacy.load(Path("/path/to/en")) # pathlib Path
+> nlp = spacy.load("/path/to/pipeline") # string path
+> nlp = spacy.load(Path("/path/to/pipeline")) # pathlib Path
 >
 > nlp = spacy.load("en_core_web_sm", exclude=["parser", "tagger"])
 > ```
 
 | Name                                 | Description                                                                                                                                                                                                                                    |
 | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `name`                               | Model to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                                 |
+| `name`                               | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                              |
 | _keyword-only_                       |                                                                                                                                                                                                                                                |
 | `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
 | `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                           |
 | `config` <Tag variant="new">3</Tag>  | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~                                                                             |
-| **RETURNS**                          | A `Language` object with the loaded model. ~~Language~~                                                                                                                                                                                        |
+| **RETURNS**                          | A `Language` object with the loaded pipeline. ~~Language~~                                                                                                                                                                                     |
 
-Essentially, `spacy.load()` is a convenience wrapper that reads the model's
+Essentially, `spacy.load()` is a convenience wrapper that reads the pipeline's
 [`config.cfg`](/api/data-formats#config), uses the language and pipeline
 information to construct a `Language` object, loads in the model data and
-returns it.
+weights, and returns it.
 
 ```python
 ### Abstract example
-cls = util.get_lang_class(lang)         #  get language for ID, e.g. "en"
-nlp = cls()                             #  initialize the language
+cls = spacy.util.get_lang_class(lang)  # 1. Get Language class, e.g. English
+nlp = cls()                            # 2. Initialize it
 for name in pipeline:
-    nlp.add_pipe(name)                  #  add component to pipeline
-nlp.from_disk(model_data_path)          #  load in model data
+    nlp.add_pipe(name)                 # 3. Add the component to the pipeline
+nlp.from_disk(data_path)               # 4. Load in the binary data
 ```
 
 ### spacy.blank {#spacy.blank tag="function" new="2"}
 
-Create a blank model of a given language class. This function is the twin of
+Create a blank pipeline of a given language class. This function is the twin of
 `spacy.load()`.
 
 > #### Example
@@ -85,9 +85,7 @@ Create a blank model of a given language class. This function is the twin of
 ### spacy.info {#spacy.info tag="function"}
 
 The same as the [`info` command](/api/cli#info). Pretty-print information about
-your installation, models and local setup from within spaCy. To get the model
-meta data as a dictionary instead, you can use the `meta` attribute on your
-`nlp` object with a loaded model, e.g. `nlp.meta`.
+your installation, installed pipelines and local setup from within spaCy.
 
 > #### Example
 >
@@ -97,12 +95,12 @@ meta data as a dictionary instead, you can use the `meta` attribute on your
 > markdown = spacy.info(markdown=True, silent=True)
 > ```
 
-| Name           | Description                                                        |
-| -------------- | ------------------------------------------------------------------ |
-| `model`        | A model, i.e. a package name or path (optional). ~~Optional[str]~~ |
-| _keyword-only_ |                                                                    |
-| `markdown`     | Print information as Markdown. ~~bool~~                            |
-| `silent`       | Don't print anything, just return. ~~bool~~                        |
+| Name           | Description                                                                  |
+| -------------- | ---------------------------------------------------------------------------- |
+| `model`        | Optional pipeline, i.e. a package name or path (optional). ~~Optional[str]~~ |
+| _keyword-only_ |                                                                              |
+| `markdown`     | Print information as Markdown. ~~bool~~                                      |
+| `silent`       | Don't print anything, just return. ~~bool~~                                  |
 
 ### spacy.explain {#spacy.explain tag="function"}
 
@@ -133,7 +131,7 @@ list of available terms, see
 Allocate data and perform operations on [GPU](/usage/#gpu), if available. If
 data has already been allocated on CPU, it will not be moved. Ideally, this
 function should be called right after importing spaCy and _before_ loading any
-models.
+pipelines.
 
 > #### Example
 >
@@ -152,7 +150,7 @@ models.
 Allocate data and perform operations on [GPU](/usage/#gpu). Will raise an error
 if no GPU is available. If data has already been allocated on CPU, it will not
 be moved. Ideally, this function should be called right after importing spaCy
-and _before_ loading any models.
+and _before_ loading any pipelines.
 
 > #### Example
 >
@@ -271,9 +269,9 @@ If a setting is not present in the options, the default value will be used.
 | `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
 
 By default, displaCy comes with colors for all entity types used by
-[spaCy models](/models). If you're using custom entity types, you can use the
-`colors` setting to add your own colors for them. Your application or model
-package can also expose a
+[spaCy's trained pipelines](/models). If you're using custom entity types, you
+can use the `colors` setting to add your own colors for them. Your application
+or pipeline package can also expose a
 [`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
 to add custom labels and their colors automatically.
 
@@ -666,8 +664,8 @@ loaded lazily, to avoid expensive setup code associated with the language data.
 
 ### util.load_model {#util.load_model tag="function" new="2"}
 
-Load a model from a package or data path. If called with a package name, spaCy
-will assume the model is a Python package and import and call its `load()`
+Load a pipeline from a package or data path. If called with a string name, spaCy
+will assume the pipeline is a Python package and import and call its `load()`
 method. If called with a path, spaCy will assume it's a data directory, read the
 language and pipeline settings from the [`config.cfg`](/api/data-formats#config)
 and create a `Language` object. The model data will then be loaded in via
@@ -683,16 +681,16 @@ and create a `Language` object. The model data will then be loaded in via
 
 | Name                                 | Description                                                                                                                                                                                                                                    |
 | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `name`                               | Package name or model path. ~~str~~                                                                                                                                                                                                            |
+| `name`                               | Package name or path. ~~str~~                                                                                                                                                                                                                  |
 | `vocab` <Tag variant="new">3</Tag>   | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~.                                                                                                         |
 | `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
 | `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                           |
 | `config` <Tag variant="new">3</Tag>  | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                 |
-| **RETURNS**                          | `Language` class with the loaded model. ~~Language~~                                                                                                                                                                                           |
+| **RETURNS**                          | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                        |
 
 ### util.load_model_from_init_py {#util.load_model_from_init_py tag="function" new="2"}
 
-A helper function to use in the `load()` method of a model package's
+A helper function to use in the `load()` method of a pipeline package's
 [`__init__.py`](https://github.com/explosion/spacy-models/tree/master/template/model/xx_model_name/__init__.py).
 
 > #### Example
@@ -706,70 +704,72 @@ A helper function to use in the `load()` method of a model package's
 
 | Name                                 | Description                                                                                                                                                                                                                                    |
 | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `init_file`                          | Path to model's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~                                                                                                                                                                           |
+| `init_file`                          | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~                                                                                                                                                                         |
 | `vocab` <Tag variant="new">3</Tag>   | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~.                                                                                                         |
 | `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
 | `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                           |
 | `config` <Tag variant="new">3</Tag>  | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                 |
-| **RETURNS**                          | `Language` class with the loaded model. ~~Language~~                                                                                                                                                                                           |
+| **RETURNS**                          | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                        |
 
 ### util.load_config {#util.load_config tag="function" new="3"}
 
-Load a model's [`config.cfg`](/api/data-formats#config) from a file path. The
-config typically includes details about the model pipeline and how its
-components are created, as well as all training settings and hyperparameters.
+Load a pipeline's [`config.cfg`](/api/data-formats#config) from a file path. The
+config typically includes details about the components and how they're created,
+as well as all training settings and hyperparameters.
 
 > #### Example
 >
 > ```python
-> config = util.load_config("/path/to/model/config.cfg")
+> config = util.load_config("/path/to/config.cfg")
 > print(config.to_str())
 > ```
 
 | Name          | Description                                                                                                                                                                 |
 | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `path`        | Path to the model's `config.cfg`. ~~Union[str, Path]~~                                                                                                                      |
+| `path`        | Path to the pipeline's `config.cfg`. ~~Union[str, Path]~~                                                                                                                   |
 | `overrides`   | Optional config overrides to replace in loaded config. Can be provided as nested dict, or as flat dict with keys in dot notation, e.g. `"nlp.pipeline"`. ~~Dict[str, Any]~~ |
 | `interpolate` | Whether to interpolate the config and replace variables like `${paths.train}` with their values. Defaults to `False`. ~~bool~~                                              |
-| **RETURNS**   | The model's config. ~~Config~~                                                                                                                                              |
+| **RETURNS**   | The pipeline's config. ~~Config~~                                                                                                                                           |
 
 ### util.load_meta {#util.load_meta tag="function" new="3"}
 
-Get a model's [`meta.json`](/api/data-formats#meta) from a file path and
-validate its contents.
+Get a pipeline's [`meta.json`](/api/data-formats#meta) from a file path and
+validate its contents. The meta typically includes details about author,
+licensing, data sources and version.
 
 > #### Example
 >
 > ```python
-> meta = util.load_meta("/path/to/model/meta.json")
+> meta = util.load_meta("/path/to/meta.json")
 > ```
 
-| Name        | Description                                           |
-| ----------- | ----------------------------------------------------- |
-| `path`      | Path to the model's `meta.json`. ~~Union[str, Path]~~ |
-| **RETURNS** | The model's meta data. ~~Dict[str, Any]~~             |
+| Name        | Description                                              |
+| ----------- | -------------------------------------------------------- |
+| `path`      | Path to the pipeline's `meta.json`. ~~Union[str, Path]~~ |
+| **RETURNS** | The pipeline's meta data. ~~Dict[str, Any]~~             |
 
 ### util.get_installed_models {#util.get_installed_models tag="function" new="3"}
 
-List all model packages installed in the current environment. This will include
-any spaCy model that was packaged with [`spacy package`](/api/cli#package).
-Under the hood, model packages expose a Python entry point that spaCy can check,
-without having to load the model.
+List all pipeline packages installed in the current environment. This will
+include any spaCy pipeline that was packaged with
+[`spacy package`](/api/cli#package). Under the hood, pipeline packages expose a
+Python entry point that spaCy can check, without having to load the `nlp`
+object.
 
 > #### Example
 >
 > ```python
-> model_names = util.get_installed_models()
+> names = util.get_installed_models()
 > ```
 
-| Name        | Description                                                                        |
-| ----------- | ---------------------------------------------------------------------------------- |
-| **RETURNS** | The string names of the models installed in the current environment. ~~List[str]~~ |
+| Name        | Description                                                                           |
+| ----------- | ------------------------------------------------------------------------------------- |
+| **RETURNS** | The string names of the pipelines installed in the current environment. ~~List[str]~~ |
 
 ### util.is_package {#util.is_package tag="function"}
 
 Check if string maps to a package installed via pip. Mainly used to validate
-[model packages](/usage/models).
+[pipeline packages](/usage/models).
 
 > #### Example
 >
@@ -786,7 +786,8 @@ Check if string maps to a package installed via pip. Mainly used to validate
 ### util.get_package_path {#util.get_package_path tag="function" new="2"}
 
 Get path to an installed package. Mainly used to resolve the location of
-[model packages](/usage/models). Currently imports the package to find its path.
+[pipeline packages](/usage/models). Currently imports the package to find its
+path.
 
 > #### Example
 >
@@ -795,10 +796,10 @@ Get path to an installed package. Mainly used to resolve the location of
 > # /usr/lib/python3.6/site-packages/en_core_web_sm
 > ```
 
-| Name           | Description                               |
-| -------------- | ----------------------------------------- |
-| `package_name` | Name of installed package. ~~str~~        |
-| **RETURNS**    | Path to model package directory. ~~Path~~ |
+| Name           | Description                                  |
+| -------------- | -------------------------------------------- |
+| `package_name` | Name of installed package. ~~str~~           |
+| **RETURNS**    | Path to pipeline package directory. ~~Path~~ |
 
 ### util.is_in_jupyter {#util.is_in_jupyter tag="function" new="2"}
 
diff --git a/website/docs/models/index.md b/website/docs/models/index.md
index d5f87d3b5..64e719f37 100644
--- a/website/docs/models/index.md
+++ b/website/docs/models/index.md
@@ -1,6 +1,6 @@
 ---
-title: Models
-teaser: Downloadable pretrained models for spaCy
+title: Trained Models & Pipelines
+teaser: Downloadable trained pipelines and weights for spaCy
 menu:
   - ['Quickstart', 'quickstart']
   - ['Conventions', 'conventions']
@@ -8,15 +8,15 @@ menu:
 
 <!-- Update page, refer to new /api/architectures and training docs -->
 
-The models directory includes two types of pretrained models:
+This directory includes two types of packages:
 
-1. **Core models:** General-purpose pretrained models to predict named entities,
-   part-of-speech tags and syntactic dependencies. Can be used out-of-the-box
-   and fine-tuned on more specific data.
-2. **Starter models:** Transfer learning starter packs with pretrained weights
-   you can initialize your models with to achieve better accuracy. They can
+1. **Trained pipelines:** General-purpose spaCy pipelines to predict named
+   entities, part-of-speech tags and syntactic dependencies. Can be used
+   out-of-the-box and fine-tuned on more specific data.
+2. **Starters:** Transfer learning starter packs with pretrained weights you can
+   initialize your pipeline models with to achieve better accuracy. They can
    include word vectors (which will be used as features during training) or
-   other pretrained representations like BERT. These models don't include
+   other pretrained representations like BERT. These packages don't include
    components for specific tasks like NER or text classification and are
    intended to be used as base models when training your own models.
 
@@ -28,43 +28,42 @@ import QuickstartModels from 'widgets/quickstart-models.js'
 
 <Infobox title="Installation and usage" emoji="📖">
 
-For more details on how to use models with spaCy, see the
-[usage guide on models](/usage/models).
+For more details on how to use trained pipelines with spaCy, see the
+[usage guide](/usage/models).
 
 </Infobox>
 
-## Model naming conventions {#conventions}
+## Package naming conventions {#conventions}
 
-In general, spaCy expects all model packages to follow the naming convention of
-`[lang`\_[name]]. For spaCy's models, we also chose to divide the name into
-three components:
+In general, spaCy expects all pipeline packages to follow the naming convention
+of `[lang`\_[name]]. For spaCy's pipelines, we also chose to divide the name
+into three components:
 
-1. **Type:** Model capabilities (e.g. `core` for general-purpose model with
+1. **Type:** Capabilities (e.g. `core` for general-purpose pipeline with
    vocabulary, syntax, entities and word vectors, or `depent` for only vocab,
    syntax and entities).
-2. **Genre:** Type of text the model is trained on, e.g. `web` or `news`.
-3. **Size:** Model size indicator, `sm`, `md` or `lg`.
+2. **Genre:** Type of text the pipeline is trained on, e.g. `web` or `news`.
+3. **Size:** Package size indicator, `sm`, `md` or `lg`.
 
 For example, [`en_core_web_sm`](/models/en#en_core_web_sm) is a small English
-model trained on written web text (blogs, news, comments), that includes
+pipeline trained on written web text (blogs, news, comments), that includes
 vocabulary, vectors, syntax and entities.
 
-### Model versioning {#model-versioning}
+### Package versioning {#model-versioning}
 
-Additionally, the model versioning reflects both the compatibility with spaCy,
-as well as the major and minor model version. A model version `a.b.c` translates
-to:
+Additionally, the pipeline package versioning reflects both the compatibility
+with spaCy, as well as the major and minor version. A package version `a.b.c`
+translates to:
 
 - `a`: **spaCy major version**. For example, `2` for spaCy v2.x.
-- `b`: **Model major version**. Models with a different major version can't be
-  loaded by the same code. For example, changing the width of the model, adding
-  hidden layers or changing the activation changes the model major version.
-- `c`: **Model minor version**. Same model structure, but different parameter
-  values, e.g. from being trained on different data, for different numbers of
-  iterations, etc.
+- `b`: **Package major version**. Pipelines with a different major version can't
+  be loaded by the same code. For example, changing the width of the model,
+  adding hidden layers or changing the activation changes the major version.
+- `c`: **Package minor version**. Same pipeline structure, but different
+  parameter values, e.g. from being trained on different data, for different
+  numbers of iterations, etc.
 
 For a detailed compatibility overview, see the
-[`compatibility.json`](https://github.com/explosion/spacy-models/tree/master/compatibility.json)
-in the models repository. This is also the source of spaCy's internal
-compatibility check, performed when you run the [`download`](/api/cli#download)
-command.
+[`compatibility.json`](https://github.com/explosion/spacy-models/tree/master/compatibility.json).
+This is also the source of spaCy's internal compatibility check, performed when
+you run the [`download`](/api/cli#download) command.
diff --git a/website/docs/usage/101/_pipelines.md b/website/docs/usage/101/_pipelines.md
index 0aa821223..9a63ee42d 100644
--- a/website/docs/usage/101/_pipelines.md
+++ b/website/docs/usage/101/_pipelines.md
@@ -1,9 +1,9 @@
 When you call `nlp` on a text, spaCy first tokenizes the text to produce a `Doc`
 object. The `Doc` is then processed in several different steps – this is also
 referred to as the **processing pipeline**. The pipeline used by the
-[default models](/models) typically include a tagger, a lemmatizer, a parser and
-an entity recognizer. Each pipeline component returns the processed `Doc`, which
-is then passed on to the next component.
+[trained pipelines](/models) typically include a tagger, a lemmatizer, a parser
+and an entity recognizer. Each pipeline component returns the processed `Doc`,
+which is then passed on to the next component.
 
 ![The processing pipeline](../../images/pipeline.svg)
 
@@ -23,14 +23,15 @@ is then passed on to the next component.
 | **textcat**           | [`TextCategorizer`](/api/textcategorizer)                          | `Doc.cats`                                                | Assign document labels.                          |
 | **custom**            | [custom components](/usage/processing-pipelines#custom-components) | `Doc._.xxx`, `Token._.xxx`, `Span._.xxx`                  | Assign custom attributes, methods or properties. |
 
-The processing pipeline always **depends on the statistical model** and its
-capabilities. For example, a pipeline can only include an entity recognizer
-component if the model includes data to make predictions of entity labels. This
-is why each model will specify the pipeline to use in its meta data and
-[config](/usage/training#config), as a simple list containing the component
-names:
+The capabilities of a processing pipeline always depend on the components, their
+models and how they were trained. For example, a pipeline for named entity
+recognition needs to include a trained named entity recognizer component with a
+statistical model and weights that enable it to **make predictions** of entity
+labels. This is why each pipeline specifies its components and their settings in
+the [config](/usage/training#config):
 
 ```ini
+[nlp]
 pipeline = ["tagger", "parser", "ner"]
 ```
 
diff --git a/website/docs/usage/101/_pos-deps.md b/website/docs/usage/101/_pos-deps.md
index 1e8960edf..a531b245e 100644
--- a/website/docs/usage/101/_pos-deps.md
+++ b/website/docs/usage/101/_pos-deps.md
@@ -1,9 +1,9 @@
 After tokenization, spaCy can **parse** and **tag** a given `Doc`. This is where
-the statistical model comes in, which enables spaCy to **make a prediction** of
-which tag or label most likely applies in this context. A model consists of
-binary data and is produced by showing a system enough examples for it to make
-predictions that generalize across the language – for example, a word following
-"the" in English is most likely a noun.
+the trained pipeline and its statistical models come in, which enable spaCy to
+**make predictions** of which tag or label most likely applies in this context.
+A trained component includes binary data that is produced by showing a system
+enough examples for it to make predictions that generalize across the language –
+for example, a word following "the" in English is most likely a noun.
 
 Linguistic annotations are available as
 [`Token` attributes](/api/token#attributes). Like many NLP libraries, spaCy
@@ -25,7 +25,8 @@ for token in doc:
 
 > - **Text:** The original word text.
 > - **Lemma:** The base form of the word.
-> - **POS:** The simple [UPOS](https://universaldependencies.org/docs/u/pos/) part-of-speech tag.
+> - **POS:** The simple [UPOS](https://universaldependencies.org/docs/u/pos/)
+>   part-of-speech tag.
 > - **Tag:** The detailed part-of-speech tag.
 > - **Dep:** Syntactic dependency, i.e. the relation between tokens.
 > - **Shape:** The word shape – capitalization, punctuation, digits.
diff --git a/website/docs/usage/101/_serialization.md b/website/docs/usage/101/_serialization.md
index 01a9c39d1..ce34ea6e9 100644
--- a/website/docs/usage/101/_serialization.md
+++ b/website/docs/usage/101/_serialization.md
@@ -1,9 +1,9 @@
 If you've been modifying the pipeline, vocabulary, vectors and entities, or made
-updates to the model, you'll eventually want to **save your progress** – for
-example, everything that's in your `nlp` object. This means you'll have to
-translate its contents and structure into a format that can be saved, like a
-file or a byte string. This process is called serialization. spaCy comes with
-**built-in serialization methods** and supports the
+updates to the component models, you'll eventually want to **save your
+progress** – for example, everything that's in your `nlp` object. This means
+you'll have to translate its contents and structure into a format that can be
+saved, like a file or a byte string. This process is called serialization. spaCy
+comes with **built-in serialization methods** and supports the
 [Pickle protocol](https://www.diveinto.org/python3/serializing.html#dump).
 
 > #### What's pickle?
diff --git a/website/docs/usage/101/_training.md b/website/docs/usage/101/_training.md
index 4573f5ea3..b73a83d6a 100644
--- a/website/docs/usage/101/_training.md
+++ b/website/docs/usage/101/_training.md
@@ -1,25 +1,25 @@
 spaCy's tagger, parser, text categorizer and many other components are powered
 by **statistical models**. Every "decision" these components make – for example,
 which part-of-speech tag to assign, or whether a word is a named entity – is a
-**prediction** based on the model's current **weight values**. The weight
-values are estimated based on examples the model has seen
-during **training**. To train a model, you first need training data – examples
-of text, and the labels you want the model to predict. This could be a
-part-of-speech tag, a named entity or any other information.
+**prediction** based on the model's current **weight values**. The weight values
+are estimated based on examples the model has seen during **training**. To train
+a model, you first need training data – examples of text, and the labels you
+want the model to predict. This could be a part-of-speech tag, a named entity or
+any other information.
 
-Training is an iterative process in which the model's predictions are compared 
+Training is an iterative process in which the model's predictions are compared
 against the reference annotations in order to estimate the **gradient of the
 loss**. The gradient of the loss is then used to calculate the gradient of the
 weights through [backpropagation](https://thinc.ai/backprop101). The gradients
-indicate how the weight values should be changed so that the model's
-predictions become more similar to the reference labels over time. 
+indicate how the weight values should be changed so that the model's predictions
+become more similar to the reference labels over time.
 
 > - **Training data:** Examples and their annotations.
 > - **Text:** The input text the model should predict a label for.
 > - **Label:** The label the model should predict.
 > - **Gradient:** The direction and rate of change for a numeric value.
->   Minimising the gradient of the weights should result in predictions that
->   are closer to the reference labels on the training data.
+>   Minimising the gradient of the weights should result in predictions that are
+>   closer to the reference labels on the training data.
 
 ![The training process](../../images/training.svg)
 
diff --git a/website/docs/usage/101/_vectors-similarity.md b/website/docs/usage/101/_vectors-similarity.md
index 92df1b331..cf5b70af2 100644
--- a/website/docs/usage/101/_vectors-similarity.md
+++ b/website/docs/usage/101/_vectors-similarity.md
@@ -24,12 +24,12 @@ array([2.02280000e-01,  -7.66180009e-02,   3.70319992e-01,
 
 <Infobox title="Important note" variant="warning">
 
-To make them compact and fast, spaCy's small [models](/models) (all packages
-that end in `sm`) **don't ship with word vectors**, and only include
+To make them compact and fast, spaCy's small [pipeline packages](/models) (all
+packages that end in `sm`) **don't ship with word vectors**, and only include
 context-sensitive **tensors**. This means you can still use the `similarity()`
 methods to compare documents, spans and tokens – but the result won't be as
 good, and individual tokens won't have any vectors assigned. So in order to use
-_real_ word vectors, you need to download a larger model:
+_real_ word vectors, you need to download a larger pipeline package:
 
 ```diff
 - python -m spacy download en_core_web_sm
@@ -38,11 +38,11 @@ _real_ word vectors, you need to download a larger model:
 
 </Infobox>
 
-Models that come with built-in word vectors make them available as the
-[`Token.vector`](/api/token#vector) attribute. [`Doc.vector`](/api/doc#vector)
-and [`Span.vector`](/api/span#vector) will default to an average of their token
-vectors. You can also check if a token has a vector assigned, and get the L2
-norm, which can be used to normalize vectors.
+Pipeline packages that come with built-in word vectors make them available as
+the [`Token.vector`](/api/token#vector) attribute.
+[`Doc.vector`](/api/doc#vector) and [`Span.vector`](/api/span#vector) will
+default to an average of their token vectors. You can also check if a token has
+a vector assigned, and get the L2 norm, which can be used to normalize vectors.
 
 ```python
 ### {executable="true"}
@@ -62,12 +62,12 @@ for token in tokens:
 > - **OOV**: Out-of-vocabulary
 
 The words "dog", "cat" and "banana" are all pretty common in English, so they're
-part of the model's vocabulary, and come with a vector. The word "afskfsd" on
+part of the pipeline's vocabulary, and come with a vector. The word "afskfsd" on
 the other hand is a lot less common and out-of-vocabulary – so its vector
 representation consists of 300 dimensions of `0`, which means it's practically
 nonexistent. If your application will benefit from a **large vocabulary** with
-more vectors, you should consider using one of the larger models or loading in a
-full vector package, for example,
+more vectors, you should consider using one of the larger pipeline packages or
+loading in a full vector package, for example,
 [`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg), which includes
 over **1 million unique vectors**.
 
@@ -82,7 +82,7 @@ Each [`Doc`](/api/doc), [`Span`](/api/span), [`Token`](/api/token) and
 method that lets you compare it with another object, and determine the
 similarity. Of course similarity is always subjective – whether two words, spans
 or documents are similar really depends on how you're looking at it. spaCy's
-similarity model usually assumes a pretty general-purpose definition of
+similarity implementation usually assumes a pretty general-purpose definition of
 similarity.
 
 > #### 📝 Things to try
@@ -99,7 +99,7 @@ similarity.
 ### {executable="true"}
 import spacy
 
-nlp = spacy.load("en_core_web_md")  # make sure to use larger model!
+nlp = spacy.load("en_core_web_md")  # make sure to use larger package!
 doc1 = nlp("I like salty fries and hamburgers.")
 doc2 = nlp("Fast food tastes very good.")
 
@@ -143,10 +143,9 @@ us that builds on top of spaCy and lets you train and query more interesting and
 detailed word vectors. It combines noun phrases like "fast food" or "fair game"
 and includes the part-of-speech tags and entity labels. The library also
 includes annotation recipes for our annotation tool [Prodigy](https://prodi.gy)
-that let you evaluate vector models and create terminology lists. For more
-details, check out
-[our blog post](https://explosion.ai/blog/sense2vec-reloaded). To explore the
-semantic similarities across all Reddit comments of 2015 and 2019, see the
-[interactive demo](https://explosion.ai/demos/sense2vec).
+that let you evaluate vectors and create terminology lists. For more details,
+check out [our blog post](https://explosion.ai/blog/sense2vec-reloaded). To
+explore the semantic similarities across all Reddit comments of 2015 and 2019,
+see the [interactive demo](https://explosion.ai/demos/sense2vec).
 
 </Infobox>
diff --git a/website/docs/usage/index.md b/website/docs/usage/index.md
index 76858213c..ee5fd0a3b 100644
--- a/website/docs/usage/index.md
+++ b/website/docs/usage/index.md
@@ -35,10 +35,10 @@ Using pip, spaCy releases are available as source packages and binary wheels.
 $ pip install -U spacy
 ```
 
-> #### Download models
+> #### Download pipelines
 >
-> After installation you need to download a language model. For more info and
-> available models, see the [docs on models](/models).
+> After installation you typically want to download a trained pipeline. For more
+> info and available packages, see the [models directory](/models).
 >
 > ```cli
 > $ python -m spacy download en_core_web_sm
@@ -54,7 +54,7 @@ To install additional data tables for lemmatization you can run
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data)
 separately. The lookups package is needed to provide normalization and
 lemmatization data for new models and to lemmatize in languages that don't yet
-come with pretrained models and aren't powered by third-party libraries.
+come with trained pipelines and aren't powered by third-party libraries.
 
 </Infobox>
 
@@ -88,23 +88,21 @@ and pull requests to the recipe and setup are always appreciated.
 > spaCy v2.x to v3.x may still require some changes to your code base. For
 > details see the sections on [backwards incompatibilities](/usage/v3#incompat)
 > and [migrating](/usage/v3#migrating). Also remember to download the new
-> models, and retrain your own models.
+> trained pipelines, and retrain your own pipelines.
 
 When updating to a newer version of spaCy, it's generally recommended to start
 with a clean virtual environment. If you're upgrading to a new major version,
-make sure you have the latest **compatible models** installed, and that there
-are no old and incompatible model packages left over in your environment, as
-this can often lead to unexpected results and errors. If you've trained your own
-models, keep in mind that your train and runtime inputs must match. This means
-you'll have to **retrain your models** with the new version.
+make sure you have the latest **compatible trained pipelines** installed, and
+that there are no old and incompatible packages left over in your environment,
+as this can often lead to unexpected results and errors. If you've trained your
+own models, keep in mind that your train and runtime inputs must match. This
+means you'll have to **retrain your pipelines** with the new version.
 
 spaCy also provides a [`validate`](/api/cli#validate) command, which lets you
-verify that all installed models are compatible with your spaCy version. If
-incompatible models are found, tips and installation instructions are printed.
-The command is also useful to detect out-of-sync model links resulting from
-links created in different virtual environments. It's recommended to run the
-command with `python -m` to make sure you're executing the correct version of
-spaCy.
+verify that all installed pipeline packages are compatible with your spaCy
+version. If incompatible packages are found, tips and installation instructions
+are printed. It's recommended to run the command with `python -m` to make sure
+you're executing the correct version of spaCy.
 
 ```cli
 $ pip install -U spacy
@@ -132,8 +130,8 @@ $ pip install -U spacy[cuda92]
 Once you have a GPU-enabled installation, the best way to activate it is to call
 [`spacy.prefer_gpu`](/api/top-level#spacy.prefer_gpu) or
 [`spacy.require_gpu()`](/api/top-level#spacy.require_gpu) somewhere in your
-script before any models have been loaded. `require_gpu` will raise an error if
-no GPU is available.
+script before any pipelines have been loaded. `require_gpu` will raise an error
+if no GPU is available.
 
 ```python
 import spacy
@@ -238,16 +236,16 @@ installing, loading and using spaCy, as well as their solutions.
 <Accordion title="No compatible model found" id="compatible-model">
 
 ```
-No compatible model found for [lang] (spaCy vX.X.X).
+No compatible package found for [lang] (spaCy vX.X.X).
 ```
 
-This usually means that the model you're trying to download does not exist, or
-isn't available for your version of spaCy. Check the
+This usually means that the trained pipeline you're trying to download does not
+exist, or isn't available for your version of spaCy. Check the
 [compatibility table](https://github.com/explosion/spacy-models/tree/master/compatibility.json)
-to see which models are available for your spaCy version. If you're using an old
-version, consider upgrading to the latest release. Note that while spaCy
+to see which packages are available for your spaCy version. If you're using an
+old version, consider upgrading to the latest release. Note that while spaCy
 supports tokenization for [a variety of languages](/usage/models#languages), not
-all of them come with statistical models. To only use the tokenizer, import the
+all of them come with trained pipelines. To only use the tokenizer, import the
 language's `Language` class instead, for example
 `from spacy.lang.fr import French`.
 
@@ -259,7 +257,7 @@ language's `Language` class instead, for example
 no such option: --no-cache-dir
 ```
 
-The `download` command uses pip to install the models and sets the
+The `download` command uses pip to install the pipeline packages and sets the
 `--no-cache-dir` flag to prevent it from requiring too much memory.
 [This setting](https://pip.pypa.io/en/stable/reference/pip_install/#caching)
 requires pip v6.0 or newer. Run `pip install -U pip` to upgrade to the latest
@@ -323,19 +321,19 @@ also run `which python` to find out where your Python executable is located.
 
 </Accordion>
 
-<Accordion title="Import error: No module named [model]" id="import-error-models">
+<Accordion title="Import error: No module named [name]" id="import-error-models">
 
 ```
 ImportError: No module named 'en_core_web_sm'
 ```
 
-As of spaCy v1.7, all models can be installed as Python packages. This means
-that they'll become importable modules of your application. If this fails, it's
-usually a sign that the package is not installed in the current environment. Run
-`pip list` or `pip freeze` to check which model packages you have installed, and
-install the [correct models](/models) if necessary. If you're importing a model
-manually at the top of a file, make sure to use the name of the package, not the
-shortcut link you've created.
+As of spaCy v1.7, all trained pipelines can be installed as Python packages.
+This means that they'll become importable modules of your application. If this
+fails, it's usually a sign that the package is not installed in the current
+environment. Run `pip list` or `pip freeze` to check which pipeline packages you
+have installed, and install the [correct package](/models) if necessary. If
+you're importing a package manually at the top of a file, make sure to use the
+full name of the package.
 
 </Accordion>
 
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 726cf0521..7d3613cf5 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -132,7 +132,7 @@ language can extend the `Lemmatizer` as part of its
 ### {executable="true"}
 import spacy
 
-# English models include a rule-based lemmatizer
+# English pipelines include a rule-based lemmatizer
 nlp = spacy.load("en_core_web_sm")
 lemmatizer = nlp.get_pipe("lemmatizer")
 print(lemmatizer.mode)  # 'rule'
@@ -156,14 +156,14 @@ component.
 
 The data for spaCy's lemmatizers is distributed in the package
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data). The
-provided models already include all the required tables, but if you are creating
-new models, you'll probably want to install `spacy-lookups-data` to provide the
-data when the lemmatizer is initialized.
+provided trained pipelines already include all the required tables, but if you
+are creating new pipelines, you'll probably want to install `spacy-lookups-data`
+to provide the data when the lemmatizer is initialized.
 
 ### Lookup lemmatizer {#lemmatizer-lookup}
 
-For models without a tagger or morphologizer, a lookup lemmatizer can be added
-to the pipeline as long as a lookup table is provided, typically through
+For pipelines without a tagger or morphologizer, a lookup lemmatizer can be
+added to the pipeline as long as a lookup table is provided, typically through
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data). The
 lookup lemmatizer looks up the token surface form in the lookup table without
 reference to the token's part-of-speech or context.
@@ -178,9 +178,9 @@ nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
 
 ### Rule-based lemmatizer {#lemmatizer-rule}
 
-When training models that include a component that assigns POS (a morphologizer
-or a tagger with a [POS mapping](#mappings-exceptions)), a rule-based lemmatizer
-can be added using rule tables from
+When training pipelines that include a component that assigns part-of-speech
+tags (a morphologizer or a tagger with a [POS mapping](#mappings-exceptions)), a
+rule-based lemmatizer can be added using rule tables from
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data):
 
 ```python
@@ -366,10 +366,10 @@ sequence of tokens. You can walk up the tree with the
 
 > #### Projective vs. non-projective
 >
-> For the [default English model](/models/en), the parse tree is **projective**,
-> which means that there are no crossing brackets. The tokens returned by
-> `.subtree` are therefore guaranteed to be contiguous. This is not true for the
-> German model, which has many
+> For the [default English pipelines](/models/en), the parse tree is
+> **projective**, which means that there are no crossing brackets. The tokens
+> returned by `.subtree` are therefore guaranteed to be contiguous. This is not
+> true for the German pipelines, which have many
 > [non-projective dependencies](https://explosion.ai/blog/german-model#word-order).
 
 ```python
@@ -497,26 +497,27 @@ displaCy in our [online demo](https://explosion.ai/demos/displacy)..
 
 ### Disabling the parser {#disabling}
 
-In the [default models](/models), the parser is loaded and enabled as part of
-the [standard processing pipeline](/usage/processing-pipelines). If you don't
-need any of the syntactic information, you should disable the parser. Disabling
-the parser will make spaCy load and run much faster. If you want to load the
-parser, but need to disable it for specific documents, you can also control its
-use on the `nlp` object.
+In the [trained pipelines](/models) provided by spaCy, the parser is loaded and
+enabled by default as part of the
+[standard processing pipeline](/usage/processing-pipelines). If you don't need
+any of the syntactic information, you should disable the parser. Disabling the
+parser will make spaCy load and run much faster. If you want to load the parser,
+but need to disable it for specific documents, you can also control its use on
+the `nlp` object. For more details, see the usage guide on
+[disabling pipeline components](/usage/processing-pipelines/#disabling).
 
 ```python
 nlp = spacy.load("en_core_web_sm", disable=["parser"])
-nlp = English().from_disk("/model", disable=["parser"])
-doc = nlp("I don't want parsed", disable=["parser"])
 ```
 
 ## Named Entity Recognition {#named-entities}
 
 spaCy features an extremely fast statistical entity recognition system, that
-assigns labels to contiguous spans of tokens. The default model identifies a
-variety of named and numeric entities, including companies, locations,
-organizations and products. You can add arbitrary classes to the entity
-recognition system, and update the model with new examples.
+assigns labels to contiguous spans of tokens. The default
+[trained pipelines](/models) can indentify a variety of named and numeric
+entities, including companies, locations, organizations and products. You can
+add arbitrary classes to the entity recognition system, and update the model
+with new examples.
 
 ### Named Entity Recognition 101 {#named-entities-101}
 
@@ -669,7 +670,7 @@ responsibility for ensuring that the data is left in a consistent state.
 
 <Infobox title="Annotation scheme">
 
-For details on the entity types available in spaCy's pretrained models, see the
+For details on the entity types available in spaCy's trained pipelines, see the
 "label scheme" sections of the individual models in the
 [models directory](/models).
 
@@ -710,9 +711,8 @@ import DisplacyEntHtml from 'images/displacy-ent2.html'
 To ground the named entities into the "real world", spaCy provides functionality
 to perform entity linking, which resolves a textual entity to a unique
 identifier from a knowledge base (KB). You can create your own
-[`KnowledgeBase`](/api/kb) and
-[train a new Entity Linking model](/usage/training#entity-linker) using that
-custom-made KB.
+[`KnowledgeBase`](/api/kb) and [train](/usage/training) a new
+[`EntityLinker`](/api/entitylinker) using that custom knowledge base.
 
 ### Accessing entity identifiers {#entity-linking-accessing model="entity linking"}
 
@@ -724,7 +724,7 @@ object, or the `ent_kb_id` and `ent_kb_id_` attributes of a
 ```python
 import spacy
 
-nlp = spacy.load("my_custom_el_model")
+nlp = spacy.load("my_custom_el_pipeline")
 doc = nlp("Ada Lovelace was born in London")
 
 # Document level
@@ -1042,13 +1042,15 @@ function that behaves the same way.
 
 <Infobox title="Important note" variant="warning">
 
-If you're using a statistical model, writing to the
+If you've loaded a trained pipeline, writing to the
 [`nlp.Defaults`](/api/language#defaults) or `English.Defaults` directly won't
-work, since the regular expressions are read from the model and will be compiled
-when you load it. If you modify `nlp.Defaults`, you'll only see the effect if
-you call [`spacy.blank`](/api/top-level#spacy.blank). If you want to modify the
-tokenizer loaded from a statistical model, you should modify `nlp.tokenizer`
-directly.
+work, since the regular expressions are read from the pipeline data and will be
+compiled when you load it. If you modify `nlp.Defaults`, you'll only see the
+effect if you call [`spacy.blank`](/api/top-level#spacy.blank). If you want to
+modify the tokenizer loaded from a trained pipeline, you should modify
+`nlp.tokenizer` directly. If you're training your own pipeline, you can register
+[callbacks](/usage/training/#custom-code-nlp-callbacks) to modify the `nlp`
+object before training.
 
 </Infobox>
 
@@ -1218,11 +1220,11 @@ print(doc.text, [token.text for token in doc])
 
 <Infobox title="Important note on tokenization and models" variant="warning">
 
-Keep in mind that your model's result may be less accurate if the tokenization
+Keep in mind that your models' results may be less accurate if the tokenization
 during training differs from the tokenization at runtime. So if you modify a
-pretrained model's tokenization afterwards, it may produce very different
-predictions. You should therefore train your model with the **same tokenizer**
-it will be using at runtime. See the docs on
+trained pipeline' tokenization afterwards, it may produce very different
+predictions. You should therefore train your pipeline with the **same
+tokenizer** it will be using at runtime. See the docs on
 [training with custom tokenization](#custom-tokenizer-training) for details.
 
 </Infobox>
@@ -1231,7 +1233,7 @@ it will be using at runtime. See the docs on
 
 spaCy's [training config](/usage/training#config) describe the settings,
 hyperparameters, pipeline and tokenizer used for constructing and training the
-model. The `[nlp.tokenizer]` block refers to a **registered function** that
+pipeline. The `[nlp.tokenizer]` block refers to a **registered function** that
 takes the `nlp` object and returns a tokenizer. Here, we're registering a
 function called `whitespace_tokenizer` in the
 [`@tokenizers` registry](/api/registry). To make sure spaCy knows how to
@@ -1626,11 +1628,11 @@ spaCy provides four alternatives for sentence segmentation:
 
 Unlike other libraries, spaCy uses the dependency parse to determine sentence
 boundaries. This is usually the most accurate approach, but it requires a
-**statistical model** that provides accurate predictions. If your texts are
+**trained pipeline** that provides accurate predictions. If your texts are
 closer to general-purpose news or web text, this should work well out-of-the-box
-with spaCy's provided models. For social media or conversational text that
-doesn't follow the same rules, your application may benefit from a custom model
-or rule-based component.
+with spaCy's provided trained pipelines. For social media or conversational text
+that doesn't follow the same rules, your application may benefit from a custom
+trained or rule-based component.
 
 ```python
 ### {executable="true"}
@@ -1652,8 +1654,8 @@ parses consistent with the sentence boundaries.
 The [`SentenceRecognizer`](/api/sentencerecognizer) is a simple statistical
 component that only provides sentence boundaries. Along with being faster and
 smaller than the parser, its primary advantage is that it's easier to train
-custom models because it only requires annotated sentence boundaries rather than
-full dependency parses.
+because it only requires annotated sentence boundaries rather than full
+dependency parses.
 
 <!-- TODO: update/confirm usage once we have final models trained -->
 
@@ -1685,7 +1687,7 @@ need sentence boundaries without dependency parses.
 import spacy
 from spacy.lang.en import English
 
-nlp = English()  # just the language with no model
+nlp = English()  # just the language with no pipeline
 nlp.add_pipe("sentencizer")
 doc = nlp("This is a sentence. This is another sentence.")
 for sent in doc.sents:
@@ -1827,11 +1829,11 @@ or Tomas Mikolov's original
 [Word2vec implementation](https://code.google.com/archive/p/word2vec/). Most
 word vector libraries output an easy-to-read text-based format, where each line
 consists of the word followed by its vector. For everyday use, we want to
-convert the vectors model into a binary format that loads faster and takes up
-less space on disk. The easiest way to do this is the
-[`init model`](/api/cli#init-model) command-line utility. This will output a
-spaCy model in the directory `/tmp/la_vectors_wiki_lg`, giving you access to
-some nice Latin vectors. You can then pass the directory path to
+convert the vectors into a binary format that loads faster and takes up less
+space on disk. The easiest way to do this is the
+[`init vocab`](/api/cli#init-vocab) command-line utility. This will output a
+blank spaCy pipeline in the directory `/tmp/la_vectors_wiki_lg`, giving you
+access to some nice Latin vectors. You can then pass the directory path to
 [`spacy.load`](/api/top-level#spacy.load).
 
 > #### Usage example
@@ -1845,7 +1847,7 @@ some nice Latin vectors. You can then pass the directory path to
 
 ```cli
 $ wget https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.la.300.vec.gz
-$ python -m spacy init model en /tmp/la_vectors_wiki_lg --vectors-loc cc.la.300.vec.gz
+$ python -m spacy init vocab en /tmp/la_vectors_wiki_lg --vectors-loc cc.la.300.vec.gz
 ```
 
 <Accordion title="How to optimize vector coverage" id="custom-vectors-coverage" spaced>
@@ -1853,13 +1855,13 @@ $ python -m spacy init model en /tmp/la_vectors_wiki_lg --vectors-loc cc.la.300.
 To help you strike a good balance between coverage and memory usage, spaCy's
 [`Vectors`](/api/vectors) class lets you map **multiple keys** to the **same
 row** of the table. If you're using the
-[`spacy init model`](/api/cli#init-model) command to create a vocabulary,
+[`spacy init vocab`](/api/cli#init-vocab) command to create a vocabulary,
 pruning the vectors will be taken care of automatically if you set the
 `--prune-vectors` flag. You can also do it manually in the following steps:
 
-1. Start with a **word vectors model** that covers a huge vocabulary. For
+1. Start with a **word vectors package** that covers a huge vocabulary. For
    instance, the [`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg)
-   model provides 300-dimensional GloVe vectors for over 1 million terms of
+   starter provides 300-dimensional GloVe vectors for over 1 million terms of
    English.
 2. If your vocabulary has values set for the `Lexeme.prob` attribute, the
    lexemes will be sorted by descending probability to determine which vectors
@@ -1900,17 +1902,17 @@ the two words.
 In the example above, the vector for "Shore" was removed and remapped to the
 vector of "coast", which is deemed about 73% similar. "Leaving" was remapped to
 the vector of "leaving", which is identical. If you're using the
-[`init model`](/api/cli#init-model) command, you can set the `--prune-vectors`
+[`init vocab`](/api/cli#init-vocab) command, you can set the `--prune-vectors`
 option to easily reduce the size of the vectors as you add them to a spaCy
-model:
+pipeline:
 
 ```cli
-$ python -m spacy init model en /tmp/la_vectors_web_md --vectors-loc la.300d.vec.tgz --prune-vectors 10000
+$ python -m spacy init vocab en /tmp/la_vectors_web_md --vectors-loc la.300d.vec.tgz --prune-vectors 10000
 ```
 
-This will create a spaCy model with vectors for the first 10,000 words in the
-vectors model. All other words in the vectors model are mapped to the closest
-vector among those retained.
+This will create a blank spaCy pipeline with vectors for the first 10,000 words
+in the vectors. All other words in the vectors are mapped to the closest vector
+among those retained.
 
 </Accordion>
 
@@ -1925,8 +1927,8 @@ possible. You can modify the vectors via the [`Vocab`](/api/vocab) or
 if you have vectors in an arbitrary format, as you can read in the vectors with
 your own logic, and just set them with a simple loop. This method is likely to
 be slower than approaches that work with the whole vectors table at once, but
-it's a great approach for once-off conversions before you save out your model to
-disk.
+it's a great approach for once-off conversions before you save out your `nlp`
+object to disk.
 
 ```python
 ### Adding vectors
@@ -1978,14 +1980,14 @@ print(nlp2.lang, [token.is_stop for token in nlp2("custom stop")])
 The [`@spacy.registry.languages`](/api/top-level#registry) decorator lets you
 register a custom language class and assign it a string name. This means that
 you can call [`spacy.blank`](/api/top-level#spacy.blank) with your custom
-language name, and even train models with it and refer to it in your
+language name, and even train pipelines with it and refer to it in your
 [training config](/usage/training#config).
 
 > #### Config usage
 >
 > After registering your custom language class using the `languages` registry,
 > you can refer to it in your [training config](/usage/training#config). This
-> means spaCy will train your model using the custom subclass.
+> means spaCy will train your pipeline using the custom subclass.
 >
 > ```ini
 > [nlp]
diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md
index ec0e02297..9b1e96e4e 100644
--- a/website/docs/usage/models.md
+++ b/website/docs/usage/models.md
@@ -8,25 +8,24 @@ menu:
   - ['Production Use', 'production']
 ---
 
-spaCy's models can be installed as **Python packages**. This means that they're
-a component of your application, just like any other module. They're versioned
-and can be defined as a dependency in your `requirements.txt`. Models can be
-installed from a download URL or a local directory, manually or via
-[pip](https://pypi.python.org/pypi/pip). Their data can be located anywhere on
-your file system.
+spaCy's trained pipelines can be installed as **Python packages**. This means
+that they're a component of your application, just like any other module.
+They're versioned and can be defined as a dependency in your `requirements.txt`.
+Trained pipelines can be installed from a download URL or a local directory,
+manually or via [pip](https://pypi.python.org/pypi/pip). Their data can be
+located anywhere on your file system.
 
 > #### Important note
 >
-> If you're upgrading to spaCy v3.x, you need to **download the new models**. If
-> you've trained statistical models that use spaCy's annotations, you should
-> **retrain your models** after updating spaCy. If you don't retrain, you may
-> suffer train/test skew, which might decrease your accuracy.
+> If you're upgrading to spaCy v3.x, you need to **download the new pipeline
+> packages**. If you've trained your own pipelines, you need to **retrain** them
+> after updating spaCy.
 
 ## Quickstart {hidden="true"}
 
 import QuickstartModels from 'widgets/quickstart-models.js'
 
-<QuickstartModels title="Quickstart" id="quickstart" description="Install a default model, get the code to load it from within spaCy and an example to test it. For more options, see the section on available models below." />
+<QuickstartModels title="Quickstart" id="quickstart" description="Install a default trained pipeline package, get the code to load it from within spaCy and an example to test it. For more options, see the section on available packages below." />
 
 ## Language support {#languages}
 
@@ -34,14 +33,14 @@ spaCy currently provides support for the following languages. You can help by
 [improving the existing language data](/usage/adding-languages#language-data)
 and extending the tokenization patterns.
 [See here](https://github.com/explosion/spaCy/issues/3056) for details on how to
-contribute to model development.
+contribute to development.
 
 > #### Usage note
 >
-> If a model is available for a language, you can download it using the
-> [`spacy download`](/api/cli#download) command. In order to use languages that
-> don't yet come with a model, you have to import them directly, or use
-> [`spacy.blank`](/api/top-level#spacy.blank):
+> If a trained pipeline is available for a language, you can download it using
+> the [`spacy download`](/api/cli#download) command. In order to use languages
+> that don't yet come with a trained pipeline, you have to import them directly,
+> or use [`spacy.blank`](/api/top-level#spacy.blank):
 >
 > ```python
 > from spacy.lang.fi import Finnish
@@ -73,13 +72,13 @@ import Languages from 'widgets/languages.js'
 > nlp = spacy.blank("xx")
 > ```
 
-spaCy also supports models trained on more than one language. This is especially
-useful for named entity recognition. The language ID used for multi-language or
-language-neutral models is `xx`. The language class, a generic subclass
-containing only the base language data, can be found in
+spaCy also supports pipelines trained on more than one language. This is
+especially useful for named entity recognition. The language ID used for
+multi-language or language-neutral pipelines is `xx`. The language class, a
+generic subclass containing only the base language data, can be found in
 [`lang/xx`](https://github.com/explosion/spaCy/tree/master/spacy/lang/xx).
 
-To train a model using the neutral multi-language class, you can set
+To train a pipeline using the neutral multi-language class, you can set
 `lang = "xx"` in your [training config](/usage/training#config). You can also
 import the `MultiLanguage` class directly, or call
 [`spacy.blank("xx")`](/api/top-level#spacy.blank) for lazy-loading.
@@ -111,7 +110,7 @@ The Chinese language class supports three word segmentation options:
 3. **PKUSeg**: As of spaCy v2.3.0, support for
    [PKUSeg](https://github.com/lancopku/PKUSeg-python) has been added to support
    better segmentation for Chinese OntoNotes and the provided
-   [Chinese models](/models/zh). Enable PKUSeg with the tokenizer option
+   [Chinese pipelines](/models/zh). Enable PKUSeg with the tokenizer option
    `{"segmenter": "pkuseg"}`.
 
 <Infobox variant="warning">
@@ -169,9 +168,9 @@ nlp.tokenizer.pkuseg_update_user_dict([], reset=True)
 
 </Accordion>
 
-<Accordion title="Details on pretrained and custom Chinese models" spaced>
+<Accordion title="Details on trained and custom Chinese pipelines" spaced>
 
-The [Chinese models](/models/zh) provided by spaCy include a custom `pkuseg`
+The [Chinese pipelines](/models/zh) provided by spaCy include a custom `pkuseg`
 model trained only on
 [Chinese OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19), since the
 models provided by `pkuseg` include data restricted to research use. For
@@ -208,29 +207,29 @@ nlp = Chinese(meta={"tokenizer": {"config": {"pkuseg_model": "/path/to/pkuseg_mo
 The Japanese language class uses
 [SudachiPy](https://github.com/WorksApplications/SudachiPy) for word
 segmentation and part-of-speech tagging. The default Japanese language class and
-the provided Japanese models use SudachiPy split mode `A`. The `meta` argument
-of the `Japanese` language class can be used to configure the split mode to `A`,
-`B` or `C`.
+the provided Japanese pipelines use SudachiPy split mode `A`. The `meta`
+argument of the `Japanese` language class can be used to configure the split
+mode to `A`, `B` or `C`.
 
 <Infobox variant="warning">
 
 If you run into errors related to `sudachipy`, which is currently under active
 development, we suggest downgrading to `sudachipy==0.4.5`, which is the version
-used for training the current [Japanese models](/models/ja).
+used for training the current [Japanese pipelines](/models/ja).
 
 </Infobox>
 
-## Installing and using models {#download}
+## Installing and using trained pipelines {#download}
 
-The easiest way to download a model is via spaCy's
+The easiest way to download a trained pipeline is via spaCy's
 [`download`](/api/cli#download) command. It takes care of finding the
-best-matching model compatible with your spaCy installation.
+best-matching package compatible with your spaCy installation.
 
 > #### Important note for v3.0
 >
-> Note that as of spaCy v3.0, model shortcut links that create (potentially
+> Note that as of spaCy v3.0, shortcut links like `en` that create (potentially
 > brittle) symlinks in your spaCy installation are **deprecated**. To download
-> and load an installed model, use its full name:
+> and load an installed pipeline package, use its full name:
 >
 > ```diff
 > - python -m spacy download en
@@ -243,14 +242,14 @@ best-matching model compatible with your spaCy installation.
 > ```
 
 ```cli
-# Download best-matching version of a model for your spaCy installation
+# Download best-matching version of a package for your spaCy installation
 $ python -m spacy download en_core_web_sm
 
-# Download exact model version
+# Download exact package version
 $ python -m spacy download en_core_web_sm-3.0.0 --direct
 ```
 
-The download command will [install the model](/usage/models#download-pip) via
+The download command will [install the package](/usage/models#download-pip) via
 pip and place the package in your `site-packages` directory.
 
 ```cli
@@ -266,11 +265,11 @@ doc = nlp("This is a sentence.")
 
 ### Installation via pip {#download-pip}
 
-To download a model directly using [pip](https://pypi.python.org/pypi/pip),
-point `pip install` to the URL or local path of the archive file. To find the
-direct link to a model, head over to the
-[model releases](https://github.com/explosion/spacy-models/releases), right
-click on the archive link and copy it to your clipboard.
+To download a trained pipeline directly using
+[pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
+path of the archive file. To find the direct link to a package, head over to the
+[releases](https://github.com/explosion/spacy-models/releases), right click on
+the archive link and copy it to your clipboard.
 
 ```bash
 # With external URL
@@ -280,60 +279,61 @@ $ pip install https://github.com/explosion/spacy-models/releases/download/en_cor
 $ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
 ```
 
-By default, this will install the model into your `site-packages` directory. You
-can then use `spacy.load()` to load it via its package name or
+By default, this will install the pipeline package into your `site-packages`
+directory. You can then use `spacy.load` to load it via its package name or
 [import it](#usage-import) explicitly as a module. If you need to download
-models as part of an automated process, we recommend using pip with a direct
-link, instead of relying on spaCy's [`download`](/api/cli#download) command.
+pipeline packages as part of an automated process, we recommend using pip with a
+direct link, instead of relying on spaCy's [`download`](/api/cli#download)
+command.
 
 You can also add the direct download link to your application's
 `requirements.txt`. For more details, see the section on
-[working with models in production](#production).
+[working with pipeline packages in production](#production).
 
 ### Manual download and installation {#download-manual}
 
 In some cases, you might prefer downloading the data manually, for example to
-place it into a custom directory. You can download the model via your browser
+place it into a custom directory. You can download the package via your browser
 from the [latest releases](https://github.com/explosion/spacy-models/releases),
 or configure your own download script using the URL of the archive file. The
-archive consists of a model directory that contains another directory with the
-model data.
+archive consists of a package directory that contains another directory with the
+pipeline data.
 
 ```yaml
 ### Directory structure {highlight="6"}
 └── en_core_web_md-3.0.0.tar.gz       # downloaded archive
     ├── setup.py                      # setup file for pip installation
-    ├── meta.json                     # copy of model meta
-    └── en_core_web_md                # 📦 model package
+    ├── meta.json                     # copy of pipeline meta
+    └── en_core_web_md                # 📦 pipeline package
         ├── __init__.py               # init for pip installation
-        └── en_core_web_md-3.0.0      # model data
-            ├── config.cfg            # model config
-            ├── meta.json             # model meta
+        └── en_core_web_md-3.0.0      # pipeline data
+            ├── config.cfg            # pipeline config
+            ├── meta.json             # pipeline meta
             └── ...                   # directories with component data
 ```
 
-You can place the **model package directory** anywhere on your local file
+You can place the **pipeline package directory** anywhere on your local file
 system.
 
-### Using models with spaCy {#usage}
+### Using trained pipelines with spaCy {#usage}
 
-To load a model, use [`spacy.load`](/api/top-level#spacy.load) with the model's
-package name or a path to the data directory:
+To load a pipeline package, use [`spacy.load`](/api/top-level#spacy.load) with
+the package name or a path to the data directory:
 
 > #### Important note for v3.0
 >
-> Note that as of spaCy v3.0, model shortcut links that create (potentially
-> brittle) symlinks in your spaCy installation are **deprecated**. To load an
-> installed model, use its full name:
+> Note that as of spaCy v3.0, shortcut links like `en` that create (potentially
+> brittle) symlinks in your spaCy installation are **deprecated**. To download
+> and load an installed pipeline package, use its full name:
 >
 > ```diff
-> - nlp = spacy.load("en")
-> + nlp = spacy.load("en_core_web_sm")
+> - python -m spacy download en
+> + python -m spacy dowmload en_core_web_sm
 > ```
 
 ```python
 import spacy
-nlp = spacy.load("en_core_web_sm")           # load model package "en_core_web_sm"
+nlp = spacy.load("en_core_web_sm")           # load package "en_core_web_sm"
 nlp = spacy.load("/path/to/en_core_web_sm")  # load package from a directory
 
 doc = nlp("This is a sentence.")
@@ -342,17 +342,18 @@ doc = nlp("This is a sentence.")
 <Infobox title="Tip: Preview model info" emoji="💡">
 
 You can use the [`info`](/api/cli#info) command or
-[`spacy.info()`](/api/top-level#spacy.info) method to print a model's meta data
-before loading it. Each `Language` object with a loaded model also exposes the
-model's meta data as the attribute `meta`. For example, `nlp.meta['version']`
-will return the model's version.
+[`spacy.info()`](/api/top-level#spacy.info) method to print a pipeline
+packages's meta data before loading it. Each `Language` object with a loaded
+pipeline also exposes the pipeline's meta data as the attribute `meta`. For
+example, `nlp.meta['version']` will return the package version.
 
 </Infobox>
 
-### Importing models as modules {#usage-import}
+### Importing pipeline packages as modules {#usage-import}
 
-If you've installed a model via spaCy's downloader, or directly via pip, you can
-also `import` it and then call its `load()` method with no arguments:
+If you've installed a trained pipeline via [`spacy download`](/api/cli#download)
+or directly via pip, you can also `import` it and then call its `load()` method
+with no arguments:
 
 ```python
 ### {executable="true"}
@@ -362,51 +363,38 @@ nlp = en_core_web_sm.load()
 doc = nlp("This is a sentence.")
 ```
 
-How you choose to load your models ultimately depends on personal preference.
-However, **for larger code bases**, we usually recommend native imports, as this
-will make it easier to integrate models with your existing build process,
-continuous integration workflow and testing framework. It'll also prevent you
-from ever trying to load a model that is not installed, as your code will raise
-an `ImportError` immediately, instead of failing somewhere down the line when
-calling `spacy.load()`.
+How you choose to load your trained pipelines ultimately depends on personal
+preference. However, **for larger code bases**, we usually recommend native
+imports, as this will make it easier to integrate pipeline packages with your
+existing build process, continuous integration workflow and testing framework.
+It'll also prevent you from ever trying to load a package that is not installed,
+as your code will raise an `ImportError` immediately, instead of failing
+somewhere down the line when calling `spacy.load()`. For more details, see the
+section on [working with pipeline packages in production](#production).
 
-For more details, see the section on
-[working with models in production](#production).
+## Using trained pipelines in production {#production}
 
-### Using your own models {#own-models}
-
-If you've trained your own model, for example for
-[additional languages](/usage/adding-languages) or
-[custom named entities](/usage/training#ner), you can save its state using the
-[`Language.to_disk()`](/api/language#to_disk) method. To make the model more
-convenient to deploy, we recommend wrapping it as a Python package.
-
-For more information and a detailed guide on how to package your model, see the
-documentation on [saving and loading models](/usage/saving-loading#models).
-
-## Using models in production {#production}
-
-If your application depends on one or more models, you'll usually want to
-integrate them into your continuous integration workflow and build process.
-While spaCy provides a range of useful helpers for downloading, linking and
-loading models, the underlying functionality is entirely based on native Python
-packages. This allows your application to handle a model like any other package
-dependency.
+If your application depends on one or more trained pipeline packages, you'll
+usually want to integrate them into your continuous integration workflow and
+build process. While spaCy provides a range of useful helpers for downloading
+and loading pipeline packages, the underlying functionality is entirely based on
+native Python packaging. This allows your application to handle a spaCy pipeline
+like any other package dependency.
 
 <!-- TODO: reference relevant spaCy project -->
 
-### Downloading and requiring model dependencies {#models-download}
+### Downloading and requiring package dependencies {#models-download}
 
 spaCy's built-in [`download`](/api/cli#download) command is mostly intended as a
 convenient, interactive wrapper. It performs compatibility checks and prints
-detailed error messages and warnings. However, if you're downloading models as
-part of an automated build process, this only adds an unnecessary layer of
-complexity. If you know which models your application needs, you should be
-specifying them directly.
+detailed error messages and warnings. However, if you're downloading pipeline
+packages as part of an automated build process, this only adds an unnecessary
+layer of complexity. If you know which packages your application needs, you
+should be specifying them directly.
 
-Because all models are valid Python packages, you can add them to your
+Because pipeline packages are valid Python packages, you can add them to your
 application's `requirements.txt`. If you're running your own internal PyPi
-installation, you can upload the models there. pip's
+installation, you can upload the pipeline packages there. pip's
 [requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format)
 supports both package names to download via a PyPi server, as well as direct
 URLs.
@@ -422,17 +410,17 @@ the download URL. This way, the package won't be re-downloaded and overwritten
 if it's already installed - just like when you're downloading a package from
 PyPi.
 
-All models are versioned and specify their spaCy dependency. This ensures
-cross-compatibility and lets you specify exact version requirements for each
-model. If you've trained your own model, you can use the
-[`package`](/api/cli#package) command to generate the required meta data and
-turn it into a loadable package.
+All pipeline packages are versioned and specify their spaCy dependency. This
+ensures cross-compatibility and lets you specify exact version requirements for
+each pipeline. If you've [trained](/usage/training) your own pipeline, you can
+use the [`spacy package`](/api/cli#package) command to generate the required
+meta data and turn it into a loadable package.
 
-### Loading and testing models {#models-loading}
+### Loading and testing pipeline packages {#models-loading}
 
-Models are regular Python packages, so you can also import them as a package
-using Python's native `import` syntax, and then call the `load` method to load
-the model data and return an `nlp` object:
+Pipeline packages are regular Python packages, so you can also import them as a
+package using Python's native `import` syntax, and then call the `load` method
+to load the data and return an `nlp` object:
 
 ```python
 import en_core_web_sm
@@ -440,16 +428,17 @@ nlp = en_core_web_sm.load()
 ```
 
 In general, this approach is recommended for larger code bases, as it's more
-"native", and doesn't depend on symlinks or rely on spaCy's loader to resolve
-string names to model packages. If a model can't be imported, Python will raise
-an `ImportError` immediately. And if a model is imported but not used, any
-linter will catch that.
+"native", and doesn't rely on spaCy's loader to resolve string names to
+packages. If a package can't be imported, Python will raise an `ImportError`
+immediately. And if a package is imported but not used, any linter will catch
+that.
 
 Similarly, it'll give you more flexibility when writing tests that require
-loading models. For example, instead of writing your own `try` and `except`
+loading pipelines. For example, instead of writing your own `try` and `except`
 logic around spaCy's loader, you can use
 [pytest](http://pytest.readthedocs.io/en/latest/)'s
 [`importorskip()`](https://docs.pytest.org/en/latest/builtin.html#_pytest.outcomes.importorskip)
-method to only run a test if a specific model or model version is installed.
-Each model package exposes a `__version__` attribute which you can also use to
-perform your own version compatibility checks before loading a model.
+method to only run a test if a specific pipeline package or version is
+installed. Each pipeline package package exposes a `__version__` attribute which
+you can also use to perform your own version compatibility checks before loading
+it.
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index 3636aa3c2..c8702a147 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -42,8 +42,8 @@ texts = ["This is a text", "These are lots of texts", "..."]
 - Only apply the **pipeline components you need**. Getting predictions from the
   model that you don't actually need adds up and becomes very inefficient at
   scale. To prevent this, use the `disable` keyword argument to disable
-  components you don't need – either when loading a model, or during processing
-  with `nlp.pipe`. See the section on
+  components you don't need – either when loading a pipeline, or during
+  processing with `nlp.pipe`. See the section on
   [disabling pipeline components](#disabling) for more details and examples.
 
 </Infobox>
@@ -95,7 +95,7 @@ spaCy makes it very easy to create your own pipelines consisting of reusable
 components – this includes spaCy's default tagger, parser and entity recognizer,
 but also your own custom processing functions. A pipeline component can be added
 to an already existing `nlp` object, specified when initializing a `Language`
-class, or defined within a [model package](/usage/saving-loading#models).
+class, or defined within a [pipeline package](/usage/saving-loading#models).
 
 > #### config.cfg (excerpt)
 >
@@ -115,7 +115,7 @@ class, or defined within a [model package](/usage/saving-loading#models).
 > # Settings for the parser component
 > ```
 
-When you load a model, spaCy first consults the model's
+When you load a pipeline, spaCy first consults the
 [`meta.json`](/usage/saving-loading#models) and
 [`config.cfg`](/usage/training#config). The config tells spaCy what language
 class to use, which components are in the pipeline, and how those components
@@ -131,8 +131,7 @@ should be created. spaCy will then do the following:
    component with with [`add_pipe`](/api/language#add_pipe). The settings are
    passed into the factory.
 3. Make the **model data** available to the `Language` class by calling
-   [`from_disk`](/api/language#from_disk) with the path to the model data
-   directory.
+   [`from_disk`](/api/language#from_disk) with the path to the data directory.
 
 So when you call this...
 
@@ -140,27 +139,27 @@ So when you call this...
 nlp = spacy.load("en_core_web_sm")
 ```
 
-... the model's `config.cfg` tells spaCy to use the language `"en"` and the
+... the pipeline's `config.cfg` tells spaCy to use the language `"en"` and the
 pipeline `["tagger", "parser", "ner"]`. spaCy will then initialize
 `spacy.lang.en.English`, and create each pipeline component and add it to the
-processing pipeline. It'll then load in the model's data from its data directory
+processing pipeline. It'll then load in the model data from the data directory
 and return the modified `Language` class for you to use as the `nlp` object.
 
 <Infobox title="Changed in v3.0" variant="warning">
 
 spaCy v3.0 introduces a `config.cfg`, which includes more detailed settings for
-the model pipeline, its components and the
-[training process](/usage/training#config). You can export the config of your
-current `nlp` object by calling [`nlp.config.to_disk`](/api/language#config).
+the pipeline, its components and the [training process](/usage/training#config).
+You can export the config of your current `nlp` object by calling
+[`nlp.config.to_disk`](/api/language#config).
 
 </Infobox>
 
-Fundamentally, a [spaCy model](/models) consists of three components: **the
-weights**, i.e. binary data loaded in from a directory, a **pipeline** of
+Fundamentally, a [spaCy pipeline package](/models) consists of three components:
+**the weights**, i.e. binary data loaded in from a directory, a **pipeline** of
 functions called in order, and **language data** like the tokenization rules and
-language-specific settings. For example, a Spanish NER model requires different
-weights, language data and pipeline components than an English parsing and
-tagging model. This is also why the pipeline state is always held by the
+language-specific settings. For example, a Spanish NER pipeline requires
+different weights, language data and components than an English parsing and
+tagging pipeline. This is also why the pipeline state is always held by the
 `Language` class. [`spacy.load`](/api/top-level#spacy.load) puts this all
 together and returns an instance of `Language` with a pipeline set and access to
 the binary data:
@@ -175,7 +174,7 @@ cls = spacy.util.get_lang_class(lang)  # 1. Get Language class, e.g. English
 nlp = cls()                            # 2. Initialize it
 for name in pipeline:
     nlp.add_pipe(name)                 # 3. Add the component to the pipeline
-nlp.from_disk(model_data_path)         # 4. Load in the binary data
+nlp.from_disk(data_path)               # 4. Load in the binary data
 ```
 
 When you call `nlp` on a text, spaCy will **tokenize** it and then **call each
@@ -243,28 +242,29 @@ tagger or the parser, you can **disable or exclude** it. This can sometimes make
 a big difference and improve loading and inference speed. There are two
 different mechanisms you can use:
 
-1. **Disable:** The component and its data will be loaded with the model, but it
-   will be disabled by default and not run as part of the processing pipeline.
-   To run it, you can explicitly enable it by calling
+1. **Disable:** The component and its data will be loaded with the pipeline, but
+   it will be disabled by default and not run as part of the processing
+   pipeline. To run it, you can explicitly enable it by calling
    [`nlp.enable_pipe`](/api/language#enable_pipe). When you save out the `nlp`
    object, the disabled component will be included but disabled by default.
-2. **Exclude:** Don't load the component and its data with the model. Once the
-   model is loaded, there will be no reference to the excluded component.
+2. **Exclude:** Don't load the component and its data with the pipeline. Once
+   the pipeline is loaded, there will be no reference to the excluded component.
 
 Disabled and excluded component names can be provided to
 [`spacy.load`](/api/top-level#spacy.load) as a list.
 
 <!-- TODO: update with info on our models shipped with optional components -->
 
-> #### 💡 Models with optional components
+> #### 💡 Optional pipeline components
 >
-> The `disable` mechanism makes it easy to distribute models with optional
-> components that you can enable or disable at runtime. For instance, your model
-> may include a statistical _and_ a rule-based component for sentence
-> segmentation, and you can choose which one to run depending on your use case.
+> The `disable` mechanism makes it easy to distribute pipeline packages with
+> optional components that you can enable or disable at runtime. For instance,
+> your pipeline may include a statistical _and_ a rule-based component for
+> sentence segmentation, and you can choose which one to run depending on your
+> use case.
 
 ```python
-# Load the model without the entity recognizer
+# Load the pipeline without the entity recognizer
 nlp = spacy.load("en_core_web_sm", exclude=["ner"])
 
 # Load the tagger and parser but don't enable them
@@ -358,25 +358,25 @@ run as part of the pipeline.
 | `nlp.component_names` | All component names, including disabled components.              |
 | `nlp.disabled`        | Names of components that are currently disabled.                 |
 
-### Sourcing pipeline components from existing models {#sourced-components new="3"}
+### Sourcing components from existing pipelines {#sourced-components new="3"}
 
-Pipeline components that are independent can also be reused across models.
-Instead of adding a new blank component to a pipeline, you can also copy an
-existing component from a pretrained model by setting the `source` argument on
+Pipeline components that are independent can also be reused across pipelines.
+Instead of adding a new blank component, you can also copy an existing component
+from a trained pipeline by setting the `source` argument on
 [`nlp.add_pipe`](/api/language#add_pipe). The first argument will then be
 interpreted as the name of the component in the source pipeline – for instance,
 `"ner"`. This is especially useful for
-[training a model](/usage/training#config-components) because it lets you mix
-and match components and create fully custom model packages with updated
-pretrained components and new components trained on your data.
+[training a pipeline](/usage/training#config-components) because it lets you mix
+and match components and create fully custom pipeline packages with updated
+trained components and new components trained on your data.
 
-<Infobox variant="warning" title="Important note for pretrained components">
+<Infobox variant="warning" title="Important note for trained components">
 
-When reusing components across models, keep in mind that the **vocabulary**,
-**vectors** and model settings **must match**. If a pretrained model includes
+When reusing components across pipelines, keep in mind that the **vocabulary**,
+**vectors** and model settings **must match**. If a trained pipeline includes
 [word vectors](/usage/linguistic-features#vectors-similarity) and the component
-uses them as features, the model you copy it to needs to have the _same_ vectors
-available – otherwise, it won't be able to make the same predictions.
+uses them as features, the pipeline you copy it to needs to have the _same_
+vectors available – otherwise, it won't be able to make the same predictions.
 
 </Infobox>
 
@@ -384,7 +384,7 @@ available – otherwise, it won't be able to make the same predictions.
 >
 > Instead of providing a `factory`, component blocks in the training
 > [config](/usage/training#config) can also define a `source`. The string needs
-> to be a loadable spaCy model package or path. The
+> to be a loadable spaCy pipeline package or path. The
 >
 > ```ini
 > [components.ner]
@@ -404,11 +404,11 @@ available – otherwise, it won't be able to make the same predictions.
 ### {executable="true"}
 import spacy
 
-# The source model with different components
+# The source pipeline with different components
 source_nlp = spacy.load("en_core_web_sm")
 print(source_nlp.pipe_names)
 
-# Add only the entity recognizer to the new blank model
+# Add only the entity recognizer to the new blank pipeline
 nlp = spacy.blank("en")
 nlp.add_pipe("ner", source=source_nlp)
 print(nlp.pipe_names)
@@ -535,8 +535,8 @@ only being able to modify it afterwards.
 The [`@Language.component`](/api/language#component) decorator lets you turn a
 simple function into a pipeline component. It takes at least one argument, the
 **name** of the component factory. You can use this name to add an instance of
-your component to the pipeline. It can also be listed in your model config, so
-you can save, load and train models using your component.
+your component to the pipeline. It can also be listed in your pipeline config,
+so you can save, load and train pipelines using your component.
 
 Custom components can be added to the pipeline using the
 [`add_pipe`](/api/language#add_pipe) method. Optionally, you can either specify
@@ -838,7 +838,7 @@ If what you're passing in isn't JSON-serializable – e.g. a custom object like
 [model](#trainable-components) – saving out the component config becomes
 impossible because there's no way for spaCy to know _how_ that object was
 created, and what to do to create it again. This makes it much harder to save,
-load and train custom models with custom components. A simple solution is to
+load and train custom pipelines with custom components. A simple solution is to
 **register a function** that returns your resources. The
 [registry](/api/top-level#registry) lets you **map string names to functions**
 that create objects, so given a name and optional arguments, spaCy will know how
@@ -876,13 +876,13 @@ the result of the registered function is passed in as the key `"dictionary"`.
 ```
 
 Using a registered function also means that you can easily include your custom
-components in models that you [train](/usage/training). To make sure spaCy knows
-where to find your custom `@assets` function, you can pass in a Python file via
-the argument `--code`. If someone else is using your component, all they have to
-do to customize the data is to register their own function and swap out the
-name. Registered functions can also take **arguments** by the way that can be
-defined in the config as well – you can read more about this in the docs on
-[training with custom code](/usage/training#custom-code).
+components in pipelines that you [train](/usage/training). To make sure spaCy
+knows where to find your custom `@assets` function, you can pass in a Python
+file via the argument `--code`. If someone else is using your component, all
+they have to do to customize the data is to register their own function and swap
+out the name. Registered functions can also take **arguments** by the way that
+can be defined in the config as well – you can read more about this in the docs
+on [training with custom code](/usage/training#custom-code).
 
 ### Python type hints and pydantic validation {#type-hints new="3"}
 
@@ -1121,7 +1121,14 @@ loss is calculated and to add evaluation scores to the training output.
 | [`get_loss`](/api/pipe#get_loss)             | Return a tuple of the loss and the gradient for a batch of [`Example`](/api/example) objects.                                                                                                                                                                                                                      |
 | [`score`](/api/pipe#score)                   | Score a batch of [`Example`](/api/example) objects and return a dictionary of scores. The [`@Language.factory`](/api/language#factory) decorator can define the `default_socre_weights` of the component to decide which keys of the scores to display during training and how they count towards the final score. |
 
-<!-- TODO: link to (not yet created) page for defining models for trainable components -->
+<Infobox title="Custom trainable components and models" emoji="📖">
+
+For more details on how to implement your own trainable components and model
+architectures, and plug existing models implemented in PyTorch or TensorFlow
+into your spaCy pipeline, see the usage guide on
+[layers and model architectures](/usage/layers-architectures#components).
+
+</Infobox>
 
 ## Extension attributes {#custom-components-attributes new="2"}
 
@@ -1322,9 +1329,9 @@ While it's generally recommended to use the `Doc._`, `Span._` and `Token._`
 proxies to add your own custom attributes, spaCy offers a few exceptions to
 allow **customizing the built-in methods** like
 [`Doc.similarity`](/api/doc#similarity) or [`Doc.vector`](/api/doc#vector) with
-your own hooks, which can rely on statistical models you train yourself. For
-instance, you can provide your own on-the-fly sentence segmentation algorithm or
-document similarity method.
+your own hooks, which can rely on components you train yourself. For instance,
+you can provide your own on-the-fly sentence segmentation algorithm or document
+similarity method.
 
 Hooks let you customize some of the behaviors of the `Doc`, `Span` or `Token`
 objects by adding a component to the pipeline. For instance, to customize the
@@ -1456,13 +1463,13 @@ function that takes a `Doc`, modifies it and returns it.
   method. However, a third-party extension should **never silently overwrite
   built-ins**, or attributes set by other extensions.
 
-- If you're looking to publish a model that depends on a custom pipeline
-  component, you can either **require it** in the model package's dependencies,
-  or – if the component is specific and lightweight – choose to **ship it with
-  your model package**. Just make sure the
+- If you're looking to publish a pipeline package that depends on a custom
+  pipeline component, you can either **require it** in the package's
+  dependencies, or – if the component is specific and lightweight – choose to
+  **ship it with your pipeline package**. Just make sure the
   [`@Language.component`](/api/language#component) or
   [`@Language.factory`](/api/language#factory) decorator that registers the
-  custom component runs in your model's `__init__.py` or is exposed via an
+  custom component runs in your package's `__init__.py` or is exposed via an
   [entry point](/usage/saving-loading#entry-points).
 
 - Once you're ready to share your extension with others, make sure to **add docs
@@ -1511,9 +1518,9 @@ def custom_ner_wrapper(doc):
     return doc
 ```
 
-The `custom_ner_wrapper` can then be added to the pipeline of a blank model
-using [`nlp.add_pipe`](/api/language#add_pipe). You can also replace the
-existing entity recognizer of a pretrained model with
+The `custom_ner_wrapper` can then be added to a blank pipeline using
+[`nlp.add_pipe`](/api/language#add_pipe). You can also replace the existing
+entity recognizer of a trained pipeline with
 [`nlp.replace_pipe`](/api/language#replace_pipe).
 
 Here's another example of a custom model, `your_custom_model`, that takes a list
diff --git a/website/docs/usage/projects.md b/website/docs/usage/projects.md
index 97a0caed8..97e3abb6e 100644
--- a/website/docs/usage/projects.md
+++ b/website/docs/usage/projects.md
@@ -20,10 +20,10 @@ menu:
 
 spaCy projects let you manage and share **end-to-end spaCy workflows** for
 different **use cases and domains**, and orchestrate training, packaging and
-serving your custom models. You can start off by cloning a pre-defined project
-template, adjust it to fit your needs, load in your data, train a model, export
-it as a Python package, upload your outputs to a remote storage and share your
-results with your team. spaCy projects can be used via the new
+serving your custom pipelines. You can start off by cloning a pre-defined
+project template, adjust it to fit your needs, load in your data, train a
+pipeline, export it as a Python package, upload your outputs to a remote storage
+and share your results with your team. spaCy projects can be used via the new
 [`spacy project`](/api/cli#project) command and we provide templates in our
 [`projects`](https://github.com/explosion/projects) repo.
 
@@ -51,7 +51,7 @@ production.
 <Grid narrow cols={3}>
 <Integration title="DVC" logo="dvc" url="#dvc">Manage and version your data</Integration>
 <Integration title="Prodigy" logo="prodigy" url="#prodigy">Create labelled training data</Integration>
-<Integration title="Streamlit" logo="streamlit" url="#streamlit">Visualize and demo your models</Integration>
+<Integration title="Streamlit" logo="streamlit" url="#streamlit">Visualize and demo your pipelines</Integration>
 <Integration title="FastAPI" logo="fastapi" url="#fastapi">Serve your models and host APIs</Integration>
 <Integration title="Ray" logo="ray" url="#ray">Distributed and parallel training</Integration>
 <Integration title="Weights &amp; Biases" logo="wandb" url="#wandb">Track your experiments and results</Integration>
@@ -66,8 +66,8 @@ production.
 
 The [`spacy project clone`](/api/cli#project-clone) command clones an existing
 project template and copies the files to a local directory. You can then run the
-project, e.g. to train a model and edit the commands and scripts to build fully
-custom workflows.
+project, e.g. to train a pipeline and edit the commands and scripts to build
+fully custom workflows.
 
 ```cli
 python -m spacy project clone some_example_project
@@ -162,12 +162,12 @@ script).
 > ```
 
 Workflows are series of commands that are run in order and often depend on each
-other. For instance, to generate a packaged model, you might start by converting
-your data, then run [`spacy train`](/api/cli#train) to train your model on the
-converted data and if that's successful, run [`spacy package`](/api/cli#package)
-to turn the best model artifact into an installable Python package. The
-following command runs the workflow named `all` defined in the `project.yml`,
-and executes the commands it specifies, in order:
+other. For instance, to generate a pipeline package, you might start by
+converting your data, then run [`spacy train`](/api/cli#train) to train your
+pipeline on the converted data and if that's successful, run
+[`spacy package`](/api/cli#package) to turn the best trained artifact into an
+installable Python package. The following command runs the workflow named `all`
+defined in the `project.yml`, and executes the commands it specifies, in order:
 
 ```cli
 $ python -m spacy project run all
@@ -191,11 +191,11 @@ project as a DVC repo.
 >   local: '/mnt/scratch/cache'
 > ```
 
-After training a model, you can optionally use the
+After training a pipeline, you can optionally use the
 [`spacy project push`](/api/cli#project-push) command to upload your outputs to
 a remote storage, using protocols like [S3](https://aws.amazon.com/s3/),
 [Google Cloud Storage](https://cloud.google.com/storage) or SSH. This can help
-you **export** your model packages, **share** work with your team, or **cache
+you **export** your pipeline packages, **share** work with your team, or **cache
 results** to avoid repeating work.
 
 ```cli
@@ -214,8 +214,8 @@ docs on [remote storage](#remote).
 The `project.yml` defines the assets a project depends on, like datasets and
 pretrained weights, as well as a series of commands that can be run separately
 or as a workflow – for instance, to preprocess the data, convert it to spaCy's
-format, train a model, evaluate it and export metrics, package it and spin up a
-quick web demo. It looks pretty similar to a config file used to define CI
+format, train a pipeline, evaluate it and export metrics, package it and spin up
+a quick web demo. It looks pretty similar to a config file used to define CI
 pipelines.
 
 <!-- TODO: update with better (final) example -->
@@ -324,17 +324,17 @@ others are running your project with the same data.
 
 Each command defined in the `project.yml` can optionally define a list of
 dependencies and outputs. These are the files the command requires and creates.
-For example, a command for training a model may depend on a
+For example, a command for training a pipeline may depend on a
 [`config.cfg`](/usage/training#config) and the training and evaluation data, and
-it will export a directory `model-best`, containing the best model, which you
-can then re-use in other commands.
+it will export a directory `model-best`, which you can then re-use in other
+commands.
 
 <!-- prettier-ignore -->
 ```yaml
 ### project.yml
 commands:
   - name: train
-    help: 'Train a spaCy model using the specified corpus and config'
+    help: 'Train a spaCy pipeline using the specified corpus and config'
     script:
       - 'python -m spacy train ./configs/config.cfg -o training/ --paths.train ./corpus/training.spacy --paths.dev ./corpus/evaluation.spacy'
     deps:
@@ -392,14 +392,14 @@ directory:
 ├── project.yml          # the project settings
 ├── project.lock         # lockfile that tracks inputs/outputs
 ├── assets/              # downloaded data assets
-├── configs/             # model config.cfg files used for training
+├── configs/             # pipeline config.cfg files used for training
 ├── corpus/              # output directory for training corpus
-├── metas/               # model meta.json templates used for packaging
+├── metas/               # pipeline meta.json templates used for packaging
 ├── metrics/             # output directory for evaluation metrics
 ├── notebooks/           # directory for Jupyter notebooks
-├── packages/            # output directory for model Python packages
+├── packages/            # output directory for pipeline Python packages
 ├── scripts/             # directory for scripts, e.g. referenced in commands
-├── training/            # output directory for trained models
+├── training/            # output directory for trained pipelines
 └── ...                  # any other files, like a requirements.txt etc.
 ```
 
@@ -426,7 +426,7 @@ report:
 ### project.yml
 commands:
   - name: test
-    help: 'Test the trained model'
+    help: 'Test the trained pipeline'
     script:
       - 'pip install pytest pytest-html'
       - 'python -m pytest ./scripts/tests --html=metrics/test-report.html'
@@ -440,8 +440,8 @@ commands:
 Adding `training/model-best` to the command's `deps` lets you ensure that the
 file is available. If not, spaCy will show an error and the command won't run.
 Setting `no_skip: true` means that the command will always run, even if the
-dependencies (the trained model) hasn't changed. This makes sense here, because
-you typically don't want to skip your tests.
+dependencies (the trained pipeline) haven't changed. This makes sense here,
+because you typically don't want to skip your tests.
 
 ### Writing custom scripts {#custom-scripts}
 
@@ -554,7 +554,7 @@ notebooks with usage examples.
 
 <Infobox title="Important note about assets" variant="warning">
 
-It's typically not a good idea to check large data assets, trained models or
+It's typically not a good idea to check large data assets, trained pipelines or
 other artifacts into a Git repo and you should exclude them from your project
 template by adding a `.gitignore`. If you want to version your data and models,
 check out [Data Version Control](#dvc) (DVC), which integrates with spaCy
@@ -566,7 +566,7 @@ projects.
 
 You can persist your project outputs to a remote storage using the
 [`project push`](/api/cli#project-push) command. This can help you **export**
-your model packages, **share** work with your team, or **cache results** to
+your pipeline packages, **share** work with your team, or **cache results** to
 avoid repeating work. The [`project pull`](/api/cli#project-pull) command will
 download any outputs that are in the remote storage and aren't available
 locally.
@@ -622,7 +622,7 @@ For instance, let's say you had the following command in your `project.yml`:
 ```yaml
 ### project.yml
 - name: train
-  help: 'Train a spaCy model using the specified corpus and config'
+  help: 'Train a spaCy pipeline using the specified corpus and config'
   script:
     - 'spacy train ./config.cfg --output training/'
   deps:
@@ -814,8 +814,8 @@ mattis pretium.
 [Streamlit](https://streamlit.io) is a Python framework for building interactive
 data apps. The [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit)
 package helps you integrate spaCy visualizations into your Streamlit apps and
-quickly spin up demos to explore your models interactively. It includes a full
-embedded visualizer, as well as individual components.
+quickly spin up demos to explore your pipelines interactively. It includes a
+full embedded visualizer, as well as individual components.
 
 ```bash
 $ pip install spacy_streamlit
@@ -829,11 +829,11 @@ $ pip install spacy_streamlit
 
 Using [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit), your
 projects can easily define their own scripts that spin up an interactive
-visualizer, using the latest model you trained, or a selection of models so you
-can compare their results. The following script starts an
+visualizer, using the latest pipeline you trained, or a selection of pipelines
+so you can compare their results. The following script starts an
 [NER visualizer](/usage/visualizers#ent) and takes two positional command-line
-argument you can pass in from your `config.yml`: a comma-separated list of model
-paths and an example text to use as the default text.
+argument you can pass in from your `config.yml`: a comma-separated list of paths
+to load the pipelines from and an example text to use as the default text.
 
 ```python
 ### scripts/visualize.py
@@ -841,8 +841,8 @@ import spacy_streamlit
 import sys
 
 DEFAULT_TEXT = sys.argv[2] if len(sys.argv) >= 3 else ""
-MODELS = [name.strip() for name in sys.argv[1].split(",")]
-spacy_streamlit.visualize(MODELS, DEFAULT_TEXT, visualizers=["ner"])
+PIPELINES = [name.strip() for name in sys.argv[1].split(",")]
+spacy_streamlit.visualize(PIPELINES, DEFAULT_TEXT, visualizers=["ner"])
 ```
 
 > #### Example usage
@@ -856,7 +856,7 @@ spacy_streamlit.visualize(MODELS, DEFAULT_TEXT, visualizers=["ner"])
 ### project.yml
 commands:
   - name: visualize
-    help: "Visualize the model's output interactively using Streamlit"
+    help: "Visualize the pipeline's output interactively using Streamlit"
     script:
       - 'streamlit run ./scripts/visualize.py ./training/model-best "I like Adidas shoes."'
     deps:
@@ -879,8 +879,8 @@ mattis pretium.
 for building REST APIs with Python, based on Python
 [type hints](https://fastapi.tiangolo.com/python-types/). It's become a popular
 library for serving machine learning models and you can use it in your spaCy
-projects to quickly serve up a trained model and make it available behind a REST
-API.
+projects to quickly serve up a trained pipeline and make it available behind a
+REST API.
 
 ```python
 # TODO: show an example that addresses some of the main concerns for serving ML (workers etc.)
@@ -897,7 +897,7 @@ API.
 ### project.yml
 commands:
   - name: serve
-    help: "Serve the trained model with FastAPI"
+    help: "Serve the trained pipeline with FastAPI"
     script:
       - 'python ./scripts/serve.py ./training/model-best'
     deps:
diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md
index a589c556e..fb54c9936 100644
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@@ -759,7 +759,7 @@ whitespace, making them easy to match as well.
 from spacy.lang.en import English
 from spacy.matcher import Matcher
 
-nlp = English()  # We only want the tokenizer, so no need to load a model
+nlp = English()  # We only want the tokenizer, so no need to load a pipeline
 matcher = Matcher(nlp.vocab)
 
 pos_emoji = ["😀", "😃", "😂", "🤣", "😊", "😍"]  # Positive emoji
@@ -893,12 +893,13 @@ pattern covering the exact tokenization of the term.
 <Infobox title="Important note on creating patterns" variant="warning">
 
 To create the patterns, each phrase has to be processed with the `nlp` object.
-If you have a model loaded, doing this in a loop or list comprehension can
-easily become inefficient and slow. If you **only need the tokenization and
-lexical attributes**, you can run [`nlp.make_doc`](/api/language#make_doc)
-instead, which will only run the tokenizer. For an additional speed boost, you
-can also use the [`nlp.tokenizer.pipe`](/api/tokenizer#pipe) method, which will
-process the texts as a stream.
+If you have a trained pipeline loaded, doing this in a loop or list
+comprehension can easily become inefficient and slow. If you **only need the
+tokenization and lexical attributes**, you can run
+[`nlp.make_doc`](/api/language#make_doc) instead, which will only run the
+tokenizer. For an additional speed boost, you can also use the
+[`nlp.tokenizer.pipe`](/api/tokenizer#pipe) method, which will process the texts
+as a stream.
 
 ```diff
 - patterns = [nlp(term) for term in LOTS_OF_TERMS]
@@ -977,7 +978,7 @@ of an advantage over writing one or two token patterns.
 The [`EntityRuler`](/api/entityruler) is an exciting new component that lets you
 add named entities based on pattern dictionaries, and makes it easy to combine
 rule-based and statistical named entity recognition for even more powerful
-models.
+pipelines.
 
 ### Entity Patterns {#entityruler-patterns}
 
@@ -1021,8 +1022,8 @@ doc = nlp("Apple is opening its first big office in San Francisco.")
 print([(ent.text, ent.label_) for ent in doc.ents])
 ```
 
-The entity ruler is designed to integrate with spaCy's existing statistical
-models and enhance the named entity recognizer. If it's added **before the
+The entity ruler is designed to integrate with spaCy's existing pipeline
+components and enhance the named entity recognizer. If it's added **before the
 `"ner"` component**, the entity recognizer will respect the existing entity
 spans and adjust its predictions around it. This can significantly improve
 accuracy in some cases. If it's added **after the `"ner"` component**, the
@@ -1111,20 +1112,20 @@ versa.
 </Infobox>
 
 When you save out an `nlp` object that has an `EntityRuler` added to its
-pipeline, its patterns are automatically exported to the model directory:
+pipeline, its patterns are automatically exported to the pipeline directory:
 
 ```python
 nlp = spacy.load("en_core_web_sm")
 ruler = nlp.add_pipe("entity_ruler")
 ruler.add_patterns([{"label": "ORG", "pattern": "Apple"}])
-nlp.to_disk("/path/to/model")
+nlp.to_disk("/path/to/pipeline")
 ```
 
-The saved model now includes the `"entity_ruler"` in its
-[`config.cfg`](/api/data-formats#config) and the model directory contains a file
-`entityruler.jsonl` with the patterns. When you load the model back in, all
-pipeline components will be restored and deserialized – including the entity
-ruler. This lets you ship powerful model packages with binary weights _and_
+The saved pipeline now includes the `"entity_ruler"` in its
+[`config.cfg`](/api/data-formats#config) and the pipeline directory contains a
+file `entityruler.jsonl` with the patterns. When you load the pipeline back in,
+all pipeline components will be restored and deserialized – including the entity
+ruler. This lets you ship powerful pipeline packages with binary weights _and_
 rules included!
 
 ### Using a large number of phrase patterns {#entityruler-large-phrase-patterns new="2.2.4"}
@@ -1141,7 +1142,7 @@ of `"phrase_matcher_attr": "POS"` for the entity ruler.
 
 Running the full language pipeline across every pattern in a large list scales
 linearly and can therefore take a long time on large amounts of phrase patterns.
-As of spaCy 2.2.4 the `add_patterns` function has been refactored to use
+As of spaCy v2.2.4 the `add_patterns` function has been refactored to use
 nlp.pipe on all phrase patterns resulting in about a 10x-20x speed up with
 5,000-100,000 phrase patterns respectively. Even with this speedup (but
 especially if you're using an older version) the `add_patterns` function can
@@ -1168,7 +1169,7 @@ order to implement more abstract logic.
 
 ### Example: Expanding named entities {#models-rules-ner}
 
-When using the a pretrained
+When using a trained
 [named entity recognition](/usage/linguistic-features/#named-entities) model to
 extract information from your texts, you may find that the predicted span only
 includes parts of the entity you're looking for. Sometimes, this happens if
@@ -1178,15 +1179,15 @@ what you need for your application.
 
 > #### Where corpora come from
 >
-> Corpora used to train models from scratch are often produced in academia. They
-> contain text from various sources with linguistic features labeled manually by
-> human annotators (following a set of specific guidelines). The corpora are
-> then distributed with evaluation data, so other researchers can benchmark
-> their algorithms and everyone can report numbers on the same data. However,
-> most applications need to learn information that isn't contained in any
-> available corpus.
+> Corpora used to train pipelines from scratch are often produced in academia.
+> They contain text from various sources with linguistic features labeled
+> manually by human annotators (following a set of specific guidelines). The
+> corpora are then distributed with evaluation data, so other researchers can
+> benchmark their algorithms and everyone can report numbers on the same data.
+> However, most applications need to learn information that isn't contained in
+> any available corpus.
 
-For example, the corpus spaCy's [English models](/models/en) were trained on
+For example, the corpus spaCy's [English pipelines](/models/en) were trained on
 defines a `PERSON` entity as just the **person name**, without titles like "Mr."
 or "Dr.". This makes sense, because it makes it easier to resolve the entity
 type back to a knowledge base. But what if your application needs the full
diff --git a/website/docs/usage/saving-loading.md b/website/docs/usage/saving-loading.md
index 3f9435f5e..9955e7d84 100644
--- a/website/docs/usage/saving-loading.md
+++ b/website/docs/usage/saving-loading.md
@@ -4,7 +4,7 @@ menu:
   - ['Basics', 'basics']
   - ['Serialization Methods', 'serialization-methods']
   - ['Entry Points', 'entry-points']
-  - ['Models', 'models']
+  - ['Trained Pipelines', 'models']
 ---
 
 ## Basics {#basics hidden="true"}
@@ -25,10 +25,10 @@ can load in the data.
 > #### Saving the meta and config
 >
 > The [`nlp.meta`](/api/language#meta) attribute is a JSON-serializable
-> dictionary and contains all model meta information like the author and license
-> information. The [`nlp.config`](/api/language#config) attribute is a
+> dictionary and contains all pipeline meta information like the author and
+> license information. The [`nlp.config`](/api/language#config) attribute is a
 > dictionary containing the training configuration, pipeline component factories
-> and other settings. It is saved out with a model as the `config.cfg`.
+> and other settings. It is saved out with a pipeline as the `config.cfg`.
 
 ```python
 ### Serialize
@@ -45,12 +45,11 @@ for pipe_name in pipeline:
 nlp.from_bytes(bytes_data)
 ```
 
-This is also how spaCy does it under the hood when loading a model: it loads the
-model's `config.cfg` containing the language and pipeline information,
-initializes the language class, creates and adds the pipeline components based
-on the defined
-[factories](/usage/processing-pipeline#custom-components-factories) and _then_
-loads in the binary data. You can read more about this process
+This is also how spaCy does it under the hood when loading a pipeline: it loads
+the `config.cfg` containing the language and pipeline information, initializes
+the language class, creates and adds the pipeline components based on the
+defined [factories](/usage/processing-pipeline#custom-components-factories) and
+_then_ loads in the binary data. You can read more about this process
 [here](/usage/processing-pipelines#pipelines).
 
 ### Serializing Doc objects efficiently {#docs new="2.2"}
@@ -168,10 +167,10 @@ data = pickle.dumps(span_doc)
 ## Implementing serialization methods {#serialization-methods}
 
 When you call [`nlp.to_disk`](/api/language#to_disk),
-[`nlp.from_disk`](/api/language#from_disk) or load a model package, spaCy will
-iterate over the components in the pipeline, check if they expose a `to_disk` or
-`from_disk` method and if so, call it with the path to the model directory plus
-the string name of the component. For example, if you're calling
+[`nlp.from_disk`](/api/language#from_disk) or load a pipeline package, spaCy
+will iterate over the components in the pipeline, check if they expose a
+`to_disk` or `from_disk` method and if so, call it with the path to the pipeline
+directory plus the string name of the component. For example, if you're calling
 `nlp.to_disk("/path")`, the data for the named entity recognizer will be saved
 in `/path/ner`.
 
@@ -191,8 +190,8 @@ add to that data and saves and loads the data to and from a JSON file.
 > [source](https://github.com/explosion/spaCy/tree/master/spacy/pipeline/entityruler.py).
 > Patterns added to the component will be saved to a `.jsonl` file if the
 > pipeline is serialized to disk, and to a bytestring if the pipeline is
-> serialized to bytes. This allows saving out a model with a rule-based entity
-> recognizer and including all rules _with_ the model data.
+> serialized to bytes. This allows saving out a pipeline with a rule-based
+> entity recognizer and including all rules _with_ the component data.
 
 ```python
 ### {highlight="14-18,20-25"}
@@ -232,7 +231,7 @@ component's `to_disk` method.
 nlp = spacy.load("en_core_web_sm")
 my_component = nlp.add_pipe("my_component")
 my_component.add({"hello": "world"})
-nlp.to_disk("/path/to/model")
+nlp.to_disk("/path/to/pipeline")
 ```
 
 The contents of the directory would then look like this.
@@ -241,15 +240,15 @@ file `data.json` in its subdirectory:
 
 ```yaml
 ### Directory structure {highlight="2-3"}
-└── /path/to/model
+└── /path/to/pipeline
     ├── my_component     # data serialized by "my_component"
     │   └── data.json
     ├── ner              # data for "ner" component
     ├── parser           # data for "parser" component
     ├── tagger           # data for "tagger" component
-    ├── vocab            # model vocabulary
-    ├── meta.json        # model meta.json
-    ├── config.cfg       # model config
+    ├── vocab            # pipeline vocabulary
+    ├── meta.json        # pipeline meta.json
+    ├── config.cfg       # pipeline config
     └── tokenizer        # tokenization rules
 ```
 
@@ -258,18 +257,19 @@ When you load the data back in, spaCy will call the custom component's
 contents of `data.json`, convert them to a Python object and restore the
 component state. The same works for other types of data, of course – for
 instance, you could add a
-[wrapper for a model](/usage/processing-pipelines#wrapping-models-libraries)
-trained with a different library like TensorFlow or PyTorch and make spaCy load
-its weights automatically when you load the model package.
+[wrapper for a model](/usage/layers-architectures#frameworks) trained with a
+different library like TensorFlow or PyTorch and make spaCy load its weights
+automatically when you load the pipeline package.
 
 <Infobox title="Important note on loading custom components" variant="warning">
 
-When you load back a model with custom components, make sure that the components
-are **available** and that the [`@Language.component`](/api/language#component)
-or [`@Language.factory`](/api/language#factory) decorators are executed _before_
-your model is loaded back. Otherwise, spaCy won't know how to resolve the string
-name of a component factory like `"my_component"` back to a function. For more
-details, see the documentation on
+When you load back a pipeline with custom components, make sure that the
+components are **available** and that the
+[`@Language.component`](/api/language#component) or
+[`@Language.factory`](/api/language#factory) decorators are executed _before_
+your pipeline is loaded back. Otherwise, spaCy won't know how to resolve the
+string name of a component factory like `"my_component"` back to a function. For
+more details, see the documentation on
 [adding factories](/usage/processing-pipelines#custom-components-factories) or
 use [entry points](#entry-points) to make your extension package expose your
 custom components to spaCy automatically.
@@ -297,18 +297,19 @@ installed in the same environment – that's it.
 
 ### Custom components via entry points {#entry-points-components}
 
-When you load a model, spaCy will generally use the model's `config.cfg` to set
-up the language class and construct the pipeline. The pipeline is specified as a
+When you load a pipeline, spaCy will generally use its `config.cfg` to set up
+the language class and construct the pipeline. The pipeline is specified as a
 list of strings, e.g. `pipeline = ["tagger", "paser", "ner"]`. For each of those
 strings, spaCy will call `nlp.add_pipe` and look up the name in all factories
 defined by the decorators [`@Language.component`](/api/language#component) and
 [`@Language.factory`](/api/language#factory). This means that you have to import
-your custom components _before_ loading the model.
+your custom components _before_ loading the pipeline.
 
-Using entry points, model packages and extension packages can define their own
-`"spacy_factories"`, which will be loaded automatically in the background when
-the `Language` class is initialized. So if a user has your package installed,
-they'll be able to use your components – even if they **don't import them**!
+Using entry points, pipeline packages and extension packages can define their
+own `"spacy_factories"`, which will be loaded automatically in the background
+when the `Language` class is initialized. So if a user has your package
+installed, they'll be able to use your components – even if they **don't import
+them**!
 
 To stick with the theme of
 [this entry points blog post](https://amir.rachum.com/blog/2017/07/28/python-entry-points/),
@@ -343,10 +344,10 @@ def snek_component(doc):
 
 Since it's a very complex and sophisticated module, you want to split it off
 into its own package so you can version it and upload it to PyPi. You also want
-your custom model to be able to define `pipeline = ["snek"]` in its
+your custom package to be able to define `pipeline = ["snek"]` in its
 `config.cfg`. For that, you need to be able to tell spaCy where to find the
 component `"snek"`. If you don't do this, spaCy will raise an error when you try
-to load the model because there's no built-in `"snek"` component. To add an
+to load the pipeline because there's no built-in `"snek"` component. To add an
 entry to the factories, you can now expose it in your `setup.py` via the
 `entry_points` dictionary:
 
@@ -380,7 +381,7 @@ $ python setup.py develop
 spaCy is now able to create the pipeline component `"snek"` – even though you
 never imported `snek_component`. When you save the
 [`nlp.config`](/api/language#config) to disk, it includes an entry for your
-`"snek"` component and any model you train with this config will include the
+`"snek"` component and any pipeline you train with this config will include the
 component and know how to load it – if your `snek` package is installed.
 
 > #### config.cfg (excerpt)
@@ -449,9 +450,9 @@ entry_points={
 
 The factory can also implement other pipeline component like `to_disk` and
 `from_disk` for serialization, or even `update` to make the component trainable.
-If a component exposes a `from_disk` method and is included in a model's
-pipeline, spaCy will call it on load. This lets you ship custom data with your
-model. When you save out a model using `nlp.to_disk` and the component exposes a
+If a component exposes a `from_disk` method and is included in a pipeline, spaCy
+will call it on load. This lets you ship custom data with your pipeline package.
+When you save out a pipeline using `nlp.to_disk` and the component exposes a
 `to_disk` method, it will be called with the disk path.
 
 ```python
@@ -467,8 +468,8 @@ def from_disk(self, path, exclude=tuple()):
     return self
 ```
 
-The above example will serialize the current snake in a `snek.txt` in the model
-data directory. When a model using the `snek` component is loaded, it will open
+The above example will serialize the current snake in a `snek.txt` in the data
+directory. When a pipeline using the `snek` component is loaded, it will open
 the `snek.txt` and make it available to the component.
 
 ### Custom language classes via entry points {#entry-points-languages}
@@ -476,7 +477,7 @@ the `snek.txt` and make it available to the component.
 To stay with the theme of the previous example and
 [this blog post on entry points](https://amir.rachum.com/blog/2017/07/28/python-entry-points/),
 let's imagine you wanted to implement your own `SnekLanguage` class for your
-custom model – but you don't necessarily want to modify spaCy's code to add a
+custom pipeline – but you don't necessarily want to modify spaCy's code to add a
 language. In your package, you could then implement the following
 [custom language subclass](/usage/linguistic-features#language-subclass):
 
@@ -510,10 +511,10 @@ setup(
 ```
 
 In spaCy, you can then load the custom `snk` language and it will be resolved to
-`SnekLanguage` via the custom entry point. This is especially relevant for model
-packages you train, which could then specify `lang = snk` in their `config.cfg`
-without spaCy raising an error because the language is not available in the core
-library.
+`SnekLanguage` via the custom entry point. This is especially relevant for
+pipeline packages you [train](/usage/training), which could then specify
+`lang = snk` in their `config.cfg` without spaCy raising an error because the
+language is not available in the core library.
 
 ### Custom displaCy colors via entry points {#entry-points-displacy new="2.2"}
 
@@ -526,7 +527,7 @@ values.
 
 > #### Domain-specific NER labels
 >
-> Good examples of models with domain-specific label schemes are
+> Good examples of pipelines with domain-specific label schemes are
 > [scispaCy](/universe/project/scispacy) and
 > [Blackstone](/universe/project/blackstone).
 
@@ -559,24 +560,23 @@ import DisplaCyEntSnekHtml from 'images/displacy-ent-snek.html'
 
 <Iframe title="displaCy visualization of entities" html={DisplaCyEntSnekHtml} height={100} />
 
-## Saving, loading and distributing models {#models}
+## Saving, loading and distributing trained pipelines {#models}
 
-After training your model, you'll usually want to save its state, and load it
+After training your pipeline, you'll usually want to save its state, and load it
 back later. You can do this with the [`Language.to_disk`](/api/language#to_disk)
 method:
 
 ```python
-nlp.to_disk("./en_example_model")
+nlp.to_disk("./en_example_pipeline")
 ```
 
 The directory will be created if it doesn't exist, and the whole pipeline data,
-model meta and model configuration will be written out. To make the model more
-convenient to deploy, we recommend wrapping it as a
-[Python package](/api/cli#package).
+meta and configuration will be written out. To make the pipeline more convenient
+to deploy, we recommend wrapping it as a [Python package](/api/cli#package).
 
 <Accordion title="What’s the difference between the config.cfg and meta.json?" spaced id="models-meta-vs-config">
 
-When you save a model in spaCy v3.0+, two files will be exported: a
+When you save a pipeline in spaCy v3.0+, two files will be exported: a
 [`config.cfg`](/api/data-formats#config) based on
 [`nlp.config`](/api/language#config) and a [`meta.json`](/api/data-formats#meta)
 based on [`nlp.meta`](/api/language#meta).
@@ -587,42 +587,42 @@ based on [`nlp.meta`](/api/language#meta).
   [pipeline components](/usage/processing-pipelines#custom-components) or
   [model architectures](/api/architectures). Given a config, spaCy is able
   reconstruct the whole tree of objects and the `nlp` object. An exported config
-  can also be used to [train a model](/usage/training#conig) with the same
+  can also be used to [train a pipeline](/usage/training#config) with the same
   settings.
-- **meta**: Meta information about the model and the Python package, such as the
-  author information, license, version, data sources and label scheme. This is
-  mostly used for documentation purposes and for packaging models. It has no
-  impact on the functionality of the `nlp` object.
+- **meta**: Meta information about the pipeline and the Python package, such as
+  the author information, license, version, data sources and label scheme. This
+  is mostly used for documentation purposes and for packaging pipelines. It has
+  no impact on the functionality of the `nlp` object.
 
 </Accordion>
 
-### Generating a model package {#models-generating}
+### Generating a pipeline package {#models-generating}
 
 <Infobox title="Important note" variant="warning">
 
-The model packages are **not suitable** for the public
+Pipeline packages are typically **not suitable** for the public
 [pypi.python.org](https://pypi.python.org) directory, which is not designed for
 binary data and files over 50 MB. However, if your company is running an
-**internal installation** of PyPi, publishing your models on there can be a
-convenient way to share them with your team.
+**internal installation** of PyPi, publishing your pipeline packages on there
+can be a convenient way to share them with your team.
 
 </Infobox>
 
 spaCy comes with a handy CLI command that will create all required files, and
-walk you through generating the meta data. You can also create the meta.json
-manually and place it in the model data directory, or supply a path to it using
-the `--meta` flag. For more info on this, see the [`package`](/api/cli#package)
+walk you through generating the meta data. You can also create the `meta.json`
+manually and place it in the data directory, or supply a path to it using the
+`--meta` flag. For more info on this, see the [`package`](/api/cli#package)
 docs.
 
 > #### meta.json (example)
 >
 > ```json
 > {
->   "name": "example_model",
+>   "name": "example_pipeline",
 >   "lang": "en",
 >   "version": "1.0.0",
 >   "spacy_version": ">=2.0.0,<3.0.0",
->   "description": "Example model for spaCy",
+>   "description": "Example pipeline for spaCy",
 >   "author": "You",
 >   "email": "you@example.com",
 >   "license": "CC BY-SA 3.0"
@@ -630,27 +630,27 @@ docs.
 > ```
 
 ```cli
-$ python -m spacy package ./en_example_model ./my_models
+$ python -m spacy package ./en_example_pipeline ./my_pipelines
 ```
 
-This command will create a model package directory and will run
+This command will create a pipeline package directory and will run
 `python setup.py sdist` in that directory to create `.tar.gz` archive of your
-model package that can be installed using `pip install`.
+package that can be installed using `pip install`.
 
 ```yaml
 ### Directory structure
 └── /
-    ├── MANIFEST.in                        # to include meta.json
-    ├── meta.json                          # model meta data
-    ├── setup.py                           # setup file for pip installation
-    ├── en_example_model                   # model directory
-    │    ├── __init__.py                   # init for pip installation
-    │    └── en_example_model-1.0.0        # model data
-    │        ├── config.cfg                # model config
-    │        ├── meta.json                 # model meta
-    │        └── ...                       # directories with component data
+    ├── MANIFEST.in                           # to include meta.json
+    ├── meta.json                             # pipeline meta data
+    ├── setup.py                              # setup file for pip installation
+    ├── en_example_pipeline                   # pipeline directory
+    │    ├── __init__.py                      # init for pip installation
+    │    └── en_example_pipeline-1.0.0        # pipeline data
+    │        ├── config.cfg                   # pipeline config
+    │        ├── meta.json                    # pipeline meta
+    │        └── ...                          # directories with component data
     └── dist
-        └── en_example_model-1.0.0.tar.gz  # installable package
+        └── en_example_pipeline-1.0.0.tar.gz  # installable package
 ```
 
 You can also find templates for all files in the
@@ -659,16 +659,15 @@ If you're creating the package manually, keep in mind that the directories need
 to be named according to the naming conventions of `lang_name` and
 `lang_name-version`.
 
-### Customizing the model setup {#models-custom}
+### Customizing the package setup {#models-custom}
 
-The `load()` method that comes with our model package templates will take care
-of putting all this together and returning a `Language` object with the loaded
-pipeline and data. If your model requires custom
-[pipeline components](/usage/processing-pipelines) or a custom language class,
-you can also **ship the code with your model** and include it in the
-`__init__.py` – for example, to register custom
-[pipeline components](/usage/processing-pipelines#custom-components) before the
-`nlp` object is created.
+The `load()` method that comes with our pipeline package templates will take
+care of putting all this together and returning a `Language` object with the
+loaded pipeline and data. If your pipeline requires
+[custom components](/usage/processing-pipelines#custom-components) or a custom
+language class, you can also **ship the code with your package** and include it
+in the `__init__.py` – for example, to register component before the `nlp`
+object is created.
 
 <Infobox variant="warning" title="Important note on making manual edits">
 
@@ -682,16 +681,16 @@ spaCy to export the current state of its `nlp` objects via
 
 </Infobox>
 
-### Loading a custom model package {#loading}
+### Loading a custom pipeline package {#loading}
 
-To load a model from a data directory, you can use
+To load a pipeline from a data directory, you can use
 [`spacy.load()`](/api/top-level#spacy.load) with the local path. This will look
 for a `config.cfg` in the directory and use the `lang` and `pipeline` settings
 to initialize a `Language` class with a processing pipeline and load in the
 model data.
 
 ```python
-nlp = spacy.load("/path/to/model")
+nlp = spacy.load("/path/to/pipeline")
 ```
 
 If you want to **load only the binary data**, you'll have to create a `Language`
diff --git a/website/docs/usage/spacy-101.md b/website/docs/usage/spacy-101.md
index 8ea6a6ca0..82fec4b6a 100644
--- a/website/docs/usage/spacy-101.md
+++ b/website/docs/usage/spacy-101.md
@@ -130,14 +130,15 @@ related to more general machine learning functionality.
 ### Statistical models {#statistical-models}
 
 While some of spaCy's features work independently, others require
-[ statistical models](/models) to be loaded, which enable spaCy to **predict**
-linguistic annotations – for example, whether a word is a verb or a noun. spaCy
-currently offers statistical models for a variety of languages, which can be
-installed as individual Python modules. Models can differ in size, speed, memory
-usage, accuracy and the data they include. The model you choose always depends
-on your use case and the texts you're working with. For a general-purpose use
-case, the small, default models are always a good start. They typically include
-the following components:
+[trained pipelines](/models) to be loaded, which enable spaCy to **predict**
+linguistic annotations – for example, whether a word is a verb or a noun. A
+trained pipeline can consist of multiple components that use a statistical model
+trained on labeled data. spaCy currently offers trained pipelines for a variety
+of languages, which can be installed as individual Python modules. Pipeline
+packages can differ in size, speed, memory usage, accuracy and the data they
+include. The package you choose always depends on your use case and the texts
+you're working with. For a general-purpose use case, the small, default packages
+are always a good start. They typically include the following components:
 
 - **Binary weights** for the part-of-speech tagger, dependency parser and named
   entity recognizer to predict those annotations in context.
@@ -146,8 +147,9 @@ the following components:
 - **Data files** like lemmatization rules and lookup tables.
 - **Word vectors**, i.e. multi-dimensional meaning representations of words that
   let you determine how similar they are to each other.
-- **Configuration** options, like the language and processing pipeline settings,
-  to put spaCy in the correct state when you load in the model.
+- **Configuration** options, like the language and processing pipeline settings
+  and model implementations to use, to put spaCy in the correct state when you
+  load the pipeline.
 
 ## Linguistic annotations {#annotations}
 
@@ -158,7 +160,7 @@ analyzing text, it makes a huge difference whether a noun is the subject of a
 sentence, or the object – or whether "google" is used as a verb, or refers to
 the website or company in a specific context.
 
-> #### Loading models
+> #### Loading pipelines
 >
 > ```cli
 > $ python -m spacy download en_core_web_sm
@@ -167,11 +169,11 @@ the website or company in a specific context.
 > >>> nlp = spacy.load("en_core_web_sm")
 > ```
 
-Once you've [downloaded and installed](/usage/models) a model, you can load it
-via [`spacy.load()`](/api/top-level#spacy.load). This will return a `Language`
-object containing all components and data needed to process text. We usually
-call it `nlp`. Calling the `nlp` object on a string of text will return a
-processed `Doc`:
+Once you've [downloaded and installed](/usage/models) a trained pipeline, you
+can load it via [`spacy.load`](/api/top-level#spacy.load). This will return a
+`Language` object containing all components and data needed to process text. We
+usually call it `nlp`. Calling the `nlp` object on a string of text will return
+a processed `Doc`:
 
 ```python
 ### {executable="true"}
@@ -233,7 +235,7 @@ To learn more about entity recognition in spaCy, how to **add your own
 entities** to a document and how to **train and update** the entity predictions
 of a model, see the usage guides on
 [named entity recognition](/usage/linguistic-features#named-entities) and
-[training the named entity recognizer](/usage/training#ner).
+[training pipelines](/usage/training).
 
 </Infobox>
 
@@ -346,7 +348,7 @@ The mapping of words to hashes doesn't depend on any state. To make sure each
 value is unique, spaCy uses a
 [hash function](https://en.wikipedia.org/wiki/Hash_function) to calculate the
 hash **based on the word string**. This also means that the hash for "coffee"
-will always be the same, no matter which model you're using or how you've
+will always be the same, no matter which pipeline you're using or how you've
 configured spaCy.
 
 However, hashes **cannot be reversed** and there's no way to resolve
@@ -391,7 +393,7 @@ import Serialization101 from 'usage/101/\_serialization.md'
 
 <Infobox title="Saving and loading" emoji="📖">
 
-To learn more about how to **save and load your own models**, see the usage
+To learn more about how to **save and load your own pipelines**, see the usage
 guide on [saving and loading](/usage/saving-loading#models).
 
 </Infobox>
@@ -402,9 +404,9 @@ import Training101 from 'usage/101/\_training.md'
 
 <Training101 />
 
-<Infobox title="Training statistical models" emoji="📖">
+<Infobox title="Training pipelines and models" emoji="📖">
 
-To learn more about **training and updating** models, how to create training
+To learn more about **training and updating** pipelines, how to create training
 data and how to improve spaCy's named entity recognition models, see the usage
 guides on [training](/usage/training).
 
@@ -454,8 +456,8 @@ via the following platforms:
   practices**.
 - [GitHub issue tracker](https://github.com/explosion/spaCy/issues): **Bug
   reports** and **improvement suggestions**, i.e. everything that's likely
-  spaCy's fault. This also includes problems with the models beyond statistical
-  imprecisions, like patterns that point to a bug.
+  spaCy's fault. This also includes problems with the trained pipelines beyond
+  statistical imprecisions, like patterns that point to a bug.
 
 <Infobox title="Important note" variant="warning">
 
@@ -484,10 +486,10 @@ Another way of getting involved is to help us improve the
 happen to speak one of the languages currently in
 [alpha support](/usage/models#languages). Even adding simple tokenizer
 exceptions, stop words or lemmatizer data can make a big difference. It will
-also make it easier for us to provide a statistical model for the language in
-the future. Submitting a test that documents a bug or performance issue, or
-covers functionality that's especially important for your application is also
-very helpful. This way, you'll also make sure we never accidentally introduce
+also make it easier for us to provide a trained pipeline for the language in the
+future. Submitting a test that documents a bug or performance issue, or covers
+functionality that's especially important for your application is also very
+helpful. This way, you'll also make sure we never accidentally introduce
 regressions to the parts of the library that you care about the most.
 
 **For more details on the types of contributions we're looking for, the code
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 2fabd3f7d..9c18e4606 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -1,5 +1,6 @@
 ---
-title: Training Models
+title: Training Pipelines & Models
+teaser: Train and update components on your own data and integrate custom models
 next: /usage/layers-architectures
 menu:
   - ['Introduction', 'basics']
@@ -10,7 +11,7 @@ menu:
   - ['Internal API', 'api']
 ---
 
-## Introduction to training models {#basics hidden="true"}
+## Introduction to training {#basics hidden="true"}
 
 import Training101 from 'usage/101/\_training.md'
 
@@ -25,13 +26,13 @@ new, active learning-powered annotation tool we've developed. Prodigy is fast
 and extensible, and comes with a modern **web application** that helps you
 collect training data faster. It integrates seamlessly with spaCy, pre-selects
 the **most relevant examples** for annotation, and lets you train and evaluate
-ready-to-use spaCy models.
+ready-to-use spaCy pipelines.
 
 </Infobox>
 
 ## Quickstart {#quickstart tag="new"}
 
-The recommended way to train your spaCy models is via the
+The recommended way to train your spaCy pipelines is via the
 [`spacy train`](/api/cli#train) command on the command line. It only needs a
 single [`config.cfg`](#config) **configuration file** that includes all settings
 and hyperparameters. You can optionally [overwrite](#config-overrides) settings
@@ -94,9 +95,9 @@ $ python -m spacy train config.cfg --output ./output --paths.train ./train.spacy
 ## Training config {#config}
 
 Training config files include all **settings and hyperparameters** for training
-your model. Instead of providing lots of arguments on the command line, you only
-need to pass your `config.cfg` file to [`spacy train`](/api/cli#train). Under
-the hood, the training config uses the
+your pipeline. Instead of providing lots of arguments on the command line, you
+only need to pass your `config.cfg` file to [`spacy train`](/api/cli#train).
+Under the hood, the training config uses the
 [configuration system](https://thinc.ai/docs/usage-config) provided by our
 machine learning library [Thinc](https://thinc.ai). This also makes it easy to
 integrate custom models and architectures, written in your framework of choice.
@@ -178,27 +179,26 @@ $ python -m spacy train config.cfg --paths.train ./corpus/train.spacy --paths.de
 ```
 
 Only existing sections and values in the config can be overwritten. At the end
-of the training, the final filled `config.cfg` is exported with your model, so
-you'll always have a record of the settings that were used, including your
+of the training, the final filled `config.cfg` is exported with your pipeline,
+so you'll always have a record of the settings that were used, including your
 overrides. Overrides are added before [variables](#config-interpolation) are
 resolved, by the way – so if you need to use a value in multiple places,
 reference it across your config and override it on the CLI once.
 
 ### Defining pipeline components {#config-components}
 
-When you train a model, you typically train a
-[pipeline](/usage/processing-pipelines) of **one or more components**. The
-`[components]` block in the config defines the available pipeline components and
-how they should be created – either by a built-in or custom
-[factory](/usage/processing-pipelines#built-in), or
+You typically train a [pipeline](/usage/processing-pipelines) of **one or more
+components**. The `[components]` block in the config defines the available
+pipeline components and how they should be created – either by a built-in or
+custom [factory](/usage/processing-pipelines#built-in), or
 [sourced](/usage/processing-pipelines#sourced-components) from an existing
-pretrained model. For example, `[components.parser]` defines the component named
+trained pipeline. For example, `[components.parser]` defines the component named
 `"parser"` in the pipeline. There are different ways you might want to treat
 your components during training, and the most common scenarios are:
 
 1. Train a **new component** from scratch on your data.
-2. Update an existing **pretrained component** with more examples.
-3. Include an existing pretrained component without updating it.
+2. Update an existing **trained component** with more examples.
+3. Include an existing trained component without updating it.
 4. Include a non-trainable component, like a rule-based
    [`EntityRuler`](/api/entityruler) or [`Sentencizer`](/api/sentencizer), or a
    fully [custom component](/usage/processing-pipelines#custom-components).
@@ -209,16 +209,16 @@ If a component block defines a `factory`, spaCy will look it up in the
 new component from scratch. All settings defined in the config block will be
 passed to the component factory as arguments. This lets you configure the model
 settings and hyperparameters. If a component block defines a `source`, the
-component will be copied over from an existing pretrained model, with its
+component will be copied over from an existing trained pipeline, with its
 existing weights. This lets you include an already trained component in your
-model pipeline, or update a pretrained component with more data specific to your
-use case.
+pipeline, or update a trained component with more data specific to your use
+case.
 
 ```ini
 ### config.cfg (excerpt)
 [components]
 
-# "parser" and "ner" are sourced from a pretrained model
+# "parser" and "ner" are sourced from a trained pipeline
 [components.parser]
 source = "en_core_web_sm"
 
@@ -243,7 +243,7 @@ weights and [resume training](/api/language#resume_training).
 
 If you don't want a component to be updated, you can **freeze** it by adding it
 to the `frozen_components` list in the `[training]` block. Frozen components are
-**not updated** during training and are included in the final trained model
+**not updated** during training and are included in the final trained pipeline
 as-is.
 
 > #### Note on frozen components
@@ -252,8 +252,8 @@ as-is.
 > still **run** during training and evaluation. This is very important, because
 > they may still impact your model's performance – for instance, a sentence
 > boundary detector can impact what the parser or entity recognizer considers a
-> valid parse. So the evaluation results should always reflect what your model
-> will produce at runtime.
+> valid parse. So the evaluation results should always reflect what your
+> pipeline will produce at runtime.
 
 ```ini
 [nlp]
@@ -398,11 +398,11 @@ different tasks. For example:
 
 ### Metrics, training output and weighted scores {#metrics}
 
-When you train a model using the [`spacy train`](/api/cli#train) command, you'll
-see a table showing the metrics after each pass over the data. The available
-metrics **depend on the pipeline components**. Pipeline components also define
-which scores are shown and how they should be **weighted in the final score**
-that decides about the best model.
+When you train a pipeline using the [`spacy train`](/api/cli#train) command,
+you'll see a table showing the metrics after each pass over the data. The
+available metrics **depend on the pipeline components**. Pipeline components
+also define which scores are shown and how they should be **weighted in the
+final score** that decides about the best model.
 
 The `training.score_weights` setting in your `config.cfg` lets you customize the
 scores shown in the table and how they should be weighted. In this example, the
@@ -415,8 +415,8 @@ score.
 >
 > At the end of your training process, you typically want to select the **best
 > model** – but what "best" means depends on the available components and your
-> specific use case. For instance, you may prefer a model with higher NER and
-> lower POS tagging accuracy over a model with lower NER and higher POS
+> specific use case. For instance, you may prefer a pipeline with higher NER and
+> lower POS tagging accuracy over a pipeline with lower NER and higher POS
 > accuracy. You can express this preference in the score weights, e.g. by
 > assigning `ents_f` (NER F-score) a higher weight.
 
@@ -488,8 +488,8 @@ The [`spacy train`](/api/cli#train) recipe lets you specify an optional argument
 `--code` that points to a Python file. The file is imported before training and
 allows you to add custom functions and architectures to the function registry
 that can then be referenced from your `config.cfg`. This lets you train spaCy
-models with custom components, without having to re-implement the whole training
-workflow.
+pipelines with custom components, without having to re-implement the whole
+training workflow.
 
 #### Example: Modifying the nlp object {#custom-code-nlp-callbacks}
 
@@ -837,11 +837,11 @@ def MyModel(output_width: int) -> Model[List[Doc], List[Floats2d]]:
 <Infobox variant="warning">
 
 spaCy gives you full control over the training loop. However, for most use
-cases, it's recommended to train your models via the
+cases, it's recommended to train your pipelines via the
 [`spacy train`](/api/cli#train) command with a [`config.cfg`](#config) to keep
 track of your settings and hyperparameters, instead of writing your own training
 scripts from scratch. [Custom registered functions](#custom-code) should
-typically give you everything you need to train fully custom models with
+typically give you everything you need to train fully custom pipelines with
 [`spacy train`](/api/cli#train).
 
 </Infobox>
@@ -874,8 +874,8 @@ their assigned part-of-speech tags.
 > #### About the tag map
 >
 > The tag map is part of the vocabulary and defines the annotation scheme. If
-> you're training a new language model, this will let you map the tags present
-> in the treebank you train on to spaCy's tag scheme:
+> you're training a new pipeline, this will let you map the tags present in the
+> treebank you train on to spaCy's tag scheme:
 >
 > ```python
 > tag_map = {"N": {"pos": "NOUN"}, "V": {"pos": "VERB"}}
@@ -924,15 +924,16 @@ it harder for the model to memorize the training data. For example, a `0.25`
 dropout means that each feature or internal representation has a 1/4 likelihood
 of being dropped.
 
-> - [`nlp`](/api/language): The `nlp` object with the model.
+> - [`nlp`](/api/language): The `nlp` object with the pipeline components and
+>   their models.
 > - [`nlp.begin_training`](/api/language#begin_training): Start the training and
->   return an optimizer to update the model's weights.
+>   return an optimizer to update the component model weights.
 > - [`Optimizer`](https://thinc.ai/docs/api-optimizers): Function that holds
 >   state between updates.
-> - [`nlp.update`](/api/language#update): Update model with examples.
+> - [`nlp.update`](/api/language#update): Update component models with examples.
 > - [`Example`](/api/example): object holding predictions and gold-standard
 >   annotations.
-> - [`nlp.to_disk`](/api/language#to_disk): Save the updated model to a
+> - [`nlp.to_disk`](/api/language#to_disk): Save the updated pipeline to a
 >   directory.
 
 ```python
@@ -944,7 +945,7 @@ for itn in range(100):
         doc = nlp.make_doc(raw_text)
         example = Example.from_dict(doc, {"entities": entity_offsets})
         nlp.update([example], sgd=optimizer)
-nlp.to_disk("/model")
+nlp.to_disk("/output")
 ```
 
 The [`nlp.update`](/api/language#update) method takes the following arguments:
diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index 6a1499bdf..8ddcfd9af 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -42,7 +42,7 @@ menu:
 
 <Infobox title="Details & Documentation" emoji="📖" list>
 
-- **Usage:** [Training models](/usage/training)
+- **Usage:** [Training pipelines and models](/usage/training)
 - **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
   [`Config`](https://thinc.ai/docs/api-config#config)
 - **CLI:** [`train`](/api/cli#train), [`pretrain`](/api/cli#pretrain),
@@ -59,14 +59,14 @@ menu:
 <Infobox title="Details & Documentation" emoji="📖" list>
 
 - **Usage:** [Embeddings & Transformers](/usage/embeddings-transformers),
-  [Training models](/usage/training)
+  [Training pipelines and models](/usage/training)
 - **API:** [`Transformer`](/api/transformer),
   [`TransformerData`](/api/transformer#transformerdata),
   [`FullTransformerBatch`](/api/transformer#fulltransformerbatch)
 - **Architectures: ** [TransformerModel](/api/architectures#TransformerModel),
   [TransformerListener](/api/architectures#TransformerListener),
   [Tok2VecTransformer](/api/architectures#Tok2VecTransformer)
-- **Models:** [`en_core_trf_lg_sm`](/models/en)
+- **Trained Pipelines:** [`en_core_trf_lg_sm`](/models/en)
 - **Implementation:**
   [`spacy-transformers`](https://github.com/explosion/spacy-transformers)
 
@@ -76,8 +76,7 @@ menu:
 
 <Infobox title="Details & Documentation" emoji="📖" list>
 
-<!-- TODO: link to new custom models page -->
-
+- **Usage: ** [Layers and architectures](/usage/layers-architectures)
 - **Thinc: **
   [Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks)
 - **API:** [Model architectures](/api/architectures), [`Pipe`](/api/pipe)
@@ -102,10 +101,10 @@ menu:
 
 spaCy projects let you manage and share **end-to-end spaCy workflows** for
 different **use cases and domains**, and orchestrate training, packaging and
-serving your custom models. You can start off by cloning a pre-defined project
-template, adjust it to fit your needs, load in your data, train a model, export
-it as a Python package, upload your outputs to a remote storage and share your
-results with your team.
+serving your custom pipelines. You can start off by cloning a pre-defined
+project template, adjust it to fit your needs, load in your data, train a
+pipeline, export it as a Python package, upload your outputs to a remote storage
+and share your results with your team.
 
 ![Illustration of project workflow and commands](../images/projects.svg)
 
@@ -121,14 +120,14 @@ data, [Streamlit](/usage/projects#streamlit) for building interactive apps,
 
 The easiest way to get started with an end-to-end training process is to clone a
 [project](/usage/projects) template. Projects let you manage multi-step
-workflows, from data preprocessing to training and packaging your model.
+workflows, from data preprocessing to training and packaging your pipeline.
 
 </Project>-->
 
 <Infobox title="Details & Documentation" emoji="📖" list>
 
 - **Usage:** [spaCy projects](/usage/projects),
-  [Training models](/usage/training)
+  [Training pipelines and models](/usage/training)
 - **CLI:** [`project`](/api/cli#project), [`train`](/api/cli#train)
 - **Templates:** [`projects`](https://github.com/explosion/projects)
 
@@ -183,7 +182,7 @@ now easier and more convenient. The `@Language.component` and
 `@Language.factory` decorators let you register your component, define its
 default configuration and meta data, like the attribute values it assigns and
 requires. Any custom component can be included during training, and sourcing
-components from existing pretrained models lets you **mix and match custom
+components from existing trained pipelines lets you **mix and match custom
 pipelines**. The `nlp.analyze_pipes` method outputs structured information about
 the current pipeline and its components, including the attributes they assign,
 the scores they compute during training and whether any required attributes
@@ -257,7 +256,7 @@ The following methods, attributes and commands are new in spaCy v3.0.
 | [`Language.select_pipes`](/api/language#select_pipes)                                                                           | Context manager for enabling or disabling specific pipeline components for a block.                                                                                                              |
 | [`Language.disable_pipe`](/api/language#disable_pipe), [`Language.enable_pipe`](/api/language#enable_pipe)                      | Disable or enable a loaded pipeline component (but don't remove it).                                                                                                                             |
 | [`Language.analyze_pipes`](/api/language#analyze_pipes)                                                                         | [Analyze](/usage/processing-pipelines#analysis) components and their interdependencies.                                                                                                          |
-| [`Language.resume_training`](/api/language#resume_training)                                                                     | Experimental: continue training a pretrained model and initialize "rehearsal" for components that implement a `rehearse` method to prevent catastrophic forgetting.                              |
+| [`Language.resume_training`](/api/language#resume_training)                                                                     | Experimental: continue training a trained pipeline and initialize "rehearsal" for components that implement a `rehearse` method to prevent catastrophic forgetting.                              |
 | [`@Language.factory`](/api/language#factory), [`@Language.component`](/api/language#component)                                  | Decorators for [registering](/usage/processing-pipelines#custom-components) pipeline component factories and simple stateless component functions.                                               |
 | [`Language.has_factory`](/api/language#has_factory)                                                                             | Check whether a component factory is registered on a language class.s                                                                                                                            |
 | [`Language.get_factory_meta`](/api/language#get_factory_meta), [`Language.get_pipe_meta`](/api/language#get_factory_meta)       | Get the [`FactoryMeta`](/api/language#factorymeta) with component metadata for a factory or instance name.                                                                                       |
@@ -266,8 +265,8 @@ The following methods, attributes and commands are new in spaCy v3.0.
 | [`Language.disabled`](/api/language#attributes)                                                                                 | Names of disabled components that are not run as part of the pipeline.                                                                                                                           |
 | [`Pipe.score`](/api/pipe#score)                                                                                                 | Method on pipeline components that returns a dictionary of evaluation scores.                                                                                                                    |
 | [`registry`](/api/top-level#registry)                                                                                           | Function registry to map functions to string names that can be referenced in [configs](/usage/training#config).                                                                                  |
-| [`util.load_meta`](/api/top-level#util.load_meta), [`util.load_config`](/api/top-level#util.load_config)                        | Updated helpers for loading a model's [`meta.json`](/api/data-formats#meta) and [`config.cfg`](/api/data-formats#config).                                                                        |
-| [`util.get_installed_models`](/api/top-level#util.get_installed_models)                                                         | Names of all models installed in the environment.                                                                                                                                                |
+| [`util.load_meta`](/api/top-level#util.load_meta), [`util.load_config`](/api/top-level#util.load_config)                        | Updated helpers for loading a pipeline's [`meta.json`](/api/data-formats#meta) and [`config.cfg`](/api/data-formats#config).                                                                     |
+| [`util.get_installed_models`](/api/top-level#util.get_installed_models)                                                         | Names of all pipeline packages installed in the environment.                                                                                                                                     |
 | [`init config`](/api/cli#init-config), [`init fill-config`](/api/cli#init-fill-config), [`debug config`](/api/cli#debug-config) | CLI commands for initializing, auto-filling and debugging [training configs](/usage/training).                                                                                                   |
 | [`project`](/api/cli#project)                                                                                                   | Suite of CLI commands for cloning, running and managing [spaCy projects](/usage/projects).                                                                                                       |
 
@@ -280,9 +279,9 @@ The following methods, attributes and commands are new in spaCy v3.0.
 To help you get started with spaCy v3.0 and the new features, we've added
 several new or rewritten documentation pages, including a new usage guide on
 [embeddings, transformers and transfer learning](/usage/embeddings-transformers),
-a guide on [training models](/usage/training) rewritten from scratch, a page
-explaining the new [spaCy projects](/usage/projects) and updated usage
-documentation on
+a guide on [training pipelines and models](/usage/training) rewritten from
+scratch, a page explaining the new [spaCy projects](/usage/projects) and updated
+usage documentation on
 [custom pipeline components](/usage/processing-pipelines#custom-components).
 We've also added a bunch of new illustrations and new API reference pages
 documenting spaCy's machine learning [model architectures](/api/architectures)
@@ -335,15 +334,15 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
 
 ### API changes {#incompat-api}
 
-- Model symlinks, the `link` command and shortcut names are now deprecated.
-  There can be many [different models](/models) and not just one "English
-  model", so you should always use the full model name like
+- Pipeline package symlinks, the `link` command and shortcut names are now
+  deprecated. There can be many [different trained pipelines](/models) and not
+  just one "English model", so you should always use the full package name like
   [`en_core_web_sm`](/models/en) explicitly.
-- A model's [`meta.json`](/api/data-formats#meta) is now only used to provide
-  meta information like the model name, author, license and labels. It's **not**
-  used to construct the processing pipeline anymore. This is all defined in the
-  [`config.cfg`](/api/data-formats#config), which also includes all settings
-  used to train the model.
+- A pipeline's [`meta.json`](/api/data-formats#meta) is now only used to provide
+  meta information like the package name, author, license and labels. It's
+  **not** used to construct the processing pipeline anymore. This is all defined
+  in the [`config.cfg`](/api/data-formats#config), which also includes all
+  settings used to train the pipeline.
 - The [`train`](/api/cli#train) and [`pretrain`](/api/cli#pretrain) commands now
   only take a `config.cfg` file containing the full
   [training config](/usage/training#config).
@@ -390,10 +389,10 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
 | `GoldCorpus`                                             | [`Corpus`](/api/corpus)                                                                                      |
 | `KnowledgeBase.load_bulk`, `KnowledgeBase.dump`          | [`KnowledgeBase.from_disk`](/api/kb#from_disk), [`KnowledgeBase.to_disk`](/api/kb#to_disk)                   |
 | `Matcher.pipe`, `PhraseMatcher.pipe`                     | not needed                                                                                                   |
-| `spacy init-model`                                       | [`spacy init model`](/api/cli#init-model)                                                                    |
+| `spacy init-model`                                       | [`spacy init vocab`](/api/cli#init.vocab)                                                                    |
 | `spacy debug-data`                                       | [`spacy debug data`](/api/cli#debug-data)                                                                    |
 | `spacy profile`                                          | [`spacy debug profile`](/api/cli#debug-profile)                                                              |
-| `spacy link`, `util.set_data_path`, `util.get_data_path` | not needed, model symlinks are deprecated                                                                    |
+| `spacy link`, `util.set_data_path`, `util.get_data_path` | not needed, symlinks are deprecated                                                                          |
 
 The following deprecated methods, attributes and arguments were removed in v3.0.
 Most of them have been **deprecated for a while** and many would previously
@@ -414,12 +413,13 @@ on them.
 
 ## Migrating from v2.x {#migrating}
 
-### Downloading and loading models {#migrating-downloading-models}
+### Downloading and loading trained pipelines {#migrating-downloading-models}
 
-Model symlinks and shortcuts like `en` are now officially deprecated. There are
-[many different models](/models) with different capabilities and not just one
-"English model". In order to download and load a model, you should always use
-its full name – for instance, [`en_core_web_sm`](/models/en#en_core_web_sm).
+Symlinks and shortcuts like `en` are now officially deprecated. There are
+[many different trained pipelines](/models) with different capabilities and not
+just one "English model". In order to download and load a package, you should
+always use its full name – for instance,
+[`en_core_web_sm`](/models/en#en_core_web_sm).
 
 ```diff
 - python -m spacy download en
@@ -522,12 +522,12 @@ and you typically shouldn't have to use it in your code.
 + parser = nlp.add_pipe("parser")
 ```
 
-If you need to add a component from an existing pretrained model, you can now
+If you need to add a component from an existing trained pipeline, you can now
 use the `source` argument on [`nlp.add_pipe`](/api/language#add_pipe). This will
 check that the component is compatible, and take care of porting over all
-config. During training, you can also reference existing pretrained components
-in your [config](/usage/training#config-components) and decide whether or not
-they should be updated with more data.
+config. During training, you can also reference existing trained components in
+your [config](/usage/training#config-components) and decide whether or not they
+should be updated with more data.
 
 > #### config.cfg (excerpt)
 >
@@ -599,13 +599,13 @@ nlp = spacy.blank("en")
 + ruler.load_from_tag_map(YOUR_TAG_MAP)
 ```
 
-### Training models {#migrating-training}
+### Training pipelines and models {#migrating-training}
 
-To train your models, you should now pretty much always use the
+To train your pipelines, you should now pretty much always use the
 [`spacy train`](/api/cli#train) CLI. You shouldn't have to put together your own
 training scripts anymore, unless you _really_ want to. The training commands now
 use a [flexible config file](/usage/training#config) that describes all training
-settings and hyperparameters, as well as your pipeline, model components and
+settings and hyperparameters, as well as your pipeline, components and
 architectures to use. The `--code` argument lets you pass in code containing
 [custom registered functions](/usage/training#custom-code) that you can
 reference in your config. To get started, check out the
@@ -616,7 +616,7 @@ reference in your config. To get started, check out the
 spaCy v3.0 uses a new
 [binary training data format](/api/data-formats#binary-training) created by
 serializing a [`DocBin`](/api/docbin), which represents a collection of `Doc`
-objects. This means that you can train spaCy models using the same format it
+objects. This means that you can train spaCy pipelines using the same format it
 outputs: annotated `Doc` objects. The binary format is extremely **efficient in
 storage**, especially when packing multiple documents together. You can convert
 your existing JSON-formatted data using the [`spacy convert`](/api/cli#convert)
@@ -655,7 +655,7 @@ values. You can then use the auto-generated `config.cfg` for training:
 
 The easiest way to get started with an end-to-end training process is to clone a
 [project](/usage/projects) template. Projects let you manage multi-step
-workflows, from data preprocessing to training and packaging your model.
+workflows, from data preprocessing to training and packaging your pipeline.
 
 </Project>
 
@@ -728,7 +728,7 @@ setting up the label scheme.
 + nlp.begin_training(lambda: examples)
 ```
 
-#### Packaging models {#migrating-training-packaging}
+#### Packaging trained pipelines {#migrating-training-packaging}
 
 The [`spacy package`](/api/cli#package) command now automatically builds the
 installable `.tar.gz` sdist of the Python package, so you don't have to run this
@@ -736,8 +736,8 @@ step manually anymore. You can disable the behavior by setting the `--no-sdist`
 flag.
 
 ```diff
-python -m spacy package ./model ./packages
-- cd /output/en_model-0.0.0
+python -m spacy package ./output ./packages
+- cd /output/en_pipeline-0.0.0
 - python setup.py sdist
 ```
 
diff --git a/website/docs/usage/visualizers.md b/website/docs/usage/visualizers.md
index 4ba0112b6..6754cabab 100644
--- a/website/docs/usage/visualizers.md
+++ b/website/docs/usage/visualizers.md
@@ -23,10 +23,10 @@ The quickest way to visualize `Doc` is to use
 [`displacy.serve`](/api/top-level#displacy.serve). This will spin up a simple
 web server and let you view the result straight from your browser. displaCy can
 either take a single `Doc` or a list of `Doc` objects as its first argument.
-This lets you construct them however you like – using any model or modifications
-you like. If you're using [Streamlit](https://streamlit.io), check out the
-[`spacy-streamlit`](https://github.com/explosion/spacy-streamlit) package that
-helps you integrate spaCy visualizations into your apps!
+This lets you construct them however you like – using any pipeline or
+modifications you like. If you're using [Streamlit](https://streamlit.io), check
+out the [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit)
+package that helps you integrate spaCy visualizations into your apps!
 
 ## Visualizing the dependency parse {#dep}
 
@@ -131,8 +131,8 @@ example, you can choose to display `PERSON` entities. Internally, the visualizer
 knows nothing about available entity types and will render whichever spans and
 labels it receives. This makes it especially easy to work with custom entity
 types. By default, displaCy comes with colors for all entity types used by
-[spaCy models](/models). If you're using custom entity types, you can use the
-`colors` setting to add your own colors for them.
+[trained spaCy pipelines](/models). If you're using custom entity types, you can
+use the `colors` setting to add your own colors for them.
 
 > #### Options example
 >
@@ -176,7 +176,7 @@ visualizations will be included as HTML.
 
 ```python
 ### Jupyter example
-# Don't forget to install a model, e.g.: python -m spacy download en
+# Don't forget to install a trained pipeline, e.g.: python -m spacy download en
 
 # In[1]:
 import spacy
diff --git a/website/meta/sidebars.json b/website/meta/sidebars.json
index 94fbc2492..76d5e63d6 100644
--- a/website/meta/sidebars.json
+++ b/website/meta/sidebars.json
@@ -50,11 +50,11 @@
                 "items": [{ "text": "Overview", "url": "/models" }]
             },
             {
-                "label": "Core Models",
+                "label": "Trained Pipelines",
                 "items": []
             },
             {
-                "label": "Starter Models",
+                "label": "Starter Packages",
                 "items": []
             }
         ]
diff --git a/website/src/components/tag.js b/website/src/components/tag.js
index c7ef20919..3f2b4e994 100644
--- a/website/src/components/tag.js
+++ b/website/src/components/tag.js
@@ -23,7 +23,7 @@ export default function Tag({ spaced = false, variant, tooltip, children }) {
         )
     }
     if (variant === 'model') {
-        const tooltipText = `To use this functionality, spaCy needs a model to be installed that supports the following capabilities: ${children}`
+        const tooltipText = `To use this functionality, spaCy needs a trained pipeline that supports the following capabilities: ${children}`
         return (
             <TagTemplate spaced={spaced} tooltip={tooltipText}>
                 Needs model
diff --git a/website/src/templates/models.js b/website/src/templates/models.js
index 3c5e9d2a4..1bcdd4859 100644
--- a/website/src/templates/models.js
+++ b/website/src/templates/models.js
@@ -37,14 +37,15 @@ const MODEL_META = {
     ents_r: 'Entities (recall)',
     cpu: 'words per second on CPU',
     gpu: 'words per second on GPU',
-    pipeline: 'Processing pipeline components in order',
+    pipeline: 'Active processing pipeline components in order',
+    components: 'All processing pipeline components (including disabled components)',
     sources: 'Sources of training data',
     vecs:
-        'Word vectors included in the model. Models that only support context vectors compute similarity via the tensors shared with the pipeline.',
+        'Word vectors included in the package. Packages that only support context vectors compute similarity via the tensors shared with the pipeline.',
     benchmark_parser: 'Syntax accuracy',
     benchmark_ner: 'NER accuracy',
     benchmark_speed: 'Speed',
-    compat: 'Latest compatible model version for your spaCy installation',
+    compat: 'Latest compatible package version for your spaCy installation',
 }
 
 const LABEL_SCHEME_META = {
@@ -178,6 +179,7 @@ const Model = ({ name, langId, langName, baseUrl, repo, compatibility, hasExampl
         { label: 'Type', tag: type, content: MODEL_META[type] },
         { label: 'Genre', tag: genre, content: MODEL_META[genre] },
         { label: 'Size', tag: size, content: meta.sizeFull },
+        { label: 'Components', content: components, help: MODEL_META.components },
         { label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
         { label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
         { label: 'Sources', content: sources, help: MODEL_META.sources },
@@ -355,7 +357,7 @@ const Models = ({ pageContext, repo, children }) => {
     }, [initialized, baseUrl])
 
     const modelTitle = title
-    const modelTeaser = `Available pretrained statistical models for ${title}`
+    const modelTeaser = `Available trained pipelines for ${title}`
 
     const starterTitle = `${title} starters`
     const starterTeaser = `Available transfer learning starter packs for ${title}`
diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js
index 9d993c969..d6f7960d0 100644
--- a/website/src/widgets/quickstart-install.js
+++ b/website/src/widgets/quickstart-install.js
@@ -43,7 +43,7 @@ const DATA = [
             {
                 id: 'transformers',
                 title: 'Transformers',
-                help: 'Use transformers like BERT to train your spaCy models',
+                help: 'Use transformers like BERT to train your spaCy pipelines',
             },
             {
                 id: 'lookups',
@@ -63,7 +63,7 @@ const QuickstartInstall = ({ id, title }) => (
                 ...DATA,
                 {
                     id: 'models',
-                    title: 'Models',
+                    title: 'Trained Pipelines',
                     multiple: true,
                     options: models.map(({ code, name }) => ({ id: code, title: name })),
                 },
diff --git a/website/src/widgets/quickstart-models.js b/website/src/widgets/quickstart-models.js
index f7505cefa..d19ff9e52 100644
--- a/website/src/widgets/quickstart-models.js
+++ b/website/src/widgets/quickstart-models.js
@@ -15,13 +15,13 @@ const data = [
             {
                 id: 'spacy',
                 title: 'Use spacy.load()',
-                help: "Use spaCy's built-in loader to load the model by name.",
+                help: "Use spaCy's built-in loader to load the package by name",
                 checked: true,
             },
             {
                 id: 'module',
                 title: 'Import as module',
-                help: 'Import the model explicitly as a Python module.',
+                help: 'Import the package explicitly as a Python module',
             },
         ],
     },

From 25a595dc10ef60ee5d20b6f06bf123119e2a0238 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 16:37:45 +0200
Subject: [PATCH 42/71] Fix typos and wording [ci skip]

---
 website/docs/api/language.md              | 6 ++++--
 website/docs/usage/linguistic-features.md | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/website/docs/api/language.md b/website/docs/api/language.md
index d65b217a4..7799f103b 100644
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@@ -828,8 +828,10 @@ subclass of the built-in `dict`. It supports the additional methods `to_disk`
 
 ## Language.to_disk {#to_disk tag="method" new="2"}
 
-Save the current state to a directory. If a trained pipeline is loaded, this
-will **include all model data**.
+Save the current state to a directory. Under the hood, this method delegates to
+the `to_disk` methods of the individual pipeline components, if available. This
+means that if a trained pipeline is loaded, all components and their weights
+will be saved to disk.
 
 > #### Example
 >
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 7d3613cf5..ff08d547c 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -1222,7 +1222,7 @@ print(doc.text, [token.text for token in doc])
 
 Keep in mind that your models' results may be less accurate if the tokenization
 during training differs from the tokenization at runtime. So if you modify a
-trained pipeline' tokenization afterwards, it may produce very different
+trained pipeline's tokenization afterwards, it may produce very different
 predictions. You should therefore train your pipeline with the **same
 tokenizer** it will be using at runtime. See the docs on
 [training with custom tokenization](#custom-tokenizer-training) for details.

From 121809dd1ec7f310b0385fa67a12edd2eb16eb19 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 16:49:56 +0200
Subject: [PATCH 43/71] Fix anchor [ci skip]

---
 website/docs/usage/v3.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index 8ddcfd9af..45ed7b0c8 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -389,7 +389,7 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
 | `GoldCorpus`                                             | [`Corpus`](/api/corpus)                                                                                      |
 | `KnowledgeBase.load_bulk`, `KnowledgeBase.dump`          | [`KnowledgeBase.from_disk`](/api/kb#from_disk), [`KnowledgeBase.to_disk`](/api/kb#to_disk)                   |
 | `Matcher.pipe`, `PhraseMatcher.pipe`                     | not needed                                                                                                   |
-| `spacy init-model`                                       | [`spacy init vocab`](/api/cli#init.vocab)                                                                    |
+| `spacy init-model`                                       | [`spacy init vocab`](/api/cli#init-vocab)                                                                    |
 | `spacy debug-data`                                       | [`spacy debug data`](/api/cli#debug-data)                                                                    |
 | `spacy profile`                                          | [`spacy debug profile`](/api/cli#debug-profile)                                                              |
 | `spacy link`, `util.set_data_path`, `util.get_data_path` | not needed, symlinks are deprecated                                                                          |

From c53b1433b9f54192dedc41919877e9e672641e31 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 17:12:24 +0200
Subject: [PATCH 44/71] Adjust more arguments [ci skip]

---
 spacy/cli/convert.py    |  2 +-
 spacy/cli/init_model.py |  4 ++--
 website/docs/api/cli.md | 10 ++++++----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index 2a24bd145..84040a712 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -44,7 +44,7 @@ def convert_cli(
     file_type: FileTypes = Opt("spacy", "--file-type", "-t", help="Type of data to produce"),
     n_sents: int = Opt(1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"),
     seg_sents: bool = Opt(False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"),
-    model: Optional[str] = Opt(None, "--model", "-b", help="Trained spaCy pipeline for sentence segmentation (for -s)"),
+    model: Optional[str] = Opt(None, "--model", "--base", "-b", help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)"),
     morphology: bool = Opt(False, "--morphology", "-m", help="Enable appending morphology to tags"),
     merge_subtokens: bool = Opt(False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"),
     converter: str = Opt("auto", "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"),
diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index 071d5f659..4fd3025fd 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -46,8 +46,8 @@ def init_model_cli(
     prune_vectors: int = Opt(-1, "--prune-vectors", "-V", help="Optional number of vectors to prune to"),
     truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
     vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
-    model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the pipeline meta"),
-    base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base pipeline (for languages with custom tokenizers)")
+    model_name: Optional[str] = Opt(None, "--meta-name", "-mn", help="Optional name of the package for the pipeline meta"),
+    base_model: Optional[str] = Opt(None, "--base", "-b", help="Name of or path to base pipeline to start with (mostly relevant for pipelines with custom tokenizers)")
     # fmt: on
 ):
     """
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index 98da62eb3..aee285763 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -183,7 +183,7 @@ This command was previously called `init-model`.
 </Infobox>
 
 ```cli
-$ python -m spacy init vocab [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [--prune-vectors]
+$ python -m spacy init vocab [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [--prune-vectors] [--vectors-name] [--meta-name] [--base]
 ```
 
 | Name                                                    | Description                                                                                                                                                                                                                                                                         |
@@ -194,7 +194,9 @@ $ python -m spacy init vocab [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [
 | `--vectors-loc`, `-v`                                   | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. ~~Optional[Path] \(option)~~ |
 | `--truncate-vectors`, `-t` <Tag variant="new">2.3</Tag> | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. ~~int (option)~~                                                                                                                                                                  |
 | `--prune-vectors`, `-V`                                 | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. ~~int (option)~~                                                                                                                                                                                     |
-| `--vectors-name`, `-vn`                                 | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~str (option)~~                                                                                                                                                                              |
+| `--vectors-name`, `-vn`                                 | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~Optional[str] \(option)~~                                                                                                                                                                   |
+| `--meta-name`, `-mn`                                    | Optional name of the package for the pipeline meta. ~~Optional[str] \(option)~~                                                                                                                                                                                                     |
+| `--base`, `-b`                                          | Optional name of or path to base pipeline to start with (mostly relevant for pipelines with custom tokenizers). ~~Optional[str] \(option)~~                                                                                                                                         |
 | `--help`, `-h`                                          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                                                                          |
 | **CREATES**                                             | A spaCy pipeline directory containing the vocab and vectors.                                                                                                                                                                                                                        |
 
@@ -207,7 +209,7 @@ management functions. The converter can be specified on the command line, or
 chosen based on the file extension of the input file.
 
 ```cli
-$ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type] [--n-sents] [--seg-sents] [--model] [--morphology] [--merge-subtokens] [--ner-map] [--lang]
+$ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type] [--n-sents] [--seg-sents] [--base] [--morphology] [--merge-subtokens] [--ner-map] [--lang]
 ```
 
 | Name                                             | Description                                                                                                                               |
@@ -218,7 +220,7 @@ $ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type]
 | `--file-type`, `-t` <Tag variant="new">2.1</Tag> | Type of file to create. Either `spacy` (default) for binary [`DocBin`](/api/docbin) data or `json` for v2.x JSON format. ~~str (option)~~ |
 | `--n-sents`, `-n`                                | Number of sentences per document. ~~int (option)~~                                                                                        |
 | `--seg-sents`, `-s` <Tag variant="new">2.2</Tag> | Segment sentences (for `--converter ner`). ~~bool (flag)~~                                                                                |
-| `--model`, `-b` <Tag variant="new">2.2</Tag>     | Model for parser-based sentence segmentation (for `--seg-sents`). ~~Optional[str](option)~~                                               |
+| `--base`, `-b`                                   | Trained spaCy pipeline for sentence segmentation to use as base (for `--seg-sents`). ~~Optional[str](option)~~                            |
 | `--morphology`, `-m`                             | Enable appending morphology to tags. ~~bool (flag)~~                                                                                      |
 | `--ner-map`, `-nm`                               | NER tag mapping (as JSON-encoded dict of entity types). ~~Optional[Path](option)~~                                                        |
 | `--lang`, `-l` <Tag variant="new">2.1</Tag>      | Language code (if tokenizer required). ~~Optional[str] \(option)~~                                                                        |

From 804f1203612452efa7c7673318f4880b33424836 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 17:29:47 +0200
Subject: [PATCH 45/71] Don't use registered function version in title

---
 website/docs/api/top-level.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 6e52585ee..64282c275 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -364,7 +364,7 @@ results to a [Weights & Biases](https://www.wandb.com/) dashboard. Instead of
 using one of the built-in loggers listed here, you can also
 [implement your own](/usage/training#custom-logging).
 
-#### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"}
+#### spacy.ConsoleLogger {#ConsoleLogger tag="registered function"}
 
 > #### Example config
 >
@@ -410,7 +410,7 @@ start decreasing across epochs.
 
  </Accordion>
 
-#### spacy.WandbLogger.v1 {#WandbLogger tag="registered function"}
+#### spacy.WandbLogger {#WandbLogger tag="registered function"}
 
 > #### Installation
 >
@@ -466,7 +466,7 @@ Instead of using one of the built-in batchers listed here, you can also
 [implement your own](/usage/training#custom-code-readers-batchers), which may or
 may not use a custom schedule.
 
-#### batch_by_words.v1 {#batch_by_words tag="registered function"}
+#### batch_by_words {#batch_by_words tag="registered function"}
 
 Create minibatches of roughly a given number of words. If any examples are
 longer than the specified batch length, they will appear in a batch by
@@ -493,7 +493,7 @@ themselves, or be discarded if `discard_oversize` is set to `True`. The argument
 | `discard_oversize` | Whether to discard sequences that by themselves exceed the tolerated size. ~~bool~~                                                                                                     |
 | `get_length`       | Optional function that receives a sequence item and returns its length. Defaults to the built-in `len()` if not set. ~~Optional[Callable[[Any], int]]~~                                 |
 
-#### batch_by_sequence.v1 {#batch_by_sequence tag="registered function"}
+#### batch_by_sequence {#batch_by_sequence tag="registered function"}
 
 > #### Example config
 >
@@ -511,7 +511,7 @@ Create a batcher that creates batches of the specified size.
 | `size`       | The target number of items per batch. Can also be a block referencing a schedule, e.g. [`compounding`](https://thinc.ai/docs/api-schedules/#compounding). ~~Union[int, Sequence[int]]~~ |
 | `get_length` | Optional function that receives a sequence item and returns its length. Defaults to the built-in `len()` if not set. ~~Optional[Callable[[Any], int]]~~                                 |
 
-#### batch_by_padded.v1 {#batch_by_padded tag="registered function"}
+#### batch_by_padded {#batch_by_padded tag="registered function"}
 
 > #### Example config
 >

From c063e55eb7ab673c52600890cc5ae997d5362c83 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 17:30:41 +0200
Subject: [PATCH 46/71] Add prefix to batchers

---
 extra/experiments/onto-joint/defaults.cfg        | 2 +-
 extra/experiments/ptb-joint-pos-dep/defaults.cfg | 2 +-
 spacy/cli/templates/quickstart_training.jinja    | 4 ++--
 spacy/default_config.cfg                         | 2 +-
 spacy/gold/batchers.py                           | 6 +++---
 spacy/tests/serialize/test_serialize_config.py   | 2 +-
 website/docs/api/cli.md                          | 4 ++--
 website/docs/api/top-level.md                    | 6 +++---
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/extra/experiments/onto-joint/defaults.cfg b/extra/experiments/onto-joint/defaults.cfg
index 3ab3ddaba..7954b57b5 100644
--- a/extra/experiments/onto-joint/defaults.cfg
+++ b/extra/experiments/onto-joint/defaults.cfg
@@ -36,7 +36,7 @@ max_length = 0
 limit = 0
 
 [training.batcher]
-@batchers = "batch_by_words.v1"
+@batchers = "spacy.batch_by_words.v1"
 discard_oversize = false
 tolerance = 0.2
 
diff --git a/extra/experiments/ptb-joint-pos-dep/defaults.cfg b/extra/experiments/ptb-joint-pos-dep/defaults.cfg
index fc471ac43..8f9c5666e 100644
--- a/extra/experiments/ptb-joint-pos-dep/defaults.cfg
+++ b/extra/experiments/ptb-joint-pos-dep/defaults.cfg
@@ -35,7 +35,7 @@ max_length = 0
 limit = 0
 
 [training.batcher]
-@batchers = "batch_by_words.v1"
+@batchers = "spacy.batch_by_words.v1"
 discard_oversize = false
 tolerance = 0.2
 
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index fa9bb6d76..4b06abc0f 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -204,13 +204,13 @@ max_length = 0
 
 {% if use_transformer %}
 [training.batcher]
-@batchers = "batch_by_padded.v1"
+@batchers = "spacy.batch_by_padded.v1"
 discard_oversize = true
 size = 2000
 buffer = 256
 {%- else %}
 [training.batcher]
-@batchers = "batch_by_words.v1"
+@batchers = "spacy.batch_by_words.v1"
 discard_oversize = false
 tolerance = 0.2
 
diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index d76ef630d..9507f0f0a 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -69,7 +69,7 @@ max_length = 2000
 limit = 0
 
 [training.batcher]
-@batchers = "batch_by_words.v1"
+@batchers = "spacy.batch_by_words.v1"
 discard_oversize = false
 tolerance = 0.2
 
diff --git a/spacy/gold/batchers.py b/spacy/gold/batchers.py
index ec1f35815..c54242eae 100644
--- a/spacy/gold/batchers.py
+++ b/spacy/gold/batchers.py
@@ -11,7 +11,7 @@ ItemT = TypeVar("ItemT")
 BatcherT = Callable[[Iterable[ItemT]], Iterable[List[ItemT]]]
 
 
-@registry.batchers("batch_by_padded.v1")
+@registry.batchers("spacy.batch_by_padded.v1")
 def configure_minibatch_by_padded_size(
     *,
     size: Sizing,
@@ -46,7 +46,7 @@ def configure_minibatch_by_padded_size(
     )
 
 
-@registry.batchers("batch_by_words.v1")
+@registry.batchers("spacy.batch_by_words.v1")
 def configure_minibatch_by_words(
     *,
     size: Sizing,
@@ -70,7 +70,7 @@ def configure_minibatch_by_words(
     )
 
 
-@registry.batchers("batch_by_sequence.v1")
+@registry.batchers("spacy.batch_by_sequence.v1")
 def configure_minibatch(
     size: Sizing, get_length: Optional[Callable[[ItemT], int]] = None
 ) -> BatcherT:
diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py
index fde92b0af..0ab212fda 100644
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@@ -28,7 +28,7 @@ path = ${paths.train}
 path = ${paths.dev}
 
 [training.batcher]
-@batchers = "batch_by_words.v1"
+@batchers = "spacy.batch_by_words.v1"
 size = 666
 
 [nlp]
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index aee285763..7852d0482 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -271,7 +271,7 @@ training -> dropout     field required
 training -> optimizer   field required
 training -> optimize    extra fields not permitted
 
-{'vectors': 'en_vectors_web_lg', 'seed': 0, 'accumulate_gradient': 1, 'init_tok2vec': None, 'raw_text': None, 'patience': 1600, 'max_epochs': 0, 'max_steps': 20000, 'eval_frequency': 200, 'frozen_components': [], 'optimize': None, 'batcher': {'@batchers': 'batch_by_words.v1', 'discard_oversize': False, 'tolerance': 0.2, 'get_length': None, 'size': {'@schedules': 'compounding.v1', 'start': 100, 'stop': 1000, 'compound': 1.001, 't': 0.0}}, 'dev_corpus': {'@readers': 'spacy.Corpus.v1', 'path': '', 'max_length': 0, 'gold_preproc': False, 'limit': 0}, 'score_weights': {'tag_acc': 0.5, 'dep_uas': 0.25, 'dep_las': 0.25, 'sents_f': 0.0}, 'train_corpus': {'@readers': 'spacy.Corpus.v1', 'path': '', 'max_length': 0, 'gold_preproc': False, 'limit': 0}}
+{'vectors': 'en_vectors_web_lg', 'seed': 0, 'accumulate_gradient': 1, 'init_tok2vec': None, 'raw_text': None, 'patience': 1600, 'max_epochs': 0, 'max_steps': 20000, 'eval_frequency': 200, 'frozen_components': [], 'optimize': None, 'batcher': {'@batchers': 'spacy.batch_by_words.v1', 'discard_oversize': False, 'tolerance': 0.2, 'get_length': None, 'size': {'@schedules': 'compounding.v1', 'start': 100, 'stop': 1000, 'compound': 1.001, 't': 0.0}}, 'dev_corpus': {'@readers': 'spacy.Corpus.v1', 'path': '', 'max_length': 0, 'gold_preproc': False, 'limit': 0}, 'score_weights': {'tag_acc': 0.5, 'dep_uas': 0.25, 'dep_las': 0.25, 'sents_f': 0.0}, 'train_corpus': {'@readers': 'spacy.Corpus.v1', 'path': '', 'max_length': 0, 'gold_preproc': False, 'limit': 0}}
 
 If your config contains missing values, you can run the 'init fill-config'
 command to fill in all the defaults, if possible:
@@ -361,7 +361,7 @@ Module     spacy.gold.loggers
 File       /path/to/spacy/gold/loggers.py (line 8)
 ℹ [training.batcher]
 Registry   @batchers
-Name       batch_by_words.v1
+Name       spacy.batch_by_words.v1
 Module     spacy.gold.batchers
 File       /path/to/spacy/gold/batchers.py (line 49)
 ℹ [training.batcher.size]
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 64282c275..0fe48e736 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -478,7 +478,7 @@ themselves, or be discarded if `discard_oversize` is set to `True`. The argument
 >
 > ```ini
 > [training.batcher]
-> @batchers = "batch_by_words.v1"
+> @batchers = "spacy.batch_by_words.v1"
 > size = 100
 > tolerance = 0.2
 > discard_oversize = false
@@ -499,7 +499,7 @@ themselves, or be discarded if `discard_oversize` is set to `True`. The argument
 >
 > ```ini
 > [training.batcher]
-> @batchers = "batch_by_sequence.v1"
+> @batchers = "spacy.batch_by_sequence.v1"
 > size = 32
 > get_length = null
 > ```
@@ -517,7 +517,7 @@ Create a batcher that creates batches of the specified size.
 >
 > ```ini
 > [training.batcher]
-> @batchers = "batch_by_padded.v1"
+> @batchers = "spacy.batch_by_padded.v1"
 > size = 100
 > buffer = 256
 > discard_oversize = false

From 5afe6447cd835bb6ce4e21adb37340e2e2c34019 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 17:31:14 +0200
Subject: [PATCH 47/71] registry.assets -> registry.misc

---
 spacy/ml/models/entity_linker.py            |  6 ++--
 spacy/pipeline/entity_linker.py             |  4 +--
 spacy/tests/lang/test_lemmatizers.py        |  6 ++--
 spacy/tests/pipeline/test_attributeruler.py |  4 +--
 spacy/tests/pipeline/test_entity_linker.py  | 20 ++++++------
 spacy/tests/pipeline/test_lemmatizer.py     | 16 ++++-----
 spacy/tests/regression/test_issue5230.py    |  4 +--
 spacy/tests/serialize/test_serialize_kb.py  |  4 +--
 spacy/util.py                               |  2 +-
 website/docs/api/architectures.md           |  4 +--
 website/docs/api/entitylinker.md            |  6 ++--
 website/docs/api/top-level.md               |  2 +-
 website/docs/usage/processing-pipelines.md  | 36 +++++++++++++--------
 13 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 6792f3e59..d945e5fba 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -24,7 +24,7 @@ def build_nel_encoder(tok2vec: Model, nO: Optional[int] = None) -> Model:
     return model
 
 
-@registry.assets.register("spacy.KBFromFile.v1")
+@registry.misc.register("spacy.KBFromFile.v1")
 def load_kb(kb_path: str) -> Callable[[Vocab], KnowledgeBase]:
     def kb_from_file(vocab):
         kb = KnowledgeBase(vocab, entity_vector_length=1)
@@ -34,7 +34,7 @@ def load_kb(kb_path: str) -> Callable[[Vocab], KnowledgeBase]:
     return kb_from_file
 
 
-@registry.assets.register("spacy.EmptyKB.v1")
+@registry.misc.register("spacy.EmptyKB.v1")
 def empty_kb(entity_vector_length: int) -> Callable[[Vocab], KnowledgeBase]:
     def empty_kb_factory(vocab):
         return KnowledgeBase(vocab=vocab, entity_vector_length=entity_vector_length)
@@ -42,6 +42,6 @@ def empty_kb(entity_vector_length: int) -> Callable[[Vocab], KnowledgeBase]:
     return empty_kb_factory
 
 
-@registry.assets.register("spacy.CandidateGenerator.v1")
+@registry.misc.register("spacy.CandidateGenerator.v1")
 def create_candidates() -> Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]:
     return get_candidates
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index c45cdce75..78cf274ab 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -39,12 +39,12 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
     requires=["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
     assigns=["token.ent_kb_id"],
     default_config={
-        "kb_loader": {"@assets": "spacy.EmptyKB.v1", "entity_vector_length": 64},
+        "kb_loader": {"@misc": "spacy.EmptyKB.v1", "entity_vector_length": 64},
         "model": DEFAULT_NEL_MODEL,
         "labels_discard": [],
         "incl_prior": True,
         "incl_context": True,
-        "get_candidates": {"@assets": "spacy.CandidateGenerator.v1"},
+        "get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
     },
 )
 def make_entity_linker(
diff --git a/spacy/tests/lang/test_lemmatizers.py b/spacy/tests/lang/test_lemmatizers.py
index 8c235c86e..14c59659a 100644
--- a/spacy/tests/lang/test_lemmatizers.py
+++ b/spacy/tests/lang/test_lemmatizers.py
@@ -14,7 +14,7 @@ LANGUAGES = ["el", "en", "fr", "nl"]
 
 @pytest.mark.parametrize("lang", LANGUAGES)
 def test_lemmatizer_initialize(lang, capfd):
-    @registry.assets("lemmatizer_init_lookups")
+    @registry.misc("lemmatizer_init_lookups")
     def lemmatizer_init_lookups():
         lookups = Lookups()
         lookups.add_table("lemma_lookup", {"cope": "cope"})
@@ -25,9 +25,7 @@ def test_lemmatizer_initialize(lang, capfd):
 
     """Test that languages can be initialized."""
     nlp = get_lang_class(lang)()
-    nlp.add_pipe(
-        "lemmatizer", config={"lookups": {"@assets": "lemmatizer_init_lookups"}}
-    )
+    nlp.add_pipe("lemmatizer", config={"lookups": {"@misc": "lemmatizer_init_lookups"}})
     # Check for stray print statements (see #3342)
     doc = nlp("test")  # noqa: F841
     captured = capfd.readouterr()
diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py
index 96361a693..c12a2b650 100644
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@@ -31,7 +31,7 @@ def pattern_dicts():
     ]
 
 
-@registry.assets("attribute_ruler_patterns")
+@registry.misc("attribute_ruler_patterns")
 def attribute_ruler_patterns():
     return [
         {
@@ -86,7 +86,7 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
     # initialize with patterns from asset
     nlp.add_pipe(
         "attribute_ruler",
-        config={"pattern_dicts": {"@assets": "attribute_ruler_patterns"}},
+        config={"pattern_dicts": {"@misc": "attribute_ruler_patterns"}},
     )
     doc = nlp("This is a test.")
     assert doc[2].lemma_ == "the"
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 4385d2bf9..4eaa71272 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -137,7 +137,7 @@ def test_kb_undefined(nlp):
 
 def test_kb_empty(nlp):
     """Test that the EL can't train with an empty KB"""
-    config = {"kb_loader": {"@assets": "spacy.EmptyKB.v1", "entity_vector_length": 342}}
+    config = {"kb_loader": {"@misc": "spacy.EmptyKB.v1", "entity_vector_length": 342}}
     entity_linker = nlp.add_pipe("entity_linker", config=config)
     assert len(entity_linker.kb) == 0
     with pytest.raises(ValueError):
@@ -183,7 +183,7 @@ def test_el_pipe_configuration(nlp):
     ruler = nlp.add_pipe("entity_ruler")
     ruler.add_patterns([pattern])
 
-    @registry.assets.register("myAdamKB.v1")
+    @registry.misc.register("myAdamKB.v1")
     def mykb() -> Callable[["Vocab"], KnowledgeBase]:
         def create_kb(vocab):
             kb = KnowledgeBase(vocab, entity_vector_length=1)
@@ -199,7 +199,7 @@ def test_el_pipe_configuration(nlp):
     # run an EL pipe without a trained context encoder, to check the candidate generation step only
     nlp.add_pipe(
         "entity_linker",
-        config={"kb_loader": {"@assets": "myAdamKB.v1"}, "incl_context": False},
+        config={"kb_loader": {"@misc": "myAdamKB.v1"}, "incl_context": False},
     )
     # With the default get_candidates function, matching is case-sensitive
     text = "Douglas and douglas are not the same."
@@ -211,7 +211,7 @@ def test_el_pipe_configuration(nlp):
     def get_lowercased_candidates(kb, span):
         return kb.get_alias_candidates(span.text.lower())
 
-    @registry.assets.register("spacy.LowercaseCandidateGenerator.v1")
+    @registry.misc.register("spacy.LowercaseCandidateGenerator.v1")
     def create_candidates() -> Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]:
         return get_lowercased_candidates
 
@@ -220,9 +220,9 @@ def test_el_pipe_configuration(nlp):
         "entity_linker",
         "entity_linker",
         config={
-            "kb_loader": {"@assets": "myAdamKB.v1"},
+            "kb_loader": {"@misc": "myAdamKB.v1"},
             "incl_context": False,
-            "get_candidates": {"@assets": "spacy.LowercaseCandidateGenerator.v1"},
+            "get_candidates": {"@misc": "spacy.LowercaseCandidateGenerator.v1"},
         },
     )
     doc = nlp(text)
@@ -282,7 +282,7 @@ def test_append_invalid_alias(nlp):
 def test_preserving_links_asdoc(nlp):
     """Test that Span.as_doc preserves the existing entity links"""
 
-    @registry.assets.register("myLocationsKB.v1")
+    @registry.misc.register("myLocationsKB.v1")
     def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
         def create_kb(vocab):
             mykb = KnowledgeBase(vocab, entity_vector_length=1)
@@ -304,7 +304,7 @@ def test_preserving_links_asdoc(nlp):
     ]
     ruler = nlp.add_pipe("entity_ruler")
     ruler.add_patterns(patterns)
-    el_config = {"kb_loader": {"@assets": "myLocationsKB.v1"}, "incl_prior": False}
+    el_config = {"kb_loader": {"@misc": "myLocationsKB.v1"}, "incl_prior": False}
     el_pipe = nlp.add_pipe("entity_linker", config=el_config, last=True)
     el_pipe.begin_training(lambda: [])
     el_pipe.incl_context = False
@@ -387,7 +387,7 @@ def test_overfitting_IO():
         doc = nlp(text)
         train_examples.append(Example.from_dict(doc, annotation))
 
-    @registry.assets.register("myOverfittingKB.v1")
+    @registry.misc.register("myOverfittingKB.v1")
     def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
         def create_kb(vocab):
             # create artificial KB - assign same prior weight to the two russ cochran's
@@ -408,7 +408,7 @@ def test_overfitting_IO():
     # Create the Entity Linker component and add it to the pipeline
     nlp.add_pipe(
         "entity_linker",
-        config={"kb_loader": {"@assets": "myOverfittingKB.v1"}},
+        config={"kb_loader": {"@misc": "myOverfittingKB.v1"}},
         last=True,
     )
 
diff --git a/spacy/tests/pipeline/test_lemmatizer.py b/spacy/tests/pipeline/test_lemmatizer.py
index 8a70fdeeb..05e15bc16 100644
--- a/spacy/tests/pipeline/test_lemmatizer.py
+++ b/spacy/tests/pipeline/test_lemmatizer.py
@@ -13,7 +13,7 @@ def nlp():
 
 @pytest.fixture
 def lemmatizer(nlp):
-    @registry.assets("cope_lookups")
+    @registry.misc("cope_lookups")
     def cope_lookups():
         lookups = Lookups()
         lookups.add_table("lemma_lookup", {"cope": "cope"})
@@ -23,13 +23,13 @@ def lemmatizer(nlp):
         return lookups
 
     lemmatizer = nlp.add_pipe(
-        "lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}}
+        "lemmatizer", config={"mode": "rule", "lookups": {"@misc": "cope_lookups"}}
     )
     return lemmatizer
 
 
 def test_lemmatizer_init(nlp):
-    @registry.assets("cope_lookups")
+    @registry.misc("cope_lookups")
     def cope_lookups():
         lookups = Lookups()
         lookups.add_table("lemma_lookup", {"cope": "cope"})
@@ -39,7 +39,7 @@ def test_lemmatizer_init(nlp):
         return lookups
 
     lemmatizer = nlp.add_pipe(
-        "lemmatizer", config={"mode": "lookup", "lookups": {"@assets": "cope_lookups"}}
+        "lemmatizer", config={"mode": "lookup", "lookups": {"@misc": "cope_lookups"}}
     )
     assert isinstance(lemmatizer.lookups, Lookups)
     assert lemmatizer.mode == "lookup"
@@ -51,14 +51,14 @@ def test_lemmatizer_init(nlp):
 
     nlp.remove_pipe("lemmatizer")
 
-    @registry.assets("empty_lookups")
+    @registry.misc("empty_lookups")
     def empty_lookups():
         return Lookups()
 
     with pytest.raises(ValueError):
         nlp.add_pipe(
             "lemmatizer",
-            config={"mode": "lookup", "lookups": {"@assets": "empty_lookups"}},
+            config={"mode": "lookup", "lookups": {"@misc": "empty_lookups"}},
         )
 
 
@@ -79,7 +79,7 @@ def test_lemmatizer_config(nlp, lemmatizer):
 
 
 def test_lemmatizer_serialize(nlp, lemmatizer):
-    @registry.assets("cope_lookups")
+    @registry.misc("cope_lookups")
     def cope_lookups():
         lookups = Lookups()
         lookups.add_table("lemma_lookup", {"cope": "cope"})
@@ -90,7 +90,7 @@ def test_lemmatizer_serialize(nlp, lemmatizer):
 
     nlp2 = English()
     lemmatizer2 = nlp2.add_pipe(
-        "lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}}
+        "lemmatizer", config={"mode": "rule", "lookups": {"@misc": "cope_lookups"}}
     )
     lemmatizer2.from_bytes(lemmatizer.to_bytes())
     assert lemmatizer.to_bytes() == lemmatizer2.to_bytes()
diff --git a/spacy/tests/regression/test_issue5230.py b/spacy/tests/regression/test_issue5230.py
index 78ae04bbb..af643aadc 100644
--- a/spacy/tests/regression/test_issue5230.py
+++ b/spacy/tests/regression/test_issue5230.py
@@ -71,7 +71,7 @@ def tagger():
 def entity_linker():
     nlp = Language()
 
-    @registry.assets.register("TestIssue5230KB.v1")
+    @registry.misc.register("TestIssue5230KB.v1")
     def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
         def create_kb(vocab):
             kb = KnowledgeBase(vocab, entity_vector_length=1)
@@ -80,7 +80,7 @@ def entity_linker():
 
         return create_kb
 
-    config = {"kb_loader": {"@assets": "TestIssue5230KB.v1"}}
+    config = {"kb_loader": {"@misc": "TestIssue5230KB.v1"}}
     entity_linker = nlp.add_pipe("entity_linker", config=config)
     # need to add model for two reasons:
     # 1. no model leads to error in serialization,
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index 3cf5485d7..63736418b 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -85,7 +85,7 @@ def test_serialize_subclassed_kb():
             super().__init__(vocab, entity_vector_length)
             self.custom_field = custom_field
 
-    @registry.assets.register("spacy.CustomKB.v1")
+    @registry.misc.register("spacy.CustomKB.v1")
     def custom_kb(
         entity_vector_length: int, custom_field: int
     ) -> Callable[["Vocab"], KnowledgeBase]:
@@ -101,7 +101,7 @@ def test_serialize_subclassed_kb():
     nlp = English()
     config = {
         "kb_loader": {
-            "@assets": "spacy.CustomKB.v1",
+            "@misc": "spacy.CustomKB.v1",
             "entity_vector_length": 342,
             "custom_field": 666,
         }
diff --git a/spacy/util.py b/spacy/util.py
index 0eb76c3d1..fa4815df8 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -76,7 +76,7 @@ class registry(thinc.registry):
     lemmatizers = catalogue.create("spacy", "lemmatizers", entry_points=True)
     lookups = catalogue.create("spacy", "lookups", entry_points=True)
     displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True)
-    assets = catalogue.create("spacy", "assets", entry_points=True)
+    misc = catalogue.create("spacy", "misc", entry_points=True)
     # Callback functions used to manipulate nlp object etc.
     callbacks = catalogue.create("spacy", "callbacks")
     batchers = catalogue.create("spacy", "batchers", entry_points=True)
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index 93e50bfb3..35816a9a2 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -673,11 +673,11 @@ into the "real world". This requires 3 main components:
 > subword_features = true
 >
 > [kb_loader]
-> @assets = "spacy.EmptyKB.v1"
+> @misc = "spacy.EmptyKB.v1"
 > entity_vector_length = 64
 >
 > [get_candidates]
-> @assets = "spacy.CandidateGenerator.v1"
+> @misc = "spacy.CandidateGenerator.v1"
 > ```
 
 The `EntityLinker` model architecture is a Thinc `Model` with a
diff --git a/website/docs/api/entitylinker.md b/website/docs/api/entitylinker.md
index 637bd3c68..8cde6c490 100644
--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@@ -34,8 +34,8 @@ architectures and their arguments and hyperparameters.
 >    "incl_prior": True,
 >    "incl_context": True,
 >    "model": DEFAULT_NEL_MODEL,
->    "kb_loader": {'@assets': 'spacy.EmptyKB.v1', 'entity_vector_length': 64},
->    "get_candidates": {'@assets': 'spacy.CandidateGenerator.v1'},
+>    "kb_loader": {'@misc': 'spacy.EmptyKB.v1', 'entity_vector_length': 64},
+>    "get_candidates": {'@misc': 'spacy.CandidateGenerator.v1'},
 > }
 > nlp.add_pipe("entity_linker", config=config)
 > ```
@@ -66,7 +66,7 @@ https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py
 > entity_linker = nlp.add_pipe("entity_linker", config=config)
 >
 > # Construction via add_pipe with custom KB and candidate generation
-> config = {"kb": {"@assets": "my_kb.v1"}}
+> config = {"kb": {"@misc": "my_kb.v1"}}
 > entity_linker = nlp.add_pipe("entity_linker", config=config)
 >
 > # Construction from class
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 0fe48e736..b9201ca39 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -307,7 +307,6 @@ factories.
 | Registry name     | Description                                                                                                                                                                                                                                        |
 | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `architectures`   | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`.                                                                           |
-| `assets`          | Registry for data assets, knowledge bases etc.                                                                                                                                                                                                     |
 | `batchers`        | Registry for training and evaluation [data batchers](#batchers).                                                                                                                                                                                   |
 | `callbacks`       | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training.                                                                                                                             |
 | `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points).                                                                             |
@@ -322,6 +321,7 @@ factories.
 | `readers`         | Registry for training and evaluation data readers like [`Corpus`](/api/corpus).                                                                                                                                                                    |
 | `schedules`       | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules).                                                                                                                                                               |
 | `tokenizers`      | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable.                                                                   |
+| `misc`            | Registry for miscellaneous functions that return data assets, knowledge bases or anything else you may need.                                                                                                                                       |
 
 ### spacy-transformers registry {#registry-transformers}
 
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index c8702a147..2885d9f50 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -842,12 +842,20 @@ load and train custom pipelines with custom components. A simple solution is to
 **register a function** that returns your resources. The
 [registry](/api/top-level#registry) lets you **map string names to functions**
 that create objects, so given a name and optional arguments, spaCy will know how
-to recreate the object. To register a function that returns a custom asset, you
-can use the `@spacy.registry.assets` decorator with a single argument, the name:
+to recreate the object. To register a function that returns your custom
+dictionary, you can use the `@spacy.registry.misc` decorator with a single
+argument, the name:
+
+> #### What's the misc registry?
+>
+> The [`registry`](/api/top-level#registry) provides different categories for
+> different types of functions – for example, model architectures, tokenizers or
+> batchers. `misc` is intended for miscellaneous functions that don't fit
+> anywhere else.
 
 ```python
 ### Registered function for assets {highlight="1"}
-@spacy.registry.assets("acronyms.slang_dict.v1")
+@spacy.registry.misc("acronyms.slang_dict.v1")
 def create_acronyms_slang_dict():
     dictionary = {"lol": "laughing out loud", "brb": "be right back"}
     dictionary.update({value: key for key, value in dictionary.items()})
@@ -856,9 +864,9 @@ def create_acronyms_slang_dict():
 
 In your `default_config` (and later in your
 [training config](/usage/training#config)), you can now refer to the function
-registered under the name `"acronyms.slang_dict.v1"` using the `@assets` key.
-This tells spaCy how to create the value, and when your component is created,
-the result of the registered function is passed in as the key `"dictionary"`.
+registered under the name `"acronyms.slang_dict.v1"` using the `@misc` key. This
+tells spaCy how to create the value, and when your component is created, the
+result of the registered function is passed in as the key `"dictionary"`.
 
 > #### config.cfg
 >
@@ -867,22 +875,22 @@ the result of the registered function is passed in as the key `"dictionary"`.
 > factory = "acronyms"
 >
 > [components.acronyms.dictionary]
-> @assets = "acronyms.slang_dict.v1"
+> @misc = "acronyms.slang_dict.v1"
 > ```
 
 ```diff
 - default_config = {"dictionary:" DICTIONARY}
-+ default_config = {"dictionary": {"@assets": "acronyms.slang_dict.v1"}}
++ default_config = {"dictionary": {"@misc": "acronyms.slang_dict.v1"}}
 ```
 
 Using a registered function also means that you can easily include your custom
 components in pipelines that you [train](/usage/training). To make sure spaCy
-knows where to find your custom `@assets` function, you can pass in a Python
-file via the argument `--code`. If someone else is using your component, all
-they have to do to customize the data is to register their own function and swap
-out the name. Registered functions can also take **arguments** by the way that
-can be defined in the config as well – you can read more about this in the docs
-on [training with custom code](/usage/training#custom-code).
+knows where to find your custom `@misc` function, you can pass in a Python file
+via the argument `--code`. If someone else is using your component, all they
+have to do to customize the data is to register their own function and swap out
+the name. Registered functions can also take **arguments** by the way that can
+be defined in the config as well – you can read more about this in the docs on
+[training with custom code](/usage/training#custom-code).
 
 ### Python type hints and pydantic validation {#type-hints new="3"}
 

From 23b7d9cfa3c061fa986c0077b07669759b6d37ba Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 17:37:06 +0200
Subject: [PATCH 48/71] Prefix span getters

---
 spacy/cli/templates/quickstart_training.jinja | 2 +-
 website/docs/api/architectures.md             | 2 +-
 website/docs/api/transformer.md               | 8 ++++----
 website/docs/usage/embeddings-transformers.md | 7 ++++---
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 4b06abc0f..43c852d13 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -29,7 +29,7 @@ name = "{{ transformer["name"] }}"
 tokenizer_config = {"use_fast": true}
 
 [components.transformer.model.get_spans]
-@span_getters = "strided_spans.v1"
+@span_getters = "spacy-transformers.strided_spans.v1"
 window = 128
 stride = 96
 
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index 35816a9a2..ee844d961 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -320,7 +320,7 @@ for details and system requirements.
 > tokenizer_config = {"use_fast": true}
 >
 > [model.get_spans]
-> @span_getters = "strided_spans.v1"
+> @span_getters = "spacy-transformers.strided_spans.v1"
 > window = 128
 > stride = 96
 > ```
diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md
index 5ac95cb29..b41a18890 100644
--- a/website/docs/api/transformer.md
+++ b/website/docs/api/transformer.md
@@ -453,7 +453,7 @@ using the `@spacy.registry.span_getters` decorator.
 > #### Example
 >
 > ```python
-> @spacy.registry.span_getters("sent_spans.v1")
+> @spacy.registry.span_getters("custom_sent_spans")
 > def configure_get_sent_spans() -> Callable:
 >     def get_sent_spans(docs: Iterable[Doc]) -> List[List[Span]]:
 >         return [list(doc.sents) for doc in docs]
@@ -472,7 +472,7 @@ using the `@spacy.registry.span_getters` decorator.
 >
 > ```ini
 > [transformer.model.get_spans]
-> @span_getters = "doc_spans.v1"
+> @span_getters = "spacy-transformers.doc_spans.v1"
 > ```
 
 Create a span getter that uses the whole document as its spans. This is the best
@@ -485,7 +485,7 @@ texts.
 >
 > ```ini
 > [transformer.model.get_spans]
-> @span_getters = "sent_spans.v1"
+> @span_getters = "spacy-transformers.sent_spans.v1"
 > ```
 
 Create a span getter that uses sentence boundary markers to extract the spans.
@@ -500,7 +500,7 @@ more meaningful windows to attend over.
 >
 > ```ini
 > [transformer.model.get_spans]
-> @span_getters = "strided_spans.v1"
+> @span_getters = "spacy-transformers.strided_spans.v1"
 > window = 128
 > stride = 96
 > ```
diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md
index 7792ce124..abd92a8ac 100644
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@@ -331,7 +331,7 @@ name = "bert-base-cased"
 tokenizer_config = {"use_fast": true}
 
 [components.transformer.model.get_spans]
-@span_getters = "doc_spans.v1"
+@span_getters = "spacy-transformers.doc_spans.v1"
 
 [components.transformer.annotation_setter]
 @annotation_setters = "spacy-transformers.null_annotation_setter.v1"
@@ -369,8 +369,9 @@ all defaults.
 
 To change any of the settings, you can edit the `config.cfg` and re-run the
 training. To change any of the functions, like the span getter, you can replace
-the name of the referenced function – e.g. `@span_getters = "sent_spans.v1"` to
-process sentences. You can also register your own functions using the
+the name of the referenced function – e.g.
+`@span_getters = "spacy-transformers.sent_spans.v1"` to process sentences. You
+can also register your own functions using the
 [`span_getters` registry](/api/top-level#registry). For instance, the following
 custom function returns [`Span`](/api/span) objects following sentence
 boundaries, unless a sentence succeeds a certain amount of tokens, in which case

From b1eb98b15c2bbca812def86226a58cd5d5eaec79 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 17:43:58 +0200
Subject: [PATCH 49/71] Remove todos [ci skip]

---
 website/docs/usage/projects.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/website/docs/usage/projects.md b/website/docs/usage/projects.md
index 97e3abb6e..b6688cd5d 100644
--- a/website/docs/usage/projects.md
+++ b/website/docs/usage/projects.md
@@ -27,10 +27,6 @@ and share your results with your team. spaCy projects can be used via the new
 [`spacy project`](/api/cli#project) command and we provide templates in our
 [`projects`](https://github.com/explosion/projects) repo.
 
-<!-- TODO: mention integrations -->
-
-<!-- TODO: decide how to introduce concept -->
-
 ![Illustration of project workflow and commands](../images/projects.svg)
 
 <!-- TODO:

From 7be8a0516a667ec098adffa306cc93cec2b20083 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 3 Sep 2020 18:54:03 +0200
Subject: [PATCH 50/71] Fix project pull

---
 spacy/cli/project/pull.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/project/pull.py b/spacy/cli/project/pull.py
index 6c0f32171..7b597141f 100644
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@@ -38,5 +38,5 @@ def project_pull(project_dir: Path, remote: str, *, verbose: bool = False):
             url = storage.pull(output_path, command_hash=cmd_hash)
             yield url, output_path
 
-        if cmd.get("outptus") and all(loc.exists() for loc in cmd["outputs"]):
+        if cmd.get("outputs") and all(loc.exists() for loc in cmd["outputs"]):
             update_lockfile(project_dir, cmd)

From 4daf1381365cd52958d6d53e9dcc7022eb9f7509 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 23:01:50 +0200
Subject: [PATCH 51/71] Fix alphabetic ordering [ci skip]

---
 website/docs/api/top-level.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index b9201ca39..7f2eb2e66 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -317,11 +317,11 @@ factories.
 | `loggers`         | Registry for functions that log [training results](/usage/training).                                                                                                                                                                               |
 | `lookups`         | Registry for large lookup tables available via `vocab.lookups`.                                                                                                                                                                                    |
 | `losses`          | Registry for functions that create [losses](https://thinc.ai/docs/api-loss).                                                                                                                                                                       |
+| `misc`            | Registry for miscellaneous functions that return data assets, knowledge bases or anything else you may need.                                                                                                                                       |
 | `optimizers`      | Registry for functions that create [optimizers](https://thinc.ai/docs/api-optimizers).                                                                                                                                                             |
 | `readers`         | Registry for training and evaluation data readers like [`Corpus`](/api/corpus).                                                                                                                                                                    |
 | `schedules`       | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules).                                                                                                                                                               |
 | `tokenizers`      | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable.                                                                   |
-| `misc`            | Registry for miscellaneous functions that return data assets, knowledge bases or anything else you may need.                                                                                                                                       |
 
 ### spacy-transformers registry {#registry-transformers}
 

From 595f9dc2e4e29d73ef3fe8f991bbd0324d2faa31 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 3 Sep 2020 23:05:41 +0200
Subject: [PATCH 52/71] Make displacy color registry consistent with others

This was the only registry that expected the registered objects to be dictionaries instead of functions that return something. We can still support plain dicts but we should also support functions for consistency
---
 spacy/displacy/render.py | 6 ++++++
 spacy/errors.py          | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 07550f9aa..984971812 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -249,6 +249,12 @@ class EntityRenderer:
         colors = dict(DEFAULT_LABEL_COLORS)
         user_colors = registry.displacy_colors.get_all()
         for user_color in user_colors.values():
+            if callable(user_color):
+                # Since this comes from the function registry, we want to make
+                # sure we support functions that *return* a dict of colors
+                user_color = user_color()
+            if not isinstance(user_color, dict):
+                raise ValueError(Errors.E925.format(obj=type(user_color)))
             colors.update(user_color)
         colors.update(options.get("colors", {}))
         self.default_color = DEFAULT_ENTITY_COLOR
diff --git a/spacy/errors.py b/spacy/errors.py
index be71de820..165714d9e 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -476,6 +476,8 @@ class Errors:
     E199 = ("Unable to merge 0-length span at doc[{start}:{end}].")
 
     # TODO: fix numbering after merging develop into master
+    E925 = ("Invalid color values for displaCy visualizer: expected dictionary "
+            "mapping label names to colors but got: {obj}")
     E926 = ("It looks like you're trying to modify nlp.{attr} directly. This "
             "doesn't work because it's an immutable computed property. If you "
             "need to modify the pipeline, use the built-in methods like "

From 2160aafec689b72ccbb4c69cf75e132bfa86862a Mon Sep 17 00:00:00 2001
From: Brad Jascob <bjascob@msn.com>
Date: Fri, 4 Sep 2020 02:03:35 -0600
Subject: [PATCH 53/71] Updates spaCy Universe for amrlib (#6020)

* Updates spaCy Universe for amrlib

* Updates to doc based on feedback
---
 website/meta/universe.json | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index cf361435f..bd2cff65a 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1967,6 +1967,28 @@
             "category": ["pipeline"],
             "tags": ["inflection", "lemmatizer"]
         },
+        {
+            "id": "amrlib",
+            "slogan": "A python library that makes AMR parsing, generation and visualization simple.",
+            "description": "amrlib is a python module and spaCy add-in for Abstract Meaning Representation (AMR).  The system can parse sentences to AMR graphs or generate text from existing graphs.  It includes a GUI for visualization and experimentation.",
+            "github": "bjascob/amrlib",
+            "pip": "amrlib",
+            "code_example": [
+                "import spacy",
+                "import amrlib",
+                "amrlib.setup_spacy_extension()",
+                "nlp = spacy.load('en_core_web_sm')",
+                "doc = nlp('This is a test of the spaCy extension. The test has multiple sentences.')",
+                "graphs = doc._.to_amr()",
+                "for graph in graphs:",
+                "    print(graph)"
+            ],
+            "author": "Brad Jascob",
+            "author_links": {
+                "github": "bjascob"
+            },
+            "category": ["pipeline"]
+        },
         {
             "id": "blackstone",
             "title": "Blackstone",

From 0a27fca55709e166775a51a14fd2645eab3a5ee6 Mon Sep 17 00:00:00 2001
From: holubvl3 <47881982+holubvl3@users.noreply.github.com>
Date: Fri, 4 Sep 2020 11:00:14 +0200
Subject: [PATCH 54/71] Create examples.py (#5985)

* Create examples.py

* Create tag_map.py

* Delete tag_map.py

* Update examples.py

formatting: add empty line

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/lang/cs/examples.py | 42 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 spacy/lang/cs/examples.py

diff --git a/spacy/lang/cs/examples.py b/spacy/lang/cs/examples.py
new file mode 100644
index 000000000..fe8a9f6d1
--- /dev/null
+++ b/spacy/lang/cs/examples.py
@@ -0,0 +1,42 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+>>> from spacy.lang.cs.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+   "Máma mele maso.",
+    "Příliš žluťoučký kůň úpěl ďábelské ódy.",
+    "ArcGIS je geografický informační systém určený pro práci s prostorovými daty." ,
+    "Může data vytvářet a spravovat, ale především je dokáže analyzovat, najít v nich nové vztahy a vše přehledně vizualizovat.",
+    "Dnes je krásné počasí.",
+    "Nestihl autobus, protože pozdě vstal z postele.",
+    "Než budeš jíst, jdi si umýt ruce.",
+    "Dnes je neděle.",
+    "Škola začíná v 8:00.",
+    "Poslední autobus jede v jedenáct hodin večer.",
+    "V roce 2020 se téměř zastavila světová ekonomika.",
+    "Praha je hlavní město České republiky.",
+    "Kdy půjdeš ven?",
+    "Kam pojedete na dovolenou?",
+    "Kolik stojí iPhone 12?",
+    "Průměrná mzda je 30000 Kč.",
+    "1. ledna 1993 byla založena Česká republika.",
+    "Co se stalo 21.8.1968?",
+    "Moje telefonní číslo je 712 345 678.",
+    "Můj pes má blechy.",
+    "Když bude přes noc více než 20°, tak nás čeká tropická noc.",
+    "Kolik bylo letos tropických nocí?",
+    "Jak to mám udělat?",
+    "Bydlíme ve čtvrtém patře.",
+    "Vysílají 30. sezonu seriálu Simpsonovi.",
+    "Adresa ČVUT je Thákurova 7, 166 29, Praha 6.",
+    "Jaké PSČ má Praha 1?",
+    "PSČ Prahy 1 je 110 00.",
+    "Za 20 minut jede vlak.",
+    ]

From ab1bb421edcedfcbad884fa410f891883882d88a Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 12:58:50 +0200
Subject: [PATCH 55/71] Update docs links in codebase

---
 spacy/cli/_util.py                      |  2 +-
 spacy/cli/convert.py                    |  4 +-
 spacy/cli/debug_config.py               |  2 +
 spacy/cli/debug_data.py                 |  2 +
 spacy/cli/debug_model.py                |  2 +
 spacy/cli/download.py                   |  4 +-
 spacy/cli/evaluate.py                   | 15 +++---
 spacy/cli/info.py                       |  2 +
 spacy/cli/init_config.py                |  4 ++
 spacy/cli/init_model.py                 |  4 +-
 spacy/cli/package.py                    |  2 +
 spacy/cli/pretrain.py                   |  2 +
 spacy/cli/profile.py                    |  2 +
 spacy/cli/project/assets.py             |  2 +
 spacy/cli/project/clone.py              |  2 +
 spacy/cli/project/document.py           |  2 +
 spacy/cli/project/dvc.py                |  5 +-
 spacy/cli/project/pull.py               |  4 +-
 spacy/cli/project/push.py               |  9 ++--
 spacy/cli/project/run.py                |  2 +
 spacy/cli/train.py                      |  2 +
 spacy/cli/validate.py                   |  2 +
 spacy/displacy/__init__.py              | 12 ++---
 spacy/errors.py                         | 12 ++---
 spacy/gold/converters/conll_ner2docs.py |  2 +-
 spacy/gold/converters/iob2docs.py       |  2 +-
 spacy/gold/corpus.py                    |  4 +-
 spacy/kb.pyx                            |  4 +-
 spacy/language.py                       | 54 ++++++++++-----------
 spacy/lexeme.pyx                        |  2 +-
 spacy/lookups.py                        | 28 +++++------
 spacy/matcher/matcher.pyx               |  4 +-
 spacy/matcher/phrasematcher.pyx         | 16 +++----
 spacy/pipeline/attributeruler.py        | 24 +++++-----
 spacy/pipeline/dep_parser.pyx           |  4 +-
 spacy/pipeline/entity_linker.py         | 20 ++++----
 spacy/pipeline/entityruler.py           | 24 +++++-----
 spacy/pipeline/functions.py             |  6 +--
 spacy/pipeline/lemmatizer.py            | 28 +++++------
 spacy/pipeline/morphologizer.pyx        | 20 ++++----
 spacy/pipeline/ner.pyx                  |  4 +-
 spacy/pipeline/pipe.pyx                 | 36 +++++++-------
 spacy/pipeline/sentencizer.pyx          | 18 +++----
 spacy/pipeline/senter.pyx               | 20 ++++----
 spacy/pipeline/simple_ner.py            |  2 +-
 spacy/pipeline/tagger.pyx               | 34 +++++++-------
 spacy/pipeline/textcat.py               | 24 +++++-----
 spacy/pipeline/tok2vec.py               | 14 +++---
 spacy/scorer.py                         | 14 +++---
 spacy/strings.pyx                       |  2 +-
 spacy/tokenizer.pyx                     | 26 +++++------
 spacy/tokens/_retokenize.pyx            |  8 ++--
 spacy/tokens/_serialize.py              | 16 +++----
 spacy/tokens/doc.pyx                    | 62 ++++++++++++-------------
 spacy/tokens/span.pyx                   | 50 ++++++++++----------
 spacy/tokens/token.pyx                  | 50 ++++++++++----------
 spacy/vectors.pyx                       | 38 +++++++--------
 spacy/vocab.pyx                         | 26 +++++------
 58 files changed, 416 insertions(+), 371 deletions(-)

diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 6a24a4ba4..0ecb5ad8f 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -25,7 +25,7 @@ COMMAND = "python -m spacy"
 NAME = "spacy"
 HELP = """spaCy Command-line Interface
 
-DOCS: https://spacy.io/api/cli
+DOCS: https://nightly.spacy.io/api/cli
 """
 PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
 You'd typically start by cloning a project template to a local directory and
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index 84040a712..ade5a3ad4 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -61,6 +61,8 @@ def convert_cli(
     If no output_dir is specified and the output format is JSON, the data
     is written to stdout, so you can pipe them forward to a JSON file:
     $ spacy convert some_file.conllu --file-type json > some_file.json
+
+    DOCS: https://nightly.spacy.io/api/cli#convert
     """
     if isinstance(file_type, FileTypes):
         # We get an instance of the FileTypes from the CLI so we need its string value
@@ -261,6 +263,6 @@ def _get_converter(msg, converter, input_path):
             msg.warn(
                 "Can't automatically detect NER format. "
                 "Conversion may not succeed. "
-                "See https://spacy.io/api/cli#convert"
+                "See https://nightly.spacy.io/api/cli#convert"
             )
     return converter
diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py
index 2944cd364..7930d0674 100644
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@@ -31,6 +31,8 @@ def debug_config_cli(
     Similar as with the 'train' command, you can override settings from the config
     as command line options. For instance, --training.batch_size 128 overrides
     the value of "batch_size" in the block "[training]".
+
+    DOCS: https://nightly.spacy.io/api/cli#debug-config
     """
     overrides = parse_config_overrides(ctx.args)
     import_code(code_path)
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index a4269796f..75a81e6f5 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -47,6 +47,8 @@ def debug_data_cli(
     Analyze, debug and validate your training and development data. Outputs
     useful stats, and can help you find problems like invalid entity annotations,
     cyclic dependencies, low data labels and more.
+
+    DOCS: https://nightly.spacy.io/api/cli#debug-data
     """
     if ctx.command.name == "debug-data":
         msg.warn(
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index ed8d54655..5bd4e008f 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -30,6 +30,8 @@ def debug_model_cli(
     """
     Analyze a Thinc model implementation. Includes checks for internal structure
     and activations during training.
+
+    DOCS: https://nightly.spacy.io/api/cli#debug-model
     """
     if use_gpu >= 0:
         msg.info("Using GPU")
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 3d5e0a765..036aeab17 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -28,7 +28,7 @@ def download_cli(
     additional arguments provided to this command will be passed to `pip install`
     on package installation.
 
-    DOCS: https://spacy.io/api/cli#download
+    DOCS: https://nightly.spacy.io/api/cli#download
     AVAILABLE PACKAGES: https://spacy.io/models
     """
     download(model, direct, *ctx.args)
@@ -77,7 +77,7 @@ def get_compatibility() -> dict:
             f"Couldn't fetch compatibility table. Please find a package for your spaCy "
             f"installation (v{about.__version__}), and download it manually. "
             f"For more details, see the documentation: "
-            f"https://spacy.io/usage/models",
+            f"https://nightly.spacy.io/usage/models",
             exits=1,
         )
     comp_table = r.json()
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 3898c89a1..c5cbab09a 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -27,12 +27,15 @@ def evaluate_cli(
 ):
     """
     Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation
-    data in the binary .spacy format. The --gold-preproc option sets up the evaluation
-    examples with gold-standard sentences and tokens for the predictions. Gold
-    preprocessing helps the annotations align to the tokenization, and may
-    result in sequences of more consistent length. However, it may reduce
-    runtime accuracy due to train/test skew. To render a sample of dependency
-    parses in a HTML file, set as output directory as the displacy_path argument.
+    data in the binary .spacy format. The --gold-preproc option sets up the
+    evaluation examples with gold-standard sentences and tokens for the
+    predictions. Gold preprocessing helps the annotations align to the
+    tokenization, and may result in sequences of more consistent length. However,
+    it may reduce runtime accuracy due to train/test skew. To render a sample of
+    dependency parses in a HTML file, set as output directory as the
+    displacy_path argument.
+
+    DOCS: https://nightly.spacy.io/api/cli#evaluate
     """
     evaluate(
         model,
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 98cd042a8..2b87163c2 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -21,6 +21,8 @@ def info_cli(
     Print info about spaCy installation. If a pipeline is speficied as an argument,
     print its meta information. Flag --markdown prints details in Markdown for easy
     copy-pasting to GitHub issues.
+
+    DOCS: https://nightly.spacy.io/api/cli#info
     """
     info(model, markdown=markdown, silent=silent)
 
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index b75718a2e..584ca7f64 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -37,6 +37,8 @@ def init_config_cli(
     specified via the CLI arguments, this command generates a config with the
     optimal settings for you use case. This includes the choice of architecture,
     pretrained weights and related hyperparameters.
+
+    DOCS: https://nightly.spacy.io/api/cli#init-config
     """
     if isinstance(optimize, Optimizations):  # instance of enum from the CLI
         optimize = optimize.value
@@ -59,6 +61,8 @@ def init_fill_config_cli(
     functions for their default values and update the base config. This command
     can be used with a config generated via the training quickstart widget:
     https://nightly.spacy.io/usage/training#quickstart
+
+    DOCS: https://nightly.spacy.io/api/cli#init-fill-config
     """
     fill_config(output_file, base_path, pretraining=pretraining, diff=diff)
 
diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index 4fd3025fd..5f06fd895 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -28,7 +28,7 @@ except ImportError:
 DEFAULT_OOV_PROB = -20
 
 
-@init_cli.command("vectors")
+@init_cli.command("vocab")
 @app.command(
     "init-model",
     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
@@ -54,6 +54,8 @@ def init_model_cli(
     Create a new blank pipeline directory with vocab and vectors from raw data.
     If vectors are provided in Word2Vec format, they can be either a .txt or
     zipped as a .zip or .tar.gz.
+
+    DOCS: https://nightly.spacy.io/api/cli#init-vocab
     """
     if ctx.command.name == "init-model":
         msg.warn(
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index f464c97e8..c457b3e17 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -31,6 +31,8 @@ def package_cli(
     the existing values will be used as the defaults in the command-line prompt.
     After packaging, "python setup.py sdist" is run in the package directory,
     which will create a .tar.gz archive that can be installed via "pip install".
+
+    DOCS: https://nightly.spacy.io/api/cli#package
     """
     package(
         input_dir,
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index fe6bfa92e..828e5f08e 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -57,6 +57,8 @@ def pretrain_cli(
     To load the weights back in during 'spacy train', you need to ensure
     all settings are the same between pretraining and training. Ideally,
     this is done by using the same config file for both commands.
+
+    DOCS: https://nightly.spacy.io/api/cli#pretrain
     """
     overrides = parse_config_overrides(ctx.args)
     import_code(code_path)
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index 1b995f4bc..43226730d 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -29,6 +29,8 @@ def profile_cli(
     Input should be formatted as one JSON object per line with a key "text".
     It can either be provided as a JSONL file, or be read from sys.sytdin.
     If no input file is specified, the IMDB dataset is loaded via Thinc.
+
+    DOCS: https://nightly.spacy.io/api/cli#debug-profile
     """
     if ctx.parent.command.name == NAME:  # called as top-level command
         msg.warn(
diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py
index e33a82acc..2b623675d 100644
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@@ -20,6 +20,8 @@ def project_assets_cli(
     defined in the "assets" section of the project.yml. If a checksum is
     provided in the project.yml, the file is only downloaded if no local file
     with the same checksum exists.
+
+    DOCS: https://nightly.spacy.io/api/cli#project-assets
     """
     project_assets(project_dir)
 
diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py
index 751c389bc..a419feb0f 100644
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@@ -22,6 +22,8 @@ def project_clone_cli(
     only download the files from the given subdirectory. The GitHub repo
     defaults to the official spaCy template repo, but can be customized
     (including using a private repo).
+
+    DOCS: https://nightly.spacy.io/api/cli#project-clone
     """
     if dest is None:
         dest = Path.cwd() / name
diff --git a/spacy/cli/project/document.py b/spacy/cli/project/document.py
index ab345ecd8..d0265029a 100644
--- a/spacy/cli/project/document.py
+++ b/spacy/cli/project/document.py
@@ -43,6 +43,8 @@ def project_document_cli(
     hidden markers are added so you can add custom content before or after the
     auto-generated section and only the auto-generated docs will be replaced
     when you re-run the command.
+
+    DOCS: https://nightly.spacy.io/api/cli#project-document
     """
     project_document(project_dir, output_file, no_emoji=no_emoji)
 
diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py
index de0480bad..541253234 100644
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@@ -31,7 +31,10 @@ def project_update_dvc_cli(
     """Auto-generate Data Version Control (DVC) config. A DVC
     project can only define one pipeline, so you need to specify one workflow
     defined in the project.yml. If no workflow is specified, the first defined
-    workflow is used. The DVC config will only be updated if the project.yml changed.
+    workflow is used. The DVC config will only be updated if the project.yml
+    changed.
+
+    DOCS: https://nightly.spacy.io/api/cli#project-dvc
     """
     project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
 
diff --git a/spacy/cli/project/pull.py b/spacy/cli/project/pull.py
index 7b597141f..655e2f459 100644
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@@ -17,7 +17,9 @@ def project_pull_cli(
     """Retrieve available precomputed outputs from a remote storage.
     You can alias remotes in your project.yml by mapping them to storage paths.
     A storage can be anything that the smart-open library can upload to, e.g.
-    gcs, aws, ssh, local directories etc
+    AWS, Google Cloud Storage, SSH, local directories etc.
+
+    DOCS: https://nightly.spacy.io/api/cli#project-pull
     """
     for url, output_path in project_pull(project_dir, remote):
         if url is not None:
diff --git a/spacy/cli/project/push.py b/spacy/cli/project/push.py
index e09ee6e1a..fcee2231a 100644
--- a/spacy/cli/project/push.py
+++ b/spacy/cli/project/push.py
@@ -13,9 +13,12 @@ def project_push_cli(
     project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
     # fmt: on
 ):
-    """Persist outputs to a remote storage. You can alias remotes in your project.yml
-    by mapping them to storage paths. A storage can be anything that the smart-open
-    library can upload to, e.g. gcs, aws, ssh, local directories etc
+    """Persist outputs to a remote storage. You can alias remotes in your
+    project.yml by mapping them to storage paths. A storage can be anything that
+    the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
+    local directories etc.
+
+    DOCS: https://nightly.spacy.io/api/cli#project-push
     """
     for output_path, url in project_push(project_dir, remote):
         if url is None:
diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py
index bacd7f04b..eb7b8cc5b 100644
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@@ -24,6 +24,8 @@ def project_run_cli(
     name is specified, all commands in the workflow are run, in order. If
     commands define dependencies and/or outputs, they will only be re-run if
     state has changed.
+
+    DOCS: https://nightly.spacy.io/api/cli#project-run
     """
     if show_help or not subcommand:
         print_run_help(project_dir, subcommand)
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 5377f7f8f..6be47fa39 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -44,6 +44,8 @@ def train_cli(
     lets you pass in a Python file that's imported before training. It can be
     used to register custom functions and architectures that can then be
     referenced in the config.
+
+    DOCS: https://nightly.spacy.io/api/cli#train
     """
     util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
     verify_cli_args(config_path, output_path)
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index a1e05fdcd..9a75ed6f3 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -16,6 +16,8 @@ def validate_cli():
     Validate the currently installed pipeline packages and spaCy version. Checks
     if the installed packages are compatible and shows upgrade instructions if
     available. Should be run after `pip install -U spacy`.
+
+    DOCS: https://nightly.spacy.io/api/cli#validate
     """
     validate()
 
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index 2df2bd61c..0e80c3b5f 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -1,8 +1,8 @@
 """
 spaCy's built in visualization suite for dependencies and named entities.
 
-DOCS: https://spacy.io/api/top-level#displacy
-USAGE: https://spacy.io/usage/visualizers
+DOCS: https://nightly.spacy.io/api/top-level#displacy
+USAGE: https://nightly.spacy.io/usage/visualizers
 """
 from typing import Union, Iterable, Optional, Dict, Any, Callable
 import warnings
@@ -37,8 +37,8 @@ def render(
     manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
     RETURNS (str): Rendered HTML markup.
 
-    DOCS: https://spacy.io/api/top-level#displacy.render
-    USAGE: https://spacy.io/usage/visualizers
+    DOCS: https://nightly.spacy.io/api/top-level#displacy.render
+    USAGE: https://nightly.spacy.io/usage/visualizers
     """
     factories = {
         "dep": (DependencyRenderer, parse_deps),
@@ -88,8 +88,8 @@ def serve(
     port (int): Port to serve visualisation.
     host (str): Host to serve visualisation.
 
-    DOCS: https://spacy.io/api/top-level#displacy.serve
-    USAGE: https://spacy.io/usage/visualizers
+    DOCS: https://nightly.spacy.io/api/top-level#displacy.serve
+    USAGE: https://nightly.spacy.io/usage/visualizers
     """
     from wsgiref import simple_server
 
diff --git a/spacy/errors.py b/spacy/errors.py
index 165714d9e..f3058d2b4 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -22,7 +22,7 @@ class Warnings:
             "generate a dependency visualization for it. Make sure the Doc "
             "was processed with a model that supports dependency parsing, and "
             "not just a language class like `English()`. For more info, see "
-            "the docs:\nhttps://spacy.io/usage/models")
+            "the docs:\nhttps://nightly.spacy.io/usage/models")
     W006 = ("No entities to visualize found in Doc object. If this is "
             "surprising to you, make sure the Doc was processed using a model "
             "that supports named entity recognition, and check the `doc.ents` "
@@ -147,7 +147,7 @@ class Errors:
     E010 = ("Word vectors set to length 0. This may be because you don't have "
             "a model installed or loaded, or because your model doesn't "
             "include word vectors. For more info, see the docs:\n"
-            "https://spacy.io/usage/models")
+            "https://nightly.spacy.io/usage/models")
     E011 = ("Unknown operator: '{op}'. Options: {opts}")
     E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
     E014 = ("Unknown tag ID: {tag}")
@@ -181,7 +181,7 @@ class Errors:
             "list of (unicode, bool) tuples. Got bytes instance: {value}")
     E029 = ("noun_chunks requires the dependency parse, which requires a "
             "statistical model to be installed and loaded. For more info, see "
-            "the documentation:\nhttps://spacy.io/usage/models")
+            "the documentation:\nhttps://nightly.spacy.io/usage/models")
     E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
             "component to the pipeline with: "
             "nlp.add_pipe('sentencizer'). "
@@ -294,7 +294,7 @@ class Errors:
     E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
             "tokens to merge. If you want to find the longest non-overlapping "
             "spans, you can use the util.filter_spans helper:\n"
-            "https://spacy.io/api/top-level#util.filter_spans")
+            "https://nightly.spacy.io/api/top-level#util.filter_spans")
     E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
             "token can only be part of one entity, so make sure the entities "
             "you're setting don't overlap.")
@@ -364,10 +364,10 @@ class Errors:
     E137 = ("Expected 'dict' type, but got '{type}' from '{line}'. Make sure "
             "to provide a valid JSON object as input with either the `text` "
             "or `tokens` key. For more info, see the docs:\n"
-            "https://spacy.io/api/cli#pretrain-jsonl")
+            "https://nightly.spacy.io/api/cli#pretrain-jsonl")
     E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input "
             "includes either the `text` or `tokens` key. For more info, see "
-            "the docs:\nhttps://spacy.io/api/cli#pretrain-jsonl")
+            "the docs:\nhttps://nightly.spacy.io/api/cli#pretrain-jsonl")
     E139 = ("Knowledge Base for component '{name}' is empty. Use the methods "
             "kb.add_entity and kb.add_alias to add entries.")
     E140 = ("The list of entities, prior probabilities and entity vectors "
diff --git a/spacy/gold/converters/conll_ner2docs.py b/spacy/gold/converters/conll_ner2docs.py
index 0b348142a..c04a77f07 100644
--- a/spacy/gold/converters/conll_ner2docs.py
+++ b/spacy/gold/converters/conll_ner2docs.py
@@ -106,7 +106,7 @@ def conll_ner2docs(
                 raise ValueError(
                     "The token-per-line NER file is not formatted correctly. "
                     "Try checking whitespace and delimiters. See "
-                    "https://spacy.io/api/cli#convert"
+                    "https://nightly.spacy.io/api/cli#convert"
                 )
             length = len(cols[0])
             words.extend(cols[0])
diff --git a/spacy/gold/converters/iob2docs.py b/spacy/gold/converters/iob2docs.py
index c7e243397..eebf1266b 100644
--- a/spacy/gold/converters/iob2docs.py
+++ b/spacy/gold/converters/iob2docs.py
@@ -44,7 +44,7 @@ def read_iob(raw_sents, vocab, n_sents):
                 sent_tags = ["-"] * len(sent_words)
             else:
                 raise ValueError(
-                    "The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://spacy.io/api/cli#convert"
+                    "The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert"
                 )
             words.extend(sent_words)
             tags.extend(sent_tags)
diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py
index 1046da1e6..545f01eaa 100644
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@@ -38,7 +38,7 @@ class Corpus:
     limit (int): Limit corpus to a subset of examples, e.g. for debugging.
         Defaults to 0, which indicates no limit.
 
-    DOCS: https://spacy.io/api/corpus
+    DOCS: https://nightly.spacy.io/api/corpus
     """
 
     def __init__(
@@ -83,7 +83,7 @@ class Corpus:
         nlp (Language): The current nlp object.
         YIELDS (Example): The examples.
 
-        DOCS: https://spacy.io/api/corpus#call
+        DOCS: https://nightly.spacy.io/api/corpus#call
         """
         ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.path))
         if self.gold_preproc:
diff --git a/spacy/kb.pyx b/spacy/kb.pyx
index 3b8017a0c..b24ed3a20 100644
--- a/spacy/kb.pyx
+++ b/spacy/kb.pyx
@@ -21,7 +21,7 @@ cdef class Candidate:
     algorithm which will disambiguate the various candidates to the correct one.
     Each candidate (alias, entity) pair is assigned to a certain prior probability.
 
-    DOCS: https://spacy.io/api/kb/#candidate_init
+    DOCS: https://nightly.spacy.io/api/kb/#candidate_init
     """
 
     def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
@@ -79,7 +79,7 @@ cdef class KnowledgeBase:
     """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
     to support entity linking of named entities to real-world concepts.
 
-    DOCS: https://spacy.io/api/kb
+    DOCS: https://nightly.spacy.io/api/kb
     """
 
     def __init__(self, Vocab vocab, entity_vector_length):
diff --git a/spacy/language.py b/spacy/language.py
index 1a40bec61..17ca020ca 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -95,7 +95,7 @@ class Language:
         object and processing pipeline.
     lang (str): Two-letter language ID, i.e. ISO code.
 
-    DOCS: https://spacy.io/api/language
+    DOCS: https://nightly.spacy.io/api/language
     """
 
     Defaults = BaseDefaults
@@ -130,7 +130,7 @@ class Language:
         create_tokenizer (Callable): Function that takes the nlp object and
             returns a tokenizer.
 
-        DOCS: https://spacy.io/api/language#init
+        DOCS: https://nightly.spacy.io/api/language#init
         """
         # We're only calling this to import all factories provided via entry
         # points. The factory decorator applied to these functions takes care
@@ -185,7 +185,7 @@ class Language:
 
         RETURNS (Dict[str, Any]): The meta.
 
-        DOCS: https://spacy.io/api/language#meta
+        DOCS: https://nightly.spacy.io/api/language#meta
         """
         spacy_version = util.get_model_version_range(about.__version__)
         if self.vocab.lang:
@@ -225,7 +225,7 @@ class Language:
 
         RETURNS (thinc.api.Config): The config.
 
-        DOCS: https://spacy.io/api/language#config
+        DOCS: https://nightly.spacy.io/api/language#config
         """
         self._config.setdefault("nlp", {})
         self._config.setdefault("training", {})
@@ -433,7 +433,7 @@ class Language:
             will be combined and normalized for the whole pipeline.
         func (Optional[Callable]): Factory function if not used as a decorator.
 
-        DOCS: https://spacy.io/api/language#factory
+        DOCS: https://nightly.spacy.io/api/language#factory
         """
         if not isinstance(name, str):
             raise ValueError(Errors.E963.format(decorator="factory"))
@@ -513,7 +513,7 @@ class Language:
             Used for pipeline analysis.
         func (Optional[Callable]): Factory function if not used as a decorator.
 
-        DOCS: https://spacy.io/api/language#component
+        DOCS: https://nightly.spacy.io/api/language#component
         """
         if name is not None and not isinstance(name, str):
             raise ValueError(Errors.E963.format(decorator="component"))
@@ -579,7 +579,7 @@ class Language:
         name (str): Name of pipeline component to get.
         RETURNS (callable): The pipeline component.
 
-        DOCS: https://spacy.io/api/language#get_pipe
+        DOCS: https://nightly.spacy.io/api/language#get_pipe
         """
         for pipe_name, component in self._components:
             if pipe_name == name:
@@ -608,7 +608,7 @@ class Language:
             arguments and types expected by the factory.
         RETURNS (Callable[[Doc], Doc]): The pipeline component.
 
-        DOCS: https://spacy.io/api/language#create_pipe
+        DOCS: https://nightly.spacy.io/api/language#create_pipe
         """
         name = name if name is not None else factory_name
         if not isinstance(config, dict):
@@ -722,7 +722,7 @@ class Language:
             arguments and types expected by the factory.
         RETURNS (Callable[[Doc], Doc]): The pipeline component.
 
-        DOCS: https://spacy.io/api/language#add_pipe
+        DOCS: https://nightly.spacy.io/api/language#add_pipe
         """
         if not isinstance(factory_name, str):
             bad_val = repr(factory_name)
@@ -820,7 +820,7 @@ class Language:
         name (str): Name of the component.
         RETURNS (bool): Whether a component of the name exists in the pipeline.
 
-        DOCS: https://spacy.io/api/language#has_pipe
+        DOCS: https://nightly.spacy.io/api/language#has_pipe
         """
         return name in self.pipe_names
 
@@ -841,7 +841,7 @@ class Language:
         validate (bool): Whether to validate the component config against the
             arguments and types expected by the factory.
 
-        DOCS: https://spacy.io/api/language#replace_pipe
+        DOCS: https://nightly.spacy.io/api/language#replace_pipe
         """
         if name not in self.pipe_names:
             raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
@@ -870,7 +870,7 @@ class Language:
         old_name (str): Name of the component to rename.
         new_name (str): New name of the component.
 
-        DOCS: https://spacy.io/api/language#rename_pipe
+        DOCS: https://nightly.spacy.io/api/language#rename_pipe
         """
         if old_name not in self.component_names:
             raise ValueError(
@@ -891,7 +891,7 @@ class Language:
         name (str): Name of the component to remove.
         RETURNS (tuple): A `(name, component)` tuple of the removed component.
 
-        DOCS: https://spacy.io/api/language#remove_pipe
+        DOCS: https://nightly.spacy.io/api/language#remove_pipe
         """
         if name not in self.component_names:
             raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
@@ -944,7 +944,7 @@ class Language:
             keyword arguments for specific components.
         RETURNS (Doc): A container for accessing the annotations.
 
-        DOCS: https://spacy.io/api/language#call
+        DOCS: https://nightly.spacy.io/api/language#call
         """
         if len(text) > self.max_length:
             raise ValueError(
@@ -993,7 +993,7 @@ class Language:
         disable (str or iterable): The name(s) of the pipes to disable
         enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
 
-        DOCS: https://spacy.io/api/language#select_pipes
+        DOCS: https://nightly.spacy.io/api/language#select_pipes
         """
         if enable is None and disable is None:
             raise ValueError(Errors.E991)
@@ -1044,7 +1044,7 @@ class Language:
         exclude (Iterable[str]): Names of components that shouldn't be updated.
         RETURNS (Dict[str, float]): The updated losses dictionary
 
-        DOCS: https://spacy.io/api/language#update
+        DOCS: https://nightly.spacy.io/api/language#update
         """
         if _ is not None:
             raise ValueError(Errors.E989)
@@ -1106,7 +1106,7 @@ class Language:
             >>>     raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
             >>>     nlp.rehearse(raw_batch)
 
-        DOCS: https://spacy.io/api/language#rehearse
+        DOCS: https://nightly.spacy.io/api/language#rehearse
         """
         if len(examples) == 0:
             return
@@ -1153,7 +1153,7 @@ class Language:
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/language#begin_training
+        DOCS: https://nightly.spacy.io/api/language#begin_training
         """
         # TODO: throw warning when get_gold_tuples is provided instead of get_examples
         if get_examples is None:
@@ -1200,7 +1200,7 @@ class Language:
         sgd (Optional[Optimizer]): An optimizer.
         RETURNS (Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/language#resume_training
+        DOCS: https://nightly.spacy.io/api/language#resume_training
         """
         if device >= 0:  # TODO: do we need this here?
             require_gpu(device)
@@ -1236,7 +1236,7 @@ class Language:
             for the scorer.
         RETURNS (Scorer): The scorer containing the evaluation results.
 
-        DOCS: https://spacy.io/api/language#evaluate
+        DOCS: https://nightly.spacy.io/api/language#evaluate
         """
         validate_examples(examples, "Language.evaluate")
         if component_cfg is None:
@@ -1286,7 +1286,7 @@ class Language:
             >>> with nlp.use_params(optimizer.averages):
             >>>     nlp.to_disk("/tmp/checkpoint")
 
-        DOCS: https://spacy.io/api/language#use_params
+        DOCS: https://nightly.spacy.io/api/language#use_params
         """
         if not params:
             yield
@@ -1333,7 +1333,7 @@ class Language:
         n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
         YIELDS (Doc): Documents in the order of the original text.
 
-        DOCS: https://spacy.io/api/language#pipe
+        DOCS: https://nightly.spacy.io/api/language#pipe
         """
         if n_process == -1:
             n_process = mp.cpu_count()
@@ -1469,7 +1469,7 @@ class Language:
             the types expected by the factory.
         RETURNS (Language): The initialized Language class.
 
-        DOCS: https://spacy.io/api/language#from_config
+        DOCS: https://nightly.spacy.io/api/language#from_config
         """
         if auto_fill:
             config = Config(
@@ -1582,7 +1582,7 @@ class Language:
             it doesn't exist.
         exclude (list): Names of components or serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/language#to_disk
+        DOCS: https://nightly.spacy.io/api/language#to_disk
         """
         path = util.ensure_path(path)
         serializers = {}
@@ -1611,7 +1611,7 @@ class Language:
         exclude (list): Names of components or serialization fields to exclude.
         RETURNS (Language): The modified `Language` object.
 
-        DOCS: https://spacy.io/api/language#from_disk
+        DOCS: https://nightly.spacy.io/api/language#from_disk
         """
 
         def deserialize_meta(path: Path) -> None:
@@ -1659,7 +1659,7 @@ class Language:
         exclude (list): Names of components or serialization fields to exclude.
         RETURNS (bytes): The serialized form of the `Language` object.
 
-        DOCS: https://spacy.io/api/language#to_bytes
+        DOCS: https://nightly.spacy.io/api/language#to_bytes
         """
         serializers = {}
         serializers["vocab"] = lambda: self.vocab.to_bytes()
@@ -1683,7 +1683,7 @@ class Language:
         exclude (list): Names of components or serialization fields to exclude.
         RETURNS (Language): The `Language` object.
 
-        DOCS: https://spacy.io/api/language#from_bytes
+        DOCS: https://nightly.spacy.io/api/language#from_bytes
         """
 
         def deserialize_meta(b):
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index 25461b4b7..17ce574ce 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -30,7 +30,7 @@ cdef class Lexeme:
     tag, dependency parse, or lemma (lemmatization depends on the
     part-of-speech tag).
 
-    DOCS: https://spacy.io/api/lexeme
+    DOCS: https://nightly.spacy.io/api/lexeme
     """
     def __init__(self, Vocab vocab, attr_t orth):
         """Create a Lexeme object.
diff --git a/spacy/lookups.py b/spacy/lookups.py
index d79a5b950..fb5e3d748 100644
--- a/spacy/lookups.py
+++ b/spacy/lookups.py
@@ -57,7 +57,7 @@ class Table(OrderedDict):
         data (dict): The dictionary.
         name (str): Optional table name for reference.
 
-        DOCS: https://spacy.io/api/lookups#table.from_dict
+        DOCS: https://nightly.spacy.io/api/lookups#table.from_dict
         """
         self = cls(name=name)
         self.update(data)
@@ -69,7 +69,7 @@ class Table(OrderedDict):
         name (str): Optional table name for reference.
         data (dict): Initial data, used to hint Bloom Filter.
 
-        DOCS: https://spacy.io/api/lookups#table.init
+        DOCS: https://nightly.spacy.io/api/lookups#table.init
         """
         OrderedDict.__init__(self)
         self.name = name
@@ -135,7 +135,7 @@ class Table(OrderedDict):
 
         RETURNS (bytes): The serialized table.
 
-        DOCS: https://spacy.io/api/lookups#table.to_bytes
+        DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes
         """
         data = {
             "name": self.name,
@@ -150,7 +150,7 @@ class Table(OrderedDict):
         bytes_data (bytes): The data to load.
         RETURNS (Table): The loaded table.
 
-        DOCS: https://spacy.io/api/lookups#table.from_bytes
+        DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes
         """
         loaded = srsly.msgpack_loads(bytes_data)
         data = loaded.get("dict", {})
@@ -172,7 +172,7 @@ class Lookups:
     def __init__(self) -> None:
         """Initialize the Lookups object.
 
-        DOCS: https://spacy.io/api/lookups#init
+        DOCS: https://nightly.spacy.io/api/lookups#init
         """
         self._tables = {}
 
@@ -201,7 +201,7 @@ class Lookups:
         data (dict): Optional data to add to the table.
         RETURNS (Table): The newly added table.
 
-        DOCS: https://spacy.io/api/lookups#add_table
+        DOCS: https://nightly.spacy.io/api/lookups#add_table
         """
         if name in self.tables:
             raise ValueError(Errors.E158.format(name=name))
@@ -215,7 +215,7 @@ class Lookups:
         name (str): Name of the table to set.
         table (Table): The Table to set.
 
-        DOCS: https://spacy.io/api/lookups#set_table
+        DOCS: https://nightly.spacy.io/api/lookups#set_table
         """
         self._tables[name] = table
 
@@ -227,7 +227,7 @@ class Lookups:
         default (Any): Optional default value to return if table doesn't exist.
         RETURNS (Table): The table.
 
-        DOCS: https://spacy.io/api/lookups#get_table
+        DOCS: https://nightly.spacy.io/api/lookups#get_table
         """
         if name not in self._tables:
             if default == UNSET:
@@ -241,7 +241,7 @@ class Lookups:
         name (str): Name of the table to remove.
         RETURNS (Table): The removed table.
 
-        DOCS: https://spacy.io/api/lookups#remove_table
+        DOCS: https://nightly.spacy.io/api/lookups#remove_table
         """
         if name not in self._tables:
             raise KeyError(Errors.E159.format(name=name, tables=self.tables))
@@ -253,7 +253,7 @@ class Lookups:
         name (str): Name of the table.
         RETURNS (bool): Whether a table of that name exists.
 
-        DOCS: https://spacy.io/api/lookups#has_table
+        DOCS: https://nightly.spacy.io/api/lookups#has_table
         """
         return name in self._tables
 
@@ -262,7 +262,7 @@ class Lookups:
 
         RETURNS (bytes): The serialized Lookups.
 
-        DOCS: https://spacy.io/api/lookups#to_bytes
+        DOCS: https://nightly.spacy.io/api/lookups#to_bytes
         """
         return srsly.msgpack_dumps(self._tables)
 
@@ -272,7 +272,7 @@ class Lookups:
         bytes_data (bytes): The data to load.
         RETURNS (Lookups): The loaded Lookups.
 
-        DOCS: https://spacy.io/api/lookups#from_bytes
+        DOCS: https://nightly.spacy.io/api/lookups#from_bytes
         """
         self._tables = {}
         for key, value in srsly.msgpack_loads(bytes_data).items():
@@ -287,7 +287,7 @@ class Lookups:
 
         path (str / Path): The file path.
 
-        DOCS: https://spacy.io/api/lookups#to_disk
+        DOCS: https://nightly.spacy.io/api/lookups#to_disk
         """
         if len(self._tables):
             path = ensure_path(path)
@@ -306,7 +306,7 @@ class Lookups:
         path (str / Path): The directory path.
         RETURNS (Lookups): The loaded lookups.
 
-        DOCS: https://spacy.io/api/lookups#from_disk
+        DOCS: https://nightly.spacy.io/api/lookups#from_disk
         """
         path = ensure_path(path)
         filepath = path / filename
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index d3a8fa539..a170c7a6b 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -31,8 +31,8 @@ DEF PADDING = 5
 cdef class Matcher:
     """Match sequences of tokens, based on pattern rules.
 
-    DOCS: https://spacy.io/api/matcher
-    USAGE: https://spacy.io/usage/rule-based-matching
+    DOCS: https://nightly.spacy.io/api/matcher
+    USAGE: https://nightly.spacy.io/usage/rule-based-matching
     """
 
     def __init__(self, vocab, validate=True):
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index ba0f515b5..fae513367 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -19,8 +19,8 @@ cdef class PhraseMatcher:
     sequences based on lists of token descriptions, the `PhraseMatcher` accepts
     match patterns in the form of `Doc` objects.
 
-    DOCS: https://spacy.io/api/phrasematcher
-    USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher
+    DOCS: https://nightly.spacy.io/api/phrasematcher
+    USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher
 
     Adapted from FlashText: https://github.com/vi3k6i5/flashtext
     MIT License (see `LICENSE`)
@@ -34,7 +34,7 @@ cdef class PhraseMatcher:
         attr (int / str): Token attribute to match on.
         validate (bool): Perform additional validation when patterns are added.
 
-        DOCS: https://spacy.io/api/phrasematcher#init
+        DOCS: https://nightly.spacy.io/api/phrasematcher#init
         """
         self.vocab = vocab
         self._callbacks = {}
@@ -61,7 +61,7 @@ cdef class PhraseMatcher:
 
         RETURNS (int): The number of rules.
 
-        DOCS: https://spacy.io/api/phrasematcher#len
+        DOCS: https://nightly.spacy.io/api/phrasematcher#len
         """
         return len(self._callbacks)
 
@@ -71,7 +71,7 @@ cdef class PhraseMatcher:
         key (str): The match ID.
         RETURNS (bool): Whether the matcher contains rules for this match ID.
 
-        DOCS: https://spacy.io/api/phrasematcher#contains
+        DOCS: https://nightly.spacy.io/api/phrasematcher#contains
         """
         return key in self._callbacks
 
@@ -85,7 +85,7 @@ cdef class PhraseMatcher:
 
         key (str): The match ID.
 
-        DOCS: https://spacy.io/api/phrasematcher#remove
+        DOCS: https://nightly.spacy.io/api/phrasematcher#remove
         """
         if key not in self._docs:
             raise KeyError(key)
@@ -164,7 +164,7 @@ cdef class PhraseMatcher:
             as variable arguments. Will be ignored if a list of patterns is
             provided as the second argument.
 
-        DOCS: https://spacy.io/api/phrasematcher#add
+        DOCS: https://nightly.spacy.io/api/phrasematcher#add
         """
         if docs is None or hasattr(docs, "__call__"):  # old API
             on_match = docs
@@ -228,7 +228,7 @@ cdef class PhraseMatcher:
             `doc[start:end]`. The `match_id` is an integer. If as_spans is set
             to True, a list of Span objects is returned.
 
-        DOCS: https://spacy.io/api/phrasematcher#call
+        DOCS: https://nightly.spacy.io/api/phrasematcher#call
         """
         matches = []
         if doc is None or len(doc) == 0:
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index 85a425e29..406112681 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -38,7 +38,7 @@ class AttributeRuler(Pipe):
     """Set token-level attributes for tokens matched by Matcher patterns.
     Additionally supports importing patterns from tag maps and morph rules.
 
-    DOCS: https://spacy.io/api/attributeruler
+    DOCS: https://nightly.spacy.io/api/attributeruler
     """
 
     def __init__(
@@ -59,7 +59,7 @@ class AttributeRuler(Pipe):
 
         RETURNS (AttributeRuler): The AttributeRuler component.
 
-        DOCS: https://spacy.io/api/attributeruler#init
+        DOCS: https://nightly.spacy.io/api/attributeruler#init
         """
         self.name = name
         self.vocab = vocab
@@ -77,7 +77,7 @@ class AttributeRuler(Pipe):
         doc (Doc): The document to process.
         RETURNS (Doc): The processed Doc.
 
-        DOCS: https://spacy.io/api/attributeruler#call
+        DOCS: https://nightly.spacy.io/api/attributeruler#call
         """
         matches = sorted(self.matcher(doc))
 
@@ -121,7 +121,7 @@ class AttributeRuler(Pipe):
         tag_map (dict): The tag map that maps fine-grained tags to
             coarse-grained tags and morphological features.
 
-        DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
+        DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
         """
         for tag, attrs in tag_map.items():
             pattern = [{"TAG": tag}]
@@ -139,7 +139,7 @@ class AttributeRuler(Pipe):
             fine-grained tags to coarse-grained tags, lemmas and morphological
             features.
 
-        DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
+        DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
         """
         for tag in morph_rules:
             for word in morph_rules[tag]:
@@ -163,7 +163,7 @@ class AttributeRuler(Pipe):
         index (int): The index of the token in the matched span to modify. May
             be negative to index from the end of the span. Defaults to 0.
 
-        DOCS: https://spacy.io/api/attributeruler#add
+        DOCS: https://nightly.spacy.io/api/attributeruler#add
         """
         self.matcher.add(len(self.attrs), patterns)
         self._attrs_unnormed.append(attrs)
@@ -178,7 +178,7 @@ class AttributeRuler(Pipe):
             as the arguments to AttributeRuler.add (patterns/attrs/index) to
             add as patterns.
 
-        DOCS: https://spacy.io/api/attributeruler#add_patterns
+        DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
         """
         for p in pattern_dicts:
             self.add(**p)
@@ -203,7 +203,7 @@ class AttributeRuler(Pipe):
             Scorer.score_token_attr for the attributes "tag", "pos", "morph"
             and "lemma" for the target token attributes.
 
-        DOCS: https://spacy.io/api/tagger#score
+        DOCS: https://nightly.spacy.io/api/tagger#score
         """
         validate_examples(examples, "AttributeRuler.score")
         results = {}
@@ -227,7 +227,7 @@ class AttributeRuler(Pipe):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized object.
 
-        DOCS: https://spacy.io/api/attributeruler#to_bytes
+        DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes
         """
         serialize = {}
         serialize["vocab"] = self.vocab.to_bytes
@@ -243,7 +243,7 @@ class AttributeRuler(Pipe):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         returns (AttributeRuler): The loaded object.
 
-        DOCS: https://spacy.io/api/attributeruler#from_bytes
+        DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes
         """
 
         def load_patterns(b):
@@ -264,7 +264,7 @@ class AttributeRuler(Pipe):
 
         path (Union[Path, str]): A path to a directory.
         exclude (Iterable[str]): String names of serialization fields to exclude.
-        DOCS: https://spacy.io/api/attributeruler#to_disk
+        DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
         """
         serialize = {
             "vocab": lambda p: self.vocab.to_disk(p),
@@ -279,7 +279,7 @@ class AttributeRuler(Pipe):
 
         path (Union[Path, str]): A path to a directory.
         exclude (Iterable[str]): String names of serialization fields to exclude.
-        DOCS: https://spacy.io/api/attributeruler#from_disk
+        DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
         """
 
         def load_patterns(p):
diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index 76f58df58..e001920a6 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -105,7 +105,7 @@ def make_parser(
 cdef class DependencyParser(Parser):
     """Pipeline component for dependency parsing.
 
-    DOCS: https://spacy.io/api/dependencyparser
+    DOCS: https://nightly.spacy.io/api/dependencyparser
     """
     TransitionSystem = ArcEager
 
@@ -146,7 +146,7 @@ cdef class DependencyParser(Parser):
         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans
             and Scorer.score_deps.
 
-        DOCS: https://spacy.io/api/dependencyparser#score
+        DOCS: https://nightly.spacy.io/api/dependencyparser#score
         """
         validate_examples(examples, "DependencyParser.score")
         def dep_getter(token, attr):
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 78cf274ab..d4f1e6b56 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -83,7 +83,7 @@ def make_entity_linker(
 class EntityLinker(Pipe):
     """Pipeline component for named entity linking.
 
-    DOCS: https://spacy.io/api/entitylinker
+    DOCS: https://nightly.spacy.io/api/entitylinker
     """
 
     NIL = "NIL"  # string used to refer to a non-existing link
@@ -111,7 +111,7 @@ class EntityLinker(Pipe):
         incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
         incl_context (bool): Whether or not to include the local context in the model.
 
-        DOCS: https://spacy.io/api/entitylinker#init
+        DOCS: https://nightly.spacy.io/api/entitylinker#init
         """
         self.vocab = vocab
         self.model = model
@@ -151,7 +151,7 @@ class EntityLinker(Pipe):
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/entitylinker#begin_training
+        DOCS: https://nightly.spacy.io/api/entitylinker#begin_training
         """
         self.require_kb()
         nO = self.kb.entity_vector_length
@@ -182,7 +182,7 @@ class EntityLinker(Pipe):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/entitylinker#update
+        DOCS: https://nightly.spacy.io/api/entitylinker#update
         """
         self.require_kb()
         if losses is None:
@@ -264,7 +264,7 @@ class EntityLinker(Pipe):
         doc (Doc): The document to process.
         RETURNS (Doc): The processed Doc.
 
-        DOCS: https://spacy.io/api/entitylinker#call
+        DOCS: https://nightly.spacy.io/api/entitylinker#call
         """
         kb_ids = self.predict([doc])
         self.set_annotations([doc], kb_ids)
@@ -279,7 +279,7 @@ class EntityLinker(Pipe):
         batch_size (int): The number of documents to buffer.
         YIELDS (Doc): Processed documents in order.
 
-        DOCS: https://spacy.io/api/entitylinker#pipe
+        DOCS: https://nightly.spacy.io/api/entitylinker#pipe
         """
         for docs in util.minibatch(stream, size=batch_size):
             kb_ids = self.predict(docs)
@@ -294,7 +294,7 @@ class EntityLinker(Pipe):
         docs (Iterable[Doc]): The documents to predict.
         RETURNS (List[int]): The models prediction for each document.
 
-        DOCS: https://spacy.io/api/entitylinker#predict
+        DOCS: https://nightly.spacy.io/api/entitylinker#predict
         """
         self.require_kb()
         entity_count = 0
@@ -391,7 +391,7 @@ class EntityLinker(Pipe):
         docs (Iterable[Doc]): The documents to modify.
         kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.
 
-        DOCS: https://spacy.io/api/entitylinker#set_annotations
+        DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations
         """
         count_ents = len([ent for doc in docs for ent in doc.ents])
         if count_ents != len(kb_ids):
@@ -412,7 +412,7 @@ class EntityLinker(Pipe):
         path (str / Path): Path to a directory.
         exclude (Iterable[str]): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/entitylinker#to_disk
+        DOCS: https://nightly.spacy.io/api/entitylinker#to_disk
         """
         serialize = {}
         serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
@@ -430,7 +430,7 @@ class EntityLinker(Pipe):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (EntityLinker): The modified EntityLinker object.
 
-        DOCS: https://spacy.io/api/entitylinker#from_disk
+        DOCS: https://nightly.spacy.io/api/entitylinker#from_disk
         """
 
         def load_model(p):
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 5137dfec2..9a87c8589 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -53,8 +53,8 @@ class EntityRuler:
     purely rule-based entity recognition system. After initialization, the
     component is typically added to the pipeline using `nlp.add_pipe`.
 
-    DOCS: https://spacy.io/api/entityruler
-    USAGE: https://spacy.io/usage/rule-based-matching#entityruler
+    DOCS: https://nightly.spacy.io/api/entityruler
+    USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler
     """
 
     def __init__(
@@ -88,7 +88,7 @@ class EntityRuler:
             added by the model, overwrite them by matches if necessary.
         ent_id_sep (str): Separator used internally for entity IDs.
 
-        DOCS: https://spacy.io/api/entityruler#init
+        DOCS: https://nightly.spacy.io/api/entityruler#init
         """
         self.nlp = nlp
         self.name = name
@@ -127,7 +127,7 @@ class EntityRuler:
         doc (Doc): The Doc object in the pipeline.
         RETURNS (Doc): The Doc with added entities, if available.
 
-        DOCS: https://spacy.io/api/entityruler#call
+        DOCS: https://nightly.spacy.io/api/entityruler#call
         """
         matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
         matches = set(
@@ -165,7 +165,7 @@ class EntityRuler:
 
         RETURNS (set): The string labels.
 
-        DOCS: https://spacy.io/api/entityruler#labels
+        DOCS: https://nightly.spacy.io/api/entityruler#labels
         """
         keys = set(self.token_patterns.keys())
         keys.update(self.phrase_patterns.keys())
@@ -185,7 +185,7 @@ class EntityRuler:
 
         RETURNS (set): The string entity ids.
 
-        DOCS: https://spacy.io/api/entityruler#ent_ids
+        DOCS: https://nightly.spacy.io/api/entityruler#ent_ids
         """
         keys = set(self.token_patterns.keys())
         keys.update(self.phrase_patterns.keys())
@@ -203,7 +203,7 @@ class EntityRuler:
 
         RETURNS (list): The original patterns, one dictionary per pattern.
 
-        DOCS: https://spacy.io/api/entityruler#patterns
+        DOCS: https://nightly.spacy.io/api/entityruler#patterns
         """
         all_patterns = []
         for label, patterns in self.token_patterns.items():
@@ -230,7 +230,7 @@ class EntityRuler:
 
         patterns (list): The patterns to add.
 
-        DOCS: https://spacy.io/api/entityruler#add_patterns
+        DOCS: https://nightly.spacy.io/api/entityruler#add_patterns
         """
 
         # disable the nlp components after this one in case they hadn't been initialized / deserialised yet
@@ -324,7 +324,7 @@ class EntityRuler:
         patterns_bytes (bytes): The bytestring to load.
         RETURNS (EntityRuler): The loaded entity ruler.
 
-        DOCS: https://spacy.io/api/entityruler#from_bytes
+        DOCS: https://nightly.spacy.io/api/entityruler#from_bytes
         """
         cfg = srsly.msgpack_loads(patterns_bytes)
         self.clear()
@@ -346,7 +346,7 @@ class EntityRuler:
 
         RETURNS (bytes): The serialized patterns.
 
-        DOCS: https://spacy.io/api/entityruler#to_bytes
+        DOCS: https://nightly.spacy.io/api/entityruler#to_bytes
         """
         serial = {
             "overwrite": self.overwrite,
@@ -365,7 +365,7 @@ class EntityRuler:
         path (str / Path): The JSONL file to load.
         RETURNS (EntityRuler): The loaded entity ruler.
 
-        DOCS: https://spacy.io/api/entityruler#from_disk
+        DOCS: https://nightly.spacy.io/api/entityruler#from_disk
         """
         path = ensure_path(path)
         self.clear()
@@ -401,7 +401,7 @@ class EntityRuler:
 
         path (str / Path): The JSONL file to save.
 
-        DOCS: https://spacy.io/api/entityruler#to_disk
+        DOCS: https://nightly.spacy.io/api/entityruler#to_disk
         """
         path = ensure_path(path)
         cfg = {
diff --git a/spacy/pipeline/functions.py b/spacy/pipeline/functions.py
index 501884873..7e68ea369 100644
--- a/spacy/pipeline/functions.py
+++ b/spacy/pipeline/functions.py
@@ -15,7 +15,7 @@ def merge_noun_chunks(doc: Doc) -> Doc:
     doc (Doc): The Doc object.
     RETURNS (Doc): The Doc object with merged noun chunks.
 
-    DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
+    DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks
     """
     if not doc.is_parsed:
         return doc
@@ -37,7 +37,7 @@ def merge_entities(doc: Doc):
     doc (Doc): The Doc object.
     RETURNS (Doc): The Doc object with merged entities.
 
-    DOCS: https://spacy.io/api/pipeline-functions#merge_entities
+    DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities
     """
     with doc.retokenize() as retokenizer:
         for ent in doc.ents:
@@ -54,7 +54,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc:
     label (str): The subtoken dependency label.
     RETURNS (Doc): The Doc object with merged subtokens.
 
-    DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
+    DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens
     """
     # TODO: make stateful component with "label" config
     merger = Matcher(doc.vocab)
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index 6cea65fec..3f3e387b7 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -43,7 +43,7 @@ class Lemmatizer(Pipe):
     The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
     lookup tables.
 
-    DOCS: https://spacy.io/api/lemmatizer
+    DOCS: https://nightly.spacy.io/api/lemmatizer
     """
 
     @classmethod
@@ -54,7 +54,7 @@ class Lemmatizer(Pipe):
         mode (str): The lemmatizer mode.
         RETURNS (dict): The lookups configuration settings for this mode.
 
-        DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
+        DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
         """
         if mode == "lookup":
             return {
@@ -80,7 +80,7 @@ class Lemmatizer(Pipe):
             lookups should be loaded.
         RETURNS (Lookups): The Lookups object.
 
-        DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
+        DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
         """
         config = cls.get_lookups_config(mode)
         required_tables = config.get("required_tables", [])
@@ -123,7 +123,7 @@ class Lemmatizer(Pipe):
         overwrite (bool): Whether to overwrite existing lemmas. Defaults to
             `False`.
 
-        DOCS: https://spacy.io/api/lemmatizer#init
+        DOCS: https://nightly.spacy.io/api/lemmatizer#init
         """
         self.vocab = vocab
         self.model = model
@@ -152,7 +152,7 @@ class Lemmatizer(Pipe):
         doc (Doc): The Doc to process.
         RETURNS (Doc): The processed Doc.
 
-        DOCS: https://spacy.io/api/lemmatizer#call
+        DOCS: https://nightly.spacy.io/api/lemmatizer#call
         """
         for token in doc:
             if self.overwrite or token.lemma == 0:
@@ -168,7 +168,7 @@ class Lemmatizer(Pipe):
         batch_size (int): The number of documents to buffer.
         YIELDS (Doc): Processed documents in order.
 
-        DOCS: https://spacy.io/api/lemmatizer#pipe
+        DOCS: https://nightly.spacy.io/api/lemmatizer#pipe
         """
         for doc in stream:
             doc = self(doc)
@@ -180,7 +180,7 @@ class Lemmatizer(Pipe):
         token (Token): The token to lemmatize.
         RETURNS (list): The available lemmas for the string.
 
-        DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
+        DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize
         """
         lookup_table = self.lookups.get_table("lemma_lookup", {})
         result = lookup_table.get(token.text, token.text)
@@ -194,7 +194,7 @@ class Lemmatizer(Pipe):
         token (Token): The token to lemmatize.
         RETURNS (list): The available lemmas for the string.
 
-        DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
+        DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize
         """
         cache_key = (token.orth, token.pos, token.morph)
         if cache_key in self.cache:
@@ -260,7 +260,7 @@ class Lemmatizer(Pipe):
         token (Token): The token.
         RETURNS (bool): Whether the token is a base form.
 
-        DOCS: https://spacy.io/api/lemmatizer#is_base_form
+        DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form
         """
         return False
 
@@ -270,7 +270,7 @@ class Lemmatizer(Pipe):
         examples (Iterable[Example]): The examples to score.
         RETURNS (Dict[str, Any]): The scores.
 
-        DOCS: https://spacy.io/api/lemmatizer#score
+        DOCS: https://nightly.spacy.io/api/lemmatizer#score
         """
         validate_examples(examples, "Lemmatizer.score")
         return Scorer.score_token_attr(examples, "lemma", **kwargs)
@@ -282,7 +282,7 @@ class Lemmatizer(Pipe):
             it doesn't exist.
         exclude (list): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/vocab#to_disk
+        DOCS: https://nightly.spacy.io/api/vocab#to_disk
         """
         serialize = {}
         serialize["vocab"] = lambda p: self.vocab.to_disk(p)
@@ -297,7 +297,7 @@ class Lemmatizer(Pipe):
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Vocab): The modified `Vocab` object.
 
-        DOCS: https://spacy.io/api/vocab#to_disk
+        DOCS: https://nightly.spacy.io/api/vocab#to_disk
         """
         deserialize = {}
         deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
@@ -310,7 +310,7 @@ class Lemmatizer(Pipe):
         exclude (list): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized form of the `Vocab` object.
 
-        DOCS: https://spacy.io/api/vocab#to_bytes
+        DOCS: https://nightly.spacy.io/api/vocab#to_bytes
         """
         serialize = {}
         serialize["vocab"] = self.vocab.to_bytes
@@ -324,7 +324,7 @@ class Lemmatizer(Pipe):
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Vocab): The `Vocab` object.
 
-        DOCS: https://spacy.io/api/vocab#from_bytes
+        DOCS: https://nightly.spacy.io/api/vocab#from_bytes
         """
         deserialize = {}
         deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index 329a05f90..bcb555b90 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -79,7 +79,7 @@ class Morphologizer(Tagger):
         labels_morph (dict): Mapping of morph + POS tags to morph labels.
         labels_pos (dict): Mapping of morph + POS tags to POS tags.
 
-        DOCS: https://spacy.io/api/morphologizer#init
+        DOCS: https://nightly.spacy.io/api/morphologizer#init
         """
         self.vocab = vocab
         self.model = model
@@ -106,7 +106,7 @@ class Morphologizer(Tagger):
         label (str): The label to add.
         RETURNS (int): 0 if label is already present, otherwise 1.
 
-        DOCS: https://spacy.io/api/morphologizer#add_label
+        DOCS: https://nightly.spacy.io/api/morphologizer#add_label
         """
         if not isinstance(label, str):
             raise ValueError(Errors.E187)
@@ -139,7 +139,7 @@ class Morphologizer(Tagger):
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/morphologizer#begin_training
+        DOCS: https://nightly.spacy.io/api/morphologizer#begin_training
         """
         if not hasattr(get_examples, "__call__"):
             err = Errors.E930.format(name="Morphologizer", obj=type(get_examples))
@@ -169,7 +169,7 @@ class Morphologizer(Tagger):
         docs (Iterable[Doc]): The documents to modify.
         batch_tag_ids: The IDs to set, produced by Morphologizer.predict.
 
-        DOCS: https://spacy.io/api/morphologizer#set_annotations
+        DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations
         """
         if isinstance(docs, Doc):
             docs = [docs]
@@ -194,7 +194,7 @@ class Morphologizer(Tagger):
         scores: Scores representing the model's predictions.
         RETUTNRS (Tuple[float, float]): The loss and the gradient.
 
-        DOCS: https://spacy.io/api/morphologizer#get_loss
+        DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
         """
         validate_examples(examples, "Morphologizer.get_loss")
         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@@ -231,7 +231,7 @@ class Morphologizer(Tagger):
             Scorer.score_token_attr for the attributes "pos" and "morph" and
             Scorer.score_token_attr_per_feat for the attribute "morph".
 
-        DOCS: https://spacy.io/api/morphologizer#score
+        DOCS: https://nightly.spacy.io/api/morphologizer#score
         """
         validate_examples(examples, "Morphologizer.score")
         results = {}
@@ -247,7 +247,7 @@ class Morphologizer(Tagger):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized object.
 
-        DOCS: https://spacy.io/api/morphologizer#to_bytes
+        DOCS: https://nightly.spacy.io/api/morphologizer#to_bytes
         """
         serialize = {}
         serialize["model"] = self.model.to_bytes
@@ -262,7 +262,7 @@ class Morphologizer(Tagger):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Morphologizer): The loaded Morphologizer.
 
-        DOCS: https://spacy.io/api/morphologizer#from_bytes
+        DOCS: https://nightly.spacy.io/api/morphologizer#from_bytes
         """
         def load_model(b):
             try:
@@ -284,7 +284,7 @@ class Morphologizer(Tagger):
         path (str / Path): Path to a directory.
         exclude (Iterable[str]): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/morphologizer#to_disk
+        DOCS: https://nightly.spacy.io/api/morphologizer#to_disk
         """
         serialize = {
             "vocab": lambda p: self.vocab.to_disk(p),
@@ -300,7 +300,7 @@ class Morphologizer(Tagger):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Morphologizer): The modified Morphologizer object.
 
-        DOCS: https://spacy.io/api/morphologizer#from_disk
+        DOCS: https://nightly.spacy.io/api/morphologizer#from_disk
         """
         def load_model(p):
             with p.open("rb") as file_:
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index 631b5ae72..d9f33ccb4 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -88,7 +88,7 @@ def make_ner(
 cdef class EntityRecognizer(Parser):
     """Pipeline component for named entity recognition.
 
-    DOCS: https://spacy.io/api/entityrecognizer
+    DOCS: https://nightly.spacy.io/api/entityrecognizer
     """
     TransitionSystem = BiluoPushDown
 
@@ -119,7 +119,7 @@ cdef class EntityRecognizer(Parser):
         examples (Iterable[Example]): The examples to score.
         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
 
-        DOCS: https://spacy.io/api/entityrecognizer#score
+        DOCS: https://nightly.spacy.io/api/entityrecognizer#score
         """
         validate_examples(examples, "EntityRecognizer.score")
         return Scorer.score_spans(examples, "ents", **kwargs)
diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx
index a3f379a97..2518ebad3 100644
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@@ -15,7 +15,7 @@ cdef class Pipe:
     from it and it defines the interface that components should follow to
     function as trainable components in a spaCy pipeline.
 
-    DOCS: https://spacy.io/api/pipe
+    DOCS: https://nightly.spacy.io/api/pipe
     """
     def __init__(self, vocab, model, name, **cfg):
         """Initialize a pipeline component.
@@ -25,7 +25,7 @@ cdef class Pipe:
         name (str): The component instance name.
         **cfg: Additonal settings and config parameters.
 
-        DOCS: https://spacy.io/api/pipe#init
+        DOCS: https://nightly.spacy.io/api/pipe#init
         """
         self.vocab = vocab
         self.model = model
@@ -40,7 +40,7 @@ cdef class Pipe:
         docs (Doc): The Doc to process.
         RETURNS (Doc): The processed Doc.
 
-        DOCS: https://spacy.io/api/pipe#call
+        DOCS: https://nightly.spacy.io/api/pipe#call
         """
         scores = self.predict([doc])
         self.set_annotations([doc], scores)
@@ -55,7 +55,7 @@ cdef class Pipe:
         batch_size (int): The number of documents to buffer.
         YIELDS (Doc): Processed documents in order.
 
-        DOCS: https://spacy.io/api/pipe#pipe
+        DOCS: https://nightly.spacy.io/api/pipe#pipe
         """
         for docs in util.minibatch(stream, size=batch_size):
             scores = self.predict(docs)
@@ -69,7 +69,7 @@ cdef class Pipe:
         docs (Iterable[Doc]): The documents to predict.
         RETURNS: Vector representations for each token in the documents.
 
-        DOCS: https://spacy.io/api/pipe#predict
+        DOCS: https://nightly.spacy.io/api/pipe#predict
         """
         raise NotImplementedError(Errors.E931.format(method="predict", name=self.name))
 
@@ -79,7 +79,7 @@ cdef class Pipe:
         docs (Iterable[Doc]): The documents to modify.
         scores: The scores to assign.
 
-        DOCS: https://spacy.io/api/pipe#set_annotations
+        DOCS: https://nightly.spacy.io/api/pipe#set_annotations
         """
         raise NotImplementedError(Errors.E931.format(method="set_annotations", name=self.name))
 
@@ -96,7 +96,7 @@ cdef class Pipe:
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/pipe#update
+        DOCS: https://nightly.spacy.io/api/pipe#update
         """
         if losses is None:
             losses = {}
@@ -132,7 +132,7 @@ cdef class Pipe:
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/pipe#rehearse
+        DOCS: https://nightly.spacy.io/api/pipe#rehearse
         """
         pass
 
@@ -144,7 +144,7 @@ cdef class Pipe:
         scores: Scores representing the model's predictions.
         RETUTNRS (Tuple[float, float]): The loss and the gradient.
 
-        DOCS: https://spacy.io/api/pipe#get_loss
+        DOCS: https://nightly.spacy.io/api/pipe#get_loss
         """
         raise NotImplementedError(Errors.E931.format(method="get_loss", name=self.name))
 
@@ -156,7 +156,7 @@ cdef class Pipe:
         label (str): The label to add.
         RETURNS (int): 0 if label is already present, otherwise 1.
 
-        DOCS: https://spacy.io/api/pipe#add_label
+        DOCS: https://nightly.spacy.io/api/pipe#add_label
         """
         raise NotImplementedError(Errors.E931.format(method="add_label", name=self.name))
 
@@ -165,7 +165,7 @@ cdef class Pipe:
 
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/pipe#create_optimizer
+        DOCS: https://nightly.spacy.io/api/pipe#create_optimizer
         """
         return util.create_default_optimizer()
 
@@ -181,7 +181,7 @@ cdef class Pipe:
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/pipe#begin_training
+        DOCS: https://nightly.spacy.io/api/pipe#begin_training
         """
         self.model.initialize()
         if sgd is None:
@@ -200,7 +200,7 @@ cdef class Pipe:
 
         params (dict): The parameter values to use in the model.
 
-        DOCS: https://spacy.io/api/pipe#use_params
+        DOCS: https://nightly.spacy.io/api/pipe#use_params
         """
         with self.model.use_params(params):
             yield
@@ -211,7 +211,7 @@ cdef class Pipe:
         examples (Iterable[Example]): The examples to score.
         RETURNS (Dict[str, Any]): The scores.
 
-        DOCS: https://spacy.io/api/pipe#score
+        DOCS: https://nightly.spacy.io/api/pipe#score
         """
         return {}
 
@@ -221,7 +221,7 @@ cdef class Pipe:
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized object.
 
-        DOCS: https://spacy.io/api/pipe#to_bytes
+        DOCS: https://nightly.spacy.io/api/pipe#to_bytes
         """
         serialize = {}
         serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
@@ -236,7 +236,7 @@ cdef class Pipe:
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Pipe): The loaded object.
 
-        DOCS: https://spacy.io/api/pipe#from_bytes
+        DOCS: https://nightly.spacy.io/api/pipe#from_bytes
         """
 
         def load_model(b):
@@ -259,7 +259,7 @@ cdef class Pipe:
         path (str / Path): Path to a directory.
         exclude (Iterable[str]): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/pipe#to_disk
+        DOCS: https://nightly.spacy.io/api/pipe#to_disk
         """
         serialize = {}
         serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
@@ -274,7 +274,7 @@ cdef class Pipe:
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Pipe): The loaded object.
 
-        DOCS: https://spacy.io/api/pipe#from_disk
+        DOCS: https://nightly.spacy.io/api/pipe#from_disk
         """
 
         def load_model(p):
diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx
index 46d599497..aaf08d594 100644
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@@ -29,7 +29,7 @@ def make_sentencizer(
 class Sentencizer(Pipe):
     """Segment the Doc into sentences using a rule-based strategy.
 
-    DOCS: https://spacy.io/api/sentencizer
+    DOCS: https://nightly.spacy.io/api/sentencizer
     """
 
     default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
@@ -51,7 +51,7 @@ class Sentencizer(Pipe):
             serialized with the nlp object.
         RETURNS (Sentencizer): The sentencizer component.
 
-        DOCS: https://spacy.io/api/sentencizer#init
+        DOCS: https://nightly.spacy.io/api/sentencizer#init
         """
         self.name = name
         if punct_chars:
@@ -68,7 +68,7 @@ class Sentencizer(Pipe):
         doc (Doc): The document to process.
         RETURNS (Doc): The processed Doc.
 
-        DOCS: https://spacy.io/api/sentencizer#call
+        DOCS: https://nightly.spacy.io/api/sentencizer#call
         """
         start = 0
         seen_period = False
@@ -94,7 +94,7 @@ class Sentencizer(Pipe):
         batch_size (int): The number of documents to buffer.
         YIELDS (Doc): Processed documents in order.
 
-        DOCS: https://spacy.io/api/sentencizer#pipe
+        DOCS: https://nightly.spacy.io/api/sentencizer#pipe
         """
         for docs in util.minibatch(stream, size=batch_size):
             predictions = self.predict(docs)
@@ -157,7 +157,7 @@ class Sentencizer(Pipe):
         examples (Iterable[Example]): The examples to score.
         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
 
-        DOCS: https://spacy.io/api/sentencizer#score
+        DOCS: https://nightly.spacy.io/api/sentencizer#score
         """
         validate_examples(examples, "Sentencizer.score")
         results = Scorer.score_spans(examples, "sents", **kwargs)
@@ -169,7 +169,7 @@ class Sentencizer(Pipe):
 
         RETURNS (bytes): The serialized object.
 
-        DOCS: https://spacy.io/api/sentencizer#to_bytes
+        DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes
         """
         return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})
 
@@ -179,7 +179,7 @@ class Sentencizer(Pipe):
         bytes_data (bytes): The data to load.
         returns (Sentencizer): The loaded object.
 
-        DOCS: https://spacy.io/api/sentencizer#from_bytes
+        DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes
         """
         cfg = srsly.msgpack_loads(bytes_data)
         self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
@@ -188,7 +188,7 @@ class Sentencizer(Pipe):
     def to_disk(self, path, *, exclude=tuple()):
         """Serialize the sentencizer to disk.
 
-        DOCS: https://spacy.io/api/sentencizer#to_disk
+        DOCS: https://nightly.spacy.io/api/sentencizer#to_disk
         """
         path = util.ensure_path(path)
         path = path.with_suffix(".json")
@@ -198,7 +198,7 @@ class Sentencizer(Pipe):
     def from_disk(self, path, *, exclude=tuple()):
         """Load the sentencizer from disk.
 
-        DOCS: https://spacy.io/api/sentencizer#from_disk
+        DOCS: https://nightly.spacy.io/api/sentencizer#from_disk
         """
         path = util.ensure_path(path)
         path = path.with_suffix(".json")
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index e82225d27..b78be44f8 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model):
 class SentenceRecognizer(Tagger):
     """Pipeline component for sentence segmentation.
 
-    DOCS: https://spacy.io/api/sentencerecognizer
+    DOCS: https://nightly.spacy.io/api/sentencerecognizer
     """
     def __init__(self, vocab, model, name="senter"):
         """Initialize a sentence recognizer.
@@ -54,7 +54,7 @@ class SentenceRecognizer(Tagger):
         name (str): The component instance name, used to add entries to the
             losses during training.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#init
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#init
         """
         self.vocab = vocab
         self.model = model
@@ -76,7 +76,7 @@ class SentenceRecognizer(Tagger):
         docs (Iterable[Doc]): The documents to modify.
         batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations
         """
         if isinstance(docs, Doc):
             docs = [docs]
@@ -101,7 +101,7 @@ class SentenceRecognizer(Tagger):
         scores: Scores representing the model's predictions.
         RETUTNRS (Tuple[float, float]): The loss and the gradient.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#get_loss
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
         """
         validate_examples(examples, "SentenceRecognizer.get_loss")
         labels = self.labels
@@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#begin_training
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#begin_training
         """
         self.set_output(len(self.labels))
         self.model.initialize()
@@ -151,7 +151,7 @@ class SentenceRecognizer(Tagger):
 
         examples (Iterable[Example]): The examples to score.
         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
-        DOCS: https://spacy.io/api/sentencerecognizer#score
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#score
         """
         validate_examples(examples, "SentenceRecognizer.score")
         results = Scorer.score_spans(examples, "sents", **kwargs)
@@ -164,7 +164,7 @@ class SentenceRecognizer(Tagger):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized object.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#to_bytes
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_bytes
         """
         serialize = {}
         serialize["model"] = self.model.to_bytes
@@ -179,7 +179,7 @@ class SentenceRecognizer(Tagger):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Tagger): The loaded SentenceRecognizer.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#from_bytes
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_bytes
         """
         def load_model(b):
             try:
@@ -201,7 +201,7 @@ class SentenceRecognizer(Tagger):
         path (str / Path): Path to a directory.
         exclude (Iterable[str]): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#to_disk
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_disk
         """
         serialize = {
             "vocab": lambda p: self.vocab.to_disk(p),
@@ -217,7 +217,7 @@ class SentenceRecognizer(Tagger):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Tagger): The modified SentenceRecognizer object.
 
-        DOCS: https://spacy.io/api/sentencerecognizer#from_disk
+        DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_disk
         """
         def load_model(p):
             with p.open("rb") as file_:
diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py
index 5f3addbd7..c55edb067 100644
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@@ -78,7 +78,7 @@ class SimpleNER(Pipe):
     def add_label(self, label: str) -> None:
         """Add a new label to the pipe.
         label (str): The label to add.
-        DOCS: https://spacy.io/api/simplener#add_label
+        DOCS: https://nightly.spacy.io/api/simplener#add_label
         """
         if not isinstance(label, str):
             raise ValueError(Errors.E187)
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index f831caefe..2b760c878 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model):
 class Tagger(Pipe):
     """Pipeline component for part-of-speech tagging.
 
-    DOCS: https://spacy.io/api/tagger
+    DOCS: https://nightly.spacy.io/api/tagger
     """
     def __init__(self, vocab, model, name="tagger", *, labels=None):
         """Initialize a part-of-speech tagger.
@@ -69,7 +69,7 @@ class Tagger(Pipe):
             losses during training.
         labels (List): The set of labels. Defaults to None.
 
-        DOCS: https://spacy.io/api/tagger#init
+        DOCS: https://nightly.spacy.io/api/tagger#init
         """
         self.vocab = vocab
         self.model = model
@@ -86,7 +86,7 @@ class Tagger(Pipe):
 
         RETURNS (Tuple[str]): The labels.
 
-        DOCS: https://spacy.io/api/tagger#labels
+        DOCS: https://nightly.spacy.io/api/tagger#labels
         """
         return tuple(self.cfg["labels"])
 
@@ -96,7 +96,7 @@ class Tagger(Pipe):
         doc (Doc): The document to process.
         RETURNS (Doc): The processed Doc.
 
-        DOCS: https://spacy.io/api/tagger#call
+        DOCS: https://nightly.spacy.io/api/tagger#call
         """
         tags = self.predict([doc])
         self.set_annotations([doc], tags)
@@ -111,7 +111,7 @@ class Tagger(Pipe):
         batch_size (int): The number of documents to buffer.
         YIELDS (Doc): Processed documents in order.
 
-        DOCS: https://spacy.io/api/tagger#pipe
+        DOCS: https://nightly.spacy.io/api/tagger#pipe
         """
         for docs in util.minibatch(stream, size=batch_size):
             tag_ids = self.predict(docs)
@@ -124,7 +124,7 @@ class Tagger(Pipe):
         docs (Iterable[Doc]): The documents to predict.
         RETURNS: The models prediction for each document.
 
-        DOCS: https://spacy.io/api/tagger#predict
+        DOCS: https://nightly.spacy.io/api/tagger#predict
         """
         if not any(len(doc) for doc in docs):
             # Handle cases where there are no tokens in any docs.
@@ -153,7 +153,7 @@ class Tagger(Pipe):
         docs (Iterable[Doc]): The documents to modify.
         batch_tag_ids: The IDs to set, produced by Tagger.predict.
 
-        DOCS: https://spacy.io/api/tagger#set_annotations
+        DOCS: https://nightly.spacy.io/api/tagger#set_annotations
         """
         if isinstance(docs, Doc):
             docs = [docs]
@@ -182,7 +182,7 @@ class Tagger(Pipe):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/tagger#update
+        DOCS: https://nightly.spacy.io/api/tagger#update
         """
         if losses is None:
             losses = {}
@@ -220,7 +220,7 @@ class Tagger(Pipe):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/tagger#rehearse
+        DOCS: https://nightly.spacy.io/api/tagger#rehearse
         """
         validate_examples(examples, "Tagger.rehearse")
         docs = [eg.predicted for eg in examples]
@@ -247,7 +247,7 @@ class Tagger(Pipe):
         scores: Scores representing the model's predictions.
         RETUTNRS (Tuple[float, float]): The loss and the gradient.
 
-        DOCS: https://spacy.io/api/tagger#get_loss
+        DOCS: https://nightly.spacy.io/api/tagger#get_loss
         """
         validate_examples(examples, "Tagger.get_loss")
         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@@ -269,7 +269,7 @@ class Tagger(Pipe):
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/tagger#begin_training
+        DOCS: https://nightly.spacy.io/api/tagger#begin_training
         """
         if not hasattr(get_examples, "__call__"):
             err = Errors.E930.format(name="Tagger", obj=type(get_examples))
@@ -307,7 +307,7 @@ class Tagger(Pipe):
         label (str): The label to add.
         RETURNS (int): 0 if label is already present, otherwise 1.
 
-        DOCS: https://spacy.io/api/tagger#add_label
+        DOCS: https://nightly.spacy.io/api/tagger#add_label
         """
         if not isinstance(label, str):
             raise ValueError(Errors.E187)
@@ -324,7 +324,7 @@ class Tagger(Pipe):
         RETURNS (Dict[str, Any]): The scores, produced by
             Scorer.score_token_attr for the attributes "tag".
 
-        DOCS: https://spacy.io/api/tagger#score
+        DOCS: https://nightly.spacy.io/api/tagger#score
         """
         validate_examples(examples, "Tagger.score")
         return Scorer.score_token_attr(examples, "tag", **kwargs)
@@ -335,7 +335,7 @@ class Tagger(Pipe):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized object.
 
-        DOCS: https://spacy.io/api/tagger#to_bytes
+        DOCS: https://nightly.spacy.io/api/tagger#to_bytes
         """
         serialize = {}
         serialize["model"] = self.model.to_bytes
@@ -350,7 +350,7 @@ class Tagger(Pipe):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Tagger): The loaded Tagger.
 
-        DOCS: https://spacy.io/api/tagger#from_bytes
+        DOCS: https://nightly.spacy.io/api/tagger#from_bytes
         """
         def load_model(b):
             try:
@@ -372,7 +372,7 @@ class Tagger(Pipe):
         path (str / Path): Path to a directory.
         exclude (Iterable[str]): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/tagger#to_disk
+        DOCS: https://nightly.spacy.io/api/tagger#to_disk
         """
         serialize = {
             "vocab": lambda p: self.vocab.to_disk(p),
@@ -388,7 +388,7 @@ class Tagger(Pipe):
         exclude (Iterable[str]): String names of serialization fields to exclude.
         RETURNS (Tagger): The modified Tagger object.
 
-        DOCS: https://spacy.io/api/tagger#from_disk
+        DOCS: https://nightly.spacy.io/api/tagger#from_disk
         """
         def load_model(p):
             with p.open("rb") as file_:
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index ce4f286e5..d6efb4348 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -92,7 +92,7 @@ def make_textcat(
 class TextCategorizer(Pipe):
     """Pipeline component for text classification.
 
-    DOCS: https://spacy.io/api/textcategorizer
+    DOCS: https://nightly.spacy.io/api/textcategorizer
     """
 
     def __init__(
@@ -111,7 +111,7 @@ class TextCategorizer(Pipe):
             losses during training.
         labels (Iterable[str]): The labels to use.
 
-        DOCS: https://spacy.io/api/textcategorizer#init
+        DOCS: https://nightly.spacy.io/api/textcategorizer#init
         """
         self.vocab = vocab
         self.model = model
@@ -124,7 +124,7 @@ class TextCategorizer(Pipe):
     def labels(self) -> Tuple[str]:
         """RETURNS (Tuple[str]): The labels currently added to the component.
 
-        DOCS: https://spacy.io/api/textcategorizer#labels
+        DOCS: https://nightly.spacy.io/api/textcategorizer#labels
         """
         return tuple(self.cfg.setdefault("labels", []))
 
@@ -146,7 +146,7 @@ class TextCategorizer(Pipe):
         batch_size (int): The number of documents to buffer.
         YIELDS (Doc): Processed documents in order.
 
-        DOCS: https://spacy.io/api/textcategorizer#pipe
+        DOCS: https://nightly.spacy.io/api/textcategorizer#pipe
         """
         for docs in util.minibatch(stream, size=batch_size):
             scores = self.predict(docs)
@@ -159,7 +159,7 @@ class TextCategorizer(Pipe):
         docs (Iterable[Doc]): The documents to predict.
         RETURNS: The models prediction for each document.
 
-        DOCS: https://spacy.io/api/textcategorizer#predict
+        DOCS: https://nightly.spacy.io/api/textcategorizer#predict
         """
         tensors = [doc.tensor for doc in docs]
         if not any(len(doc) for doc in docs):
@@ -177,7 +177,7 @@ class TextCategorizer(Pipe):
         docs (Iterable[Doc]): The documents to modify.
         scores: The scores to set, produced by TextCategorizer.predict.
 
-        DOCS: https://spacy.io/api/textcategorizer#set_annotations
+        DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations
         """
         for i, doc in enumerate(docs):
             for j, label in enumerate(self.labels):
@@ -204,7 +204,7 @@ class TextCategorizer(Pipe):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/textcategorizer#update
+        DOCS: https://nightly.spacy.io/api/textcategorizer#update
         """
         if losses is None:
             losses = {}
@@ -245,7 +245,7 @@ class TextCategorizer(Pipe):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/textcategorizer#rehearse
+        DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse
         """
         if losses is not None:
             losses.setdefault(self.name, 0.0)
@@ -289,7 +289,7 @@ class TextCategorizer(Pipe):
         scores: Scores representing the model's predictions.
         RETUTNRS (Tuple[float, float]): The loss and the gradient.
 
-        DOCS: https://spacy.io/api/textcategorizer#get_loss
+        DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
         """
         validate_examples(examples, "TextCategorizer.get_loss")
         truths, not_missing = self._examples_to_truth(examples)
@@ -305,7 +305,7 @@ class TextCategorizer(Pipe):
         label (str): The label to add.
         RETURNS (int): 0 if label is already present, otherwise 1.
 
-        DOCS: https://spacy.io/api/textcategorizer#add_label
+        DOCS: https://nightly.spacy.io/api/textcategorizer#add_label
         """
         if not isinstance(label, str):
             raise ValueError(Errors.E187)
@@ -343,7 +343,7 @@ class TextCategorizer(Pipe):
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/textcategorizer#begin_training
+        DOCS: https://nightly.spacy.io/api/textcategorizer#begin_training
         """
         if not hasattr(get_examples, "__call__"):
             err = Errors.E930.format(name="TextCategorizer", obj=type(get_examples))
@@ -378,7 +378,7 @@ class TextCategorizer(Pipe):
         positive_label (str): Optional positive label.
         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
 
-        DOCS: https://spacy.io/api/textcategorizer#score
+        DOCS: https://nightly.spacy.io/api/textcategorizer#score
         """
         validate_examples(examples, "TextCategorizer.score")
         return Scorer.score_cats(
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index 7e61ccc02..5657d687d 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -56,7 +56,7 @@ class Tok2Vec(Pipe):
             a list of Doc objects as input, and output a list of 2d float arrays.
         name (str): The component instance name.
 
-        DOCS: https://spacy.io/api/tok2vec#init
+        DOCS: https://nightly.spacy.io/api/tok2vec#init
         """
         self.vocab = vocab
         self.model = model
@@ -91,7 +91,7 @@ class Tok2Vec(Pipe):
         docs (Doc): The Doc to process.
         RETURNS (Doc): The processed Doc.
 
-        DOCS: https://spacy.io/api/tok2vec#call
+        DOCS: https://nightly.spacy.io/api/tok2vec#call
         """
         tokvecses = self.predict([doc])
         self.set_annotations([doc], tokvecses)
@@ -106,7 +106,7 @@ class Tok2Vec(Pipe):
         batch_size (int): The number of documents to buffer.
         YIELDS (Doc): Processed documents in order.
 
-        DOCS: https://spacy.io/api/tok2vec#pipe
+        DOCS: https://nightly.spacy.io/api/tok2vec#pipe
         """
         for docs in minibatch(stream, batch_size):
             docs = list(docs)
@@ -121,7 +121,7 @@ class Tok2Vec(Pipe):
         docs (Iterable[Doc]): The documents to predict.
         RETURNS: Vector representations for each token in the documents.
 
-        DOCS: https://spacy.io/api/tok2vec#predict
+        DOCS: https://nightly.spacy.io/api/tok2vec#predict
         """
         tokvecs = self.model.predict(docs)
         batch_id = Tok2VecListener.get_batch_id(docs)
@@ -135,7 +135,7 @@ class Tok2Vec(Pipe):
         docs (Iterable[Doc]): The documents to modify.
         tokvecses: The tensors to set, produced by Tok2Vec.predict.
 
-        DOCS: https://spacy.io/api/tok2vec#set_annotations
+        DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations
         """
         for doc, tokvecs in zip(docs, tokvecses):
             assert tokvecs.shape[0] == len(doc)
@@ -162,7 +162,7 @@ class Tok2Vec(Pipe):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
 
-        DOCS: https://spacy.io/api/tok2vec#update
+        DOCS: https://nightly.spacy.io/api/tok2vec#update
         """
         if losses is None:
             losses = {}
@@ -220,7 +220,7 @@ class Tok2Vec(Pipe):
             create_optimizer if it doesn't exist.
         RETURNS (thinc.api.Optimizer): The optimizer.
 
-        DOCS: https://spacy.io/api/tok2vec#begin_training
+        DOCS: https://nightly.spacy.io/api/tok2vec#begin_training
         """
         docs = [Doc(self.vocab, words=["hello"])]
         self.model.initialize(X=docs)
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 9bbc64cac..9b1831a91 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -85,7 +85,7 @@ class Scorer:
     ) -> None:
         """Initialize the Scorer.
 
-        DOCS: https://spacy.io/api/scorer#init
+        DOCS: https://nightly.spacy.io/api/scorer#init
         """
         self.nlp = nlp
         self.cfg = cfg
@@ -101,7 +101,7 @@ class Scorer:
         examples (Iterable[Example]): The predicted annotations + correct annotations.
         RETURNS (Dict): A dictionary of scores.
 
-        DOCS: https://spacy.io/api/scorer#score
+        DOCS: https://nightly.spacy.io/api/scorer#score
         """
         scores = {}
         if hasattr(self.nlp.tokenizer, "score"):
@@ -121,7 +121,7 @@ class Scorer:
         RETURNS (Dict[str, float]): A dictionary containing the scores
             token_acc/p/r/f.
 
-        DOCS: https://spacy.io/api/scorer#score_tokenization
+        DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
         """
         acc_score = PRFScore()
         prf_score = PRFScore()
@@ -169,7 +169,7 @@ class Scorer:
         RETURNS (Dict[str, float]): A dictionary containing the accuracy score
             under the key attr_acc.
 
-        DOCS: https://spacy.io/api/scorer#score_token_attr
+        DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
         """
         tag_score = PRFScore()
         for example in examples:
@@ -263,7 +263,7 @@ class Scorer:
         RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
             the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
 
-        DOCS: https://spacy.io/api/scorer#score_spans
+        DOCS: https://nightly.spacy.io/api/scorer#score_spans
         """
         score = PRFScore()
         score_per_type = dict()
@@ -350,7 +350,7 @@ class Scorer:
                 attr_f_per_type,
                 attr_auc_per_type
 
-        DOCS: https://spacy.io/api/scorer#score_cats
+        DOCS: https://nightly.spacy.io/api/scorer#score_cats
         """
         if threshold is None:
             threshold = 0.5 if multi_label else 0.0
@@ -467,7 +467,7 @@ class Scorer:
         RETURNS (Dict[str, Any]): A dictionary containing the scores:
             attr_uas, attr_las, and attr_las_per_type.
 
-        DOCS: https://spacy.io/api/scorer#score_deps
+        DOCS: https://nightly.spacy.io/api/scorer#score_deps
         """
         unlabelled = PRFScore()
         labelled = PRFScore()
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index 6a1d68221..cd442729c 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
 cdef class StringStore:
     """Look up strings by 64-bit hashes.
 
-    DOCS: https://spacy.io/api/stringstore
+    DOCS: https://nightly.spacy.io/api/stringstore
     """
     def __init__(self, strings=None, freeze=False):
         """Create the StringStore.
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 759de90d3..5e7222d40 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -31,7 +31,7 @@ cdef class Tokenizer:
     """Segment text, and create Doc objects with the discovered segment
     boundaries.
 
-    DOCS: https://spacy.io/api/tokenizer
+    DOCS: https://nightly.spacy.io/api/tokenizer
     """
     def __init__(self, Vocab vocab, rules=None, prefix_search=None,
                  suffix_search=None, infix_finditer=None, token_match=None,
@@ -54,7 +54,7 @@ cdef class Tokenizer:
         EXAMPLE:
             >>> tokenizer = Tokenizer(nlp.vocab)
 
-        DOCS: https://spacy.io/api/tokenizer#init
+        DOCS: https://nightly.spacy.io/api/tokenizer#init
         """
         self.mem = Pool()
         self._cache = PreshMap()
@@ -147,7 +147,7 @@ cdef class Tokenizer:
         string (str): The string to tokenize.
         RETURNS (Doc): A container for linguistic annotations.
 
-        DOCS: https://spacy.io/api/tokenizer#call
+        DOCS: https://nightly.spacy.io/api/tokenizer#call
         """
         doc = self._tokenize_affixes(string, True)
         self._apply_special_cases(doc)
@@ -209,7 +209,7 @@ cdef class Tokenizer:
         Defaults to 1000.
         YIELDS (Doc): A sequence of Doc objects, in order.
 
-        DOCS: https://spacy.io/api/tokenizer#pipe
+        DOCS: https://nightly.spacy.io/api/tokenizer#pipe
         """
         for text in texts:
             yield self(text)
@@ -529,7 +529,7 @@ cdef class Tokenizer:
             and `.end()` methods, denoting the placement of internal segment
             separators, e.g. hyphens.
 
-        DOCS: https://spacy.io/api/tokenizer#find_infix
+        DOCS: https://nightly.spacy.io/api/tokenizer#find_infix
         """
         if self.infix_finditer is None:
             return 0
@@ -542,7 +542,7 @@ cdef class Tokenizer:
         string (str): The string to segment.
         RETURNS (int): The length of the prefix if present, otherwise `None`.
 
-        DOCS: https://spacy.io/api/tokenizer#find_prefix
+        DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix
         """
         if self.prefix_search is None:
             return 0
@@ -556,7 +556,7 @@ cdef class Tokenizer:
         string (str): The string to segment.
         Returns (int): The length of the suffix if present, otherwise `None`.
 
-        DOCS: https://spacy.io/api/tokenizer#find_suffix
+        DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix
         """
         if self.suffix_search is None:
             return 0
@@ -596,7 +596,7 @@ cdef class Tokenizer:
             a token and its attributes. The `ORTH` fields of the attributes
             must exactly match the string when they are concatenated.
 
-        DOCS: https://spacy.io/api/tokenizer#add_special_case
+        DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case
         """
         self._validate_special_case(string, substrings)
         substrings = list(substrings)
@@ -635,7 +635,7 @@ cdef class Tokenizer:
         string (str): The string to tokenize.
         RETURNS (list): A list of (pattern_string, token_string) tuples
 
-        DOCS: https://spacy.io/api/tokenizer#explain
+        DOCS: https://nightly.spacy.io/api/tokenizer#explain
         """
         prefix_search = self.prefix_search
         suffix_search = self.suffix_search
@@ -716,7 +716,7 @@ cdef class Tokenizer:
             it doesn't exist.
         exclude (list): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/tokenizer#to_disk
+        DOCS: https://nightly.spacy.io/api/tokenizer#to_disk
         """
         path = util.ensure_path(path)
         with path.open("wb") as file_:
@@ -730,7 +730,7 @@ cdef class Tokenizer:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Tokenizer): The modified `Tokenizer` object.
 
-        DOCS: https://spacy.io/api/tokenizer#from_disk
+        DOCS: https://nightly.spacy.io/api/tokenizer#from_disk
         """
         path = util.ensure_path(path)
         with path.open("rb") as file_:
@@ -744,7 +744,7 @@ cdef class Tokenizer:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized form of the `Tokenizer` object.
 
-        DOCS: https://spacy.io/api/tokenizer#to_bytes
+        DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes
         """
         serializers = {
             "vocab": lambda: self.vocab.to_bytes(),
@@ -764,7 +764,7 @@ cdef class Tokenizer:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Tokenizer): The `Tokenizer` object.
 
-        DOCS: https://spacy.io/api/tokenizer#from_bytes
+        DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes
         """
         data = {}
         deserializers = {
diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index 8d57b791f..c5fac2299 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -24,8 +24,8 @@ from ..strings import get_string_id
 cdef class Retokenizer:
     """Helper class for doc.retokenize() context manager.
 
-    DOCS: https://spacy.io/api/doc#retokenize
-    USAGE: https://spacy.io/usage/linguistic-features#retokenization
+    DOCS: https://nightly.spacy.io/api/doc#retokenize
+    USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
     """
     cdef Doc doc
     cdef list merges
@@ -47,7 +47,7 @@ cdef class Retokenizer:
         span (Span): The span to merge.
         attrs (dict): Attributes to set on the merged token.
 
-        DOCS: https://spacy.io/api/doc#retokenizer.merge
+        DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge
         """
         if (span.start, span.end) in self._spans_to_merge:
             return
@@ -73,7 +73,7 @@ cdef class Retokenizer:
         attrs (dict): Attributes to set on all split tokens. Attribute names
             mapped to list of per-token attribute values.
 
-        DOCS: https://spacy.io/api/doc#retokenizer.split
+        DOCS: https://nightly.spacy.io/api/doc#retokenizer.split
         """
         if ''.join(orths) != token.text:
             raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text))
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index a257c7919..cd8c81939 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -61,7 +61,7 @@ class DocBin:
         store_user_data (bool): Whether to include the `Doc.user_data`.
         docs (Iterable[Doc]): Docs to add.
 
-        DOCS: https://spacy.io/api/docbin#init
+        DOCS: https://nightly.spacy.io/api/docbin#init
         """
         attrs = sorted([intify_attr(attr) for attr in attrs])
         self.version = "0.1"
@@ -86,7 +86,7 @@ class DocBin:
 
         doc (Doc): The Doc object to add.
 
-        DOCS: https://spacy.io/api/docbin#add
+        DOCS: https://nightly.spacy.io/api/docbin#add
         """
         array = doc.to_array(self.attrs)
         if len(array.shape) == 1:
@@ -115,7 +115,7 @@ class DocBin:
         vocab (Vocab): The shared vocab.
         YIELDS (Doc): The Doc objects.
 
-        DOCS: https://spacy.io/api/docbin#get_docs
+        DOCS: https://nightly.spacy.io/api/docbin#get_docs
         """
         for string in self.strings:
             vocab[string]
@@ -141,7 +141,7 @@ class DocBin:
 
         other (DocBin): The DocBin to merge into the current bin.
 
-        DOCS: https://spacy.io/api/docbin#merge
+        DOCS: https://nightly.spacy.io/api/docbin#merge
         """
         if self.attrs != other.attrs:
             raise ValueError(Errors.E166.format(current=self.attrs, other=other.attrs))
@@ -158,7 +158,7 @@ class DocBin:
 
         RETURNS (bytes): The serialized DocBin.
 
-        DOCS: https://spacy.io/api/docbin#to_bytes
+        DOCS: https://nightly.spacy.io/api/docbin#to_bytes
         """
         for tokens in self.tokens:
             assert len(tokens.shape) == 2, tokens.shape  # this should never happen
@@ -185,7 +185,7 @@ class DocBin:
         bytes_data (bytes): The data to load from.
         RETURNS (DocBin): The loaded DocBin.
 
-        DOCS: https://spacy.io/api/docbin#from_bytes
+        DOCS: https://nightly.spacy.io/api/docbin#from_bytes
         """
         msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
         self.attrs = msg["attrs"]
@@ -211,7 +211,7 @@ class DocBin:
 
         path (str / Path): The file path.
 
-        DOCS: https://spacy.io/api/docbin#to_disk
+        DOCS: https://nightly.spacy.io/api/docbin#to_disk
         """
         path = ensure_path(path)
         with path.open("wb") as file_:
@@ -223,7 +223,7 @@ class DocBin:
         path (str / Path): The file path.
         RETURNS (DocBin): The loaded DocBin.
 
-        DOCS: https://spacy.io/api/docbin#to_disk
+        DOCS: https://nightly.spacy.io/api/docbin#to_disk
         """
         path = ensure_path(path)
         with path.open("rb") as file_:
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 3c7b4f8b3..29bdf85ab 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -104,7 +104,7 @@ cdef class Doc:
         >>> from spacy.tokens import Doc
         >>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
 
-    DOCS: https://spacy.io/api/doc
+    DOCS: https://nightly.spacy.io/api/doc
     """
 
     @classmethod
@@ -118,8 +118,8 @@ cdef class Doc:
         method (callable): Optional method for method extension.
         force (bool): Force overwriting existing attribute.
 
-        DOCS: https://spacy.io/api/doc#set_extension
-        USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
+        DOCS: https://nightly.spacy.io/api/doc#set_extension
+        USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
         """
         if cls.has_extension(name) and not kwargs.get("force", False):
             raise ValueError(Errors.E090.format(name=name, obj="Doc"))
@@ -132,7 +132,7 @@ cdef class Doc:
         name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple.
 
-        DOCS: https://spacy.io/api/doc#get_extension
+        DOCS: https://nightly.spacy.io/api/doc#get_extension
         """
         return Underscore.doc_extensions.get(name)
 
@@ -143,7 +143,7 @@ cdef class Doc:
         name (str): Name of the extension.
         RETURNS (bool): Whether the extension has been registered.
 
-        DOCS: https://spacy.io/api/doc#has_extension
+        DOCS: https://nightly.spacy.io/api/doc#has_extension
         """
         return name in Underscore.doc_extensions
 
@@ -155,7 +155,7 @@ cdef class Doc:
         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
             removed extension.
 
-        DOCS: https://spacy.io/api/doc#remove_extension
+        DOCS: https://nightly.spacy.io/api/doc#remove_extension
         """
         if not cls.has_extension(name):
             raise ValueError(Errors.E046.format(name=name))
@@ -173,7 +173,7 @@ cdef class Doc:
             it is not. If `None`, defaults to `[True]*len(words)`
         user_data (dict or None): Optional extra data to attach to the Doc.
 
-        DOCS: https://spacy.io/api/doc#init
+        DOCS: https://nightly.spacy.io/api/doc#init
         """
         self.vocab = vocab
         size = max(20, (len(words) if words is not None else 0))
@@ -288,7 +288,7 @@ cdef class Doc:
             You can use negative indices and open-ended ranges, which have
             their normal Python semantics.
 
-        DOCS: https://spacy.io/api/doc#getitem
+        DOCS: https://nightly.spacy.io/api/doc#getitem
         """
         if isinstance(i, slice):
             start, stop = normalize_slice(len(self), i.start, i.stop, i.step)
@@ -305,7 +305,7 @@ cdef class Doc:
         than-Python speeds are required, you can instead access the annotations
         as a numpy array, or access the underlying C data directly from Cython.
 
-        DOCS: https://spacy.io/api/doc#iter
+        DOCS: https://nightly.spacy.io/api/doc#iter
         """
         cdef int i
         for i in range(self.length):
@@ -316,7 +316,7 @@ cdef class Doc:
 
         RETURNS (int): The number of tokens in the document.
 
-        DOCS: https://spacy.io/api/doc#len
+        DOCS: https://nightly.spacy.io/api/doc#len
         """
         return self.length
 
@@ -349,7 +349,7 @@ cdef class Doc:
             the span.
         RETURNS (Span): The newly constructed object.
 
-        DOCS: https://spacy.io/api/doc#char_span
+        DOCS: https://nightly.spacy.io/api/doc#char_span
         """
         if not isinstance(label, int):
             label = self.vocab.strings.add(label)
@@ -374,7 +374,7 @@ cdef class Doc:
             `Span`, `Token` and `Lexeme` objects.
         RETURNS (float): A scalar similarity score. Higher is more similar.
 
-        DOCS: https://spacy.io/api/doc#similarity
+        DOCS: https://nightly.spacy.io/api/doc#similarity
         """
         if "similarity" in self.user_hooks:
             return self.user_hooks["similarity"](self, other)
@@ -407,7 +407,7 @@ cdef class Doc:
 
         RETURNS (bool): Whether a word vector is associated with the object.
 
-        DOCS: https://spacy.io/api/doc#has_vector
+        DOCS: https://nightly.spacy.io/api/doc#has_vector
         """
         if "has_vector" in self.user_hooks:
             return self.user_hooks["has_vector"](self)
@@ -425,7 +425,7 @@ cdef class Doc:
         RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
             representing the document's semantics.
 
-        DOCS: https://spacy.io/api/doc#vector
+        DOCS: https://nightly.spacy.io/api/doc#vector
         """
         def __get__(self):
             if "vector" in self.user_hooks:
@@ -453,7 +453,7 @@ cdef class Doc:
 
         RETURNS (float): The L2 norm of the vector representation.
 
-        DOCS: https://spacy.io/api/doc#vector_norm
+        DOCS: https://nightly.spacy.io/api/doc#vector_norm
         """
         def __get__(self):
             if "vector_norm" in self.user_hooks:
@@ -493,7 +493,7 @@ cdef class Doc:
 
         RETURNS (tuple): Entities in the document, one `Span` per entity.
 
-        DOCS: https://spacy.io/api/doc#ents
+        DOCS: https://nightly.spacy.io/api/doc#ents
         """
         def __get__(self):
             cdef int i
@@ -584,7 +584,7 @@ cdef class Doc:
 
         YIELDS (Span): Noun chunks in the document.
 
-        DOCS: https://spacy.io/api/doc#noun_chunks
+        DOCS: https://nightly.spacy.io/api/doc#noun_chunks
         """
 
         # Accumulate the result before beginning to iterate over it. This
@@ -609,7 +609,7 @@ cdef class Doc:
 
         YIELDS (Span): Sentences in the document.
 
-        DOCS: https://spacy.io/api/doc#sents
+        DOCS: https://nightly.spacy.io/api/doc#sents
         """
         if not self.is_sentenced:
             raise ValueError(Errors.E030)
@@ -722,7 +722,7 @@ cdef class Doc:
         attr_id (int): The attribute ID to key the counts.
         RETURNS (dict): A dictionary mapping attributes to integer counts.
 
-        DOCS: https://spacy.io/api/doc#count_by
+        DOCS: https://nightly.spacy.io/api/doc#count_by
         """
         cdef int i
         cdef attr_t attr
@@ -777,7 +777,7 @@ cdef class Doc:
         array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
         RETURNS (Doc): Itself.
 
-        DOCS: https://spacy.io/api/doc#from_array
+        DOCS: https://nightly.spacy.io/api/doc#from_array
         """
         # Handle scalar/list inputs of strings/ints for py_attr_ids
         # See also #3064
@@ -872,7 +872,7 @@ cdef class Doc:
         attrs (list): Optional list of attribute ID ints or attribute name strings.
         RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given.
 
-        DOCS: https://spacy.io/api/doc#from_docs
+        DOCS: https://nightly.spacy.io/api/doc#from_docs
         """
         if not docs:
             return None
@@ -953,7 +953,7 @@ cdef class Doc:
         RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
             (n, n), where n = len(self).
 
-        DOCS: https://spacy.io/api/doc#get_lca_matrix
+        DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix
         """
         return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
 
@@ -987,7 +987,7 @@ cdef class Doc:
             it doesn't exist. Paths may be either strings or Path-like objects.
         exclude (Iterable[str]): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/doc#to_disk
+        DOCS: https://nightly.spacy.io/api/doc#to_disk
         """
         path = util.ensure_path(path)
         with path.open("wb") as file_:
@@ -1002,7 +1002,7 @@ cdef class Doc:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Doc): The modified `Doc` object.
 
-        DOCS: https://spacy.io/api/doc#from_disk
+        DOCS: https://nightly.spacy.io/api/doc#from_disk
         """
         path = util.ensure_path(path)
         with path.open("rb") as file_:
@@ -1016,7 +1016,7 @@ cdef class Doc:
         RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
             all annotations.
 
-        DOCS: https://spacy.io/api/doc#to_bytes
+        DOCS: https://nightly.spacy.io/api/doc#to_bytes
         """
         return srsly.msgpack_dumps(self.to_dict(exclude=exclude))
 
@@ -1027,7 +1027,7 @@ cdef class Doc:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Doc): Itself.
 
-        DOCS: https://spacy.io/api/doc#from_bytes
+        DOCS: https://nightly.spacy.io/api/doc#from_bytes
         """
         return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)
 
@@ -1038,7 +1038,7 @@ cdef class Doc:
         RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
             all annotations.
 
-        DOCS: https://spacy.io/api/doc#to_bytes
+        DOCS: https://nightly.spacy.io/api/doc#to_bytes
         """
         array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_ID, NORM, ENT_KB_ID]
         if self.is_tagged:
@@ -1086,7 +1086,7 @@ cdef class Doc:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Doc): Itself.
 
-        DOCS: https://spacy.io/api/doc#from_dict
+        DOCS: https://nightly.spacy.io/api/doc#from_dict
         """
         if self.length != 0:
             raise ValueError(Errors.E033.format(length=self.length))
@@ -1166,8 +1166,8 @@ cdef class Doc:
         retokenization are invalidated, although they may accidentally
         continue to work.
 
-        DOCS: https://spacy.io/api/doc#retokenize
-        USAGE: https://spacy.io/usage/linguistic-features#retokenization
+        DOCS: https://nightly.spacy.io/api/doc#retokenize
+        USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
         """
         return Retokenizer(self)
 
@@ -1202,7 +1202,7 @@ cdef class Doc:
         be added to an "_" key in the data, e.g. "_": {"foo": "bar"}.
         RETURNS (dict): The data in spaCy's JSON format.
 
-        DOCS: https://spacy.io/api/doc#to_json
+        DOCS: https://nightly.spacy.io/api/doc#to_json
         """
         data = {"text": self.text}
         if self.is_nered:
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 15e6518d6..f06f3307d 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -27,7 +27,7 @@ from .underscore import Underscore, get_ext_args
 cdef class Span:
     """A slice from a Doc object.
 
-    DOCS: https://spacy.io/api/span
+    DOCS: https://nightly.spacy.io/api/span
     """
     @classmethod
     def set_extension(cls, name, **kwargs):
@@ -40,8 +40,8 @@ cdef class Span:
         method (callable): Optional method for method extension.
         force (bool): Force overwriting existing attribute.
 
-        DOCS: https://spacy.io/api/span#set_extension
-        USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
+        DOCS: https://nightly.spacy.io/api/span#set_extension
+        USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
         """
         if cls.has_extension(name) and not kwargs.get("force", False):
             raise ValueError(Errors.E090.format(name=name, obj="Span"))
@@ -54,7 +54,7 @@ cdef class Span:
         name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple.
 
-        DOCS: https://spacy.io/api/span#get_extension
+        DOCS: https://nightly.spacy.io/api/span#get_extension
         """
         return Underscore.span_extensions.get(name)
 
@@ -65,7 +65,7 @@ cdef class Span:
         name (str): Name of the extension.
         RETURNS (bool): Whether the extension has been registered.
 
-        DOCS: https://spacy.io/api/span#has_extension
+        DOCS: https://nightly.spacy.io/api/span#has_extension
         """
         return name in Underscore.span_extensions
 
@@ -77,7 +77,7 @@ cdef class Span:
         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
             removed extension.
 
-        DOCS: https://spacy.io/api/span#remove_extension
+        DOCS: https://nightly.spacy.io/api/span#remove_extension
         """
         if not cls.has_extension(name):
             raise ValueError(Errors.E046.format(name=name))
@@ -95,7 +95,7 @@ cdef class Span:
         vector (ndarray[ndim=1, dtype='float32']): A meaning representation
             of the span.
 
-        DOCS: https://spacy.io/api/span#init
+        DOCS: https://nightly.spacy.io/api/span#init
         """
         if not (0 <= start <= end <= len(doc)):
             raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc)))
@@ -151,7 +151,7 @@ cdef class Span:
 
         RETURNS (int): The number of tokens in the span.
 
-        DOCS: https://spacy.io/api/span#len
+        DOCS: https://nightly.spacy.io/api/span#len
         """
         self._recalculate_indices()
         if self.end < self.start:
@@ -168,7 +168,7 @@ cdef class Span:
             the span to get.
         RETURNS (Token or Span): The token at `span[i]`.
 
-        DOCS: https://spacy.io/api/span#getitem
+        DOCS: https://nightly.spacy.io/api/span#getitem
         """
         self._recalculate_indices()
         if isinstance(i, slice):
@@ -189,7 +189,7 @@ cdef class Span:
 
         YIELDS (Token): A `Token` object.
 
-        DOCS: https://spacy.io/api/span#iter
+        DOCS: https://nightly.spacy.io/api/span#iter
         """
         self._recalculate_indices()
         for i in range(self.start, self.end):
@@ -210,7 +210,7 @@ cdef class Span:
         copy_user_data (bool): Whether or not to copy the original doc's user data.
         RETURNS (Doc): The `Doc` copy of the span.
 
-        DOCS: https://spacy.io/api/span#as_doc
+        DOCS: https://nightly.spacy.io/api/span#as_doc
         """
         # TODO: make copy_user_data a keyword-only argument (Python 3 only)
         words = [t.text for t in self]
@@ -292,7 +292,7 @@ cdef class Span:
         RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
             (n, n), where n = len(self).
 
-        DOCS: https://spacy.io/api/span#get_lca_matrix
+        DOCS: https://nightly.spacy.io/api/span#get_lca_matrix
         """
         return numpy.asarray(_get_lca_matrix(self.doc, self.start, self.end))
 
@@ -304,7 +304,7 @@ cdef class Span:
             `Span`, `Token` and `Lexeme` objects.
         RETURNS (float): A scalar similarity score. Higher is more similar.
 
-        DOCS: https://spacy.io/api/span#similarity
+        DOCS: https://nightly.spacy.io/api/span#similarity
         """
         if "similarity" in self.doc.user_span_hooks:
             return self.doc.user_span_hooks["similarity"](self, other)
@@ -400,7 +400,7 @@ cdef class Span:
 
         RETURNS (tuple): Entities in the span, one `Span` per entity.
 
-        DOCS: https://spacy.io/api/span#ents
+        DOCS: https://nightly.spacy.io/api/span#ents
         """
         ents = []
         for ent in self.doc.ents:
@@ -415,7 +415,7 @@ cdef class Span:
 
         RETURNS (bool): Whether a word vector is associated with the object.
 
-        DOCS: https://spacy.io/api/span#has_vector
+        DOCS: https://nightly.spacy.io/api/span#has_vector
         """
         if "has_vector" in self.doc.user_span_hooks:
             return self.doc.user_span_hooks["has_vector"](self)
@@ -434,7 +434,7 @@ cdef class Span:
         RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
             representing the span's semantics.
 
-        DOCS: https://spacy.io/api/span#vector
+        DOCS: https://nightly.spacy.io/api/span#vector
         """
         if "vector" in self.doc.user_span_hooks:
             return self.doc.user_span_hooks["vector"](self)
@@ -448,7 +448,7 @@ cdef class Span:
 
         RETURNS (float): The L2 norm of the vector representation.
 
-        DOCS: https://spacy.io/api/span#vector_norm
+        DOCS: https://nightly.spacy.io/api/span#vector_norm
         """
         if "vector_norm" in self.doc.user_span_hooks:
             return self.doc.user_span_hooks["vector"](self)
@@ -508,7 +508,7 @@ cdef class Span:
 
         YIELDS (Span): Base noun-phrase `Span` objects.
 
-        DOCS: https://spacy.io/api/span#noun_chunks
+        DOCS: https://nightly.spacy.io/api/span#noun_chunks
         """
         if not self.doc.is_parsed:
             raise ValueError(Errors.E029)
@@ -533,7 +533,7 @@ cdef class Span:
 
         RETURNS (Token): The root token.
 
-        DOCS: https://spacy.io/api/span#root
+        DOCS: https://nightly.spacy.io/api/span#root
         """
         self._recalculate_indices()
         if "root" in self.doc.user_span_hooks:
@@ -590,7 +590,7 @@ cdef class Span:
 
         RETURNS (tuple): A tuple of Token objects.
 
-        DOCS: https://spacy.io/api/span#lefts
+        DOCS: https://nightly.spacy.io/api/span#lefts
         """
         return self.root.conjuncts
 
@@ -601,7 +601,7 @@ cdef class Span:
 
         YIELDS (Token):A left-child of a token of the span.
 
-        DOCS: https://spacy.io/api/span#lefts
+        DOCS: https://nightly.spacy.io/api/span#lefts
         """
         for token in reversed(self):  # Reverse, so we get tokens in order
             for left in token.lefts:
@@ -615,7 +615,7 @@ cdef class Span:
 
         YIELDS (Token): A right-child of a token of the span.
 
-        DOCS: https://spacy.io/api/span#rights
+        DOCS: https://nightly.spacy.io/api/span#rights
         """
         for token in self:
             for right in token.rights:
@@ -630,7 +630,7 @@ cdef class Span:
         RETURNS (int): The number of leftward immediate children of the
             span, in the syntactic dependency parse.
 
-        DOCS: https://spacy.io/api/span#n_lefts
+        DOCS: https://nightly.spacy.io/api/span#n_lefts
         """
         return len(list(self.lefts))
 
@@ -642,7 +642,7 @@ cdef class Span:
         RETURNS (int): The number of rightward immediate children of the
             span, in the syntactic dependency parse.
 
-        DOCS: https://spacy.io/api/span#n_rights
+        DOCS: https://nightly.spacy.io/api/span#n_rights
         """
         return len(list(self.rights))
 
@@ -652,7 +652,7 @@ cdef class Span:
 
         YIELDS (Token): A token within the span, or a descendant from it.
 
-        DOCS: https://spacy.io/api/span#subtree
+        DOCS: https://nightly.spacy.io/api/span#subtree
         """
         for word in self.lefts:
             yield from word.subtree
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 8afde60ee..50f1c5da3 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -30,7 +30,7 @@ cdef class Token:
     """An individual token – i.e. a word, punctuation symbol, whitespace,
     etc.
 
-    DOCS: https://spacy.io/api/token
+    DOCS: https://nightly.spacy.io/api/token
     """
     @classmethod
     def set_extension(cls, name, **kwargs):
@@ -43,8 +43,8 @@ cdef class Token:
         method (callable): Optional method for method extension.
         force (bool): Force overwriting existing attribute.
 
-        DOCS: https://spacy.io/api/token#set_extension
-        USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
+        DOCS: https://nightly.spacy.io/api/token#set_extension
+        USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
         """
         if cls.has_extension(name) and not kwargs.get("force", False):
             raise ValueError(Errors.E090.format(name=name, obj="Token"))
@@ -57,7 +57,7 @@ cdef class Token:
         name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple.
 
-        DOCS: https://spacy.io/api/token#get_extension
+        DOCS: https://nightly.spacy.io/api/token#get_extension
         """
         return Underscore.token_extensions.get(name)
 
@@ -68,7 +68,7 @@ cdef class Token:
         name (str): Name of the extension.
         RETURNS (bool): Whether the extension has been registered.
 
-        DOCS: https://spacy.io/api/token#has_extension
+        DOCS: https://nightly.spacy.io/api/token#has_extension
         """
         return name in Underscore.token_extensions
 
@@ -80,7 +80,7 @@ cdef class Token:
         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
             removed extension.
 
-        DOCS: https://spacy.io/api/token#remove_extension
+        DOCS: https://nightly.spacy.io/api/token#remove_extension
         """
         if not cls.has_extension(name):
             raise ValueError(Errors.E046.format(name=name))
@@ -93,7 +93,7 @@ cdef class Token:
         doc (Doc): The parent document.
         offset (int): The index of the token within the document.
 
-        DOCS: https://spacy.io/api/token#init
+        DOCS: https://nightly.spacy.io/api/token#init
         """
         self.vocab = vocab
         self.doc = doc
@@ -108,7 +108,7 @@ cdef class Token:
 
         RETURNS (int): The number of unicode characters in the token.
 
-        DOCS: https://spacy.io/api/token#len
+        DOCS: https://nightly.spacy.io/api/token#len
         """
         return self.c.lex.length
 
@@ -171,7 +171,7 @@ cdef class Token:
         flag_id (int): The ID of the flag attribute.
         RETURNS (bool): Whether the flag is set.
 
-        DOCS: https://spacy.io/api/token#check_flag
+        DOCS: https://nightly.spacy.io/api/token#check_flag
         """
         return Lexeme.c_check_flag(self.c.lex, flag_id)
 
@@ -181,7 +181,7 @@ cdef class Token:
         i (int): The relative position of the token to get. Defaults to 1.
         RETURNS (Token): The token at position `self.doc[self.i+i]`.
 
-        DOCS: https://spacy.io/api/token#nbor
+        DOCS: https://nightly.spacy.io/api/token#nbor
         """
         if self.i+i < 0 or (self.i+i >= len(self.doc)):
             raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc)))
@@ -195,7 +195,7 @@ cdef class Token:
             `Span`, `Token` and `Lexeme` objects.
         RETURNS (float): A scalar similarity score. Higher is more similar.
 
-        DOCS: https://spacy.io/api/token#similarity
+        DOCS: https://nightly.spacy.io/api/token#similarity
         """
         if "similarity" in self.doc.user_token_hooks:
             return self.doc.user_token_hooks["similarity"](self, other)
@@ -373,7 +373,7 @@ cdef class Token:
 
         RETURNS (bool): Whether a word vector is associated with the object.
 
-        DOCS: https://spacy.io/api/token#has_vector
+        DOCS: https://nightly.spacy.io/api/token#has_vector
         """
         if "has_vector" in self.doc.user_token_hooks:
             return self.doc.user_token_hooks["has_vector"](self)
@@ -388,7 +388,7 @@ cdef class Token:
         RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
             representing the token's semantics.
 
-        DOCS: https://spacy.io/api/token#vector
+        DOCS: https://nightly.spacy.io/api/token#vector
         """
         if "vector" in self.doc.user_token_hooks:
             return self.doc.user_token_hooks["vector"](self)
@@ -403,7 +403,7 @@ cdef class Token:
 
         RETURNS (float): The L2 norm of the vector representation.
 
-        DOCS: https://spacy.io/api/token#vector_norm
+        DOCS: https://nightly.spacy.io/api/token#vector_norm
         """
         if "vector_norm" in self.doc.user_token_hooks:
             return self.doc.user_token_hooks["vector_norm"](self)
@@ -426,7 +426,7 @@ cdef class Token:
         RETURNS (int): The number of leftward immediate children of the
             word, in the syntactic dependency parse.
 
-        DOCS: https://spacy.io/api/token#n_lefts
+        DOCS: https://nightly.spacy.io/api/token#n_lefts
         """
         return self.c.l_kids
 
@@ -438,7 +438,7 @@ cdef class Token:
         RETURNS (int): The number of rightward immediate children of the
             word, in the syntactic dependency parse.
 
-        DOCS: https://spacy.io/api/token#n_rights
+        DOCS: https://nightly.spacy.io/api/token#n_rights
         """
         return self.c.r_kids
 
@@ -470,7 +470,7 @@ cdef class Token:
         RETURNS (bool / None): Whether the token starts a sentence.
             None if unknown.
 
-        DOCS: https://spacy.io/api/token#is_sent_start
+        DOCS: https://nightly.spacy.io/api/token#is_sent_start
         """
         def __get__(self):
             if self.c.sent_start == 0:
@@ -499,7 +499,7 @@ cdef class Token:
         RETURNS (bool / None): Whether the token ends a sentence.
             None if unknown.
 
-        DOCS: https://spacy.io/api/token#is_sent_end
+        DOCS: https://nightly.spacy.io/api/token#is_sent_end
         """
         def __get__(self):
             if self.i + 1 == len(self.doc):
@@ -521,7 +521,7 @@ cdef class Token:
 
         YIELDS (Token): A left-child of the token.
 
-        DOCS: https://spacy.io/api/token#lefts
+        DOCS: https://nightly.spacy.io/api/token#lefts
         """
         cdef int nr_iter = 0
         cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
@@ -541,7 +541,7 @@ cdef class Token:
 
         YIELDS (Token): A right-child of the token.
 
-        DOCS: https://spacy.io/api/token#rights
+        DOCS: https://nightly.spacy.io/api/token#rights
         """
         cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
         tokens = []
@@ -563,7 +563,7 @@ cdef class Token:
 
         YIELDS (Token): A child token such that `child.head==self`.
 
-        DOCS: https://spacy.io/api/token#children
+        DOCS: https://nightly.spacy.io/api/token#children
         """
         yield from self.lefts
         yield from self.rights
@@ -576,7 +576,7 @@ cdef class Token:
         YIELDS (Token): A descendent token such that
             `self.is_ancestor(descendent) or token == self`.
 
-        DOCS: https://spacy.io/api/token#subtree
+        DOCS: https://nightly.spacy.io/api/token#subtree
         """
         for word in self.lefts:
             yield from word.subtree
@@ -607,7 +607,7 @@ cdef class Token:
         YIELDS (Token): A sequence of ancestor tokens such that
             `ancestor.is_ancestor(self)`.
 
-        DOCS: https://spacy.io/api/token#ancestors
+        DOCS: https://nightly.spacy.io/api/token#ancestors
         """
         cdef const TokenC* head_ptr = self.c
         # Guard against infinite loop, no token can have
@@ -625,7 +625,7 @@ cdef class Token:
         descendant (Token): Another token.
         RETURNS (bool): Whether this token is the ancestor of the descendant.
 
-        DOCS: https://spacy.io/api/token#is_ancestor
+        DOCS: https://nightly.spacy.io/api/token#is_ancestor
         """
         if self.doc is not descendant.doc:
             return False
@@ -729,7 +729,7 @@ cdef class Token:
 
         RETURNS (tuple): The coordinated tokens.
 
-        DOCS: https://spacy.io/api/token#conjuncts
+        DOCS: https://nightly.spacy.io/api/token#conjuncts
         """
         cdef Token word, child
         if "conjuncts" in self.doc.user_token_hooks:
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index bcea87e67..ae2508c87 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -44,7 +44,7 @@ cdef class Vectors:
     the table need to be assigned - so len(list(vectors.keys())) may be
     greater or smaller than vectors.shape[0].
 
-    DOCS: https://spacy.io/api/vectors
+    DOCS: https://nightly.spacy.io/api/vectors
     """
     cdef public object name
     cdef public object data
@@ -59,7 +59,7 @@ cdef class Vectors:
         keys (iterable): A sequence of keys, aligned with the data.
         name (str): A name to identify the vectors table.
 
-        DOCS: https://spacy.io/api/vectors#init
+        DOCS: https://nightly.spacy.io/api/vectors#init
         """
         self.name = name
         if data is None:
@@ -83,7 +83,7 @@ cdef class Vectors:
 
         RETURNS (tuple): A `(rows, dims)` pair.
 
-        DOCS: https://spacy.io/api/vectors#shape
+        DOCS: https://nightly.spacy.io/api/vectors#shape
         """
         return self.data.shape
 
@@ -93,7 +93,7 @@ cdef class Vectors:
 
         RETURNS (int): The vector size.
 
-        DOCS: https://spacy.io/api/vectors#size
+        DOCS: https://nightly.spacy.io/api/vectors#size
         """
         return self.data.shape[0] * self.data.shape[1]
 
@@ -103,7 +103,7 @@ cdef class Vectors:
 
         RETURNS (bool): `True` if no slots are available for new keys.
 
-        DOCS: https://spacy.io/api/vectors#is_full
+        DOCS: https://nightly.spacy.io/api/vectors#is_full
         """
         return self._unset.size() == 0
 
@@ -114,7 +114,7 @@ cdef class Vectors:
 
         RETURNS (int): The number of keys in the table.
 
-        DOCS: https://spacy.io/api/vectors#n_keys
+        DOCS: https://nightly.spacy.io/api/vectors#n_keys
         """
         return len(self.key2row)
 
@@ -127,7 +127,7 @@ cdef class Vectors:
         key (int): The key to get the vector for.
         RETURNS (ndarray): The vector for the key.
 
-        DOCS: https://spacy.io/api/vectors#getitem
+        DOCS: https://nightly.spacy.io/api/vectors#getitem
         """
         i = self.key2row[key]
         if i is None:
@@ -141,7 +141,7 @@ cdef class Vectors:
         key (int): The key to set the vector for.
         vector (ndarray): The vector to set.
 
-        DOCS: https://spacy.io/api/vectors#setitem
+        DOCS: https://nightly.spacy.io/api/vectors#setitem
         """
         i = self.key2row[key]
         self.data[i] = vector
@@ -153,7 +153,7 @@ cdef class Vectors:
 
         YIELDS (int): A key in the table.
 
-        DOCS: https://spacy.io/api/vectors#iter
+        DOCS: https://nightly.spacy.io/api/vectors#iter
         """
         yield from self.key2row
 
@@ -162,7 +162,7 @@ cdef class Vectors:
 
         RETURNS (int): The number of vectors in the data.
 
-        DOCS: https://spacy.io/api/vectors#len
+        DOCS: https://nightly.spacy.io/api/vectors#len
         """
         return self.data.shape[0]
 
@@ -172,7 +172,7 @@ cdef class Vectors:
         key (int): The key to check.
         RETURNS (bool): Whether the key has a vector entry.
 
-        DOCS: https://spacy.io/api/vectors#contains
+        DOCS: https://nightly.spacy.io/api/vectors#contains
         """
         return key in self.key2row
 
@@ -189,7 +189,7 @@ cdef class Vectors:
         inplace (bool): Reallocate the memory.
         RETURNS (list): The removed items as a list of `(key, row)` tuples.
 
-        DOCS: https://spacy.io/api/vectors#resize
+        DOCS: https://nightly.spacy.io/api/vectors#resize
         """
         xp = get_array_module(self.data)
         if inplace:
@@ -224,7 +224,7 @@ cdef class Vectors:
 
         YIELDS (ndarray): A vector in the table.
 
-        DOCS: https://spacy.io/api/vectors#values
+        DOCS: https://nightly.spacy.io/api/vectors#values
         """
         for row, vector in enumerate(range(self.data.shape[0])):
             if not self._unset.count(row):
@@ -235,7 +235,7 @@ cdef class Vectors:
 
         YIELDS (tuple): A key/vector pair.
 
-        DOCS: https://spacy.io/api/vectors#items
+        DOCS: https://nightly.spacy.io/api/vectors#items
         """
         for key, row in self.key2row.items():
             yield key, self.data[row]
@@ -281,7 +281,7 @@ cdef class Vectors:
         row (int / None): The row number of a vector to map the key to.
         RETURNS (int): The row the vector was added to.
 
-        DOCS: https://spacy.io/api/vectors#add
+        DOCS: https://nightly.spacy.io/api/vectors#add
         """
         # use int for all keys and rows in key2row for more efficient access
         # and serialization
@@ -368,7 +368,7 @@ cdef class Vectors:
         path (str / Path): A path to a directory, which will be created if
             it doesn't exists.
 
-        DOCS: https://spacy.io/api/vectors#to_disk
+        DOCS: https://nightly.spacy.io/api/vectors#to_disk
         """
         xp = get_array_module(self.data)
         if xp is numpy:
@@ -396,7 +396,7 @@ cdef class Vectors:
         path (str / Path): Directory path, string or Path-like object.
         RETURNS (Vectors): The modified object.
 
-        DOCS: https://spacy.io/api/vectors#from_disk
+        DOCS: https://nightly.spacy.io/api/vectors#from_disk
         """
         def load_key2row(path):
             if path.exists():
@@ -432,7 +432,7 @@ cdef class Vectors:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized form of the `Vectors` object.
 
-        DOCS: https://spacy.io/api/vectors#to_bytes
+        DOCS: https://nightly.spacy.io/api/vectors#to_bytes
         """
         def serialize_weights():
             if hasattr(self.data, "to_bytes"):
@@ -453,7 +453,7 @@ cdef class Vectors:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Vectors): The `Vectors` object.
 
-        DOCS: https://spacy.io/api/vectors#from_bytes
+        DOCS: https://nightly.spacy.io/api/vectors#from_bytes
         """
         def deserialize_weights(b):
             if hasattr(self.data, "from_bytes"):
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 9e14f37d2..ef0847e54 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -54,7 +54,7 @@ cdef class Vocab:
     instance also provides access to the `StringStore`, and owns underlying
     C-data that is shared between `Doc` objects.
 
-    DOCS: https://spacy.io/api/vocab
+    DOCS: https://nightly.spacy.io/api/vocab
     """
     def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
                  oov_prob=-20., vectors_name=None, writing_system={},
@@ -117,7 +117,7 @@ cdef class Vocab:
             available bit will be chosen.
         RETURNS (int): The integer ID by which the flag value can be checked.
 
-        DOCS: https://spacy.io/api/vocab#add_flag
+        DOCS: https://nightly.spacy.io/api/vocab#add_flag
         """
         if flag_id == -1:
             for bit in range(1, 64):
@@ -201,7 +201,7 @@ cdef class Vocab:
         string (unicode): The ID string.
         RETURNS (bool) Whether the string has an entry in the vocabulary.
 
-        DOCS: https://spacy.io/api/vocab#contains
+        DOCS: https://nightly.spacy.io/api/vocab#contains
         """
         cdef hash_t int_key
         if isinstance(key, bytes):
@@ -218,7 +218,7 @@ cdef class Vocab:
 
         YIELDS (Lexeme): An entry in the vocabulary.
 
-        DOCS: https://spacy.io/api/vocab#iter
+        DOCS: https://nightly.spacy.io/api/vocab#iter
         """
         cdef attr_t key
         cdef size_t addr
@@ -241,7 +241,7 @@ cdef class Vocab:
             >>> apple = nlp.vocab.strings["apple"]
             >>> assert nlp.vocab[apple] == nlp.vocab[u"apple"]
 
-        DOCS: https://spacy.io/api/vocab#getitem
+        DOCS: https://nightly.spacy.io/api/vocab#getitem
         """
         cdef attr_t orth
         if isinstance(id_or_string, unicode):
@@ -309,7 +309,7 @@ cdef class Vocab:
             word was mapped to, and `score` the similarity score between the
             two words.
 
-        DOCS: https://spacy.io/api/vocab#prune_vectors
+        DOCS: https://nightly.spacy.io/api/vocab#prune_vectors
         """
         xp = get_array_module(self.vectors.data)
         # Make prob negative so it sorts by rank ascending
@@ -349,7 +349,7 @@ cdef class Vocab:
             and shape determined by the `vocab.vectors` instance. Usually, a
             numpy ndarray of shape (300,) and dtype float32.
 
-        DOCS: https://spacy.io/api/vocab#get_vector
+        DOCS: https://nightly.spacy.io/api/vocab#get_vector
         """
         if isinstance(orth, str):
             orth = self.strings.add(orth)
@@ -396,7 +396,7 @@ cdef class Vocab:
         orth (int / unicode): The word.
         vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
 
-        DOCS: https://spacy.io/api/vocab#set_vector
+        DOCS: https://nightly.spacy.io/api/vocab#set_vector
         """
         if isinstance(orth, str):
             orth = self.strings.add(orth)
@@ -418,7 +418,7 @@ cdef class Vocab:
         orth (int / unicode): The word.
         RETURNS (bool): Whether the word has a vector.
 
-        DOCS: https://spacy.io/api/vocab#has_vector
+        DOCS: https://nightly.spacy.io/api/vocab#has_vector
         """
         if isinstance(orth, str):
             orth = self.strings.add(orth)
@@ -431,7 +431,7 @@ cdef class Vocab:
             it doesn't exist.
         exclude (list): String names of serialization fields to exclude.
 
-        DOCS: https://spacy.io/api/vocab#to_disk
+        DOCS: https://nightly.spacy.io/api/vocab#to_disk
         """
         path = util.ensure_path(path)
         if not path.exists():
@@ -452,7 +452,7 @@ cdef class Vocab:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Vocab): The modified `Vocab` object.
 
-        DOCS: https://spacy.io/api/vocab#to_disk
+        DOCS: https://nightly.spacy.io/api/vocab#to_disk
         """
         path = util.ensure_path(path)
         getters = ["strings", "vectors"]
@@ -477,7 +477,7 @@ cdef class Vocab:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (bytes): The serialized form of the `Vocab` object.
 
-        DOCS: https://spacy.io/api/vocab#to_bytes
+        DOCS: https://nightly.spacy.io/api/vocab#to_bytes
         """
         def deserialize_vectors():
             if self.vectors is None:
@@ -499,7 +499,7 @@ cdef class Vocab:
         exclude (list): String names of serialization fields to exclude.
         RETURNS (Vocab): The `Vocab` object.
 
-        DOCS: https://spacy.io/api/vocab#from_bytes
+        DOCS: https://nightly.spacy.io/api/vocab#from_bytes
         """
         def serialize_vectors(b):
             if self.vectors is None:

From ba600f91c5e317984875ac4bf61961a8b537122e Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 13:15:44 +0200
Subject: [PATCH 56/71] Tidy up imports

---
 spacy/language.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/spacy/language.py b/spacy/language.py
index 17ca020ca..1eac3cbaf 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1,9 +1,8 @@
 from typing import Optional, Any, Dict, Callable, Iterable, Union, List, Pattern
-from typing import Tuple, Iterator, Optional
+from typing import Tuple, Iterator
 from dataclasses import dataclass
 import random
 import itertools
-import weakref
 import functools
 from contextlib import contextmanager
 from copy import deepcopy

From df0b68f60eda43865d4b7271c55670784b214ade Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 13:19:16 +0200
Subject: [PATCH 57/71] Remove unicode declarations and update language data

---
 spacy/lang/cs/examples.py                | 10 +++-----
 spacy/lang/cs/lex_attrs.py               |  5 +---
 spacy/lang/he/lex_attrs.py               |  6 ++---
 spacy/lang/ne/stop_words.py              |  4 ---
 spacy/lang/sa/__init__.py                | 10 +-------
 spacy/lang/sa/examples.py                |  4 ---
 spacy/lang/sa/lex_attrs.py               | 32 +++++++++++-------------
 spacy/lang/sa/stop_words.py              |  3 ---
 spacy/tests/lang/cs/test_text.py         |  3 ---
 spacy/tests/lang/ne/test_text.py         |  3 ---
 spacy/tests/lang/sa/test_text.py         |  3 ---
 spacy/tests/regression/test_issue5838.py | 14 +++++------
 spacy/tests/regression/test_issue5918.py |  3 ---
 13 files changed, 27 insertions(+), 73 deletions(-)

diff --git a/spacy/lang/cs/examples.py b/spacy/lang/cs/examples.py
index fe8a9f6d1..a30b5ac14 100644
--- a/spacy/lang/cs/examples.py
+++ b/spacy/lang/cs/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
 """
 Example sentences to test spaCy and its language models.
 >>> from spacy.lang.cs.examples import sentences
@@ -10,9 +6,9 @@ Example sentences to test spaCy and its language models.
 
 
 sentences = [
-   "Máma mele maso.",
+    "Máma mele maso.",
     "Příliš žluťoučký kůň úpěl ďábelské ódy.",
-    "ArcGIS je geografický informační systém určený pro práci s prostorovými daty." ,
+    "ArcGIS je geografický informační systém určený pro práci s prostorovými daty.",
     "Může data vytvářet a spravovat, ale především je dokáže analyzovat, najít v nich nové vztahy a vše přehledně vizualizovat.",
     "Dnes je krásné počasí.",
     "Nestihl autobus, protože pozdě vstal z postele.",
@@ -39,4 +35,4 @@ sentences = [
     "Jaké PSČ má Praha 1?",
     "PSČ Prahy 1 je 110 00.",
     "Za 20 minut jede vlak.",
-    ]
+]
diff --git a/spacy/lang/cs/lex_attrs.py b/spacy/lang/cs/lex_attrs.py
index 368cab6c8..530d1d5eb 100644
--- a/spacy/lang/cs/lex_attrs.py
+++ b/spacy/lang/cs/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from ...attrs import LIKE_NUM
 
 _num_words = [
@@ -43,7 +40,7 @@ _num_words = [
     "kvadrilion",
     "kvadriliarda",
     "kvintilion",
-    ]
+]
 
 
 def like_num(text):
diff --git a/spacy/lang/he/lex_attrs.py b/spacy/lang/he/lex_attrs.py
index 9eab93ae4..2953e7592 100644
--- a/spacy/lang/he/lex_attrs.py
+++ b/spacy/lang/he/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from ...attrs import LIKE_NUM
 
 _num_words = [
@@ -73,6 +70,7 @@ _ordinal_words = [
     "עשירי",
 ]
 
+
 def like_num(text):
     if text.startswith(("+", "-", "±", "~")):
         text = text[1:]
@@ -84,7 +82,7 @@ def like_num(text):
         num, denom = text.split("/")
         if num.isdigit() and denom.isdigit():
             return True
-    
+
     if text in _num_words:
         return True
 
diff --git a/spacy/lang/ne/stop_words.py b/spacy/lang/ne/stop_words.py
index f008697d0..8470297b9 100644
--- a/spacy/lang/ne/stop_words.py
+++ b/spacy/lang/ne/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
 # Source: https://github.com/sanjaalcorps/NepaliStopWords/blob/master/NepaliStopWords.txt
 
 STOP_WORDS = set(
diff --git a/spacy/lang/sa/__init__.py b/spacy/lang/sa/__init__.py
index 8a4533341..345137817 100644
--- a/spacy/lang/sa/__init__.py
+++ b/spacy/lang/sa/__init__.py
@@ -1,18 +1,10 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
-
 from ...language import Language
-from ...attrs import LANG
 
 
 class SanskritDefaults(Language.Defaults):
-    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
-    lex_attr_getters.update(LEX_ATTRS)
-    lex_attr_getters[LANG] = lambda text: "sa"
-
+    lex_attr_getters = LEX_ATTRS
     stop_words = STOP_WORDS
 
 
diff --git a/spacy/lang/sa/examples.py b/spacy/lang/sa/examples.py
index 9d4fa1e49..60243c04c 100644
--- a/spacy/lang/sa/examples.py
+++ b/spacy/lang/sa/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/sa/lex_attrs.py b/spacy/lang/sa/lex_attrs.py
index c33be2ce4..f2b51650b 100644
--- a/spacy/lang/sa/lex_attrs.py
+++ b/spacy/lang/sa/lex_attrs.py
@@ -1,9 +1,5 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from ...attrs import LIKE_NUM
 
-
 # reference 1: https://en.wikibooks.org/wiki/Sanskrit/Numbers
 
 _num_words = [
@@ -106,26 +102,26 @@ _num_words = [
     "सप्तनवतिः",
     "अष्टनवतिः",
     "एकोनशतम्",
-    "शतम्"
+    "शतम्",
 ]
 
 
 def like_num(text):
-   """
+    """
    Check if text resembles a number
    """
-   if text.startswith(("+", "-", "±", "~")):
-       text = text[1:]
-   text = text.replace(",", "").replace(".", "")
-   if text.isdigit():
-       return True
-   if text.count("/") == 1:
-       num, denom = text.split("/")
-       if num.isdigit() and denom.isdigit():
-           return True
-   if text in _num_words:
-       return True
-   return False
+    if text.startswith(("+", "-", "±", "~")):
+        text = text[1:]
+    text = text.replace(",", "").replace(".", "")
+    if text.isdigit():
+        return True
+    if text.count("/") == 1:
+        num, denom = text.split("/")
+        if num.isdigit() and denom.isdigit():
+            return True
+    if text in _num_words:
+        return True
+    return False
 
 
 LEX_ATTRS = {LIKE_NUM: like_num}
diff --git a/spacy/lang/sa/stop_words.py b/spacy/lang/sa/stop_words.py
index aa51ceae0..30302a14d 100644
--- a/spacy/lang/sa/stop_words.py
+++ b/spacy/lang/sa/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 # Source: https://gist.github.com/Akhilesh28/fe8b8e180f64b72e64751bc31cb6d323
 
 STOP_WORDS = set(
diff --git a/spacy/tests/lang/cs/test_text.py b/spacy/tests/lang/cs/test_text.py
index d98961738..b834111b9 100644
--- a/spacy/tests/lang/cs/test_text.py
+++ b/spacy/tests/lang/cs/test_text.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import pytest
 
 
diff --git a/spacy/tests/lang/ne/test_text.py b/spacy/tests/lang/ne/test_text.py
index 794f8fbdc..7dd971132 100644
--- a/spacy/tests/lang/ne/test_text.py
+++ b/spacy/tests/lang/ne/test_text.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import pytest
 
 
diff --git a/spacy/tests/lang/sa/test_text.py b/spacy/tests/lang/sa/test_text.py
index 7c961bdae..41257a4d8 100644
--- a/spacy/tests/lang/sa/test_text.py
+++ b/spacy/tests/lang/sa/test_text.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import pytest
 
 
diff --git a/spacy/tests/regression/test_issue5838.py b/spacy/tests/regression/test_issue5838.py
index c008c5aec..4e4d98beb 100644
--- a/spacy/tests/regression/test_issue5838.py
+++ b/spacy/tests/regression/test_issue5838.py
@@ -1,15 +1,13 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from spacy.lang.en import English
 from spacy.tokens import Span
 from spacy import displacy
 
-SAMPLE_TEXT = '''First line
+
+SAMPLE_TEXT = """First line
 Second line, with ent
 Third line
 Fourth line
-'''
+"""
 
 
 def test_issue5838():
@@ -18,8 +16,8 @@ def test_issue5838():
 
     nlp = English()
     doc = nlp(SAMPLE_TEXT)
-    doc.ents = [Span(doc, 7, 8, label='test')]
+    doc.ents = [Span(doc, 7, 8, label="test")]
 
-    html = displacy.render(doc, style='ent')
-    found = html.count('</br>')
+    html = displacy.render(doc, style="ent")
+    found = html.count("</br>")
     assert found == 4
diff --git a/spacy/tests/regression/test_issue5918.py b/spacy/tests/regression/test_issue5918.py
index 2dee26d82..3b96009a8 100644
--- a/spacy/tests/regression/test_issue5918.py
+++ b/spacy/tests/regression/test_issue5918.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from spacy.lang.en import English
 from spacy.pipeline import merge_entities, EntityRuler
 

From 90043a6f9b7da3f11c7b11b6d1cfcf1de2824836 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 13:42:33 +0200
Subject: [PATCH 58/71] Tidy up and auto-format

---
 spacy/language.py                   |  2 --
 spacy/tests/lang/en/test_text.py    | 10 +---------
 spacy/tests/pipeline/test_tagger.py |  4 ++--
 spacy/tests/test_tok2vec.py         |  1 +
 4 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/spacy/language.py b/spacy/language.py
index 1eac3cbaf..70655bafd 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1376,8 +1376,6 @@ class Language:
             docs = (self.make_doc(text) for text in texts)
             for pipe in pipes:
                 docs = pipe(docs)
-
-        nr_seen = 0
         for doc in docs:
             yield doc
 
diff --git a/spacy/tests/lang/en/test_text.py b/spacy/tests/lang/en/test_text.py
index d67bcac62..733e814f7 100644
--- a/spacy/tests/lang/en/test_text.py
+++ b/spacy/tests/lang/en/test_text.py
@@ -56,15 +56,7 @@ def test_lex_attrs_like_number(en_tokenizer, text, match):
     assert tokens[0].like_num == match
 
 
-@pytest.mark.parametrize(
-    "word",
-    [
-        "third",
-        "Millionth",
-        "100th",
-        "Hundredth",
-    ]
-)
+@pytest.mark.parametrize("word", ["third", "Millionth", "100th", "Hundredth"])
 def test_en_lex_attrs_like_number_for_ordinal(word):
     assert like_num(word)
 
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index a1aa7e1e1..540301eac 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -71,6 +71,6 @@ def test_overfitting_IO():
 
 def test_tagger_requires_labels():
     nlp = English()
-    tagger = nlp.add_pipe("tagger")
+    nlp.add_pipe("tagger")
     with pytest.raises(ValueError):
-        optimizer = nlp.begin_training()
+        nlp.begin_training()
diff --git a/spacy/tests/test_tok2vec.py b/spacy/tests/test_tok2vec.py
index 1068b662d..9f0f4b74a 100644
--- a/spacy/tests/test_tok2vec.py
+++ b/spacy/tests/test_tok2vec.py
@@ -135,6 +135,7 @@ TRAIN_DATA = [
     ("Eat blue ham", {"tags": ["V", "J", "N"]}),
 ]
 
+
 def test_tok2vec_listener():
     orig_config = Config().from_str(cfg_string)
     nlp, config = util.load_model_from_config(orig_config, auto_fill=True, validate=True)

From d7cc2ee72da7bec6de024a1a1be228c3ccd8a29e Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 14:05:55 +0200
Subject: [PATCH 59/71] Fix tests

---
 spacy/schemas.py                               | 14 ++++++++------
 spacy/tests/conftest.py                        |  4 ++--
 spacy/tests/matcher/test_pattern_validation.py |  6 +++---
 spacy/tests/regression/test_issue5918.py       |  5 ++---
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/spacy/schemas.py b/spacy/schemas.py
index be8db6a99..59af53301 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -57,12 +57,13 @@ def validate_token_pattern(obj: list) -> List[str]:
 
 
 class TokenPatternString(BaseModel):
-    REGEX: Optional[StrictStr]
-    IN: Optional[List[StrictStr]]
-    NOT_IN: Optional[List[StrictStr]]
+    REGEX: Optional[StrictStr] = Field(None, alias="regex")
+    IN: Optional[List[StrictStr]] = Field(None, alias="in")
+    NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")
 
     class Config:
         extra = "forbid"
+        allow_population_by_field_name = True  # allow alias and field name
 
     @validator("*", pre=True, each_item=True, allow_reuse=True)
     def raise_for_none(cls, v):
@@ -72,9 +73,9 @@ class TokenPatternString(BaseModel):
 
 
 class TokenPatternNumber(BaseModel):
-    REGEX: Optional[StrictStr] = None
-    IN: Optional[List[StrictInt]] = None
-    NOT_IN: Optional[List[StrictInt]] = None
+    REGEX: Optional[StrictStr] = Field(None, alias="regex")
+    IN: Optional[List[StrictInt]] = Field(None, alias="in")
+    NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
     EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
     NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
     GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
@@ -84,6 +85,7 @@ class TokenPatternNumber(BaseModel):
 
     class Config:
         extra = "forbid"
+        allow_population_by_field_name = True  # allow alias and field name
 
     @validator("*", pre=True, each_item=True, allow_reuse=True)
     def raise_for_none(cls, v):
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 3cc6bad75..e17199a08 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -46,7 +46,7 @@ def ca_tokenizer():
 
 @pytest.fixture(scope="session")
 def cs_tokenizer():
-    return get_lang_class("cs").Defaults.create_tokenizer()
+    return get_lang_class("cs")().tokenizer
 
 
 @pytest.fixture(scope="session")
@@ -211,7 +211,7 @@ def ru_lemmatizer():
 
 @pytest.fixture(scope="session")
 def sa_tokenizer():
-    return get_lang_class("sa").Defaults.create_tokenizer()
+    return get_lang_class("sa")().tokenizer
 
 
 @pytest.fixture(scope="session")
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index 79300374e..4d21aea81 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -61,10 +61,10 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
         matcher.add("TEST", [pattern])
 
 
-def test_pattern_warnings(en_vocab):
+def test_pattern_errors(en_vocab):
     matcher = Matcher(en_vocab)
     # normalize "regex" to upper like "text"
     matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
-    # warn if subpattern attribute isn't recognized and processed
-    with pytest.warns(UserWarning):
+    # error if subpattern attribute isn't recognized and processed
+    with pytest.raises(MatchPatternError):
         matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])
diff --git a/spacy/tests/regression/test_issue5918.py b/spacy/tests/regression/test_issue5918.py
index 3b96009a8..66280f012 100644
--- a/spacy/tests/regression/test_issue5918.py
+++ b/spacy/tests/regression/test_issue5918.py
@@ -1,18 +1,17 @@
 from spacy.lang.en import English
-from spacy.pipeline import merge_entities, EntityRuler
+from spacy.pipeline import merge_entities
 
 
 def test_issue5918():
     # Test edge case when merging entities.
     nlp = English()
+    ruler = nlp.add_pipe("entity_ruler")
     patterns = [
         {"label": "ORG", "pattern": "Digicon Inc"},
         {"label": "ORG", "pattern": "Rotan Mosle Inc's"},
         {"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
     ]
-    ruler = EntityRuler(nlp)
     ruler.add_patterns(patterns)
-    nlp.add_pipe(ruler)
 
     text = """
         Digicon Inc said it has completed the previously-announced disposition

From afdf14c717f93851f9f0db28008a6ba1e206eb2d Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 14:21:41 +0200
Subject: [PATCH 60/71] Remove Google Analytics [ci skip]

---
 website/gatsby-config.js | 9 ---------
 website/meta/site.json   | 1 -
 website/package.json     | 1 -
 3 files changed, 11 deletions(-)

diff --git a/website/gatsby-config.js b/website/gatsby-config.js
index 752628749..6c67de6ea 100644
--- a/website/gatsby-config.js
+++ b/website/gatsby-config.js
@@ -140,15 +140,6 @@ module.exports = {
                 icon: favicon,
             },
         },
-        {
-            resolve: `gatsby-plugin-google-analytics`,
-            options: {
-                trackingId: site.analytics,
-                head: false,
-                anonymize: true,
-                respectDNT: true,
-            },
-        },
         {
             resolve: `gatsby-plugin-plausible`,
             options: { domain },
diff --git a/website/meta/site.json b/website/meta/site.json
index 7e6f4c692..d1162edf9 100644
--- a/website/meta/site.json
+++ b/website/meta/site.json
@@ -17,7 +17,6 @@
         "github": "explosion"
     },
     "theme": "#09a3d5",
-    "analytics": "UA-58931649-1",
     "newsletter": {
         "user": "spacy.us12",
         "id": "83b0498b1e7fa3c91ce68c3f1",
diff --git a/website/package.json b/website/package.json
index 0ba7159fe..40018f532 100644
--- a/website/package.json
+++ b/website/package.json
@@ -20,7 +20,6 @@
         "gatsby-image": "^2.0.29",
         "gatsby-mdx": "^0.3.6",
         "gatsby-plugin-catch-links": "^2.0.11",
-        "gatsby-plugin-google-analytics": "^2.0.14",
         "gatsby-plugin-manifest": "^2.0.17",
         "gatsby-plugin-offline": "^2.0.24",
         "gatsby-plugin-plausible": "0.0.6",

From 86510227749be0c0db156672fc8f0d8f5401505f Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 14:27:46 +0200
Subject: [PATCH 61/71] Fix outbound link [ci skip]

---
 website/src/components/link.js | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/website/src/components/link.js b/website/src/components/link.js
index acded7d0d..178cb7105 100644
--- a/website/src/components/link.js
+++ b/website/src/components/link.js
@@ -1,7 +1,6 @@
 import React, { Fragment } from 'react'
 import PropTypes from 'prop-types'
 import { Link as GatsbyLink } from 'gatsby'
-import { OutboundLink } from 'gatsby-plugin-google-analytics'
 import classNames from 'classnames'
 
 import Icon from './icon'
@@ -81,15 +80,9 @@ export default function Link({
     const rel = isInternal ? null : 'noopener nofollow noreferrer'
     return (
         <Wrapper>
-            <OutboundLink
-                href={dest}
-                className={linkClassNames}
-                target="_blank"
-                rel={rel}
-                {...other}
-            >
+            <a href={dest} className={linkClassNames} target="_blank" rel={rel} {...other}>
                 {content}
-            </OutboundLink>
+            </a>
         </Wrapper>
     )
 }

From c28f73ddfdf346f6e86e833428bafd9f8d404798 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 14:41:55 +0200
Subject: [PATCH 62/71] Update package-lock.json

---
 website/package-lock.json | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/website/package-lock.json b/website/package-lock.json
index 9b449aca9..d995f910e 100644
--- a/website/package-lock.json
+++ b/website/package-lock.json
@@ -12818,14 +12818,6 @@
                 "escape-string-regexp": "^1.0.5"
             }
         },
-        "gatsby-plugin-google-analytics": {
-            "version": "2.0.14",
-            "resolved": "https://registry.npmjs.org/gatsby-plugin-google-analytics/-/gatsby-plugin-google-analytics-2.0.14.tgz",
-            "integrity": "sha512-sFD73d9isJQknnDAAkDidaybHJx6VIaLfy3nO3DwbFaitvZ08RimbynYOkcWAeA0zwwix2RgAvbq/9pAmtTb/A==",
-            "requires": {
-                "@babel/runtime": "^7.0.0"
-            }
-        },
         "gatsby-plugin-manifest": {
             "version": "2.0.17",
             "resolved": "https://registry.npmjs.org/gatsby-plugin-manifest/-/gatsby-plugin-manifest-2.0.17.tgz",

From f9550b4493335fe9d64b4ac482e2de7d041535a4 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 14:42:12 +0200
Subject: [PATCH 63/71] Fix components in meta.json and website [ci skip]

---
 spacy/language.py               | 1 +
 website/src/templates/models.js | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/spacy/language.py b/spacy/language.py
index 17ca020ca..3b307e3f4 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -211,6 +211,7 @@ class Language:
         # TODO: Adding this back to prevent breaking people's code etc., but
         # we should consider removing it
         self._meta["pipeline"] = list(self.pipe_names)
+        self._meta["components"] = list(self.component_names)
         self._meta["disabled"] = list(self.disabled)
         return self._meta
 
diff --git a/website/src/templates/models.js b/website/src/templates/models.js
index 1bcdd4859..afeefde64 100644
--- a/website/src/templates/models.js
+++ b/website/src/templates/models.js
@@ -167,6 +167,8 @@ const Model = ({ name, langId, langName, baseUrl, repo, compatibility, hasExampl
     const releaseUrl = `https://github.com/${repo}/releases/${releaseTag}`
     const pipeline =
         meta.pipeline && join(meta.pipeline.map(p => <InlineCode key={p}>{p}</InlineCode>))
+    const components =
+        meta.components && join(meta.components.map(p => <InlineCode key={p}>{p}</InlineCode>))
     const sources = formatSources(meta.sources)
     const author = !meta.url ? meta.author : <Link to={meta.url}>{meta.author}</Link>
     const licenseUrl = licenses[meta.license] ? licenses[meta.license].url : null

From 157caf4dfa6483b751e2ea6afd975e7446fd9ceb Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 16:30:31 +0200
Subject: [PATCH 64/71] WIP: update docs [ci skip]

---
 website/docs/api/dependencymatcher.md         |  88 ++++---------
 website/docs/images/dep-match-diagram.svg     |  91 +++++---------
 website/docs/images/displacy-dep-founded.html |   2 +-
 website/docs/usage/rule-based-matching.md     | 119 ++++++++++--------
 website/docs/usage/v3.md                      |  30 ++++-
 5 files changed, 154 insertions(+), 176 deletions(-)

diff --git a/website/docs/api/dependencymatcher.md b/website/docs/api/dependencymatcher.md
index 333f82043..c90a715d9 100644
--- a/website/docs/api/dependencymatcher.md
+++ b/website/docs/api/dependencymatcher.md
@@ -11,7 +11,8 @@ and [`PhraseMatcher`](/api/phrasematcher) and lets you match on dependency trees
 using
 [Semgrex operators](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html).
 It requires a pretrained [`DependencyParser`](/api/parser) or other component
-that sets the `Token.dep` and `Token.head` attributes.
+that sets the `Token.dep` and `Token.head` attributes. See the
+[usage guide](/usage/rule-based-matching#dependencymatcher) for examples.
 
 ## Pattern format {#patterns}
 
@@ -48,63 +49,18 @@ dictionary, which defines an anchor token using only `RIGHT_ID` and
 
 | Name          | Description                                                                                                                                                            |
 | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node.                                                                             |
+| `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node. ~~str~~                                                                     |
 | `REL_OP`      | An operator that describes how the two nodes are related. ~~str~~                                                                                                      |
 | `RIGHT_ID`    | A unique name for the right-hand node in the relation. ~~str~~                                                                                                         |
 | `RIGHT_ATTRS` | The token attributes to match for the right-hand node in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
 
-The first pattern defines an anchor token and each additional token added to the
-pattern is linked to an existing token `LEFT_ID` by the relation `REL_OP` and is
-described by the name `RIGHT_ID` and the attributes `RIGHT_ATTRS`.
+<Infobox title="Designing dependency matcher patterns" emoji="📖">
 
-Let's say we want to find sentences describing who founded what kind of company:
+For examples of how to construct dependency matcher patterns for different types
+of relations, see the usage guide on
+[dependency matching](/usage/rule-based-matching#dependencymatcher).
 
-- `Smith founded a healthcare company in 2005.`
-- `Williams initially founded an insurance company in 1987.`
-- `Lee, an established CEO, founded yet another AI startup.`
-
-Since it's the root of the dependency parse, `founded` is a good choice for the
-anchor token in our pattern:
-
-```python
-pattern = [
-    {"RIGHT_ID": "anchor_founded", "RIGHT_ATTRS": {"ORTH": "founded"}}
-]
-```
-
-We can add the subject as the token with the dependency label `nsubj` that is a
-direct child `>` of the anchor token named `anchor_founded`:
-
-```python
-pattern = [
-    {"RIGHT_ID": "anchor_founded", "RIGHT_ATTRS": {"ORTH": "founded"}},
-    {
-        "LEFT_ID": "anchor_founded",
-        "REL_OP": ">",
-        "RIGHT_ID": "subject",
-        "RIGHT_ATTRS": {"DEP": "nsubj"},
-    }
-]
-```
-
-And the direct object along with its modifier:
-
-```python
-pattern = [ ...
-    {
-        "LEFT_ID": "anchor_founded",
-        "REL_OP": ">",
-        "RIGHT_ID": "founded_object",
-        "RIGHT_ATTRS": {"DEP": "dobj"},
-    },
-    {
-        "LEFT_ID": "founded_object",
-        "REL_OP": ">",
-        "RIGHT_ID": "founded_object_modifier",
-        "RIGHT_ATTRS": {"DEP": {"IN": ["amod", "compound"]}},
-    }
-]
-```
+</Infobox>
 
 ### Operators
 
@@ -112,20 +68,20 @@ The following operators are supported by the `DependencyMatcher`, most of which
 come directly from
 [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html):
 
-| Symbol    | Description                                                                                                         |
-| --------- | ------------------------------------------------------------------------------------------------------------------- |
-| `A < B`   | `A` is the immediate dependent of `B`                                                                               |
-| `A > B`   | `A` is the immediate head of `B`                                                                                    |
-| `A << B`  | `A` is the dependent in a chain to `B` following dep->head paths                                                    |
-| `A >> B`  | `A` is the head in a chain to `B` following head->dep paths                                                         |
-| `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree                   |
-| `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_                 |
-| `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_ |
-| `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_                  |
-| `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`                 |
-| `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`                  |
-| `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`                                |
-| `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`                                 |
+| Symbol    | Description                                                                                                          |
+| --------- | -------------------------------------------------------------------------------------------------------------------- |
+| `A < B`   | `A` is the immediate dependent of `B`.                                                                               |
+| `A > B`   | `A` is the immediate head of `B`.                                                                                    |
+| `A << B`  | `A` is the dependent in a chain to `B` following dep &rarr; head paths.                                              |
+| `A >> B`  | `A` is the head in a chain to `B` following head &rarr; dep paths.                                                   |
+| `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree.                   |
+| `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_.                 |
+| `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_. |
+| `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_.                  |
+| `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`.                 |
+| `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`.                  |
+| `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`.                                |
+| `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`.                                 |
 
 ## DependencyMatcher.\_\_init\_\_ {#init tag="method"}
 
diff --git a/website/docs/images/dep-match-diagram.svg b/website/docs/images/dep-match-diagram.svg
index f23c573e2..676be4137 100644
--- a/website/docs/images/dep-match-diagram.svg
+++ b/website/docs/images/dep-match-diagram.svg
@@ -1,64 +1,39 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
-<svg width="40cm" height="9cm" viewBox="78 215 793 171" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-  <g id="Background">
-    <g>
-      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="79.225" y="263.65" width="169.55" height="54" rx="0" ry="0"/>
-      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="164" y="286.55">
-        <tspan x="164" y="286.55">ID: founded</tspan>
-        <tspan x="164" y="302.55">ORTH: founded</tspan>
-      </text>
+<svg xmlns="http://www.w3.org/2000/svg" width="1014" height="221" viewBox="0 0 1014 221">
+  <g fill="none" fill-rule="evenodd">
+    <rect width="202.5" height="59.5" x="19.3" y="85.3" fill="#C3E7F1" stroke="#3AC" stroke-width="4.5" rx="12"/>
+    <rect width="296.5" height="59.5" x="682.3" y="135.3" fill="#C3E7F1" stroke="#3AC" stroke-width="4.5" rx="12"/>
+    <rect width="173.5" height="59.5" x="368.3" y="24.3" fill="#C3E7F1" stroke="#3AC" stroke-width="4.5" rx="12"/>
+    <rect width="173.5" height="59.5" x="368.3" y="133.3" fill="#C3E7F1" stroke="#3AC" stroke-width="4.5" rx="12"/>
+    <rect width="123.5" height="21.5" x="54.8" y="71.8" fill="#3AC" stroke="#3AC" stroke-width="3.5" rx="10.8"/>
+    <rect width="135.5" height="21.5" x="762.8" y="121.8" fill="#3AC" stroke="#3AC" stroke-width="3.5" rx="10.8"/>
+    <rect width="123.5" height="21.5" x="392.8" y="13.8" fill="#3AC" stroke="#3AC" stroke-width="3.5" rx="10.8"/>
+    <rect width="123.5" height="21.5" x="392.8" y="119.8" fill="#3AC" stroke="#3AC" stroke-width="3.5" rx="10.8"/>
+    <g transform="translate(553 144)">
+      <path fill="#3D4251" fill-rule="nonzero" d="M97.3 16.8V9.2l20 10-20 10v-7.4H.8v-5h96.5z"/>
+      <circle cx="60" cy="19" r="19" fill="#F5FF87" stroke="#3D4251" stroke-width="5"/>
     </g>
-    <g>
-      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="426.238" y="216" width="139.1" height="54" rx="0" ry="0"/>
-      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="495.788" y="238.9">
-        <tspan x="495.788" y="238.9">ID: subject</tspan>
-        <tspan x="495.788" y="254.9">DEP: nsubj</tspan>
-      </text>
+    <g transform="rotate(-25 312.3 -483.3)">
+      <path fill="#3D4251" fill-rule="nonzero" d="M97.3 16.8V9.2l20 10-20 10v-7.4H.8v-5h96.5z"/>
+      <circle cx="60" cy="19" r="19" fill="#F5FF87" stroke="#3D4251" stroke-width="5"/>
     </g>
-    <g>
-      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="429.662" y="330.65" width="132.25" height="54" rx="0" ry="0"/>
-      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="495.787" y="353.55">
-        <tspan x="495.787" y="353.55">ID: object</tspan>
-        <tspan x="495.787" y="369.55">DEP: dobj</tspan>
-      </text>
+    <g transform="scale(1 -1) rotate(-25 -197.4 -596.3)">
+      <path fill="#3D4251" fill-rule="nonzero" d="M97.3 16.8V9.2l20 10-20 10v-7.4H.8v-5h96.5z"/>
+      <circle cx="60" cy="19" r="19" fill="#F5FF87" stroke="#3D4251" stroke-width="5"/>
     </g>
-    <g>
-      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="248.775" y1="290.65" x2="416.834" y2="245.525"/>
-      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="424.078,243.58 415.717,251.002 416.834,245.525 413.123,241.344 "/>
-    </g>
-    <g>
-      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="248.775" y1="290.65" x2="420.533" y2="354.268"/>
-      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="427.566,356.873 416.452,358.089 420.533,354.268 419.925,348.711 "/>
-    </g>
-    <g>
-      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="342.393" cy="265.102" rx="20.1432" ry="22.8019"/>
-      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="342.393" y="269.002">
-        <tspan x="342.393" y="269.002">&gt;</tspan>
-      </text>
-    </g>
-    <g>
-      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="342.393" cy="326.65" rx="20.1432" ry="22.8019"/>
-      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="342.393" y="330.55">
-        <tspan x="342.393" y="330.55">&gt;</tspan>
-      </text>
-    </g>
-    <g>
-      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="697.337" y="330.65" width="172.4" height="54" rx="0" ry="0"/>
-      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="783.537" y="353.55">
-        <tspan x="783.537" y="353.55">ID: modifier</tspan>
-        <tspan x="783.537" y="369.55">DEP: amod | compound</tspan>
-      </text>
-    </g>
-    <g>
-      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="561.912" y1="357.65" x2="687.601" y2="357.65"/>
-      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="695.101,357.65 685.101,362.65 687.601,357.65 685.101,352.65 "/>
-    </g>
-    <g>
-      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="629.625" cy="357.65" rx="20.1432" ry="22.8019"/>
-      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="629.625" y="361.55">
-        <tspan x="629.625" y="361.55">&gt;</tspan>
-      </text>
+    <g fill-rule="nonzero">
+      <path fill="#3D4251" d="M621.2 164.2l-13 6.7v-3.4l.2-.7.6-.5 4.6-2.5.9-.3 1-.3a15.4 15.4 0 01-2-.6L609 160l-.6-.5-.2-.7v-3.4l13 6.7v2zM304.2 79.2l-13 6.7v-3.4l.2-.7.6-.5 4.6-2.5.9-.3 1-.3a15.4 15.4 0 01-2-.6L292 75l-.6-.5-.2-.7v-3.4l13 6.7v2zM304.2 149.2l-13 6.7v-3.4l.2-.7.6-.5 4.6-2.5.9-.3 1-.3a15.4 15.4 0 01-2-.6L292 145l-.6-.5-.2-.7v-3.4l13 6.7v2zM41.9 126.2c-.8 0-1.6 0-2.3-.3l-1.7-1c-.5-.5-.9-1-1.1-1.7-.3-.6-.4-1.3-.4-2v-7c0-.7.1-1.4.4-2a4.6 4.6 0 012.8-2.6c.7-.3 1.5-.4 2.3-.4.8 0 1.6.1 2.3.4a4.6 4.6 0 012.8 2.6c.3.6.4 1.3.4 2v7c0 .7-.1 1.4-.4 2-.3.6-.6 1.2-1.1 1.6-.5.5-1 .8-1.8 1-.6.3-1.4.4-2.2.4zm-2.6-5c0 .7.2 1.3.7 1.8.5.5 1.1.7 1.9.7.8 0 1.4-.2 1.9-.7.5-.5.7-1 .7-1.9v-6.8c0-.8-.2-1.4-.7-1.9-.5-.5-1.1-.7-2-.7-.7 0-1.3.2-1.8.7-.5.5-.7 1-.7 1.9v6.8zm16.6-11.8c.9 0 1.6.2 2.3.4.7.2 1.3.6 1.8 1 .4.4.8 1 1 1.6a4.8 4.8 0 01-.4 4.9c-.5.7-1.3 1.3-2.2 1.6l3.3 7.1h-3.2l-3-6.6h-2.1v6.6h-2.9v-16.6H56zm-2.5 7.7h2.5c.8 0 1.5-.3 2-.7.4-.5.6-1.1.6-1.9 0-.8-.2-1.4-.7-2-.4-.4-1-.6-1.9-.6h-2.5v5.2zm10-5v-2.7h12.1v2.7h-4.6V126H68v-14h-4.5zM78.2 126v-16.6H81v6.7h4.6v-6.7h2.8V126h-2.8v-7.3H81v7.3H78zm16.9-1.8c0-.7.2-1.2.6-1.6a2 2 0 011.5-.6c.6 0 1.1.2 1.5.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.5-.4.3-.9.5-1.5.5a2 2 0 01-1.5-.5 2 2 0 01-.6-1.5zm0-9c0-.6.2-1.1.6-1.5a2 2 0 011.5-.6c.6 0 1.1.2 1.5.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.4-.4.4-.9.6-1.5.6a2 2 0 01-1.5-.6 2 2 0 01-.6-1.4zm16.3 10.8v-11l-1.2-.1-.6-.3a.7.7 0 01-.2-.5v-1.3h2v-1c0-.8.1-1.5.3-2a4.3 4.3 0 012.6-2.6 6 6 0 013.8 0v1.5c0 .3-.2.4-.4.5h-.8l-1 .2a1.7 1.7 0 00-1.2 1.3 4 4 0 00-.2 1.2v.9h3.5v2.3h-3.4V126h-3.2zm14.2-13.5c1 0 2 .1 2.7.4a5.8 5.8 0 013.4 3.6c.4.8.5 1.8.5 2.8 0 1-.1 2-.5 2.9a5.8 5.8 0 01-3.4 3.5c-.8.3-1.7.5-2.7.5-1 0-1.9-.2-2.7-.5a5.8 5.8 0 01-3.4-3.5c-.3-.9-.5-1.8-.5-2.9 0-1 .2-2 .5-2.8a5.9 5.9 0 013.4-3.5c.8-.4 1.7-.5 2.7-.5zm0 11.2c1.1 0 2-.3 2.5-1 .5-.8.8-2 .8-3.4s-.3-2.5-.8-3.3c-.5-.7-1.4-1-2.5-1-1 0-2 .3-2.5 1-.5.8-.8 1.9-.8 3.3 0 1.5.3 2.6.8 3.3.6.8 1.4 1.1 2.5 1.1zm12-11v8.4c0 .9.2 1.5.6 2 .4.4 1 .6 1.7.6.6 0 1.1-.1 1.6-.4.5-.2 1-.6 1.4-1v-9.6h3.2V126h-2c-.4 0-.7-.2-.8-.6l-.2-1-.9.7a5.2 5.2 0 01-3.3 1.1c-.7 0-1.3-.1-1.9-.4-.5-.2-1-.6-1.4-1-.4-.5-.7-1-.9-1.6-.2-.6-.3-1.3-.3-2v-8.5h3.3zm11.9 13.3v-13.3h2c.4 0 .7.2.8.5l.2 1.1a7.3 7.3 0 011.8-1.3 4.8 4.8 0 012.3-.5c.8 0 1.4 0 2 .3l1.4 1a6.8 6.8 0 011.2 3.6v8.6h-3.3v-8.5c0-.8-.2-1.4-.6-1.9-.3-.4-.9-.6-1.6-.6-.6 0-1.1 0-1.6.3l-1.4 1v9.7h-3.2zm24.2 0c-.4 0-.7-.2-.8-.6l-.3-1.3a8 8 0 01-.9.9 5.5 5.5 0 01-2 1l-1.4.2a4.4 4.4 0 01-3.6-1.8c-.4-.6-.7-1.3-1-2.1a10.8 10.8 0 010-5.7c.3-.8.7-1.6 1.2-2.2.5-.6 1.1-1.1 1.8-1.5.7-.3 1.5-.5 2.4-.5a4.6 4.6 0 013.3 1.3v-7h3.2V126h-2zm-4.3-2.4c.7 0 1.2 0 1.7-.4a5 5 0 001.3-1.2v-6a3.2 3.2 0 00-2.6-1.1c-.4 0-.9 0-1.3.2s-.7.5-1 .9a4 4 0 00-.6 1.4 9 9 0 000 4c0 .5.3 1 .5 1.3.2.3.5.6.9.7.3.2.7.2 1 .2zm15.2-11.1c.9 0 1.6 0 2.3.4a5 5 0 013 3 7.2 7.2 0 01.5 3.2l-.2.4-.2.2a1 1 0 01-.4 0h-8.2c.1 1.4.5 2.4 1.1 3 .7.7 1.5 1 2.6 1 .5 0 1 0 1.3-.2.4 0 .7-.2 1-.4l.7-.4.7-.2.3.1.2.2 1 1.2c-.4.4-.8.8-1.2 1a6.7 6.7 0 01-2.9 1c-.5.2-1 .2-1.4.2-1 0-1.8-.2-2.6-.5s-1.5-.7-2-1.3c-.7-.6-1.1-1.4-1.5-2.3a8.7 8.7 0 010-5.6c.2-.8.7-1.5 1.2-2.1a6 6 0 012-1.4c.8-.4 1.7-.5 2.7-.5zm0 2.3a3 3 0 00-2.1.7 4 4 0 00-1 2.3h6c0-.4 0-.8-.2-1.2 0-.3-.3-.7-.5-1l-.9-.6a3 3 0 00-1.2-.2zm17.8 11.2c-.4 0-.7-.2-.8-.6l-.3-1.3a8 8 0 01-.9.9 5.5 5.5 0 01-2 1l-1.4.2a4.4 4.4 0 01-3.6-1.8c-.4-.6-.7-1.3-1-2.1a10.8 10.8 0 010-5.7c.4-.8.7-1.6 1.2-2.2.5-.6 1.1-1.1 1.8-1.5.7-.3 1.5-.5 2.4-.5a4.6 4.6 0 013.3 1.3v-7h3.3V126h-2zm-4.3-2.4c.7 0 1.2 0 1.7-.4a5 5 0 001.3-1.2v-6a3.2 3.2 0 00-2.5-1.1c-.5 0-1 0-1.4.2l-1 .9a4 4 0 00-.6 1.4 9 9 0 000 4l.5 1.3c.3.3.5.6.9.7.3.2.7.2 1.1.2zM699.1 159.4h5.1c.9 0 1.6.2 2.3.4.7.3 1.3.6 1.8 1a5.5 5.5 0 011.5 3.8v6.1c0 .8 0 1.5-.4 2.2a4.7 4.7 0 01-2.9 2.7c-.7.3-1.4.4-2.3.4h-5v-16.6zm3 14h2.1c.9 0 1.5-.3 2-.8s.7-1 .7-1.9v-6c0-.8-.2-1.4-.7-1.9-.5-.5-1.1-.7-2-.7H702v11.3zm11.1 2.6v-16.6h10.2v2.6h-7.3v4.2h6.5v2.6h-6.5v4.7h7.3v2.5h-10.2zm19.3-16.6c.8 0 1.6.2 2.2.4a4.5 4.5 0 012.8 2.6c.3.6.5 1.3.5 2 0 .8-.2 1.5-.4 2.1-.3.7-.6 1.2-1.1 1.6a5 5 0 01-1.8 1c-.6.3-1.4.4-2.2.4h-2.9v6.5h-2.8v-16.6h5.7zm-2.9 7.7h2.9c.8 0 1.4-.3 1.9-.7.4-.5.7-1.1.7-2 0-.7-.3-1.3-.7-1.8-.5-.5-1.1-.7-2-.7h-2.8v5.2zm14 7c0-.6.2-1 .6-1.5a2 2 0 011.6-.6c.6 0 1 .2 1.5.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.5-.4.3-1 .5-1.5.5a2 2 0 01-1.6-.5 2 2 0 01-.6-1.5zm0-8.8c0-.7.2-1.2.6-1.6a2 2 0 011.6-.6c.6 0 1 .2 1.5.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.4-.4.4-1 .6-1.5.6a2 2 0 01-1.6-.6 2 2 0 01-.6-1.4zm26.2 10.7h-1.4l-.7-.1c-.2-.1-.3-.3-.4-.6l-.3-1a11 11 0 01-2 1.4l-1.1.4a6.6 6.6 0 01-2.9-.1c-.4-.2-.9-.4-1.2-.7l-.8-1.1a4 4 0 01-.3-1.6c0-.5.2-1 .4-1.5.3-.5.7-1 1.3-1.3.7-.4 1.5-.7 2.5-1 1-.3 2.3-.4 3.8-.4v-.8c0-1-.2-1.6-.6-2-.3-.4-.9-.7-1.6-.7a4 4 0 00-2.2.6l-.7.5-.7.1a1 1 0 01-.6-.1l-.4-.5-.6-1a8 8 0 015.6-2c.8 0 1.5 0 2 .3a4.4 4.4 0 012.6 2.7c.2.6.3 1.3.3 2v8.5zm-6.2-2h1a3.4 3.4 0 001.4-.8l.7-.6v-2.3l-2.3.2-1.6.5a2 2 0 00-.8.6l-.2.8c0 .6.2 1 .5 1.2.3.3.8.4 1.3.4zm9.4 2v-13.3h2c.4 0 .6.2.8.5l.2 1 .7-.7a4.7 4.7 0 011.8-1h1c1 0 1.6.2 2.1.6.6.5 1 1.1 1.3 1.8a4 4 0 011.8-1.9l1.2-.4a5.7 5.7 0 013.2.2c.6.2 1 .5 1.4 1 .4.4.7 1 1 1.6l.2 2.1v8.5h-3.2v-8.5c0-.8-.2-1.5-.5-2-.4-.3-1-.6-1.7-.6l-.9.2a2.2 2.2 0 00-1.2 1.3c-.2.3-.2.7-.2 1.1v8.5h-3.2v-8.5c0-.9-.2-1.5-.6-2-.3-.3-.9-.6-1.6-.6-.4 0-.9.2-1.3.4s-.7.5-1 1v9.7H773zm27.6-13.5c1 0 1.9.1 2.7.4a5.8 5.8 0 013.4 3.6 8.2 8.2 0 010 5.7 5.8 5.8 0 01-3.4 3.5c-.8.3-1.7.5-2.7.5-1 0-2-.2-2.7-.5a5.8 5.8 0 01-3.5-3.5c-.3-.9-.4-1.8-.4-2.9 0-1 .1-2 .4-2.8a5.9 5.9 0 013.4-3.5c.9-.4 1.8-.5 2.8-.5zm0 11.2c1 0 2-.3 2.4-1 .6-.8.8-2 .8-3.4s-.2-2.5-.8-3.3c-.5-.7-1.3-1-2.4-1-1.1 0-2 .3-2.5 1-.6.8-.8 1.9-.8 3.3 0 1.5.2 2.6.8 3.3.5.8 1.4 1.1 2.5 1.1zM819 176c-.5 0-.7-.2-.9-.6l-.2-1.3a8 8 0 01-.9.9 5.5 5.5 0 01-2.1 1l-1.3.2a4.4 4.4 0 01-3.6-1.8c-.4-.6-.8-1.3-1-2.1a10.8 10.8 0 010-5.7c.3-.8.7-1.6 1.2-2.2.5-.6 1.1-1.1 1.8-1.5.7-.3 1.5-.5 2.4-.5a4.6 4.6 0 013.3 1.3v-7h3.2V176h-2zm-4.3-2.4c.6 0 1.2 0 1.7-.4a5 5 0 001.3-1.2v-6a3.2 3.2 0 00-2.6-1.1c-.5 0-.9 0-1.3.2s-.7.5-1 .9a4 4 0 00-.6 1.4 9 9 0 000 4c0 .5.3 1 .5 1.3.2.3.5.6.8.7.4.2.8.2 1.2.2zm15.6-17.7h2.5v24.5h-2.5v-24.5zm21.3 9.6l-.3.3h-.9a15.5 15.5 0 00-1.3-.8l-1.1-.1c-.6 0-1 .1-1.5.3-.4.2-.8.5-1 .9a4 4 0 00-.7 1.4l-.2 1.8c0 .7 0 1.3.2 1.9l.7 1.4c.3.4.6.7 1 .9.4.2 1 .3 1.4.3a3.4 3.4 0 002-.7c.3-.1.5-.2.6-.4l.6-.2c.2 0 .4.1.5.3l1 1.2a5.8 5.8 0 01-2.5 1.7l-1.4.4h-1.4a6 6 0 01-2.3-.4c-.7-.3-1.4-.7-2-1.3a8.4 8.4 0 01-1.8-5c.1-1 .3-2 .6-2.8.2-.9.6-1.6 1.2-2.2.5-.6 1.2-1 2-1.4a7 7 0 012.8-.5c1 0 1.9.1 2.6.4.8.4 1.4.8 2 1.4l-.8 1.2zm8.7-3c1 0 2 .1 2.7.4a5.8 5.8 0 013.4 3.6 8.2 8.2 0 010 5.7 5.8 5.8 0 01-3.4 3.5c-.8.3-1.7.5-2.7.5-1 0-1.9-.2-2.7-.5a5.8 5.8 0 01-3.4-3.5c-.3-.9-.5-1.8-.5-2.9 0-1 .2-2 .5-2.8a5.9 5.9 0 013.4-3.5c.8-.4 1.7-.5 2.7-.5zm0 11.2c1.1 0 2-.3 2.5-1 .5-.8.8-2 .8-3.4s-.3-2.5-.8-3.3c-.5-.7-1.4-1-2.5-1s-2 .3-2.5 1c-.5.8-.8 1.9-.8 3.3 0 1.5.3 2.6.8 3.3.6.8 1.4 1.1 2.5 1.1zm9.1 2.3v-13.3h2c.4 0 .7.2.8.5l.2 1 .7-.7a4.7 4.7 0 011.8-1h1c1 0 1.6.2 2.2.6.5.5 1 1.1 1.2 1.8a4 4 0 011.8-1.9l1.2-.4a5.7 5.7 0 013.2.2c.6.2 1 .5 1.4 1 .4.4.7 1 1 1.6l.2 2.1v8.5H885v-8.5c0-.8-.2-1.5-.5-2-.4-.3-1-.6-1.7-.6l-.9.2a2.2 2.2 0 00-1.2 1.3l-.2 1.1v8.5h-3.2v-8.5c0-.9-.2-1.5-.6-2-.3-.3-.9-.6-1.6-.6-.4 0-.9.2-1.3.4s-.7.5-1 1v9.7h-3.3zm21.9 4.4v-17.7h2l.5.1.3.4.2 1.3c.6-.6 1.2-1.1 1.9-1.5a5 5 0 014.4-.1c.6.3 1.2.7 1.6 1.3.4.6.8 1.3 1 2.1a10.7 10.7 0 010 5.7c-.3.8-.7 1.6-1.2 2.2a5.3 5.3 0 01-6 1.7l-1.5-1v5.5h-3.2zm6.2-15.5c-.6 0-1.2.2-1.7.5a5 5 0 00-1.3 1.2v6a3 3 0 002.6 1.2c.5 0 1-.1 1.3-.3.4-.2.7-.5 1-.9.3-.3.5-.8.6-1.4a9 9 0 000-4c0-.5-.2-1-.5-1.3-.2-.3-.5-.6-.8-.7-.4-.2-.7-.3-1.2-.3zm14.2-2.4c1 0 1.9.1 2.7.4a5.8 5.8 0 013.4 3.6 8.2 8.2 0 010 5.7 5.8 5.8 0 01-3.4 3.5c-.8.3-1.7.5-2.7.5-1 0-2-.2-2.7-.5a5.8 5.8 0 01-3.4-3.5c-.4-.9-.5-1.8-.5-2.9 0-1 .1-2 .5-2.8a5.9 5.9 0 013.4-3.5c.8-.4 1.7-.5 2.7-.5zm0 11.2c1.1 0 2-.3 2.5-1 .5-.8.8-2 .8-3.4s-.3-2.5-.8-3.3c-.6-.7-1.4-1-2.5-1s-2 .3-2.5 1c-.5.8-.8 1.9-.8 3.3 0 1.5.3 2.6.8 3.3.5.8 1.4 1.1 2.5 1.1zm12-11v8.4c0 .9.2 1.5.6 2 .4.4 1 .6 1.7.6.5 0 1-.1 1.5-.4.5-.2 1-.6 1.4-1v-9.6h3.2V176h-2c-.3 0-.6-.2-.7-.6l-.3-1-.8.7a5.2 5.2 0 01-3.3 1.1c-.7 0-1.4-.1-2-.4-.5-.2-1-.6-1.3-1-.4-.5-.7-1-.9-1.6-.2-.6-.3-1.3-.3-2v-8.5h3.2zm11.9 13.3v-13.3h2c.3 0 .6.2.7.5l.3 1.1a7.3 7.3 0 011.8-1.3 4.8 4.8 0 012.3-.5c.7 0 1.4 0 2 .3.5.3 1 .6 1.3 1 .4.5.7 1 .9 1.7.2.6.3 1.3.3 2v8.5H944v-8.5c0-.8-.2-1.4-.6-1.9-.4-.4-1-.7-1.7-.7-.5 0-1 .2-1.5.4l-1.4 1v9.7h-3.2zm24.1 0c-.4 0-.7-.2-.8-.6l-.2-1.3a8 8 0 01-1 .9 5.5 5.5 0 01-2 1l-1.3.2a4.4 4.4 0 01-3.6-1.8c-.4-.6-.8-1.3-1-2.1a10.8 10.8 0 010-5.7c.3-.8.7-1.6 1.2-2.2.5-.6 1-1.1 1.8-1.5.7-.3 1.5-.5 2.4-.5a4.6 4.6 0 013.3 1.3v-7h3.2V176h-2zm-4.2-2.4c.6 0 1.2 0 1.7-.4a5 5 0 001.3-1.2v-6a3.2 3.2 0 00-2.6-1.1c-.5 0-1 0-1.3.2-.4.2-.7.5-1 .9a4 4 0 00-.6 1.4 9 9 0 000 4c0 .5.2 1 .5 1.3.2.3.5.6.8.7.4.2.7.2 1.2.2zM395.2 48.4h5.1c.9 0 1.6.2 2.3.4.7.3 1.3.6 1.8 1 .5.5.8 1.1 1.1 1.7.3.7.4 1.4.4 2.2v6c0 .8-.1 1.5-.4 2.2a4.7 4.7 0 01-2.9 2.7c-.7.3-1.4.4-2.3.4h-5.1V48.4zm2.9 14h2.2c.8 0 1.5-.3 2-.8s.7-1 .7-1.9v-6c0-.8-.2-1.4-.7-1.9-.5-.5-1.2-.7-2-.7h-2.2v11.3zm11.2 2.6V48.4h10.2V51h-7.3v4.2h6.5v2.6h-6.5v4.7h7.3V65h-10.2zm19.3-16.6c.8 0 1.5.2 2.2.4a4.5 4.5 0 012.8 2.6c.3.6.4 1.3.4 2 0 .8 0 1.5-.3 2.1-.3.7-.7 1.2-1.2 1.6a5 5 0 01-1.7 1c-.7.3-1.4.4-2.2.4h-2.9V65h-2.9V48.4h5.8zm-2.9 7.7h2.9c.7 0 1.4-.3 1.8-.7.5-.5.7-1.1.7-2 0-.7-.2-1.3-.7-1.8-.4-.5-1-.7-1.8-.7h-2.9V56zm14 7c0-.6.2-1 .6-1.5a2 2 0 011.5-.6c.7 0 1.2.2 1.6.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.5-.4.3-1 .5-1.6.5a2 2 0 01-1.5-.5 2 2 0 01-.6-1.5zm0-8.8c0-.7.2-1.2.6-1.6a2 2 0 011.5-.6c.7 0 1.2.2 1.6.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.4-.4.4-1 .6-1.6.6a2 2 0 01-1.5-.6 2 2 0 01-.6-1.4zM455.5 65V51.7h2c.3 0 .6.2.7.5l.3 1.1a7.3 7.3 0 011.8-1.3 4.8 4.8 0 012.3-.5c.7 0 1.4 0 2 .3.5.3 1 .6 1.3 1 .4.5.7 1 .9 1.7.2.6.3 1.3.3 2V65h-3.2v-8.5c0-.8-.2-1.4-.6-1.9-.4-.4-1-.6-1.7-.6-.5 0-1 0-1.6.3l-1.3 1V65h-3.2zm22.7-10.7l-.3.3h-.9a34.2 34.2 0 00-1.4-.7l-1-.1c-.7 0-1.2.1-1.6.4-.3.3-.5.6-.5 1 0 .3 0 .6.3.8l.7.5 1 .4a34.3 34.3 0 012.5.8l1 .7c.3.3.6.6.7 1 .2.4.3.8.3 1.4 0 .6 0 1.2-.3 1.7l-1 1.5c-.5.3-1 .7-1.7.9a7.2 7.2 0 01-3.7.2 8 8 0 01-2.3-.9l-1-.6.8-1.3.3-.3.5-.1c.2 0 .4 0 .6.2a12.5 12.5 0 001.4.7l1.3.2c.3 0 .7 0 1-.2l.6-.3.4-.5.1-.7c0-.3 0-.5-.3-.7-.1-.2-.4-.4-.7-.5l-1-.4a34.8 34.8 0 01-2.5-1c-.4 0-.7-.3-1-.6l-.8-1-.2-1.6a3.8 3.8 0 011.2-2.8l1.6-1 2.2-.2c1 0 1.7.1 2.5.4.8.3 1.4.7 1.9 1.2l-.7 1.2zm6.3-2.6V60c0 .9.2 1.5.6 2 .4.4 1 .6 1.7.6.6 0 1-.1 1.6-.4l1.3-1v-9.6h3.2V65h-2c-.3 0-.6-.2-.7-.6l-.3-1-.8.7a5.2 5.2 0 01-3.3 1.1c-.7 0-1.3-.1-2-.4-.5-.2-1-.6-1.3-1-.4-.5-.7-1-.9-1.6-.2-.6-.3-1.3-.3-2v-8.5h3.2zm12 13.3V45.7h3.1v7.6c.6-.6 1.2-1 1.8-1.3a5.2 5.2 0 014.4-.1c.6.3 1 .7 1.5 1.3.5.6.8 1.3 1 2.1a10.7 10.7 0 010 5.7c-.3.8-.7 1.6-1.2 2.2a5.3 5.3 0 01-4.1 2 4.7 4.7 0 01-2-.5 4 4 0 01-.9-.5l-.6-.7-.2.9-.2.5-.6.1h-2zm6.2-11c-.7 0-1.3 0-1.8.4a5 5 0 00-1.3 1.2v6a3 3 0 002.6 1.2c.5 0 1-.1 1.3-.3.4-.2.8-.5 1-.9.3-.3.5-.8.7-1.4a9 9 0 000-4c-.1-.5-.3-1-.6-1.3-.2-.3-.5-.6-.8-.7-.3-.2-.7-.3-1.1-.3zm12-2.3v13.8l-.3 1.6c-.2.5-.4 1-.7 1.3l-1.3 1c-.6.2-1.3.3-2 .3a5.7 5.7 0 01-1.7-.3l.1-1.7c0-.2.1-.3.2-.3l.7-.1.8-.1c.2 0 .4-.2.5-.3.2-.2.3-.3.3-.6l.1-.8V51.7h3.2zm.4-4c0 .4 0 .6-.2.9a2.1 2.1 0 01-1 1 2 2 0 01-2.3-.4l-.4-.6c-.2-.3-.2-.5-.2-.8a2 2 0 01.6-1.5 2 2 0 011.4-.6c.3 0 .6 0 .8.2a2.1 2.1 0 011.1 1l.2.9zM400.9 157.4h5c1 0 1.7.2 2.4.4l1.7 1c.5.5 1 1.1 1.2 1.7.2.7.4 1.4.4 2.2v6c0 .8-.2 1.5-.4 2.2a4.7 4.7 0 01-3 2.7c-.6.3-1.4.4-2.2.4h-5.1v-16.6zm2.9 14h2.2c.8 0 1.5-.3 2-.8.4-.5.7-1 .7-1.9v-6c0-.8-.3-1.4-.8-1.9-.4-.5-1.1-.7-2-.7h-2.1v11.3zM415 174v-16.6h10.2v2.6h-7.4v4.2h6.5v2.6h-6.5v4.7h7.4v2.5H415zm19.2-16.6c.8 0 1.6.2 2.3.4a4.5 4.5 0 012.8 2.6c.3.6.4 1.3.4 2 0 .8-.1 1.5-.4 2.1-.2.7-.6 1.2-1.1 1.6a5 5 0 01-1.7 1c-.7.3-1.5.4-2.3.4h-2.8v6.5h-3v-16.6h5.8zm-2.8 7.7h2.8c.8 0 1.4-.3 1.9-.7.5-.5.7-1.1.7-2 0-.7-.2-1.3-.7-1.8-.5-.5-1-.7-1.9-.7h-2.8v5.2zm14 7c0-.6.2-1 .6-1.5a2 2 0 011.5-.6c.6 0 1.1.2 1.5.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.5-.4.3-.9.5-1.5.5a2 2 0 01-1.5-.5 2 2 0 01-.6-1.5zm0-8.8c0-.7.2-1.2.6-1.6a2 2 0 011.5-.6c.6 0 1.1.2 1.5.6.4.4.6 1 .6 1.6 0 .6-.2 1-.6 1.4-.4.4-.9.6-1.5.6a2 2 0 01-1.5-.6 2 2 0 01-.6-1.4zm25.1 10.7c-.4 0-.7-.2-.8-.6l-.3-1.3a8 8 0 01-.8.9 5.5 5.5 0 01-2.2 1l-1.3.2a4.4 4.4 0 01-3.5-1.8c-.5-.6-.8-1.3-1-2.1a10.8 10.8 0 010-5.7c.3-.8.7-1.6 1.2-2.2.5-.6 1-1.1 1.8-1.5.7-.3 1.5-.5 2.3-.5a4.6 4.6 0 013.4 1.3v-7h3.2V174h-2zm-4.3-2.4c.7 0 1.3 0 1.7-.4a5 5 0 001.4-1.2v-6a3.2 3.2 0 00-2.6-1.1c-.5 0-1 0-1.3.2-.4.2-.8.5-1 .9a4 4 0 00-.7 1.4 9 9 0 000 4l.5 1.3c.3.3.6.6.9.7.3.2.7.2 1.1.2zm15.4-11.1c1 0 2 .1 2.7.4a5.8 5.8 0 013.4 3.6c.4.8.5 1.8.5 2.8 0 1-.1 2-.5 2.9a5.8 5.8 0 01-3.4 3.5c-.8.3-1.7.5-2.7.5-1 0-1.9-.2-2.7-.5a5.8 5.8 0 01-3.4-3.5c-.3-.9-.5-1.8-.5-2.9 0-1 .2-2 .5-2.8a5.9 5.9 0 013.4-3.5c.8-.4 1.7-.5 2.7-.5zm0 11.2c1.1 0 2-.3 2.5-1 .5-.8.8-2 .8-3.4s-.3-2.5-.8-3.3c-.5-.7-1.4-1-2.5-1s-2 .3-2.5 1c-.5.8-.8 1.9-.8 3.3 0 1.5.3 2.6.8 3.3.6.8 1.4 1.1 2.5 1.1zm9.2 2.3v-19.3h3.2v7.6c.5-.6 1.1-1 1.8-1.3a5.2 5.2 0 014.3-.1c.6.3 1.1.7 1.6 1.3.4.6.7 1.3 1 2.1a10.7 10.7 0 010 5.7c-.3.8-.7 1.6-1.2 2.2a5.3 5.3 0 01-4.2 2 4.7 4.7 0 01-2-.5 4 4 0 01-.8-.5l-.7-.7-.1.9-.3.5-.5.1h-2.1zm6.2-11c-.7 0-1.2 0-1.7.4a5 5 0 00-1.3 1.2v6a3 3 0 002.6 1.2c.4 0 .9-.1 1.3-.3.4-.2.7-.5 1-.9l.6-1.4a9 9 0 000-4l-.5-1.3c-.2-.3-.5-.6-.9-.7-.3-.2-.7-.3-1.1-.3zm12-2.3v13.8l-.3 1.6c-.1.5-.4 1-.7 1.3-.3.4-.8.7-1.3 1-.6.2-1.2.3-2 .3a5.7 5.7 0 01-1.6-.3v-1.7l.3-.3.6-.1.8-.1c.3 0 .4-.2.6-.3l.3-.6V160.7h3.3zm.4-4l-.1.9a2.1 2.1 0 01-1.1 1 2 2 0 01-2.3-.4l-.4-.6-.2-.8a2 2 0 01.6-1.5 2 2 0 011.5-.6c.2 0 .5 0 .8.2a2.1 2.1 0 011 1c.2.3.2.6.2.9z"/>
+      <path fill="#F5FF87" d="M64.3 78v-2h7.9v2h-2.8v9h2.8v2h-7.9v-2h2.8v-9h-2.8zm10.2-2h4l1.8.3c.5.2 1 .5 1.3.9.4.3.7.8 1 1.3.1.5.3 1 .3 1.6V85c0 .6-.2 1.2-.4 1.7a3.7 3.7 0 01-2.2 2.1l-1.8.3h-4V76zm2.2 11h1.8c.6 0 1.1-.3 1.5-.6.4-.4.6-1 .6-1.5V80a2 2 0 00-.6-1.4c-.4-.4-.9-.6-1.5-.6h-1.8V87zm10.5.6c0-.5.1-1 .5-1.2.3-.3.7-.5 1.1-.5.5 0 1 .2 1.3.5.3.3.4.7.4 1.2 0 .4-.1.8-.4 1.1-.4.3-.8.5-1.2.5-.5 0-1-.2-1.2-.5-.4-.3-.5-.7-.5-1.1zm0-7c0-.5.1-1 .5-1.2.3-.3.7-.5 1.1-.5.5 0 1 .2 1.3.5.3.3.4.7.4 1.2s-.1.8-.4 1.1c-.4.3-.8.5-1.2.5-.5 0-1-.2-1.2-.5-.4-.3-.5-.6-.5-1.1z"/>
+      <path fill="#FFF" d="M99 82.2v-2h2.7v-1.3c0-.9.3-1.6.9-2.1.6-.5 1.3-.8 2.3-.8h2.9v2h-2.9a1 1 0 00-.6.3c-.2.1-.3.3-.3.6v1.3h3.8v2H104V89h-2.3v-6.8H99zm14.7 7c-.6 0-1.2-.1-1.7-.3a4 4 0 01-1.4-.8c-.4-.4-.6-.8-.8-1.2a4 4 0 01-.4-1.6v-2.5c0-.6.1-1.1.4-1.6a3.5 3.5 0 012.2-2c.5-.2 1.1-.3 1.8-.3.6 0 1.2.1 1.7.3a3.5 3.5 0 012.2 2c.3.5.4 1 .4 1.6v2.5c0 .5-.1 1-.4 1.6a3.5 3.5 0 01-2.2 2c-.5.2-1 .3-1.8.3zm-2-4c0 .7.2 1.1.5 1.5.4.3 1 .5 1.6.5a2 2 0 001.5-.5c.3-.4.5-.8.5-1.4v-2.5c0-.6-.2-1-.5-1.4a2 2 0 00-1.5-.5 2 2 0 00-1.6.5c-.3.4-.5.8-.5 1.4v2.5zm10.6-6.1v6.2c0 1.3.5 2 1.8 2 1.2 0 1.8-.7 1.8-2V79h2.2v6.2c0 1.2-.4 2.2-1 2.9-.8.6-1.8 1-3 1-1.3 0-2.3-.4-3-1-.7-.7-1.1-1.7-1.1-3v-6.1h2.3zm8 0h2v1.7h.2c.2-.6.4-1 .9-1.4a3 3 0 011.8-.5c1 0 1.8.3 2.4 1 .5.7.8 1.5.8 2.6V89h-2.2v-6.2c0-.6-.2-1-.5-1.4-.3-.4-.8-.5-1.3-.5-.6 0-1 .1-1.4.5a2 2 0 00-.5 1.4V89h-2.3v-9.9zm10 3.7c0-.6.1-1.1.3-1.6.2-.5.4-.9.7-1.2a3 3 0 011-.8l1.4-.3a3 3 0 011.8.5c.5.4.8.8.9 1.4h.2v-.6a5.6 5.6 0 01-.1-1.1v-3h2.2V89h-2.1v-1.7h-.2c-.1.6-.4 1-.9 1.4a3 3 0 01-1.8.5c-.5 0-1-.1-1.3-.3a3 3 0 01-1.1-.8c-.3-.3-.5-.7-.7-1.2-.2-.5-.3-1-.3-1.6v-2.5zm2.3 0v2.4c0 .6.2 1.1.5 1.5.4.3.8.5 1.5.5.5 0 1-.2 1.4-.5.3-.4.5-.9.5-1.4v-2.5c0-.6-.2-1-.5-1.4-.4-.4-.9-.5-1.4-.5a2 2 0 00-1.5.5 2 2 0 00-.5 1.4zm8 0c0-.6.1-1.1.4-1.6.2-.5.4-.9.8-1.2.4-.4.8-.6 1.4-.8.5-.2 1-.3 1.8-.3.6 0 1.2.1 1.7.3.5.2 1 .4 1.3.8.4.3.7.7 1 1.2l.2 1.6v1.8H153v.7c0 .6.2 1.1.5 1.5.4.4.9.6 1.5.6.5 0 1-.1 1.3-.3.3-.1.6-.3.7-.6h2.2c0 .4-.2.8-.5 1.1l-1 .9-1.2.5a5.6 5.6 0 01-3.2 0c-.6-.3-1-.5-1.4-.9-.4-.3-.6-.7-.8-1.2a4 4 0 01-.4-1.6v-2.5zm2.3.3h4.1v-.3a2 2 0 00-.5-1.5 2 2 0 00-1.5-.6 2 2 0 00-1.6.6 2 2 0 00-.5 1.5v.3zm8-.3c0-.6.1-1.1.3-1.6.2-.5.4-.9.7-1.2a3 3 0 011-.8l1.5-.3a3 3 0 011.7.5c.5.4.8.8.9 1.4h.2v-.6a5.6 5.6 0 01-.1-1.1v-3h2.2V89h-2.1v-1.7h-.2c-.1.6-.4 1-.9 1.4a3 3 0 01-1.8.5c-.5 0-1-.1-1.3-.3a3 3 0 01-1.1-.8c-.3-.3-.5-.7-.7-1.2-.2-.5-.3-1-.3-1.6v-2.5zm2.3 0v2.4c0 .6.2 1.1.5 1.5.4.3.8.5 1.5.5.5 0 1-.2 1.4-.5.3-.4.5-.9.5-1.4v-2.5c0-.6-.2-1-.5-1.4-.4-.4-.9-.5-1.4-.5a2 2 0 00-1.5.5 2 2 0 00-.5 1.4z"/>
+      <path fill="#F5FF87" d="M773.2 128v-2h7.8v2h-2.8v9h2.8v2h-7.8v-2h2.8v-9h-2.8zm10.1-2h4c.7 0 1.3.1 1.8.3.6.2 1 .5 1.4.9.4.3.7.8.9 1.3.2.5.3 1 .3 1.6v4.8c0 .6-.1 1.2-.3 1.7a3.7 3.7 0 01-2.3 2.1c-.5.2-1.1.3-1.8.3h-4v-13zm2.3 11h1.7c.7 0 1.2-.3 1.6-.6.3-.4.5-1 .5-1.5V130a2 2 0 00-.5-1.4c-.4-.4-1-.6-1.6-.6h-1.7v8.8zm10.4.6c0-.5.2-1 .5-1.2.3-.3.7-.5 1.2-.5s.9.2 1.2.5c.3.3.5.7.5 1.2 0 .4-.2.8-.5 1.1-.3.3-.7.5-1.2.5s-.9-.2-1.2-.5c-.3-.3-.5-.7-.5-1.1zm0-7c0-.5.2-1 .5-1.2.3-.3.7-.5 1.2-.5s.9.2 1.2.5c.3.3.5.7.5 1.2s-.2.8-.5 1.1c-.3.3-.7.5-1.2.5s-.9-.2-1.2-.5c-.3-.3-.5-.6-.5-1.1z"/>
+      <path fill="#FFF" d="M807.8 139v-9.9h1.8v1.4h.2c0-.5.2-.9.4-1.1.3-.3.7-.5 1.2-.5s.8.2 1 .4c.3.3.5.7.6 1.2h.2c0-.5.2-.9.4-1.1.3-.3.7-.5 1.2-.5.6 0 1.1.2 1.5.7.4.4.5 1 .5 1.8v7.6h-2v-7.4c0-.7-.3-1.1-.8-1.1-.3 0-.5.1-.7.3l-.2.9v7.3h-1.6v-7.4c0-.3 0-.6-.2-.8a.8.8 0 00-.7-.3c-.3 0-.5.1-.6.3-.2.2-.2.5-.2.9v7.3h-2zm14.8.2l-1.8-.3a4 4 0 01-1.3-.8l-.9-1.2a4 4 0 01-.3-1.6v-2.5c0-.6.1-1.1.3-1.6a3.5 3.5 0 012.2-2l1.8-.3 1.8.3c.5.2 1 .4 1.3.8.4.3.7.7.9 1.2.2.5.3 1 .3 1.6v2.5l-.3 1.6a3.5 3.5 0 01-2.2 2c-.5.2-1.1.3-1.8.3zm-2-4c0 .7.1 1.1.5 1.5.4.3.9.5 1.5.5a2 2 0 001.5-.5c.4-.4.6-.8.6-1.4v-2.5c0-.6-.2-1-.6-1.4a2 2 0 00-1.5-.5 2 2 0 00-1.5.5c-.4.4-.6.8-.6 1.4v2.5zm8-2.4c0-.6 0-1.1.2-1.6s.4-.9.7-1.2a3 3 0 011.1-.8c.4-.2.9-.3 1.4-.3a3 3 0 011.8.5c.5.4.7.8.8 1.4h.2v-.6a5.6 5.6 0 010-1.1v-3h2.2V139h-2.2v-1.7h-.2c0 .6-.3 1-.8 1.4a3 3 0 01-1.8.5c-.5 0-1-.1-1.4-.3a3 3 0 01-1-.8c-.4-.3-.6-.7-.8-1.2l-.2-1.6v-2.5zm2.2 0v2.4c0 .6.2 1.1.6 1.5.3.3.8.5 1.4.5.6 0 1-.2 1.4-.5.4-.4.5-.9.5-1.4v-2.5c0-.6-.1-1-.5-1.4-.3-.4-.8-.5-1.4-.5a2 2 0 00-1.4.5 2 2 0 00-.6 1.4zm8.5 4.1h3.5v-5.8h-3v-2h5.2v7.8h2.9v2.1h-8.6v-2zm2.9-10.5c0-.3.1-.6.3-.8.3-.3.6-.4 1-.4h.4c.4 0 .7.1 1 .4.2.2.3.5.3.8 0 .4-.1.7-.3.9-.3.2-.6.3-1 .3h-.4c-.4 0-.7-.1-1-.3-.2-.2-.3-.5-.3-.9zm6.9 5.8v-2h2.7v-1.3c0-.9.3-1.6.8-2 .6-.6 1.4-.9 2.3-.9h3v2h-3a1 1 0 00-.6.3c-.2.1-.2.3-.2.6v1.3h3.7v2h-3.7v6.8h-2.3v-6.8H849zm10.8 4.7h3.5v-5.8h-3v-2h5.2v7.8h2.9v2.1h-8.6v-2zm2.9-10.5c0-.3.1-.6.3-.8.3-.3.6-.4 1-.4h.4c.4 0 .7.1 1 .4.2.2.3.5.3.8 0 .4-.1.7-.3.9-.3.2-.6.3-1 .3h-.4c-.4 0-.7-.1-1-.3-.2-.2-.3-.5-.3-.9zm7 6.4c0-.6.1-1.1.3-1.6.2-.5.5-.9.9-1.2.3-.4.8-.6 1.3-.8.5-.2 1.1-.3 1.8-.3l1.8.3c.5.2 1 .4 1.3.8.4.3.7.7.9 1.2.2.5.3 1 .3 1.6v1.8H872v.7c0 .6.2 1.1.6 1.5.3.4.9.6 1.5.6.5 0 .9-.1 1.2-.3.4-.1.6-.3.8-.6h2.2l-.5 1.1-1 .9-1.2.5a5.6 5.6 0 01-3.3 0c-.5-.3-1-.5-1.3-.9-.4-.3-.7-.7-.9-1.2a4 4 0 01-.3-1.6v-2.5zm2.2.3h4.2v-.3a2 2 0 00-.6-1.5 2 2 0 00-1.5-.6 2 2 0 00-1.5.6 2 2 0 00-.6 1.5v.3zm10.8-4v1.7h.2c.1-.6.4-1 .9-1.4.5-.3 1-.5 1.8-.5 1.1 0 2 .3 2.5 1 .6.7 1 1.6 1 2.7v.8h-2.4v-.6a2 2 0 00-.5-1.4 2 2 0 00-1.5-.6c-.6 0-1 .2-1.3.6-.4.3-.5.8-.5 1.4v6.2h-2.3v-9.9h2.1z"/>
+      <g>
+        <path fill="#F5FF87" d="M402.3 20v-2h7.9v2h-2.8v9h2.8v2h-7.9v-2h2.8v-9h-2.8zm10.2-2h4l1.8.3c.5.2 1 .5 1.3.9.4.3.7.8 1 1.3.1.5.3 1 .3 1.6V27c0 .6-.2 1.2-.4 1.7a3.7 3.7 0 01-2.2 2.1l-1.8.3h-4V18zm2.2 11h1.8c.6 0 1.1-.3 1.5-.6.4-.4.6-1 .6-1.5V22a2 2 0 00-.6-1.4c-.4-.4-.9-.6-1.5-.6h-1.8V29zm10.5.6c0-.5.1-1 .5-1.2.3-.3.7-.5 1.1-.5.5 0 1 .2 1.3.5.3.3.4.7.4 1.2 0 .4-.1.8-.4 1.1-.4.3-.8.5-1.3.5-.4 0-.8-.2-1.1-.5-.4-.3-.5-.7-.5-1.1zm0-7c0-.5.1-1 .5-1.2.3-.3.7-.5 1.1-.5.5 0 1 .2 1.3.5.3.3.4.7.4 1.2s-.1.8-.4 1.1c-.4.3-.8.5-1.3.5-.4 0-.8-.2-1.1-.5-.4-.3-.5-.6-.5-1.1z"/>
+        <path fill="#FFF" d="M437 28.5h2.4c0 .3.3.5.6.7.3.2.6.3 1 .3h.8c.6 0 1-.2 1.3-.4.3-.2.5-.5.5-.9 0-.7-.6-1-1.6-1.2l-1.4-.2c-1.1-.1-2-.4-2.5-.9s-.8-1.2-.8-2c0-1 .3-1.7 1-2.2.6-.5 1.6-.8 2.8-.8h.7c1.1 0 2 .3 2.7.7.6.5 1 1.1 1.2 2h-2.3a1 1 0 00-.5-.7 2 2 0 00-1-.3h-.8c-1 0-1.6.4-1.6 1.2 0 .6.5 1 1.4 1l1.4.3c1.2.1 2 .4 2.6 1 .6.4.9 1.1.9 2 0 1-.4 1.8-1 2.3-.7.5-1.7.8-3 .8h-.7c-1.2 0-2.1-.3-2.8-.7-.7-.5-1.1-1.2-1.2-2zm13-7.4v6.2c0 1.3.5 2 1.7 2 1.3 0 1.9-.7 1.9-2V21h2.2v6.2c0 1.2-.4 2.2-1 2.9-.8.6-1.8 1-3 1-1.3 0-2.3-.4-3-1-.7-.7-1.1-1.7-1.1-3v-6.1h2.3zm8-3h2.2v3a20 20 0 010 1.7h.1c.1-.6.4-1 .9-1.4a3 3 0 011.7-.5c.6 0 1 .1 1.5.3.4.2.7.4 1 .8.3.3.5.7.7 1.2.2.5.3 1 .3 1.6v2.5c0 .6-.1 1.1-.3 1.6-.2.5-.4.9-.7 1.2-.3.4-.6.6-1 .8l-1.4.3a3 3 0 01-1.8-.5c-.5-.4-.8-.8-.9-1.4h-.2V31H458V18zm2.2 9.2c0 .5.2 1 .5 1.4.4.3.9.5 1.4.5.7 0 1.1-.2 1.5-.5.3-.4.5-.9.5-1.5v-2.4c0-.6-.2-1-.5-1.4a2 2 0 00-1.5-.5c-.5 0-1 .1-1.3.5a2 2 0 00-.6 1.4v2.5zm8.2 4.7h2.7c.6 0 1-.2 1.3-.5.4-.4.5-.8.5-1.3v-7h-4V21h6.3v9c0 .6-.1 1.2-.3 1.7l-.8 1.2-1.2.8c-.5.2-1 .3-1.7.3h-2.8V32zm4-13.6c0-.3.2-.6.4-.8.2-.3.5-.4 1-.4h.4c.4 0 .7.1 1 .4.1.2.3.5.3.8 0 .4-.2.7-.4.9-.2.2-.5.3-1 .3h-.4c-.4 0-.7-.1-1-.3-.1-.2-.3-.5-.3-.9zm6 6.4c0-.6 0-1.1.3-1.6.2-.5.4-.9.8-1.2.4-.4.8-.6 1.4-.8.5-.2 1-.3 1.8-.3.6 0 1.2.1 1.7.3.5.2 1 .4 1.3.8.4.3.7.7 1 1.2l.2 1.6v1.8h-6.3v.7c0 .6.2 1.1.5 1.5.4.4.9.6 1.5.6.5 0 1-.1 1.3-.3.3-.1.6-.3.7-.6h2.2c0 .4-.2.8-.5 1.1l-1 .9-1.2.5a5.6 5.6 0 01-3.2 0c-.6-.3-1-.5-1.4-.9-.4-.3-.6-.7-.8-1.2a4 4 0 01-.4-1.6v-2.5zm2.2.3h4.1v-.3a2 2 0 00-.5-1.5 2 2 0 00-1.5-.6 2 2 0 00-1.6.6 2 2 0 00-.5 1.5v.3zm12.5 6c-.7 0-1.3 0-1.8-.2s-1-.4-1.4-.8a4 4 0 01-1.2-2.8v-2.5c0-.6.2-1.1.4-1.6s.5-.9.8-1.2a4 4 0 011.4-.8 5.3 5.3 0 013.4 0 4 4 0 012.7 3.2H495c0-.4-.3-.8-.7-1-.3-.4-.8-.5-1.3-.5-.7 0-1.2.1-1.5.5-.4.3-.6.8-.6 1.4v2.5c0 .6.2 1 .6 1.4.3.4.8.5 1.5.5.5 0 1-.1 1.3-.4.4-.3.6-.7.7-1.1h2.3l-.5 1.4a4 4 0 01-2.2 1.8 5 5 0 01-1.6.3zm5.6-10h2.7v-3h2.2v3h3.8v2h-3.8V28c0 .4.1.6.3.8.2.2.5.3.8.3h2.5v2h-2.7c-1 0-1.7-.3-2.3-.8a3 3 0 01-.8-2.3v-4.8h-2.7v-2z"/>
+      </g>
+      <g>
+        <path fill="#F5FF87" d="M407.5 126v-2h7.8v2h-2.8v9h2.8v2h-7.8v-2h2.8v-9h-2.8zm10.1-2h4c.7 0 1.3.1 1.8.3.6.2 1 .5 1.4.9.4.3.7.8.9 1.3.2.5.3 1 .3 1.6v4.8c0 .6-.1 1.2-.3 1.7a3.7 3.7 0 01-2.3 2.1c-.5.2-1.1.3-1.8.3h-4v-13zm2.3 11h1.7c.7 0 1.2-.3 1.6-.6.3-.4.5-1 .5-1.5V128a2 2 0 00-.5-1.4c-.4-.4-1-.6-1.6-.6H420v8.8zm10.4.6c0-.5.2-1 .5-1.2.3-.3.7-.5 1.2-.5s.9.2 1.2.5c.3.3.5.7.5 1.2 0 .4-.2.8-.5 1.1-.3.3-.7.5-1.2.5s-.9-.2-1.2-.5c-.3-.3-.5-.7-.5-1.1zm0-7c0-.5.2-1 .5-1.2.3-.3.7-.5 1.2-.5s.9.2 1.2.5c.3.3.5.7.5 1.2s-.2.8-.5 1.1c-.3.3-.7.5-1.2.5s-.9-.2-1.2-.5c-.3-.3-.5-.6-.5-1.1z"/>
+        <path fill="#FFF" d="M446.6 137.2l-1.8-.3a4 4 0 01-1.3-.8l-.9-1.2a4 4 0 01-.3-1.6v-2.5c0-.6.1-1.1.3-1.6a3.5 3.5 0 012.2-2l1.8-.3 1.8.3c.5.2 1 .4 1.3.8.4.3.7.7.9 1.2.2.5.3 1 .3 1.6v2.5l-.3 1.6a3.5 3.5 0 01-2.2 2c-.5.2-1.1.3-1.8.3zm-2-4c0 .7.1 1.1.5 1.5.4.3.9.5 1.5.5a2 2 0 001.5-.5c.4-.4.6-.8.6-1.4v-2.5c0-.6-.2-1-.6-1.4a2 2 0 00-1.5-.5 2 2 0 00-1.5.5c-.4.4-.6.8-.6 1.4v2.5zm8.2-9.2h2.3v3a20 20 0 01-.1 1.8h.2c0-.6.4-1 .8-1.4a3 3 0 011.8-.5c.5 0 1 .1 1.4.3.4.2.8.4 1 .8.4.3.6.7.8 1.2l.2 1.6v2.5c0 .6 0 1.1-.2 1.6s-.4.9-.7 1.2c-.3.4-.7.6-1.1.8-.4.2-.9.3-1.4.3a3 3 0 01-1.8-.5c-.4-.4-.7-.8-.8-1.4h-.2v1.7h-2.2v-13zm2.3 9.3c0 .5.2 1 .5 1.4.3.3.8.5 1.4.5.6 0 1-.2 1.4-.5.4-.4.6-.9.6-1.5v-2.4c0-.6-.2-1-.6-1.4a2 2 0 00-1.4-.5c-.6 0-1 .1-1.4.5a2 2 0 00-.5 1.4v2.5zm8.2 4.7h2.7c.5 0 1-.2 1.3-.5.3-.4.5-.8.5-1.3v-7h-4V127h6.2v9c0 .6 0 1.2-.2 1.7l-.8 1.2-1.3.8c-.5.2-1 .3-1.6.3h-2.8V138zm4-13.6c0-.3.1-.6.3-.8.3-.3.6-.4 1-.4h.4c.4 0 .7.1 1 .4.2.2.3.5.3.8 0 .4-.1.7-.3.9-.3.2-.6.3-1 .3h-.4c-.4 0-.7-.1-1-.3-.2-.2-.3-.5-.3-.9zm5.9 6.4c0-.6.1-1.1.3-1.6.2-.5.5-.9.9-1.2.3-.4.8-.6 1.3-.8.5-.2 1.1-.3 1.8-.3l1.8.3c.5.2 1 .4 1.3.8.4.3.7.7.9 1.2.2.5.3 1 .3 1.6v1.8h-6.4v.7c0 .6.2 1.1.6 1.5.3.4.9.6 1.5.6.5 0 .9-.1 1.2-.3.4-.1.6-.3.8-.6h2.2l-.5 1.1-1 .9-1.2.5a5.6 5.6 0 01-3.3 0c-.5-.3-1-.5-1.3-.9-.4-.3-.7-.7-.9-1.2a4 4 0 01-.3-1.6v-2.5zm2.2.3h4.2v-.3a2 2 0 00-.6-1.5 2 2 0 00-1.5-.6 2 2 0 00-1.5.6 2 2 0 00-.6 1.5v.3zm12.5 6c-.6 0-1.2 0-1.8-.2-.5-.2-1-.4-1.3-.8-.4-.3-.7-.7-.9-1.2a4 4 0 01-.3-1.6v-2.5c0-.6.1-1.1.3-1.6.2-.5.5-.9.9-1.2a4 4 0 011.3-.8 5.3 5.3 0 013.4 0 4 4 0 012.7 3.2H490c-.1-.4-.3-.8-.7-1-.4-.4-.8-.5-1.4-.5-.6 0-1.1.1-1.5.5-.3.3-.5.8-.5 1.4v2.5c0 .6.2 1 .5 1.4.4.4.9.5 1.5.5.5 0 1-.1 1.4-.4.4-.3.6-.7.7-1.1h2.2c0 .5-.2 1-.5 1.4a4 4 0 01-2.2 1.8 5 5 0 01-1.6.3zm5.6-10h2.7v-3h2.3v3h3.8v2h-3.8v4.8c0 .4 0 .6.3.8.2.2.4.3.8.3h2.5v2h-2.7c-1 0-1.7-.3-2.3-.8a3 3 0 01-.9-2.3v-4.8h-2.7v-2z"/>
+      </g>
     </g>
   </g>
 </svg>
diff --git a/website/docs/images/displacy-dep-founded.html b/website/docs/images/displacy-dep-founded.html
index 3f89ffd4a..e22984ee1 100644
--- a/website/docs/images/displacy-dep-founded.html
+++ b/website/docs/images/displacy-dep-founded.html
@@ -20,7 +20,7 @@
 </text>
 
 <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
-    <tspan class="displacy-word" fill="currentColor" x="750">company.</tspan>
+    <tspan class="displacy-word" fill="currentColor" x="750">company</tspan>
     <tspan class="displacy-tag" dy="2em" fill="currentColor" x="750"></tspan>
 </text>
 
diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md
index 532796303..01d60ddb8 100644
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@@ -974,10 +974,12 @@ to match phrases with the same sequence of punctuation and non-punctuation
 tokens as the pattern. But this can easily get confusing and doesn't have much
 of an advantage over writing one or two token patterns.
 
-## Dependency Matcher {#dependencymatcher new="3"}
+## Dependency Matcher {#dependencymatcher new="3" model="parser"}
 
 The [`DependencyMatcher`](/api/dependencymatcher) lets you match patterns within
-the dependency parse. It requires a model containing a parser such as the
+the dependency parse using
+[Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html)
+operators. It requires a model containing a parser such as the
 [`DependencyParser`](/api/dependencyparser). Instead of defining a list of
 adjacent tokens as in `Matcher` patterns, the `DependencyMatcher` patterns match
 tokens in the dependency parse and specify the relations between them.
@@ -1014,15 +1016,15 @@ tokens in the dependency parse and specify the relations between them.
 > matches = matcher(doc)
 > ```
 
-A pattern added to the `DependencyMatcher` consists of a list of dictionaries,
-with each dictionary describing a token to match and its relation to an existing
-token in the pattern. Except for the first dictionary, which defines an anchor
-token using only `RIGHT_ID` and `RIGHT_ATTRS`, each pattern should have the
-following keys:
+A pattern added to the dependency matcher consists of a **list of
+dictionaries**, with each dictionary describing a **token to match** and its
+**relation to an existing token** in the pattern. Except for the first
+dictionary, which defines an anchor token using only `RIGHT_ID` and
+`RIGHT_ATTRS`, each pattern should have the following keys:
 
 | Name          | Description                                                                                                                                                            |
 | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node.                                                                             |
+| `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node. ~~str~~                                                                     |
 | `REL_OP`      | An operator that describes how the two nodes are related. ~~str~~                                                                                                      |
 | `RIGHT_ID`    | A unique name for the right-hand node in the relation. ~~str~~                                                                                                         |
 | `RIGHT_ATTRS` | The token attributes to match for the right-hand node in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
@@ -1040,54 +1042,68 @@ can be used as `LEFT_ID` in another dict.
 
 </Infobox>
 
-### Dependency matcher operators
+### Dependency matcher operators {#dependencymatcher-operators}
 
 The following operators are supported by the `DependencyMatcher`, most of which
 come directly from
 [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html):
 
-| Symbol    | Description                                                                                                         |
-| --------- | ------------------------------------------------------------------------------------------------------------------- |
-| `A < B`   | `A` is the immediate dependent of `B`                                                                               |
-| `A > B`   | `A` is the immediate head of `B`                                                                                    |
-| `A << B`  | `A` is the dependent in a chain to `B` following dep->head paths                                                    |
-| `A >> B`  | `A` is the head in a chain to `B` following head->dep paths                                                         |
-| `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree                   |
-| `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_                 |
-| `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_ |
-| `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_                  |
-| `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`                 |
-| `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`                  |
-| `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`                                |
-| `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`                                 |
+| Symbol    | Description                                                                                                          |
+| --------- | -------------------------------------------------------------------------------------------------------------------- |
+| `A < B`   | `A` is the immediate dependent of `B`.                                                                               |
+| `A > B`   | `A` is the immediate head of `B`.                                                                                    |
+| `A << B`  | `A` is the dependent in a chain to `B` following dep &rarr; head paths.                                              |
+| `A >> B`  | `A` is the head in a chain to `B` following head &rarr; dep paths.                                                   |
+| `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree.                   |
+| `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_.                 |
+| `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_. |
+| `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_.                  |
+| `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`.                 |
+| `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`.                  |
+| `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`.                                |
+| `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`.                                 |
 
-### Designing dependency matcher patterns
+### Designing dependency matcher patterns {#dependencymatcher-patterns}
 
 Let's say we want to find sentences describing who founded what kind of company:
 
-- `Smith founded a healthcare company in 2005.`
-- `Williams initially founded an insurance company in 1987.`
-- `Lee, an experienced CEO, has founded two AI startups.`
+- _Smith founded a healthcare company in 2005._
+- _Williams initially founded an insurance company in 1987._
+- _Lee, an experienced CEO, has founded two AI startups._
 
-The dependency parse for `Smith founded a healthcare company` shows types of
+The dependency parse for "Smith founded a healthcare company" shows types of
 relations and tokens we want to match:
 
+> #### Visualizing the parse
+>
+> The [`displacy` visualizer](/usage/visualizer) lets you render `Doc` objects
+> and their dependency parse and part-of-speech tags:
+>
+> ```python
+> import spacy
+> from spacy import displacy
+>
+> nlp = spacy.load("en_core_web_sm")
+> doc = nlp("Smith founded a healthcare company")
+> displacy.serve(doc)
+> ```
+
 import DisplaCyDepFoundedHtml from 'images/displacy-dep-founded.html'
 
 <Iframe title="displaCy visualization of dependencies" html={DisplaCyDepFoundedHtml} height={450} />
 
 The relations we're interested in are:
 
-- the founder is the subject (`nsubj`) of the token with the text `founded`
-- the company is the object (`dobj`) of `founded`
-- the kind of company may be an adjective (`amod`, not shown above) or a
-  compound (`compound`)
+- the founder is the **subject** (`nsubj`) of the token with the text `founded`
+- the company is the **object** (`dobj`) of `founded`
+- the kind of company may be an **adjective** (`amod`, not shown above) or a
+  **compound** (`compound`)
 
-The first step is to pick an anchor token for the pattern. Since it's the root
-of the dependency parse, `founded` is a good choice here. It is often easier to
-construct patterns when all dependency relation operators point from the head to
-the children. In this example, we'll only use `>`, which connects a head to an
-immediate dependent as `head > child`.
+The first step is to pick an **anchor token** for the pattern. Since it's the
+root of the dependency parse, `founded` is a good choice here. It is often
+easier to construct patterns when all dependency relation operators point from
+the head to the children. In this example, we'll only use `>`, which connects a
+head to an immediate dependent as `head > child`.
 
 The simplest dependency matcher pattern will identify and name a single token in
 the tree:
@@ -1099,11 +1115,10 @@ from spacy.matcher import DependencyMatcher
 
 nlp = spacy.load("en_core_web_sm")
 matcher = DependencyMatcher(nlp.vocab)
-
 pattern = [
   {
-    "RIGHT_ID": "anchor_founded",      # unique name
-    "RIGHT_ATTRS": {"ORTH": "founded"} # token pattern for "founded"
+    "RIGHT_ID": "anchor_founded",       # unique name
+    "RIGHT_ATTRS": {"ORTH": "founded"}  # token pattern for "founded"
   }
 ]
 matcher.add("FOUNDED", [pattern])
@@ -1116,6 +1131,7 @@ Now that we have a named anchor token (`anchor_founded`), we can add the founder
 as the immediate dependent (`>`) of `founded` with the dependency label `nsubj`:
 
 ```python
+### Step 1 {highlight="8,10"}
 pattern = [
     {
         "RIGHT_ID": "anchor_founded",
@@ -1127,31 +1143,37 @@ pattern = [
         "RIGHT_ID": "subject",
         "RIGHT_ATTRS": {"DEP": "nsubj"},
     }
+    # ...
 ]
 ```
 
 The direct object (`dobj`) is added in the same way:
 
 ```python
-pattern = [ ...
+### Step 2 {highlight=""}
+pattern = [
+    #...
     {
         "LEFT_ID": "anchor_founded",
         "REL_OP": ">",
         "RIGHT_ID": "founded_object",
         "RIGHT_ATTRS": {"DEP": "dobj"},
     }
+    # ...
 ]
 ```
 
 When the subject and object tokens are added, they are required to have names
 under the key `RIGHT_ID`, which are allowed to be any unique string, e.g.
-`founded_subject`. These names can then be used as `LEFT_ID` to link new tokens
-into the pattern. For the final part of our pattern, we'll specify that the
-token `founded_object` should have a modifier with the dependency relation
+`founded_subject`. These names can then be used as `LEFT_ID` to **link new
+tokens into the pattern**. For the final part of our pattern, we'll specify that
+the token `founded_object` should have a modifier with the dependency relation
 `amod` or `compound`:
 
 ```python
-pattern = [ ...
+### Step 3 {highlight="7"}
+pattern = [
+    # ...
     {
         "LEFT_ID": "founded_object",
         "REL_OP": ">",
@@ -1168,8 +1190,6 @@ each new token needs to be linked to an existing token on its left. As for
 `founded` in this example, a token may be linked to more than one token on its
 right:
 
-<!-- TODO: adjust for final example, prettify -->
-
 ![Dependency matcher pattern](../images/dep-match-diagram.svg)
 
 The full pattern comes together as shown in the example below:
@@ -1209,11 +1229,10 @@ pattern = [
 
 matcher.add("FOUNDED", [pattern])
 doc = nlp("Lee, an experienced CEO, has founded two AI startups.")
-
 matches = matcher(doc)
-print(matches) # [(4851363122962674176, [6, 0, 10, 9])]
 
-# each token_id corresponds to one pattern dict
+print(matches) # [(4851363122962674176, [6, 0, 10, 9])]
+# Each token_id corresponds to one pattern dict
 match_id, token_ids = matches[0]
 for i in range(len(token_ids)):
     print(pattern[i]["RIGHT_ID"] + ":", doc[token_ids[i]].text)
diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index bce261c42..e5228ab21 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -26,6 +26,7 @@ menu:
 - [End-to-end project workflows](#features-projects)
 - [New built-in components](#features-pipeline-components)
 - [New custom component API](#features-components)
+- [Dependency matching](#features-dep-matcher)
 - [Python type hints](#features-types)
 - [New methods & attributes](#new-methods)
 - [New & updated documentation](#new-docs)
@@ -152,7 +153,6 @@ add to your pipeline and customize for your use case:
 | [`Morphologizer`](/api/morphologizer)           | Trainable component to predict morphological features.                                                                                                                                                                  |
 | [`Lemmatizer`](/api/lemmatizer)                 | Standalone component for rule-based and lookup lemmatization.                                                                                                                                                           |
 | [`AttributeRuler`](/api/attributeruler)         | Component for setting token attributes using match patterns.                                                                                                                                                            |
-| [`DependencyMatcher`](/api/dependencymatcher)   | Component for matching subtrees within a dependency parse.                                                                                                                                                              |
 | [`Transformer`](/api/transformer)               | Component for using [transformer models](/usage/embeddings-transformers) in your pipeline, accessing outputs and aligning tokens. Provided via [`spacy-transformers`](https://github.com/explosion/spacy-transformers). |
 
 <Infobox title="Details & Documentation" emoji="📖" list>
@@ -202,6 +202,34 @@ aren't set.
 
 </Infobox>
 
+### Dependency matching {#features-dep-matcher}
+
+<!-- TODO: improve summary -->
+
+> #### Example
+>
+> ```python
+> # TODO: example
+> ```
+
+The [`DependencyMatcher`](/api/dependencymatcher) lets you match patterns within
+the dependency parse using
+[Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html)
+operators. It follows the same API as the token-based [`Matcher`](/api/matcher).
+A pattern added to the dependency matcher consists of a **list of
+dictionaries**, with each dictionary describing a **token to match** and its
+**relation to an existing token** in the pattern.
+
+<Infobox title="Details & Documentation" emoji="📖" list>
+
+- **Usage:**
+  [Dependency matching](/usage/rule-based-matching#dependencymatcher),
+- **API:** [`DependencyMatcher`](/api/dependencymatcher),
+- **Implementation:**
+  [`spacy/matcher/dependencymatcher.pyx`](https://github.com/explosion/spaCy/tree/develop/spacy/matcher/dependencymatcher.pyx)
+
+</Infobox>
+
 ### Type hints and type-based data validation {#features-types}
 
 > #### Example

From b3e338d65e63c8550b1ab040b4414a75fb7716b3 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 20:58:36 +0200
Subject: [PATCH 65/71] Update docs [ci skip]

---
 website/docs/usage/v3.md | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index e5228ab21..8006863e1 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -209,11 +209,18 @@ aren't set.
 > #### Example
 >
 > ```python
-> # TODO: example
+> from spacy.matcher import DependencyMatcher
+>
+> matcher = DependencyMatcher(nlp.vocab)
+> pattern = [
+>     {"RIGHT_ID": "anchor_founded", "RIGHT_ATTRS": {"ORTH": "founded"}},
+>     {"LEFT_ID": "anchor_founded", "REL_OP": ">", "RIGHT_ID": "subject", "RIGHT_ATTRS": {"DEP": "nsubj"}}
+> ]
+> matcher.add("FOUNDED", [pattern])
 > ```
 
-The [`DependencyMatcher`](/api/dependencymatcher) lets you match patterns within
-the dependency parse using
+The new [`DependencyMatcher`](/api/dependencymatcher) lets you match patterns
+within the dependency parse using
 [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html)
 operators. It follows the same API as the token-based [`Matcher`](/api/matcher).
 A pattern added to the dependency matcher consists of a **list of

From 465785a672ea74e1c2b7d56440abdb9e7736c462 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 4 Sep 2020 21:15:55 +0200
Subject: [PATCH 66/71] Fix project pull and push

---
 spacy/cli/project/pull.py |  3 ++-
 spacy/cli/project/push.py | 11 ++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/project/pull.py b/spacy/cli/project/pull.py
index 655e2f459..edcd410bd 100644
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@@ -40,5 +40,6 @@ def project_pull(project_dir: Path, remote: str, *, verbose: bool = False):
             url = storage.pull(output_path, command_hash=cmd_hash)
             yield url, output_path
 
-        if cmd.get("outputs") and all(loc.exists() for loc in cmd["outputs"]):
+        out_locs = [project_dir / out for out in cmd.get("outputs", [])]
+        if all(loc.exists() for loc in out_locs):
             update_lockfile(project_dir, cmd)
diff --git a/spacy/cli/project/push.py b/spacy/cli/project/push.py
index fcee2231a..26495412d 100644
--- a/spacy/cli/project/push.py
+++ b/spacy/cli/project/push.py
@@ -45,10 +45,19 @@ def project_push(project_dir: Path, remote: str):
         )
         for output_path in cmd.get("outputs", []):
             output_loc = project_dir / output_path
-            if output_loc.exists():
+            if output_loc.exists() and _is_not_empty_dir(output_loc):
                 url = storage.push(
                     output_path,
                     command_hash=cmd_hash,
                     content_hash=get_content_hash(output_loc),
                 )
                 yield output_path, url
+
+
+def _is_not_empty_dir(loc: Path):
+    if not loc.is_dir():
+        return True
+    elif any(_is_not_empty_dir(child) for child in loc.iterdir()):
+        return True
+    else:
+        return False

From 4b7abaafdb7e5588d7305ccc7dd0ddcff36db01a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 4 Sep 2020 21:22:50 +0200
Subject: [PATCH 67/71] Fix learn rate for non-transformer

---
 spacy/cli/templates/quickstart_training.jinja | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 43c852d13..199aae217 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -186,11 +186,14 @@ accumulate_gradient = {{ transformer["size_factor"] }}
 [training.optimizer]
 @optimizers = "Adam.v1"
 
+
+{% if use_transformer -%}
 [training.optimizer.learn_rate]
 @schedules = "warmup_linear.v1"
 warmup_steps = 250
 total_steps = 20000
 initial_rate = 5e-5
+{% endif %}
 
 [training.train_corpus]
 @readers = "spacy.Corpus.v1"

From 12e1279f6b4c8db6f7f9f399de6901a429d3aaca Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 5 Sep 2020 04:13:53 +0200
Subject: [PATCH 68/71] Set version to v3.0.0a14

---
 spacy/about.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/about.py b/spacy/about.py
index 3fe720dbc..7d0e85a17 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy-nightly"
-__version__ = "3.0.0a13"
+__version__ = "3.0.0a14"
 __release__ = True
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

From c443c82722c3577a75720a73a5515d178016821d Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sat, 5 Sep 2020 13:41:10 +0200
Subject: [PATCH 69/71] Update docs [ci skip]

---
 website/docs/usage/v3.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index 8006863e1..3cbccc352 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -342,7 +342,8 @@ format for documenting argument and return types.
   [Custom tokenizers](/usage/linguistic-features#custom-tokenizer),
   [Morphology](/usage/linguistic-features#morphology),
   [Lemmatization](/usage/linguistic-features#lemmatization),
-  [Mapping & Exceptions](/usage/linguistic-features#mappings-exceptions)
+  [Mapping & Exceptions](/usage/linguistic-features#mappings-exceptions),
+  [Dependency matching](/usage/rule-based-matching#dependencymatcher)
 - **API Reference: ** [Library architecture](/api),
   [Model architectures](/api/architectures), [Data formats](/api/data-formats)
 - **New Classes: ** [`Example`](/api/example), [`Tok2Vec`](/api/tok2vec),
@@ -350,7 +351,7 @@ format for documenting argument and return types.
   [`Morphologizer`](/api/morphologizer),
   [`AttributeRuler`](/api/attributeruler),
   [`SentenceRecognizer`](/api/sentencerecognizer),
-  [`DependencyMatcher`])(/api/dependencymatcher), [`Pipe`](/api/pipe),
+  [`DependencyMatcher`](/api/dependencymatcher), [`Pipe`](/api/pipe),
   [`Corpus`](/api/corpus)
 
 </Infobox>

From dae22f3dfaea23ef560a41a80cdd3d4a39f9cb06 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 5 Sep 2020 14:08:34 +0200
Subject: [PATCH 70/71] Fix ignoring of punct labels

---
 spacy/pipeline/dep_parser.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index e001920a6..eee4ed535 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -156,7 +156,7 @@ cdef class DependencyParser(Parser):
         results = {}
         results.update(Scorer.score_spans(examples, "sents", **kwargs))
         kwargs.setdefault("getter", dep_getter)
-        kwargs.setdefault("ignore_label", ("p", "punct"))
+        kwargs.setdefault("ignore_labels", ("p", "punct"))
         results.update(Scorer.score_deps(examples, "dep", **kwargs))
         del results["sents_per_type"]
         return results

From bb62e3c8fcdcf5c8b1d3da8104b68577981ec97b Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sun, 6 Sep 2020 23:43:50 +0200
Subject: [PATCH 71/71] Fix dropdown [ci skip]

---
 website/src/components/dropdown.js | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/website/src/components/dropdown.js b/website/src/components/dropdown.js
index 4528ef77e..ae5c42415 100644
--- a/website/src/components/dropdown.js
+++ b/website/src/components/dropdown.js
@@ -6,7 +6,14 @@ import { navigate } from 'gatsby'
 import classes from '../styles/dropdown.module.sass'
 
 export default function Dropdown({ defaultValue, className, onChange, children }) {
-    const defaultOnChange = ({ target }) => navigate(target.value)
+    const defaultOnChange = ({ target }) => {
+        const isExternal = /((http(s?)):\/\/|mailto:)/gi.test(target.value)
+        if (isExternal) {
+            window.location.href = target.value
+        } else {
+            navigate(target.value)
+        }
+    }
     return (
         <select
             defaultValue={defaultValue}