From ada4fc0f09189eb32935fd0d17cb8b78e8ed51b7 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Sat, 14 Nov 2020 09:20:42 +0100
Subject: [PATCH] Update v2.2.x for bugfix release (#6384)

* Fix on_match callback and remove empty patterns (#6312)

For the `DependencyMatcher`:

* Fix on_match callback so that it is called once per matched pattern
* Fix results so that patterns with empty match lists are not returned

* Add --prefer-binary for python 3.5

* Add version pins for pyrsistent

* Use backwards-compatible super()

* Try to fix tests on Travis (2.7)

* Fix naming conflict and formatting

* Update pkuseg version in Chinese tokenizer warnings

* Some changes for Armenian (#5616)

* Fixing numericals

* We need a Armenian question sign to make the sentence a question

* Update lex_attrs.py (#5608)

* Fix compat

* Update Armenian from v2.3.x

Co-authored-by: Ines Montani <ines@ines.io>
Co-authored-by: Karen Hambardzumyan <mahnerak@gmail.com>
Co-authored-by: Marat M. Yavrumyan <myavrum@ysu.am>
---
 azure-pipelines.yml                      |   6 +-
 requirements.txt                         |   1 +
 spacy/errors.py                          |   4 +-
 spacy/lang/hy/__init__.py                |   5 +-
 spacy/lang/hy/examples.py                |   4 +-
 spacy/lang/hy/lex_attrs.py               |  26 +-
 spacy/lang/hy/stop_words.py              |   4 +-
 spacy/lang/hy/tag_map.py                 | 431 +++++++----------------
 spacy/lang/pl/lemmatizer.py              |   2 +-
 spacy/lang/zh/__init__.py                |  34 +-
 spacy/matcher/dependencymatcher.pyx      |  12 +-
 spacy/tests/lang/hy/test_text.py         |   1 +
 spacy/tests/lang/hy/test_tokenizer.py    |   1 +
 spacy/tests/matcher/test_matcher_api.py  |  59 ++--
 spacy/tests/regression/test_issue5152.py |   2 +
 15 files changed, 209 insertions(+), 383 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 147d2e903..54489615c 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -80,8 +80,8 @@ jobs:
       architecture: 'x64'
 
   - script: |
-      python -m pip install -U setuptools
-      pip install -r requirements.txt
+      python -m pip install -U pip setuptools
+      pip install -r requirements.txt --prefer-binary
     displayName: 'Install dependencies'
 
   - script: |
@@ -96,7 +96,7 @@ jobs:
 
   - bash: |
       SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
-      pip install dist/$SDIST
+      pip install dist/$SDIST --prefer-binary
     displayName: 'Install from sdist'
 
   - script: python -m pytest --pyargs spacy
diff --git a/requirements.txt b/requirements.txt
index ec30efc16..f76a8db10 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,6 +14,7 @@ plac>=0.9.6,<1.2.0
 pathlib==1.0.1; python_version < "3.4"
 tqdm>=4.38.0,<5.0.0
 # Optional dependencies
+pyrsistent<0.17.0
 jsonschema>=2.6.0,<3.1.0
 # Development dependencies
 cython>=0.25
diff --git a/spacy/errors.py b/spacy/errors.py
index d99c96922..f6b457345 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -7,8 +7,8 @@ def add_codes(err_cls):
 
     class ErrorsWithCodes(err_cls):
         def __getattribute__(self, code):
-            msg = super().__getattribute__(code)
-            if code.startswith('__'):  # python system attributes like __class__
+            msg = super(ErrorsWithCodes, self).__getattribute__(code)
+            if code.startswith("__"):  # python system attributes like __class__
                 return msg
             else:
                 return "[{code}] {msg}".format(code=code, msg=msg)
diff --git a/spacy/lang/hy/__init__.py b/spacy/lang/hy/__init__.py
index 3320edb6c..6aaa965bb 100644
--- a/spacy/lang/hy/__init__.py
+++ b/spacy/lang/hy/__init__.py
@@ -1,11 +1,12 @@
+# coding: utf8
+from __future__ import unicode_literals
+
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .tag_map import TAG_MAP
 
-
 from ...attrs import LANG
 from ...language import Language
-from ...tokens import Doc
 
 
 class ArmenianDefaults(Language.Defaults):
diff --git a/spacy/lang/hy/examples.py b/spacy/lang/hy/examples.py
index b0df31aae..8a00fd243 100644
--- a/spacy/lang/hy/examples.py
+++ b/spacy/lang/hy/examples.py
@@ -1,6 +1,6 @@
+# coding: utf8
 from __future__ import unicode_literals
 
-
 """
 Example sentences to test spaCy and its language models.
 >>> from spacy.lang.hy.examples import sentences
@@ -11,6 +11,6 @@ Example sentences to test spaCy and its language models.
 sentences = [
     "Լոնդոնը Միացյալ Թագավորության մեծ քաղաք է։",
     "Ո՞վ է Ֆրանսիայի նախագահը։",
-    "Որն է Միացյալ Նահանգների մայրաքաղաքը։",
+    "Ո՞րն է Միացյալ Նահանգների մայրաքաղաքը։",
     "Ե՞րբ է ծնվել Բարաք Օբաման։",
 ]
diff --git a/spacy/lang/hy/lex_attrs.py b/spacy/lang/hy/lex_attrs.py
index 7c1b9592f..dea3c0e97 100644
--- a/spacy/lang/hy/lex_attrs.py
+++ b/spacy/lang/hy/lex_attrs.py
@@ -1,11 +1,12 @@
+# coding: utf8
 from __future__ import unicode_literals
 
 from ...attrs import LIKE_NUM
 
 
 _num_words = [
-    "զրօ",
-    "մէկ",
+    "զրո",
+    "մեկ",
     "երկու",
     "երեք",
     "չորս",
@@ -17,20 +18,21 @@ _num_words = [
     "տասը",
     "տասնմեկ",
     "տասներկու",
-    "տասն­երեք",
-    "տասն­չորս",
-    "տասն­հինգ",
-    "տասն­վեց",
-    "տասն­յոթ",
-    "տասն­ութ",
-    "տասն­ինը",
-    "քսան" "երեսուն",
+    "տասներեք",
+    "տասնչորս",
+    "տասնհինգ",
+    "տասնվեց",
+    "տասնյոթ",
+    "տասնութ",
+    "տասնինը",
+    "քսան",
+    "երեսուն",
     "քառասուն",
     "հիսուն",
-    "վաթցսուն",
+    "վաթսուն",
     "յոթանասուն",
     "ութսուն",
-    "ինիսուն",
+    "իննսուն",
     "հարյուր",
     "հազար",
     "միլիոն",
diff --git a/spacy/lang/hy/stop_words.py b/spacy/lang/hy/stop_words.py
index c671956a4..d75aad6e2 100644
--- a/spacy/lang/hy/stop_words.py
+++ b/spacy/lang/hy/stop_words.py
@@ -1,6 +1,6 @@
+# coding: utf8
 from __future__ import unicode_literals
 
-
 STOP_WORDS = set(
     """
 նա
@@ -105,6 +105,6 @@ STOP_WORDS = set(
 յուրաքանչյուր
 այս
 մեջ
-թ	
+թ
 """.split()
 )
diff --git a/spacy/lang/hy/tag_map.py b/spacy/lang/hy/tag_map.py
index 90690c22e..4d5b6e918 100644
--- a/spacy/lang/hy/tag_map.py
+++ b/spacy/lang/hy/tag_map.py
@@ -1,7 +1,7 @@
 # coding: utf8
 from __future__ import unicode_literals
 
-from ...symbols import POS, SYM, ADJ, NUM, DET, ADV, ADP, X, VERB, NOUN
+from ...symbols import POS, ADJ, NUM, DET, ADV, ADP, X, VERB, NOUN
 from ...symbols import PROPN, PART, INTJ, PRON, SCONJ, AUX, CCONJ
 
 TAG_MAP = {
@@ -27,7 +27,7 @@ TAG_MAP = {
         POS: ADP,
         "AdpType": "Post",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
     },
     "ADP_AdpType=Post": {POS: ADP, "AdpType": "Post"},
     "ADP_AdpType=Prep": {POS: ADP, "AdpType": "Prep"},
@@ -35,12 +35,11 @@ TAG_MAP = {
     "ADV_Degree=Cmp": {POS: ADV, "Degree": "Cmp"},
     "ADV_Degree=Pos": {POS: ADV, "Degree": "Pos"},
     "ADV_Degree=Sup": {POS: ADV, "Degree": "Sup"},
-    "ADV_Distance=Dist|PronType=Dem": {POS: ADV, "Distance": "Dist", "PronType": "Dem"},
-    "ADV_Distance=Dist|PronType=Exc": {POS: ADV, "Distance": "Dist", "PronType": "Exc"},
-    "ADV_Distance=Med|PronType=Dem": {POS: ADV, "Distance": "Med", "PronType": "Dem"},
+    "ADV_Distance=Dist|PronType=Dem": {POS: ADV, "PronType": "Dem"},
+    "ADV_Distance=Dist|PronType=Exc": {POS: ADV, "PronType": "Exc"},
+    "ADV_Distance=Med|PronType=Dem": {POS: ADV, "PronType": "Dem"},
     "ADV_Distance=Med|PronType=Dem|Style=Coll": {
         POS: ADV,
-        "Distance": "Med",
         "PronType": "Dem",
         "Style": "Coll",
     },
@@ -63,7 +62,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -73,7 +72,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -83,7 +82,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -93,7 +92,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -103,7 +102,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -113,7 +112,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -123,7 +122,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -133,7 +132,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -143,7 +142,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -153,7 +152,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -163,7 +162,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -173,7 +172,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -183,7 +182,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -193,7 +192,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -203,7 +202,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Imp",
         "VerbForm": "Fin",
@@ -213,7 +212,7 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Tense": "Pres",
         "VerbForm": "Fin",
@@ -227,7 +226,6 @@ TAG_MAP = {
     "DET_Case=Gen|Distance=Med|Number=Plur|Poss=Yes|PronType=Dem": {
         POS: DET,
         "Case": "Gen",
-        "Distance": "Med",
         "Number": "Plur",
         "Poss": "Yes",
         "PronType": "Dem",
@@ -235,7 +233,6 @@ TAG_MAP = {
     "DET_Case=Gen|Distance=Med|Number=Sing|Poss=Yes|PronType=Dem": {
         POS: DET,
         "Case": "Gen",
-        "Distance": "Med",
         "Number": "Sing",
         "Poss": "Yes",
         "PronType": "Dem",
@@ -244,7 +241,7 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -252,8 +249,7 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -261,24 +257,22 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
     },
     "DET_Case=Gen|Number=Plur|Person=3|Poss=Yes|PronType=Emp|Reflex=Yes": {
         POS: DET,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
         "Reflex": "Yes",
     },
     "DET_Case=Gen|Number=Sing|Person=1|Poss=Yes|PronType=Prs": {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -286,8 +280,7 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -295,24 +288,22 @@ TAG_MAP = {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
     },
     "DET_Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Emp|Reflex=Yes": {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
-        "PronType": "Emp",
         "Reflex": "Yes",
     },
     "DET_Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {
         POS: DET,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Poss": "Yes",
         "PronType": "Prs",
     },
@@ -323,30 +314,26 @@ TAG_MAP = {
         "Poss": "Yes",
         "PronType": "Rel",
     },
-    "DET_Distance=Dist|PronType=Dem": {POS: DET, "Distance": "Dist", "PronType": "Dem"},
+    "DET_Distance=Dist|PronType=Dem": {POS: DET, "PronType": "Dem"},
     "DET_Distance=Dist|PronType=Dem|Style=Coll": {
         POS: DET,
-        "Distance": "Dist",
         "PronType": "Dem",
         "Style": "Coll",
     },
     "DET_Distance=Dist|PronType=Dem|Style=Vrnc": {
         POS: DET,
-        "Distance": "Dist",
         "PronType": "Dem",
         "Style": "Vrnc",
     },
-    "DET_Distance=Med|PronType=Dem": {POS: DET, "Distance": "Med", "PronType": "Dem"},
+    "DET_Distance=Med|PronType=Dem": {POS: DET, "PronType": "Dem"},
     "DET_Distance=Med|PronType=Dem|Style=Coll": {
         POS: DET,
-        "Distance": "Med",
         "PronType": "Dem",
         "Style": "Coll",
     },
-    "DET_Distance=Prox|PronType=Dem": {POS: DET, "Distance": "Prox", "PronType": "Dem"},
+    "DET_Distance=Prox|PronType=Dem": {POS: DET, "PronType": "Dem"},
     "DET_Distance=Prox|PronType=Dem|Style=Coll": {
         POS: DET,
-        "Distance": "Prox",
         "PronType": "Dem",
         "Style": "Coll",
     },
@@ -386,7 +373,6 @@ TAG_MAP = {
         "Case": "Abl",
         "Definite": "Ind",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Abl|Definite=Ind|Number=Sing": {
         POS: NOUN,
@@ -415,14 +401,12 @@ TAG_MAP = {
         "Case": "Dat",
         "Definite": "Def",
         "Number": "Sing",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Assoc": {
         POS: NOUN,
         "Animacy": "Hum",
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Assoc",
     },
     "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -445,7 +429,6 @@ TAG_MAP = {
         "Case": "Dat",
         "Definite": "Ind",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Sing": {
         POS: NOUN,
@@ -468,7 +451,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Hum|Case=Dat|Number=Sing|Number=Sing|Person=1|Style=Coll": {
         POS: NOUN,
@@ -476,7 +459,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Style": "Coll",
     },
     "NOUN_Animacy=Hum|Case=Ins|Definite=Ind|Number=Sing": {
@@ -499,7 +482,6 @@ TAG_MAP = {
         "Case": "Nom",
         "Definite": "Def",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Nom|Definite=Def|Number=Sing": {
         POS: NOUN,
@@ -521,7 +503,6 @@ TAG_MAP = {
         "Animacy": "Hum",
         "Case": "Nom",
         "Definite": "Ind",
-        "Number": "Assoc",
     },
     "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -544,7 +525,6 @@ TAG_MAP = {
         "Case": "Nom",
         "Definite": "Ind",
         "Number": "Plur",
-        "Style": "Slng",
     },
     "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur|Typo=Yes": {
         POS: NOUN,
@@ -575,14 +555,13 @@ TAG_MAP = {
         "Case": "Nom",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Coll": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -612,14 +591,13 @@ TAG_MAP = {
         "Case": "Abl",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Coll": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Dat",
         "Definite": "Def",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Plur": {
         POS: NOUN,
@@ -672,7 +650,6 @@ TAG_MAP = {
         "Animacy": "Nhum",
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -716,9 +693,9 @@ TAG_MAP = {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Dat",
-        "Number": "Coll",
+        #
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Number=Sing|Number=Sing|Person=1": {
         POS: NOUN,
@@ -726,7 +703,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Dat|Number=Sing|Number=Sing|Person=2": {
         POS: NOUN,
@@ -734,7 +711,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Gen|Definite=Ind|Number=Sing|Style=Arch": {
         POS: NOUN,
@@ -749,7 +726,6 @@ TAG_MAP = {
         "Animacy": "Nhum",
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Ins|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -779,7 +755,7 @@ TAG_MAP = {
         "Case": "Ins",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Loc|Definite=Ind|Number=Plur": {
         POS: NOUN,
@@ -801,21 +777,20 @@ TAG_MAP = {
         "Case": "Loc",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Coll": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Nom",
         "Definite": "Def",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Plur|Number=Sing|Poss=Yes": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Nom",
         "Definite": "Def",
-        "Number": "Plur",
+        # "Number": "Plur",
         "Number": "Sing",
         "Poss": "Yes",
     },
@@ -846,14 +821,12 @@ TAG_MAP = {
         "Animacy": "Nhum",
         "Case": "Nom",
         "Definite": "Ind",
-        "Number": "Coll",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Coll|Typo=Yes": {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Nom",
         "Definite": "Ind",
-        "Number": "Coll",
         "Typo": "Yes",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Plur": {
@@ -880,9 +853,9 @@ TAG_MAP = {
         POS: NOUN,
         "Animacy": "Nhum",
         "Case": "Nom",
-        "Number": "Plur",
+        # "Number": "Plur",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Number=Sing|Number=Sing|Person=1": {
         POS: NOUN,
@@ -890,7 +863,7 @@ TAG_MAP = {
         "Case": "Nom",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
     },
     "NOUN_Animacy=Nhum|Case=Nom|Number=Sing|Number=Sing|Person=2": {
         POS: NOUN,
@@ -898,7 +871,7 @@ TAG_MAP = {
         "Case": "Nom",
         "Number": "Sing",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
     },
     "NUM_NumForm=Digit|NumType=Card": {POS: NUM, "NumForm": "Digit", "NumType": "Card"},
     "NUM_NumForm=Digit|NumType=Frac|Typo=Yes": {
@@ -907,43 +880,37 @@ TAG_MAP = {
         "NumType": "Frac",
         "Typo": "Yes",
     },
-    "NUM_NumForm=Digit|NumType=Range": {
-        POS: NUM,
-        "NumForm": "Digit",
-        "NumType": "Range",
-    },
+    "NUM_NumForm=Digit|NumType=Range": {POS: NUM, "NumForm": "Digit",},
     "NUM_NumForm=Word|NumType=Card": {POS: NUM, "NumForm": "Word", "NumType": "Card"},
     "NUM_NumForm=Word|NumType=Dist": {POS: NUM, "NumForm": "Word", "NumType": "Dist"},
-    "NUM_NumForm=Word|NumType=Range": {POS: NUM, "NumForm": "Word", "NumType": "Range"},
+    "NUM_NumForm=Word|NumType=Range": {POS: NUM, "NumForm": "Word",},
     "PART_Polarity=Neg": {POS: PART, "Polarity": "Neg"},
     "PRON_Case=Abl|Definite=Ind|Number=Sing|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Abl",
         "Definite": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Abl|Number=Plur|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Abl",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Abl|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Abl",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Definite=Def|Distance=Dist|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Dat",
         "Definite": "Def",
-        "Distance": "Dist",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -952,7 +919,7 @@ TAG_MAP = {
         "Case": "Dat",
         "Definite": "Def",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Definite=Ind|Number=Sing|PronType=Int": {
@@ -965,14 +932,12 @@ TAG_MAP = {
     "PRON_Case=Dat|Distance=Dist|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Dat",
-        "Distance": "Dist",
         "Number": "Sing",
         "PronType": "Dem",
     },
     "PRON_Case=Dat|Distance=Med|Number=Plur|PronType=Dem": {
         POS: PRON,
         "Case": "Dat",
-        "Distance": "Med",
         "Number": "Plur",
         "PronType": "Dem",
     },
@@ -980,30 +945,28 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Plur|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Plur|Person=3|PronType=Emp|Reflex=Yes": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
         "Reflex": "Yes",
     },
     "PRON_Case=Dat|Number=Plur|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Plur|PronType=Rcp": {
@@ -1016,30 +979,27 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Dat|Number=Sing|Person=3|PronType=Emp": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
     },
     "PRON_Case=Dat|Number=Sing|Person=3|PronType=Emp|Reflex=Yes": {
         POS: PRON,
         "Case": "Dat",
         "Number": "Sing",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
         "Reflex": "Yes",
     },
     "PRON_Case=Dat|Number=Sing|PronType=Int": {
@@ -1058,7 +1018,6 @@ TAG_MAP = {
     "PRON_Case=Gen|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Gen",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1066,21 +1025,21 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Gen",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Gen|Number=Sing|Person=2|PronType=Prs": {
         POS: PRON,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Gen|Number=Sing|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Gen",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Gen|PronType=Tot": {POS: PRON, "Case": "Gen", "PronType": "Tot"},
@@ -1094,7 +1053,6 @@ TAG_MAP = {
     "PRON_Case=Ins|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Ins",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1108,7 +1066,6 @@ TAG_MAP = {
     "PRON_Case=Loc|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Loc",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1116,7 +1073,6 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Nom",
         "Definite": "Def",
-        "Distance": "Dist",
         "Number": "Plur",
         "PronType": "Dem",
     },
@@ -1124,7 +1080,6 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Nom",
         "Definite": "Def",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
         "Style": "Coll",
@@ -1167,29 +1122,25 @@ TAG_MAP = {
     "PRON_Case=Nom|Distance=Dist|Number=Plur|Person=1|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Dist",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Dem",
     },
     "PRON_Case=Nom|Distance=Med|Number=Plur|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Med",
         "Number": "Plur",
         "PronType": "Dem",
     },
     "PRON_Case=Nom|Distance=Med|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Med",
         "Number": "Sing",
         "PronType": "Dem",
     },
     "PRON_Case=Nom|Distance=Prox|Number=Sing|PronType=Dem": {
         POS: PRON,
         "Case": "Nom",
-        "Distance": "Prox",
         "Number": "Sing",
         "PronType": "Dem",
     },
@@ -1197,21 +1148,20 @@ TAG_MAP = {
         POS: PRON,
         "Case": "Nom",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Plur|Person=3|PronType=Emp": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Plur",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
     },
     "PRON_Case=Nom|Number=Plur|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Plur|PronType=Rel": {
@@ -1223,46 +1173,43 @@ TAG_MAP = {
     "PRON_Case=Nom|Number=Sing|Number=Plur|Person=3|Person=1|PronType=Emp": {
         POS: PRON,
         "Case": "Nom",
-        "Number": "Sing",
+        # "Number": "Sing",
         "Number": "Plur",
-        "Person": "3",
-        "Person": "1",
-        "PronType": "Emp",
+        # "Person": "three",
+        "Person": "one",
     },
     "PRON_Case=Nom|Number=Sing|Person=1|PronType=Int": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Int",
     },
     "PRON_Case=Nom|Number=Sing|Person=1|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "2",
-        "Polite": "Infm",
+        "Person": "two",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Sing|Person=3|PronType=Emp": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "3",
-        "PronType": "Emp",
+        "Person": "three",
     },
     "PRON_Case=Nom|Number=Sing|Person=3|PronType=Prs": {
         POS: PRON,
         "Case": "Nom",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "PronType": "Prs",
     },
     "PRON_Case=Nom|Number=Sing|PronType=Int": {
@@ -1280,26 +1227,23 @@ TAG_MAP = {
     "PRON_Case=Nom|Person=1|PronType=Tot": {
         POS: PRON,
         "Case": "Nom",
-        "Person": "1",
+        "Person": "one",
         "PronType": "Tot",
     },
     "PRON_Case=Nom|PronType=Ind": {POS: PRON, "Case": "Nom", "PronType": "Ind"},
     "PRON_Case=Nom|PronType=Tot": {POS: PRON, "Case": "Nom", "PronType": "Tot"},
     "PRON_Distance=Dist|Number=Sing|PronType=Dem": {
         POS: PRON,
-        "Distance": "Dist",
         "Number": "Sing",
         "PronType": "Dem",
     },
     "PRON_Distance=Med|PronType=Dem|Style=Coll": {
         POS: PRON,
-        "Distance": "Med",
         "PronType": "Dem",
         "Style": "Coll",
     },
     "PRON_Distance=Prox|PronType=Dem|Style=Coll": {
         POS: PRON,
-        "Distance": "Prox",
         "PronType": "Dem",
         "Style": "Coll",
     },
@@ -1384,7 +1328,6 @@ TAG_MAP = {
         "Case": "Abl",
         "Definite": "Ind",
         "NameType": "Geo",
-        "Number": "Coll",
     },
     "PROPN_Animacy=Nhum|Case=Abl|Definite=Ind|NameType=Geo|Number=Sing": {
         POS: PROPN,
@@ -1449,7 +1392,6 @@ TAG_MAP = {
         "Case": "Nom",
         "Definite": "Ind",
         "NameType": "Geo",
-        "Number": "Coll",
     },
     "PROPN_Animacy=Nhum|Case=Nom|Definite=Ind|NameType=Geo|Number=Sing": {
         POS: PROPN,
@@ -1471,41 +1413,31 @@ TAG_MAP = {
     "SCONJ_Style=Coll": {POS: SCONJ, "Style": "Coll"},
     "VERB_Aspect=Dur|Polarity=Neg|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Dur|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Mid": {
         POS: VERB,
-        "Aspect": "Dur",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -1514,9 +1446,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1526,9 +1457,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1538,9 +1468,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1550,9 +1479,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1562,9 +1490,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1574,9 +1501,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1586,9 +1512,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1598,9 +1523,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1610,9 +1534,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1622,9 +1545,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1634,9 +1556,8 @@ TAG_MAP = {
         "Aspect": "Imp",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1645,7 +1566,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Imp",
         "Style": "Coll",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -1653,48 +1573,41 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Imp",
         "Style": "Vrnc",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Imp|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Imp|Subcat=Tran|VerbForm=Part|Voice=Cau": {
         POS: VERB,
         "Aspect": "Imp",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Cau",
     },
@@ -1703,9 +1616,7 @@ TAG_MAP = {
         "Aspect": "Iter",
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -1714,9 +1625,7 @@ TAG_MAP = {
         "Aspect": "Iter",
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -1726,9 +1635,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1738,9 +1646,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1750,9 +1657,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1762,9 +1668,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1774,10 +1679,9 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
         "Style": "Vrnc",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1787,9 +1691,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1799,9 +1702,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1811,9 +1713,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1823,10 +1724,9 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
         "Style": "Vrnc",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1836,9 +1736,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1848,9 +1747,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1860,9 +1758,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1872,9 +1769,8 @@ TAG_MAP = {
         "Aspect": "Perf",
         "Mood": "Ind",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Past",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -1883,7 +1779,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
@@ -1891,7 +1786,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -1899,7 +1793,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
@@ -1907,7 +1800,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
@@ -1915,7 +1807,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Perf",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
@@ -1929,35 +1820,30 @@ TAG_MAP = {
     "VERB_Aspect=Perf|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Perf|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Perf|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Perf|Subcat=Tran|VerbForm=Part|Voice=Cau": {
         POS: VERB,
         "Aspect": "Perf",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Cau",
     },
     "VERB_Aspect=Prog|Subcat=Intr|VerbForm=Conv|Voice=Mid": {
         POS: VERB,
         "Aspect": "Prog",
-        "Subcat": "Intr",
         "VerbForm": "Conv",
         "Voice": "Mid",
     },
@@ -1966,7 +1852,6 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Connegative": "Yes",
         "Mood": "Cnd",
-        "Subcat": "Tran",
         "VerbForm": "Fin",
         "Voice": "Act",
     },
@@ -1975,10 +1860,9 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
         "Style": "Vrnc",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -1988,9 +1872,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2000,9 +1883,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2012,9 +1894,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2024,9 +1905,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2036,9 +1916,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Pass",
@@ -2048,9 +1927,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2060,9 +1938,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Cnd",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2072,8 +1949,7 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Imp",
         "Number": "Sing",
-        "Person": "2",
-        "Subcat": "Intr",
+        "Person": "two",
         "VerbForm": "Fin",
         "Voice": "Mid",
     },
@@ -2082,8 +1958,7 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Imp",
         "Number": "Sing",
-        "Person": "2",
-        "Subcat": "Tran",
+        "Person": "two",
         "VerbForm": "Fin",
         "Voice": "Act",
     },
@@ -2092,9 +1967,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Plur",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2104,9 +1978,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2116,9 +1989,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Plur",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2128,9 +2000,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2140,9 +2011,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2152,9 +2022,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2164,9 +2033,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2176,9 +2044,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2188,9 +2055,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "2",
+        "Person": "two",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2200,9 +2066,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2212,9 +2077,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Mid",
@@ -2224,9 +2088,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Fin",
         "Voice": "Pass",
     },
@@ -2235,9 +2098,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Imp",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2247,9 +2109,8 @@ TAG_MAP = {
         "Aspect": "Prosp",
         "Mood": "Sub",
         "Number": "Sing",
-        "Person": "3",
+        "Person": "three",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2258,9 +2119,8 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Prosp",
         "Mood": "Sub",
-        "Person": "1",
+        "Person": "one",
         "Polarity": "Neg",
-        "Subcat": "Tran",
         "Tense": "Pres",
         "VerbForm": "Fin",
         "Voice": "Act",
@@ -2269,7 +2129,6 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Prosp",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
@@ -2277,28 +2136,24 @@ TAG_MAP = {
         POS: VERB,
         "Aspect": "Prosp",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
     "VERB_Aspect=Prosp|Subcat=Intr|VerbForm=Part|Voice=Mid": {
         POS: VERB,
         "Aspect": "Prosp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Mid",
     },
     "VERB_Aspect=Prosp|Subcat=Intr|VerbForm=Part|Voice=Pass": {
         POS: VERB,
         "Aspect": "Prosp",
-        "Subcat": "Intr",
         "VerbForm": "Part",
         "Voice": "Pass",
     },
     "VERB_Aspect=Prosp|Subcat=Tran|VerbForm=Part|Voice=Act": {
         POS: VERB,
         "Aspect": "Prosp",
-        "Subcat": "Tran",
         "VerbForm": "Part",
         "Voice": "Act",
     },
@@ -2306,9 +2161,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2316,9 +2169,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Pass",
     },
@@ -2326,9 +2177,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Abl",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2336,9 +2185,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Def",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2346,9 +2193,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Pass",
     },
@@ -2356,9 +2201,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2366,9 +2209,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Dat",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2376,9 +2217,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2386,9 +2225,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Ins",
         "Definite": "Ind",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2396,9 +2233,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Nom",
         "Definite": "Def",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Gdv",
         "Voice": "Mid",
     },
@@ -2406,9 +2241,7 @@ TAG_MAP = {
         POS: VERB,
         "Case": "Nom",
         "Definite": "Def",
-        "Number": "Coll",
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Gdv",
         "Voice": "Act",
     },
@@ -2416,15 +2249,13 @@ TAG_MAP = {
         POS: VERB,
         "Mood": "Imp",
         "Number": "Sing",
-        "Person": "2",
-        "Subcat": "Intr",
+        "Person": "two",
         "VerbForm": "Fin",
         "Voice": "Mid",
     },
     "VERB_Polarity=Neg|Subcat=Intr|VerbForm=Inf|Voice=Mid": {
         POS: VERB,
         "Polarity": "Neg",
-        "Subcat": "Intr",
         "VerbForm": "Inf",
         "Voice": "Mid",
     },
@@ -2432,7 +2263,6 @@ TAG_MAP = {
         POS: VERB,
         "Polarity": "Pos",
         "Style": "Coll",
-        "Subcat": "Tran",
         "VerbForm": "Inf",
         "Voice": "Act",
     },
@@ -2440,28 +2270,24 @@ TAG_MAP = {
         POS: VERB,
         "Polarity": "Pos",
         "Style": "Vrnc",
-        "Subcat": "Tran",
         "VerbForm": "Inf",
         "Voice": "Act",
     },
     "VERB_Polarity=Pos|Subcat=Intr|VerbForm=Inf|Voice=Mid": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Inf",
         "Voice": "Mid",
     },
     "VERB_Polarity=Pos|Subcat=Intr|VerbForm=Inf|Voice=Pass": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Intr",
         "VerbForm": "Inf",
         "Voice": "Pass",
     },
     "VERB_Polarity=Pos|Subcat=Tran|Typo=Yes|VerbForm=Inf|Voice=Act": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "Typo": "Yes",
         "VerbForm": "Inf",
         "Voice": "Act",
@@ -2469,7 +2295,6 @@ TAG_MAP = {
     "VERB_Polarity=Pos|Subcat=Tran|VerbForm=Inf|Voice=Act": {
         POS: VERB,
         "Polarity": "Pos",
-        "Subcat": "Tran",
         "VerbForm": "Inf",
         "Voice": "Act",
     },
diff --git a/spacy/lang/pl/lemmatizer.py b/spacy/lang/pl/lemmatizer.py
index 2be4b0fb7..2ceb940c3 100644
--- a/spacy/lang/pl/lemmatizer.py
+++ b/spacy/lang/pl/lemmatizer.py
@@ -14,7 +14,7 @@ class PolishLemmatizer(Lemmatizer):
     # lemmatization for nouns
     def __init__(self, lookups, *args, **kwargs):
         # this lemmatizer is lookup based, so it does not require an index, exceptionlist, or rules
-        super().__init__(lookups)
+        super(PolishLemmatizer, self).__init__(lookups)
         self.lemma_lookups = {}
         for tag in [
             "ADJ",
diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py
index ed0b3eb74..9f8a82c10 100644
--- a/spacy/lang/zh/__init__.py
+++ b/spacy/lang/zh/__init__.py
@@ -16,7 +16,7 @@ from .tag_map import TAG_MAP
 from ... import util
 
 
-_PKUSEG_INSTALL_MSG = "install it with `pip install pkuseg==0.0.22` or from https://github.com/lancopku/pkuseg-python"
+_PKUSEG_INSTALL_MSG = "install it with `pip install pkuseg==0.0.25` or from https://github.com/lancopku/pkuseg-python"
 
 
 def try_jieba_import(use_jieba):
@@ -109,6 +109,7 @@ class ChineseTokenizer(DummyTokenizer):
             if reset:
                 try:
                     import pkuseg
+
                     self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(None)
                 except ImportError:
                     if self.use_pkuseg:
@@ -118,7 +119,7 @@ class ChineseTokenizer(DummyTokenizer):
                         )
                         raise ImportError(msg)
             for word in words:
-                self.pkuseg_seg.preprocesser.insert(word.strip(), '')
+                self.pkuseg_seg.preprocesser.insert(word.strip(), "")
 
     def _get_config(self):
         config = OrderedDict(
@@ -168,21 +169,16 @@ class ChineseTokenizer(DummyTokenizer):
         return util.to_bytes(serializers, [])
 
     def from_bytes(self, data, **kwargs):
-        pkuseg_features_b = b""
-        pkuseg_weights_b = b""
-        pkuseg_processors_data = None
+        pkuseg_data = {"features_b": b"", "weights_b": b"", "processors_data": None}
 
         def deserialize_pkuseg_features(b):
-            nonlocal pkuseg_features_b
-            pkuseg_features_b = b
+            pkuseg_data["features_b"] = b
 
         def deserialize_pkuseg_weights(b):
-            nonlocal pkuseg_weights_b
-            pkuseg_weights_b = b
+            pkuseg_data["weights_b"] = b
 
         def deserialize_pkuseg_processors(b):
-            nonlocal pkuseg_processors_data
-            pkuseg_processors_data = srsly.msgpack_loads(b)
+            pkuseg_data["processors_data"] = srsly.msgpack_loads(b)
 
         deserializers = OrderedDict(
             (
@@ -194,13 +190,13 @@ class ChineseTokenizer(DummyTokenizer):
         )
         util.from_bytes(data, deserializers, [])
 
-        if pkuseg_features_b and pkuseg_weights_b:
+        if pkuseg_data["features_b"] and pkuseg_data["weights_b"]:
             with tempfile.TemporaryDirectory() as tempdir:
                 tempdir = Path(tempdir)
                 with open(tempdir / "features.pkl", "wb") as fileh:
-                    fileh.write(pkuseg_features_b)
+                    fileh.write(pkuseg_data["features_b"])
                 with open(tempdir / "weights.npz", "wb") as fileh:
-                    fileh.write(pkuseg_weights_b)
+                    fileh.write(pkuseg_data["weights_b"])
                 try:
                     import pkuseg
                 except ImportError:
@@ -209,13 +205,9 @@ class ChineseTokenizer(DummyTokenizer):
                         + _PKUSEG_INSTALL_MSG
                     )
                 self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
-            if pkuseg_processors_data:
-                (
-                    user_dict,
-                    do_process,
-                    common_words,
-                    other_words,
-                ) = pkuseg_processors_data
+            if pkuseg_data["processors_data"]:
+                processors_data = pkuseg_data["processors_data"]
+                (user_dict, do_process, common_words, other_words) = processors_data
                 self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict)
                 self.pkuseg_seg.postprocesser.do_process = do_process
                 self.pkuseg_seg.postprocesser.common_words = set(common_words)
diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx
index 56d27024d..e93416043 100644
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@@ -235,12 +235,12 @@ cdef class DependencyMatcher:
 
                 matched_trees = []
                 self.recurse(_tree,id_to_position,_node_operator_map,0,[],matched_trees)
-                matched_key_trees.append((key,matched_trees))
-
-            for i, (ent_id, nodes) in enumerate(matched_key_trees):
-                on_match = self._callbacks.get(ent_id)
-                if on_match is not None:
-                    on_match(self, doc, i, matched_key_trees)
+                if len(matched_trees) > 0:
+                    matched_key_trees.append((key,matched_trees))
+        for i, (ent_id, nodes) in enumerate(matched_key_trees):
+            on_match = self._callbacks.get(ent_id)
+            if on_match is not None:
+                on_match(self, doc, i, matched_key_trees)
         return matched_key_trees
 
     def recurse(self,tree,id_to_position,_node_operator_map,int patternLength,visitedNodes,matched_trees):
diff --git a/spacy/tests/lang/hy/test_text.py b/spacy/tests/lang/hy/test_text.py
index 6b785bdfc..cbdb77e4e 100644
--- a/spacy/tests/lang/hy/test_text.py
+++ b/spacy/tests/lang/hy/test_text.py
@@ -1,3 +1,4 @@
+# coding: utf8
 from __future__ import unicode_literals
 
 import pytest
diff --git a/spacy/tests/lang/hy/test_tokenizer.py b/spacy/tests/lang/hy/test_tokenizer.py
index 424fb886f..5043273f9 100644
--- a/spacy/tests/lang/hy/test_tokenizer.py
+++ b/spacy/tests/lang/hy/test_tokenizer.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import pytest
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index 0295ada82..a2ca69111 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -7,6 +7,7 @@ from mock import Mock
 from spacy.matcher import Matcher, DependencyMatcher
 from spacy.tokens import Doc, Token
 from ..doc.test_underscore import clean_underscore  # noqa: F401
+from ..util import get_doc
 
 
 @pytest.fixture
@@ -301,22 +302,6 @@ def test_matcher_extension_set_membership(en_vocab):
     assert len(matches) == 0
 
 
-@pytest.fixture
-def text():
-    return "The quick brown fox jumped over the lazy fox"
-
-
-@pytest.fixture
-def heads():
-    return [3, 2, 1, 1, 0, -1, 2, 1, -3]
-
-
-@pytest.fixture
-def deps():
-    return ["det", "amod", "amod", "nsubj", "prep", "pobj", "det", "amod"]
-
-
-@pytest.fixture
 def dependency_matcher(en_vocab):
     def is_brown_yellow(text):
         return bool(re.compile(r"brown|yellow|over").match(text))
@@ -359,24 +344,40 @@ def dependency_matcher(en_vocab):
         },
     ]
 
+    # pattern that doesn't match
+    pattern4 = [
+        {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "NOMATCH"}},
+        {
+            "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
+            "PATTERN": {"ORTH": "fox"},
+        },
+        {
+            "SPEC": {"NODE_NAME": "r", "NBOR_RELOP": ">>", "NBOR_NAME": "fox"},
+            "PATTERN": {"ORTH": "brown"},
+        },
+    ]
+
     matcher = DependencyMatcher(en_vocab)
-    matcher.add("pattern1", [pattern1])
-    matcher.add("pattern2", [pattern2])
-    matcher.add("pattern3", [pattern3])
+    on_match = Mock()
+    matcher.add("pattern1", [pattern1], on_match=on_match)
+    matcher.add("pattern2", [pattern2], on_match=on_match)
+    matcher.add("pattern3", [pattern3], on_match=on_match)
+    matcher.add("pattern4", [pattern4], on_match=on_match)
 
-    return matcher
+    assert len(dependency_matcher) == 4
 
+    text = "The quick brown fox jumped over the lazy fox"
+    heads = [3, 2, 1, 1, 0, -1, 2, 1, -3]
+    deps = ["det", "amod", "amod", "nsubj", "ROOT", "prep", "pobj", "det", "amod"]
 
-def test_dependency_matcher_compile(dependency_matcher):
-    assert len(dependency_matcher) == 3
+    doc = get_doc(dependency_matcher.vocab, text.split(), heads=heads, deps=deps)
+    matches = dependency_matcher(doc)
 
-
-# def test_dependency_matcher(dependency_matcher, text, heads, deps):
-#     doc = get_doc(dependency_matcher.vocab, text.split(), heads=heads, deps=deps)
-#     matches = dependency_matcher(doc)
-#     assert matches[0][1] == [[3, 1, 2]]
-#     assert matches[1][1] == [[4, 3, 3]]
-#     assert matches[2][1] == [[4, 3, 2]]
+    assert len(matches) == 3
+    assert matches[0][1] == [[3, 1, 2]]
+    assert matches[1][1] == [[4, 3, 3]]
+    assert matches[2][1] == [[4, 3, 2]]
+    assert on_match.call_count == 3
 
 
 def test_matcher_basic_check(en_vocab):
diff --git a/spacy/tests/regression/test_issue5152.py b/spacy/tests/regression/test_issue5152.py
index a9a57746d..d2956d6e8 100644
--- a/spacy/tests/regression/test_issue5152.py
+++ b/spacy/tests/regression/test_issue5152.py
@@ -1,3 +1,5 @@
+# coding: utf8
+from __future__ import unicode_literals
 from spacy.lang.en import English