Tidy up and auto-format

2025-10-28 06:31:12 +03:00 · 2019-08-20 17:36:34 +02:00 · 2019-08-20 17:36:34 +02:00 · f580302673
commit f580302673
parent 364aaf5bc2
69 changed files with 83201 additions and 82191 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -430,8 +430,7 @@ class Errors(object):
    E150 = ("The language of the `nlp` object and the `vocab` should be the "
            "same, but found '{nlp}' and '{vocab}' respectively.")
    E151 = ("Trying to call nlp.update without required annotation types. "
-            "Expected top-level keys: {expected_keys}."
+            "Expected top-level keys: {exp}. Got: {unexp}.")
            " Got: {unexpected_keys}.")
    E152 = ("The `nlp` object should have a pre-trained `ner` component.")
    E153 = ("Either provide a path to a preprocessed training directory, "
            "or to the original Wikipedia XML dump.")
--- a/spacy/lang/bn/examples.py
+++ b/spacy/lang/bn/examples.py
@ -10,8 +10,4 @@ Example sentences to test spaCy and its language models.
 """
-sentences = [
+sentences = ["তুই খুব ভালো", "আজ আমরা ডাক্তার দেখতে যাবো", "আমি জানি না "]
    'তুই খুব ভালো',
    'আজ আমরা ডাক্তার দেখতে যাবো',
    'আমি জানি না '
 ]
--- a/spacy/lang/bn/punctuation.py
+++ b/spacy/lang/bn/punctuation.py
@ -22,7 +22,9 @@ _suffixes = (
        r"(?<=°[FfCcKk])\.",
        r"(?<=[0-9])(?:[{c}])".format(c=_currency),
        r"(?<=[0-9])(?:{u})".format(u=UNITS),
-        r"(?<=[{al}{e}{q}(?:{c})])\.".format(al=ALPHA_LOWER, e=r"%²\-\+", q=CONCAT_QUOTES, c=_currency),
+        r"(?<=[{al}{e}{q}(?:{c})])\.".format(
            al=ALPHA_LOWER, e=r"%²\-\+", q=CONCAT_QUOTES, c=_currency
        ),
    ]
 )
@ -35,8 +37,8 @@ _infixes = (
        ),
        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}])({h})(?=[{ae}])".format(a=ALPHA, h=HYPHENS, ae="এ"),
-        r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
+        r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
-        r'(?<=[{a}])[:<>=/](?=[{a}])'.format(a=ALPHA),
+        r"(?<=[{a}])[:<>=/](?=[{a}])".format(a=ALPHA),
    ]
 )
--- a/spacy/lang/de/punctuation.py
+++ b/spacy/lang/de/punctuation.py
@ -13,7 +13,7 @@ _infixes = (
    + [
        r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER),
        r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA),
-        r'(?<=[{a}])[:<>=](?=[{a}])'.format(a=ALPHA),
+        r"(?<=[{a}])[:<>=](?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}])([{q}\)\]\(\[])(?=[{a}])".format(a=ALPHA, q=_quotes),
        r"(?<=[{a}])--(?=[{a}])".format(a=ALPHA),
--- a/spacy/lang/el/punctuation.py
+++ b/spacy/lang/el/punctuation.py
@ -59,7 +59,9 @@ _suffixes = (
        r"([0-9])+\&",  # 12&
        r"(?<=[0-9])(?:{c})".format(c=CURRENCY),
        r"(?<=[0-9])(?:{u})".format(u=UNITS),
-        r"(?<=[0-9{al}{e}(?:{q})])\.".format(al=ALPHA_LOWER, e=r"²\-\+", q=CONCAT_QUOTES),
+        r"(?<=[0-9{al}{e}(?:{q})])\.".format(
            al=ALPHA_LOWER, e=r"²\-\+", q=CONCAT_QUOTES
        ),
        r"(?<=[{au}][{au}])\.".format(au=ALPHA_UPPER),
        r"(?<=[Α-Ωα-ωίϊΐόάέύϋΰήώ])\-",  # όνομα-
        r"(?<=[Α-Ωα-ωίϊΐόάέύϋΰήώ])\.",
@ -87,8 +89,8 @@ _infixes = (
        r"([a-zA-Z]+)(\-([a-zA-Z]+))+",  # abc-abc
        r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER),
        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
-        r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
+        r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
-        r'(?<=[{a}])[:<>=/](?=[{a}])'.format(a=ALPHA),
+        r"(?<=[{a}])[:<>=/](?=[{a}])".format(a=ALPHA),
    ]
 )
--- a/spacy/lang/en/lemmatizer/_adverbs_irreg.py
+++ b/spacy/lang/en/lemmatizer/_adverbs_irreg.py
@ -27,5 +27,5 @@ ADVERBS_IRREG = {
    "slower": ("slow",),
    "slowest": ("slowest",),
    "sooner": ("soon",),
-    "soonest": ("soon",)
+    "soonest": ("soon",),
 }
--- a/spacy/lang/en/lemmatizer/_nouns_irreg.py
+++ b/spacy/lang/en/lemmatizer/_nouns_irreg.py
@ -44,7 +44,7 @@ NOUNS_IRREG = {
    "allodia": ("allodium",),
    "alluvia": ("alluvium",),
    "alodia": ("alodium",),
-    "alto-relievos": ("alto-relievo", "alto-rilievo",),
+    "alto-relievos": ("alto-relievo", "alto-rilievo"),
    "altocumuli": ("altocumulus",),
    "altostrati": ("altostratus",),
    "alulae": ("alula",),
@ -81,7 +81,7 @@ NOUNS_IRREG = {
    "anamorphoses": ("anamorphosis",),
    "anastomoses": ("anastomosis",),
    "anatyxes": ("anaptyxis",),
-    "ancones": ("ancon", "ancone",),
+    "ancones": ("ancon", "ancone"),
    "androclinia": ("androclinium",),
    "androecia": ("androecium",),
    "androsphinges": ("androsphinx",),
@ -90,7 +90,7 @@ NOUNS_IRREG = {
    "angiomata": ("angioma",),
    "animalcula": ("animalculum",),
    "anlagen": ("anlage",),
-    "annattos": ("anatto", "annatto",),
+    "annattos": ("anatto", "annatto"),
    "annuli": ("annulus",),
    "antae": ("anta",),
    "antalkalies": ("antalkali",),
@ -158,7 +158,7 @@ NOUNS_IRREG = {
    "aspergilli": ("aspergillus",),
    "aspergilloses": ("aspergillosis",),
    "aspersoria": ("aspersorium",),
-    "assegais": ("assagai", "assegai",),
+    "assegais": ("assagai", "assegai"),
    "astragali": ("astragalus",),
    "asyndeta": ("asyndeton",),
    "atheromata": ("atheroma",),
@ -172,15 +172,15 @@ NOUNS_IRREG = {
    "aurei": ("aureus",),
    "auriculae": ("auricula",),
    "aurorae": ("aurora",),
-    "auspices": ("auspex", "auspice",),
+    "auspices": ("auspex", "auspice"),
    "autocatalyses": ("autocatalysis",),
    "autochthones": ("autochthon",),
    "automata": ("automaton",),
    "autos-da-fe": ("auto-da-fe",),
    "avitaminoses": ("avitaminosis",),
-    "axes": ("ax", "axis",),
+    "axes": ("ax", "axis"),
    "axillae": ("axilla",),
-    "bacchantes": ("bacchant", "bacchante",),
+    "bacchantes": ("bacchant", "bacchante"),
    "bacchii": ("bacchius",),
    "bacilli": ("bacillus",),
    "bacteriostases": ("bacteriostasis",),
@ -195,7 +195,7 @@ NOUNS_IRREG = {
    "banjoes": ("banjo",),
    "barklice": ("barklouse",),
    "barramundies": ("barramundi",),
-    "bases": ("base", "basis",),
+    "bases": ("base", "basis"),
    "bases-on-balls": ("base_on_balls",),
    "bases_on_balls": ("base_on_balls",),
    "basidia": ("basidium",),
@ -204,15 +204,15 @@ NOUNS_IRREG = {
    "bastinadoes": ("bastinado",),
    "bateaux": ("bateau",),
    "batfishes": ("batfish",),
-    "beadsmen": ("beadsman", "bedesman",),
+    "beadsmen": ("beadsman", "bedesman"),
    "beaux": ("beau",),
    "beches-de-mer": ("beche-de-mer",),
    "beeves": ("beef",),
    "behooves": ("behoof",),
    "bersaglieri": ("bersagliere",),
-    "bhishties": ("bheesty", "bhishti",),
+    "bhishties": ("bheesty", "bhishti"),
    "bibliothecae": ("bibliotheca",),
-    "bicennaries": ("bicentenary", "bicentennial",),
+    "bicennaries": ("bicentenary", "bicentennial"),
    "bijoux": ("bijou",),
    "bilboes": ("bilbo",),
    "billets-doux": ("billet-doux",),
@ -245,7 +245,7 @@ NOUNS_IRREG = {
    "brachia": ("brachium",),
    "brainchildren": ("brainchild",),
    "branchiae": ("branchia",),
-    "brants": ("brant", "brent",),
+    "brants": ("brant", "brent"),
    "bravadoes": ("bravado",),
    "bravoes": ("bravo",),
    "bregmata": ("bregma",),
@ -275,7 +275,7 @@ NOUNS_IRREG = {
    "caesurae": ("caesura",),
    "calami": ("calamus",),
    "calathi": ("calathus",),
-    "calcanei": ("calcaneum", "calcaneus",),
+    "calcanei": ("calcaneum", "calcaneus"),
    "calces": ("calx",),
    "calculi": ("calculus",),
    "caldaria": ("caldarium",),
@ -421,7 +421,7 @@ NOUNS_IRREG = {
    "comae": ("coma",),
    "comatulae": ("comatula",),
    "comedones": ("comedo",),
-    "comics": ("comic_strip", "comic",),
+    "comics": ("comic_strip", "comic"),
    "commandoes": ("commando",),
    "concertanti": ("concertante",),
    "concerti": ("concerto",),
@ -549,11 +549,11 @@ NOUNS_IRREG = {
    "diplococci": ("diplococcus",),
    "directors-general": ("director-general",),
    "disci": ("discus",),
-    "discoboli": ("discobolos", "discobolus",),
+    "discoboli": ("discobolos", "discobolus"),
    "dive": ("diva",),
    "diverticula": ("diverticulum",),
    "divertimenti": ("divertimento",),
-    "djinn": ("djinni", "djinny",),
+    "djinn": ("djinni", "djinny"),
    "dodoes": ("dodo",),
    "dogfishes": ("dogfish",),
    "dogmata": ("dogma",),
@ -593,7 +593,7 @@ NOUNS_IRREG = {
    "ellipses": ("ellipsis",),
    "eluvia": ("eluvium",),
    "elves": ("elf",),
-    "elytra": ("elytron", "elytrum",),
+    "elytra": ("elytron", "elytrum"),
    "embargoes": ("embargo",),
    "emboli": ("embolus",),
    "emphases": ("emphasis",),
@ -623,7 +623,7 @@ NOUNS_IRREG = {
    "entases": ("entasis",),
    "entera": ("enteron",),
    "entia": ("ens",),
-    "entozoa": ("entozoan", "entozoon",),
+    "entozoa": ("entozoan", "entozoon"),
    "epencephala": ("epencephalon",),
    "epentheses": ("epenthesis",),
    "epexegeses": ("epexegesis",),
@ -643,10 +643,10 @@ NOUNS_IRREG = {
    "epiphenomena": ("epiphenomenon",),
    "epiphyses": ("epiphysis",),
    "episterna": ("episternum",),
-    "epithalamia": ("epithalamion", "epithalamium",),
+    "epithalamia": ("epithalamion", "epithalamium"),
    "epithelia": ("epithelium",),
    "epitheliomata": ("epithelioma",),
-    "epizoa": ("epizoan", "epizoon",),
+    "epizoa": ("epizoan", "epizoon"),
    "epyllia": ("epyllion",),
    "equilibria": ("equilibrium",),
    "equiseta": ("equisetum",),
@ -845,11 +845,11 @@ NOUNS_IRREG = {
    "groszy": ("grosz",),
    "grottoes": ("grotto",),
    "guilder": ("guilde",),
-    "guilders": ("guilde", "guilder",),
+    "guilders": ("guilde", "guilder"),
    "guitarfishes": ("guitarfish",),
    "gummata": ("gumma",),
    "gurnard": ("gurnar",),
-    "gurnards": ("gurnar", "gurnard",),
+    "gurnards": ("gurnar", "gurnard"),
    "guttae": ("gutta",),
    "gymnasia": ("gymnasium",),
    "gynaecea": ("gynaeceum",),
@ -870,7 +870,7 @@ NOUNS_IRREG = {
    "haeredes": ("haeres",),
    "haftaroth": ("haftarah",),
    "hagfishes": ("hagfish",),
-    "haggadas": ("haggada", "haggadah",),
+    "haggadas": ("haggada", "haggadah"),
    "haggadoth": ("haggada",),
    "hajjes": ("hajj",),
    "haleru": ("haler",),
@ -879,7 +879,7 @@ NOUNS_IRREG = {
    "halloth": ("hallah",),
    "halluces": ("hallux",),
    "haloes": ("halo",),
-    "halteres": ("halter", "haltere",),
+    "halteres": ("halter", "haltere"),
    "halves": ("half",),
    "hamuli": ("hamulus",),
    "hangers-on": ("hanger-on",),
@ -909,7 +909,7 @@ NOUNS_IRREG = {
    "heraclidae": ("heraclid",),
    "heraklidae": ("heraklid",),
    "herbaria": ("herbarium",),
-    "hermae": ("herm", "herma",),
+    "hermae": ("herm", "herma"),
    "hermai": ("herma",),
    "herniae": ("hernia",),
    "heroes": ("hero",),
@ -955,8 +955,8 @@ NOUNS_IRREG = {
    "ibices": ("ibex",),
    "ibo": ("igbo",),
    "ichthyosauri": ("ichthyosaurus",),
-    "ichthyosauruses": ("ichthyosaur", "ichthyosaurus",),
+    "ichthyosauruses": ("ichthyosaur", "ichthyosaurus"),
-    "iconostases": ("iconostas", "iconostasis",),
+    "iconostases": ("iconostas", "iconostasis"),
    "icosahedra": ("icosahedron",),
    "ideata": ("ideatum",),
    "igorrorote": ("igorrote",),
@ -991,7 +991,7 @@ NOUNS_IRREG = {
    "is": ("is",),
    "ischia": ("ischium",),
    "isthmi": ("isthmus",),
-    "jackeroos": ("jackaroo", "jackeroo",),
+    "jackeroos": ("jackaroo", "jackeroo"),
    "jackfishes": ("jackfish",),
    "jackknives": ("jackknife",),
    "jacks-in-the-box": ("jack-in-the-box",),
@ -1001,12 +1001,12 @@ NOUNS_IRREG = {
    "jewfishes": ("jewfish",),
    "jingoes": ("jingo",),
    "jinn": ("jinni",),
-    "joes": ("jo", "joe",),
+    "joes": ("jo", "joe"),
    "judge_advocates_general": ("judge_advocate_general",),
    "jura": ("jus",),
    "kaddishim": ("kaddish",),
    "kalmuck": ("kalmuc",),
-    "kalmucks": ("kalmuc", "kalmuck",),
+    "kalmucks": ("kalmuc", "kalmuck"),
    "katabases": ("katabasis",),
    "keeshonden": ("keeshond",),
    "kibbutzim": ("kibbutz",),
@ -1045,7 +1045,7 @@ NOUNS_IRREG = {
    "latifundia": ("latifundium",),
    "latu": ("lat",),
    "lavaboes": ("lavabo",),
-    "leaves": ("leaf", "leave",),
+    "leaves": ("leaf", "leave"),
    "lecythi": ("lecythus",),
    "leges": ("lex",),
    "lei": ("leu",),
@ -1078,7 +1078,7 @@ NOUNS_IRREG = {
    "liriodendra": ("liriodendron",),
    "lisente": ("sente",),
    "listente": ("sente",),
-    "litai": ("lit", "litas",),
+    "litai": ("lit", "litas"),
    "litu": ("litas",),
    "lives": ("life",),
    "lixivia": ("lixivium",),
@ -1098,7 +1098,7 @@ NOUNS_IRREG = {
    "lumpfishes": ("lumpfish",),
    "lungfishes": ("lungfish",),
    "lunulae": ("lunula",),
-    "lures": ("lur", "lure",),
+    "lures": ("lur", "lure"),
    "lustra": ("lustre",),
    "lyings-in": ("lying-in",),
    "lymphangitides": ("lymphangitis",),
@ -1142,7 +1142,7 @@ NOUNS_IRREG = {
    "marsupia": ("marsupium",),
    "marvels-of-peru": ("marvel-of-peru",),
    "mass_media": ("mass_medium",),
-    "masses": ("mass", "masse",),
+    "masses": ("mass", "masse"),
    "masters-at-arms": ("master-at-arms",),
    "matrices": ("matrix",),
    "matzoth": ("matzo",),
@ -1210,7 +1210,7 @@ NOUNS_IRREG = {
    "mioses": ("miosis",),
    "miracidia": ("miracidium",),
    "miri": ("mir",),
-    "mishnayoth": ("mishna", "mishnah",),
+    "mishnayoth": ("mishna", "mishnah"),
    "mitochondria": ("mitochondrion",),
    "mitzvoth": ("mitzvah",),
    "modioli": ("modiolus",),
@ -1218,7 +1218,7 @@ NOUNS_IRREG = {
    "momenta": ("momentum",),
    "moments_of_truth": ("moment_of_truth",),
    "momi": ("momus",),
-    "monades": ("monad", "monas",),
+    "monades": ("monad", "monas"),
    "monkfishes": ("monkfish",),
    "monochasia": ("monochasium",),
    "monopodia": ("monopodium",),
@ -1235,7 +1235,7 @@ NOUNS_IRREG = {
    "moriscoes": ("morisco",),
    "morphallaxes": ("morphallaxis",),
    "morphoses": ("morphosis",),
-    "morses": ("morse", "mors",),
+    "morses": ("morse", "mors"),
    "morulae": ("morula",),
    "mosasauri": ("mosasaurus",),
    "moshavim": ("moshav",),
@ -1328,13 +1328,13 @@ NOUNS_IRREG = {
    "oceanides": ("oceanid",),
    "ocelli": ("ocellus",),
    "ochreae": ("ochrea",),
-    "ocreae": ("ochrea", "ocrea",),
+    "ocreae": ("ochrea", "ocrea"),
    "octahedra": ("octahedron",),
    "octopi": ("octopus",),
    "oculi": ("oculus",),
    "odea": ("odeum",),
-    "oedemata": ("edema", "oedema",),
+    "oedemata": ("edema", "oedema"),
-    "oesophagi": ("esophagus", "oesophagus",),
+    "oesophagi": ("esophagus", "oesophagus"),
    "oldwives": ("oldwife",),
    "olea": ("oleum",),
    "omasa": ("omasum",),
@ -1350,15 +1350,15 @@ NOUNS_IRREG = {
    "optic_axes": ("optic_axis",),
    "optima": ("optimum",),
    "ora": ("os",),
-    "organa": ("organon", "organum",),
+    "organa": ("organon", "organum"),
-    "organums": ("organa", "organum",),
+    "organums": ("organa", "organum"),
    "orthoptera": ("orthopteron",),
    "osar": ("os",),
    "oscula": ("osculum",),
    "ossa": ("os",),
    "osteomata": ("osteoma",),
    "ostia": ("ostium",),
-    "ottomans": ("othman", "ottoman",),
+    "ottomans": ("othman", "ottoman"),
    "ova": ("ovum",),
    "ovoli": ("ovolo",),
    "ovotestes": ("ovotestis",),
@ -1382,7 +1382,7 @@ NOUNS_IRREG = {
    "papulae": ("papula",),
    "papyri": ("papyrus",),
    "parabases": ("parabasis",),
-    "paraleipses": ("paraleipsis", "paralipsis",),
+    "paraleipses": ("paraleipsis", "paralipsis"),
    "paralyses": ("paralysis",),
    "paramecia": ("paramecium",),
    "paramenta": ("parament",),
@ -1442,13 +1442,13 @@ NOUNS_IRREG = {
    "personae": ("persona",),
    "petechiae": ("petechia",),
    "pfennige": ("pfennig",),
-    "phalanges": ("phalange", "phalanx",),
+    "phalanges": ("phalange", "phalanx"),
    "phalli": ("phallus",),
    "pharynges": ("pharynx",),
    "phenomena": ("phenomenon",),
    "phi-phenomena": ("phi-phenomenon",),
    "philodendra": ("philodendron",),
-    "phlyctenae": ("phlyctaena", "phlyctena",),
+    "phlyctenae": ("phlyctaena", "phlyctena"),
    "phyla": ("phylum",),
    "phylae": ("phyle",),
    "phyllotaxes": ("phyllotaxis",),
@ -1475,12 +1475,12 @@ NOUNS_IRREG = {
    "plasmodesmata": ("plasmodesma",),
    "plasmodia": ("plasmodium",),
    "plateaux": ("plateau",),
-    "plectra": ("plectron", "plectrum",),
+    "plectra": ("plectron", "plectrum"),
    "plena": ("plenum",),
    "pleura": ("pleuron",),
    "pleurae": ("pleura",),
    "plicae": ("plica",),
-    "ploughmen": ("ploughman", "plowman",),
+    "ploughmen": ("ploughman", "plowman"),
    "pneumobacilli": ("pneumobacillus",),
    "pneumococci": ("pneumococcus",),
    "pocketknives": ("pocketknife",),
@ -1515,7 +1515,7 @@ NOUNS_IRREG = {
    "principia": ("principium",),
    "proboscides": ("proboscis",),
    "proces-verbaux": ("proces-verbal",),
-    "proglottides": ("proglottid", "proglottis",),
+    "proglottides": ("proglottid", "proglottis"),
    "prognoses": ("prognosis",),
    "prolegomena": ("prolegomenon",),
    "prolepses": ("prolepsis",),
@ -1532,7 +1532,7 @@ NOUNS_IRREG = {
    "prostheses": ("prosthesis",),
    "prostomia": ("prostomium",),
    "protases": ("protasis",),
-    "prothalamia": ("prothalamion", "prothalamium",),
+    "prothalamia": ("prothalamion", "prothalamium"),
    "prothalli": ("prothallus",),
    "prothallia": ("prothallium",),
    "prothoraces": ("prothorax",),
@ -1572,7 +1572,7 @@ NOUNS_IRREG = {
    "quezales": ("quezal",),
    "quinquennia": ("quinquennium",),
    "quizzes": ("quiz",),
-    "rabatos": ("rabato", "rebato",),
+    "rabatos": ("rabato", "rebato"),
    "rabbitfishes": ("rabbitfish",),
    "rachides": ("rhachis",),
    "radices": ("radix",),
@ -1583,7 +1583,7 @@ NOUNS_IRREG = {
    "ranulae": ("ranula",),
    "ranunculi": ("ranunculus",),
    "raphae": ("raphe",),
-    "raphides": ("raphide", "raphis",),
+    "raphides": ("raphide", "raphis"),
    "ratfishes": ("ratfish",),
    "reales": ("real",),
    "rearmice": ("rearmouse",),
@ -1598,7 +1598,7 @@ NOUNS_IRREG = {
    "reis": ("real",),
    "relata": ("relatum",),
    "remiges": ("remex",),
-    "reremice": ("rearmouse", "reremouse",),
+    "reremice": ("rearmouse", "reremouse"),
    "reseaux": ("reseau",),
    "residua": ("residuum",),
    "responsa": ("responsum",),
@ -1609,7 +1609,7 @@ NOUNS_IRREG = {
    "retinae": ("retina",),
    "rhabdomyomata": ("rhabdomyoma",),
    "rhachides": ("rhachis",),
-    "rhachises": ("rachis", "rhachis",),
+    "rhachises": ("rachis", "rhachis"),
    "rhinencephala": ("rhinencephalon",),
    "rhizobia": ("rhizobium",),
    "rhombi": ("rhombus",),
@ -1636,7 +1636,7 @@ NOUNS_IRREG = {
    "runners-up": ("runner-up",),
    "sacra": ("sacrum",),
    "sacraria": ("sacrarium",),
-    "saguaros": ("saguaro", "sahuaro",),
+    "saguaros": ("saguaro", "sahuaro"),
    "sailfishes": ("sailfish",),
    "salespeople": ("salesperson",),
    "salmonellae": ("salmonella",),
@ -1657,7 +1657,7 @@ NOUNS_IRREG = {
    "scapulae": ("scapula",),
    "scarabaei": ("scarabaeus",),
    "scarves": ("scarf",),
-    "schatchonim": ("schatchen", "shadchan",),
+    "schatchonim": ("schatchen", "shadchan"),
    "schemata": ("schema",),
    "scherzandi": ("scherzando",),
    "scherzi": ("scherzo",),
@ -1690,7 +1690,7 @@ NOUNS_IRREG = {
    "senores": ("senor",),
    "sensilla": ("sensillum",),
    "senti": ("sent",),
-    "senussis": ("senusi", "senussi",),
+    "senussis": ("senusi", "senussi"),
    "separatrices": ("separatrix",),
    "sephardim": ("sephardi",),
    "septa": ("septum",),
@ -1707,9 +1707,9 @@ NOUNS_IRREG = {
    "shabbatim": ("shabbat",),
    "shackoes": ("shacko",),
    "shadchanim": ("shadchan",),
-    "shadchans": ("schatchen", "shadchan",),
+    "shadchans": ("schatchen", "shadchan"),
    "shakoes": ("shako",),
-    "shammosim": ("shammas", "shammes",),
+    "shammosim": ("shammas", "shammes"),
    "sheatfishes": ("sheatfish",),
    "sheaves": ("sheaf",),
    "shellfishes": ("shellfish",),
@ -1717,14 +1717,14 @@ NOUNS_IRREG = {
    "shinleaves": ("shinleaf",),
    "shittim": ("shittah",),
    "shmoes": ("shmo",),
-    "shofroth": ("shofar", "shophar",),
+    "shofroth": ("shofar", "shophar"),
    "shophroth": ("shophar",),
    "shrewmice": ("shrewmouse",),
    "shuln": ("shul",),
    "siddurim": ("siddur",),
    "sigloi": ("siglos",),
    "signore": ("signora",),
-    "signori": ("signior", "signore",),
+    "signori": ("signior", "signore"),
    "signorine": ("signorina",),
    "siliquae": ("siliqua",),
    "silvae": ("silva",),
@ -1739,12 +1739,12 @@ NOUNS_IRREG = {
    "snaggleteeth": ("snaggletooth",),
    "snailfishes": ("snailfish",),
    "snipefishes": ("snipefish",),
-    "socmen": ("socman", "sokeman",),
+    "socmen": ("socman", "sokeman"),
    "sola": ("solum",),
    "solaria": ("solarium",),
    "solatia": ("solatium",),
    "soldi": ("soldo",),
-    "soles": ("sol", "sole",),
+    "soles": ("sol", "sole"),
    "solfeggi": ("solfeggio",),
    "soli": ("solo",),
    "solidi": ("solidus",),
@ -1864,7 +1864,7 @@ NOUNS_IRREG = {
    "syringes": ("syrinx",),
    "syssarcoses": ("syssarcosis",),
    "tableaux": ("tableau",),
-    "taeniae": ("taenia", "tenia",),
+    "taeniae": ("taenia", "tenia"),
    "tali": ("talus",),
    "tallaisim": ("tallith",),
    "tallithes": ("tallith",),
@ -1874,14 +1874,14 @@ NOUNS_IRREG = {
    "tarsi": ("tarsus",),
    "tarsometatarsi": ("tarsometatarsus",),
    "taxa": ("taxon",),
-    "taxes": ("tax", "taxis",),
+    "taxes": ("tax", "taxis"),
    "taxies": ("taxi",),
    "tectrices": ("tectrix",),
    "teeth": ("tooth",),
    "tegmina": ("tegmen",),
    "telae": ("tela",),
    "telamones": ("telamon",),
-    "telangiectases": ("telangiectasia", "telangiectasis",),
+    "telangiectases": ("telangiectasia", "telangiectasis"),
    "telia": ("telium",),
    "tempi": ("tempo",),
    "tenacula": ("tenaculum",),
@ -1932,7 +1932,7 @@ NOUNS_IRREG = {
    "tornadoes": ("tornado",),
    "torpedoes": ("torpedo",),
    "torsi": ("torso",),
-    "touracos": ("touraco", "turaco",),
+    "touracos": ("touraco", "turaco"),
    "trabeculae": ("trabecula",),
    "tracheae": ("trachea",),
    "traditores": ("traditor",),
@ -1960,7 +1960,7 @@ NOUNS_IRREG = {
    "tubae": ("tuba",),
    "turves": ("turf",),
    "tympana": ("tympanum",),
-    "tyros": ("tiro", "tyro",),
+    "tyros": ("tiro", "tyro"),
    "ubermenschen": ("ubermensch",),
    "uglies": ("ugli",),
    "uigurs": ("uighur",),
@ -1980,7 +1980,7 @@ NOUNS_IRREG = {
    "utriculi": ("utriculus",),
    "uvulae": ("uvula",),
    "vacua": ("vacuum",),
-    "vagi": ("vagus", "vagus",),
+    "vagi": ("vagus", "vagus"),
    "vaginae": ("vagina",),
    "valleculae": ("vallecula",),
    "vaporetti": ("vaporetto",),
@ -2026,7 +2026,7 @@ NOUNS_IRREG = {
    "vortices": ("vortex",),
    "vulvae": ("vulva",),
    "wagons-lits": ("wagon-lit",),
-    "wahhabis": ("wahabi", "wahhabi",),
+    "wahhabis": ("wahabi", "wahhabi"),
    "wanderjahre": ("wanderjahr",),
    "weakfishes": ("weakfish",),
    "werewolves": ("werewolf",),
@ -2044,13 +2044,13 @@ NOUNS_IRREG = {
    "yeshivoth": ("yeshiva",),
    "yogin": ("yogi",),
    "yourselves": ("yourself",),
-    "zamindaris": ("zamindari", "zemindari",),
+    "zamindaris": ("zamindari", "zemindari"),
    "zecchini": ("zecchino",),
    "zeroes": ("zero",),
    "zoa": ("zoon",),
-    "zoaeae": ("zoaea", "zoea",),
+    "zoaeae": ("zoaea", "zoea"),
    "zoeae": ("zoea",),
    "zoeas": ("zoaea",),
    "zoonoses": ("zoonosis",),
-    "zoosporangia": ("zoosporangium",)
+    "zoosporangia": ("zoosporangium",),
 }
--- a/spacy/lang/en/lemmatizer/_verbs_irreg.py
+++ b/spacy/lang/en/lemmatizer/_verbs_irreg.py
@ -42,8 +42,8 @@ VERBS_IRREG = {
    "anglified": ("anglify",),
    "annulled": ("annul",),
    "annulling": ("annul",),
-    "appalled": ("appal", "appall",),
+    "appalled": ("appal", "appall"),
-    "appalling": ("appal", "appall",),
+    "appalling": ("appal", "appall"),
    "applied": ("apply",),
    "arcked": ("arc",),
    "arcking": ("arc",),
@ -244,9 +244,9 @@ VERBS_IRREG = {
    "bypast": ("bypass",),
    "caballed": ("cabal",),
    "caballing": ("cabal",),
-    "caddied": ("caddie", "caddy",),
+    "caddied": ("caddie", "caddy"),
-    "caddies": ("caddie", "caddy",),
+    "caddies": ("caddie", "caddy"),
-    "caddying": ("caddie", "caddy",),
+    "caddying": ("caddie", "caddy"),
    "calcified": ("calcify",),
    "came": ("come",),
    "canalled": ("canal",),
@ -506,8 +506,8 @@ VERBS_IRREG = {
    "disembodied": ("disembody",),
    "disembowelled": ("disembowel",),
    "disembowelling": ("disembowel",),
-    "disenthralled": ("disenthral", "disenthrall",),
+    "disenthralled": ("disenthral", "disenthrall"),
-    "disenthralling": ("disenthral", "disenthrall",),
+    "disenthralling": ("disenthral", "disenthrall"),
    "disenthralls": ("disenthral",),
    "disenthrals": ("disenthrall",),
    "dishevelled": ("dishevel",),
@ -518,8 +518,8 @@ VERBS_IRREG = {
    "dispelling": ("dispel",),
    "disqualified": ("disqualify",),
    "dissatisfied": ("dissatisfy",),
-    "distilled": ("distil", "distill",),
+    "distilled": ("distil", "distill"),
-    "distilling": ("distil", "distill",),
+    "distilling": ("distil", "distill"),
    "diversified": ("diversify",),
    "divvied": ("divvy",),
    "dizzied": ("dizzy",),
@ -595,10 +595,10 @@ VERBS_IRREG = {
    "enamelling": ("enamel",),
    "englutted": ("englut",),
    "englutting": ("englut",),
-    "enrolled": ("enrol", "enroll",),
+    "enrolled": ("enrol", "enroll"),
-    "enrolling": ("enrol", "enroll",),
+    "enrolling": ("enrol", "enroll"),
-    "enthralled": ("enthral", "enthrall",),
+    "enthralled": ("enthral", "enthrall"),
-    "enthralling": ("enthral", "enthrall",),
+    "enthralling": ("enthral", "enthrall"),
    "entrammelled": ("entrammel",),
    "entrammelling": ("entrammel",),
    "entrapped": ("entrap",),
@ -621,8 +621,8 @@ VERBS_IRREG = {
    "exemplified": ("exemplify",),
    "expelled": ("expel",),
    "expelling": ("expel",),
-    "extolled": ("extol", "extoll",),
+    "extolled": ("extol", "extoll"),
-    "extolling": ("extol", "extoll",),
+    "extolling": ("extol", "extoll"),
    "facetted": ("facet",),
    "facetting": ("facet",),
    "fagged": ("fag",),
@ -638,7 +638,7 @@ VERBS_IRREG = {
    "featherbedded": ("featherbed",),
    "featherbedding": ("featherbed",),
    "fed": ("feed",),
-    "feed": ("feed", "fee",),
+    "feed": ("feed", "fee"),
    "fell": ("fall",),
    "felt": ("feel",),
    "ferried": ("ferry",),
@ -744,8 +744,8 @@ VERBS_IRREG = {
    "fried": ("fry",),
    "frigged": ("frig",),
    "frigging": ("frig",),
-    "fritted": ("frit", "fritt",),
+    "fritted": ("frit", "fritt"),
-    "fritting": ("frit", "fritt",),
+    "fritting": ("frit", "fritt"),
    "frivolled": ("frivol",),
    "frivolling": ("frivol",),
    "frogged": ("frog",),
@ -757,8 +757,8 @@ VERBS_IRREG = {
    "fructified": ("fructify",),
    "fuelled": ("fuel",),
    "fuelling": ("fuel",),
-    "fulfilled": ("fulfil", "fulfill",),
+    "fulfilled": ("fulfil", "fulfill"),
-    "fulfilling": ("fulfil", "fulfill",),
+    "fulfilling": ("fulfil", "fulfill"),
    "funned": ("fun",),
    "funnelled": ("funnel",),
    "funnelling": ("funnel",),
@ -955,8 +955,8 @@ VERBS_IRREG = {
    "insetting": ("inset",),
    "inspanned": ("inspan",),
    "inspanning": ("inspan",),
-    "installed": ("instal", "install",),
+    "installed": ("instal", "install"),
-    "installing": ("instal", "install",),
+    "installing": ("instal", "install"),
    "intensified": ("intensify",),
    "interbred": ("interbreed",),
    "intercropped": ("intercrop",),
@ -1303,7 +1303,7 @@ VERBS_IRREG = {
    "overdriven": ("overdrive",),
    "overdrove": ("overdrive",),
    "overflew": ("overfly",),
-    "overflown": ("overflow", "overfly",),
+    "overflown": ("overflow", "overfly"),
    "overgrew": ("overgrow",),
    "overgrown": ("overgrow",),
    "overheard": ("overhear",),
@ -1547,8 +1547,8 @@ VERBS_IRREG = {
    "red": ("red",),
    "red-pencilled": ("red-pencil",),
    "red-pencilling": ("red-pencil",),
-    "redded": ("red", "redd",),
+    "redded": ("red", "redd"),
-    "redding": ("red", "redd",),
+    "redding": ("red", "redd"),
    "redid": ("redo",),
    "redone": ("redo",),
    "referred": ("refer",),
@ -1763,7 +1763,7 @@ VERBS_IRREG = {
    "signified": ("signify",),
    "silicified": ("silicify",),
    "simplified": ("simplify",),
-    "singing": ("sing", "singe",),
+    "singing": ("sing", "singe"),
    "single-stepped": ("single-step",),
    "single-stepping": ("single-step",),
    "sinned": ("sin",),
@ -2404,5 +2404,5 @@ VERBS_IRREG = {
    "zigzagged": ("zigzag",),
    "zigzagging": ("zigzag",),
    "zipped": ("zip",),
-    "zipping": ("zip",)
+    "zipping": ("zip",),
 }
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@ -538,7 +538,7 @@ for orth in [
    "Sen.",
    "St.",
    "vs.",
-    "v.s."
+    "v.s.",
 ]:
    _exc[orth] = [{ORTH: orth}]
--- a/spacy/lang/fa/tokenizer_exceptions.py
+++ b/spacy/lang/fa/tokenizer_exceptions.py
--- a/spacy/lang/fr/lemmatizer/init.py
+++ b/spacy/lang/fr/lemmatizer/init.py
@ -20,14 +20,22 @@ from ....util import load_language_data
 BASE_PATH = Path(__file__).parent
-LOOKUP = load_language_data(BASE_PATH / 'lookup.json')
+LOOKUP = load_language_data(BASE_PATH / "lookup.json")
-VERBS_IRREG = load_language_data(BASE_PATH / '_verbs_irreg.json')
+VERBS_IRREG = load_language_data(BASE_PATH / "_verbs_irreg.json")
-ADJECTIVES_IRREG = load_language_data(BASE_PATH / '_adjectives_irreg.json')
+ADJECTIVES_IRREG = load_language_data(BASE_PATH / "_adjectives_irreg.json")
-LEMMA_INDEX = {'adj': ADJECTIVES, 'adv': ADVERBS, 'noun': NOUNS, 'verb': VERBS}
+LEMMA_INDEX = {"adj": ADJECTIVES, "adv": ADVERBS, "noun": NOUNS, "verb": VERBS}
-LEMMA_EXC = {'adj': ADJECTIVES_IRREG, 'adp': ADP_IRREG, 'aux': AUXILIARY_VERBS_IRREG,
+LEMMA_EXC = {
-             'cconj': CCONJ_IRREG, 'det': DETS_IRREG, 'noun': NOUNS_IRREG, 'verb': VERBS_IRREG, 
+    "adj": ADJECTIVES_IRREG,
-             'pron': PRONOUNS_IRREG, 'sconj': SCONJ_IRREG}
+    "adp": ADP_IRREG,
    "aux": AUXILIARY_VERBS_IRREG,
    "cconj": CCONJ_IRREG,
    "det": DETS_IRREG,
    "noun": NOUNS_IRREG,
    "verb": VERBS_IRREG,
    "pron": PRONOUNS_IRREG,
    "sconj": SCONJ_IRREG,
 }
-LEMMA_RULES = {'adj': ADJECTIVE_RULES, 'noun': NOUN_RULES, 'verb': VERB_RULES}
+LEMMA_RULES = {"adj": ADJECTIVE_RULES, "noun": NOUN_RULES, "verb": VERB_RULES}
--- a/spacy/lang/fr/lemmatizer/_adp_irreg.py
+++ b/spacy/lang/fr/lemmatizer/_adp_irreg.py
@ -20,5 +20,5 @@ ADP_IRREG = {
    "pr": ("pour",),
    "/": ("sur",),
    "versus": ("vs",),
-	"vs.": ("vs",)
+    "vs.": ("vs",),
 }
--- a/spacy/lang/fr/lemmatizer/_auxiliary_verbs_irreg.py
+++ b/spacy/lang/fr/lemmatizer/_auxiliary_verbs_irreg.py
@ -365,5 +365,5 @@ AUXILIARY_VERBS_IRREG = {
    "va": ("aller",),
    "vais": ("aller",),
    "vas": ("aller",),
-    "vont": ("aller",)
+    "vont": ("aller",),
 }
--- a/spacy/lang/fr/lemmatizer/_cconj_irreg.py
+++ b/spacy/lang/fr/lemmatizer/_cconj_irreg.py
@ -13,5 +13,5 @@ CCONJ_IRREG = {
    "i.e.": ("c'est-à-dire",),
    "ie": ("c'est-à-dire",),
    "ou/et": ("et-ou",),
-	"+": ("plus",)
+    "+": ("plus",),
 }
--- a/spacy/lang/fr/lemmatizer/_nouns_irreg.py
+++ b/spacy/lang/fr/lemmatizer/_nouns_irreg.py
@ -9963,5 +9963,5 @@ NOUNS_IRREG = {
    "zurichoises": ("zurichois",),
    "zurichois": ("zurichois",),
    "zyras": ("zyras",),
-	"zyzomys": ("zyzomys",)
+    "zyzomys": ("zyzomys",),
 }
--- a/spacy/lang/fr/lemmatizer/_sconj_irreg.py
+++ b/spacy/lang/fr/lemmatizer/_sconj_irreg.py
@ -15,5 +15,5 @@ SCONJ_IRREG = {
    "puisqu'": ("puisque",),
    "qd": ("quand",),
    "quoiqu'": ("quoique",),
-	"qu'": ("que",)
+    "qu'": ("que",),
 }
--- a/spacy/lang/fr/lemmatizer/lemmatizer.py
+++ b/spacy/lang/fr/lemmatizer/lemmatizer.py
@ -3,20 +3,22 @@ from __future__ import unicode_literals
 from pathlib import Path
-from ....symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP, SCONJ, CCONJ
+from ....symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP
 from ....symbols import SCONJ, CCONJ
 from ....symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
 from ....util import load_language_data
-LOOKUP = load_language_data(Path(__file__).parent / 'lookup.json')
+LOOKUP = load_language_data(Path(__file__).parent / "lookup.json")
-'''
+"""
 French language lemmatizer applies the default rule based lemmatization
 procedure with some modifications for better French language support.
 The parts of speech 'ADV', 'PRON', 'DET', 'ADP' and 'AUX' are added to use the
 rule-based lemmatization. As a last resort, the lemmatizer checks in
 the lookup table.
-'''
+"""
 class FrenchLemmatizer(object):
    @classmethod
@ -32,36 +34,39 @@ class FrenchLemmatizer(object):
    def __call__(self, string, univ_pos, morphology=None):
        if not self.rules:
            return [self.lookup_table.get(string, string)]
-        if univ_pos in (NOUN, 'NOUN', 'noun'):
+        if univ_pos in (NOUN, "NOUN", "noun"):
-            univ_pos = 'noun'
+            univ_pos = "noun"
-        elif univ_pos in (VERB, 'VERB', 'verb'):
+        elif univ_pos in (VERB, "VERB", "verb"):
-            univ_pos = 'verb'
+            univ_pos = "verb"
-        elif univ_pos in (ADJ, 'ADJ', 'adj'):
+        elif univ_pos in (ADJ, "ADJ", "adj"):
-            univ_pos = 'adj'
+            univ_pos = "adj"
-        elif univ_pos in (ADP, 'ADP', 'adp'):
+        elif univ_pos in (ADP, "ADP", "adp"):
-            univ_pos = 'adp'
+            univ_pos = "adp"
-        elif univ_pos in (ADV, 'ADV', 'adv'):
+        elif univ_pos in (ADV, "ADV", "adv"):
-            univ_pos = 'adv'
+            univ_pos = "adv"
-        elif univ_pos in (AUX, 'AUX', 'aux'):
+        elif univ_pos in (AUX, "AUX", "aux"):
-            univ_pos = 'aux'
+            univ_pos = "aux"
-        elif univ_pos in (CCONJ, 'CCONJ', 'cconj'):
+        elif univ_pos in (CCONJ, "CCONJ", "cconj"):
-            univ_pos = 'cconj'
+            univ_pos = "cconj"
-        elif univ_pos in (DET, 'DET', 'det'):
+        elif univ_pos in (DET, "DET", "det"):
-            univ_pos = 'det'
+            univ_pos = "det"
-        elif univ_pos in (PRON, 'PRON', 'pron'):
+        elif univ_pos in (PRON, "PRON", "pron"):
-            univ_pos = 'pron'
+            univ_pos = "pron"
-        elif univ_pos in (PUNCT, 'PUNCT', 'punct'):
+        elif univ_pos in (PUNCT, "PUNCT", "punct"):
-            univ_pos = 'punct'
+            univ_pos = "punct"
-        elif univ_pos in (SCONJ, 'SCONJ', 'sconj'):
+        elif univ_pos in (SCONJ, "SCONJ", "sconj"):
-            univ_pos = 'sconj'
+            univ_pos = "sconj"
        else:
            return [self.lookup(string)]
        # See Issue #435 for example of where this logic is requied.
        if self.is_base_form(univ_pos, morphology):
            return list(set([string.lower()]))
-        lemmas = lemmatize(string, self.index.get(univ_pos, {}),
+        lemmas = lemmatize(
            string,
            self.index.get(univ_pos, {}),
            self.exc.get(univ_pos, {}),
-                           self.rules.get(univ_pos, []))
+            self.rules.get(univ_pos, []),
        )
        return lemmas
    def is_base_form(self, univ_pos, morphology=None):
@ -70,20 +75,25 @@ class FrenchLemmatizer(object):
        avoid lemmatization entirely.
        """
        morphology = {} if morphology is None else morphology
-        others = [key for key in morphology
+        others = [
-                  if key not in (POS, 'Number', 'POS', 'VerbForm', 'Tense')]
+            key
-        if univ_pos == 'noun' and morphology.get('Number') == 'sing':
+            for key in morphology
            if key not in (POS, "Number", "POS", "VerbForm", "Tense")
        ]
        if univ_pos == "noun" and morphology.get("Number") == "sing":
            return True
-        elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf':
+        elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
            return True
        # This maps 'VBP' to base form -- probably just need 'IS_BASE'
        # morphology
-        elif univ_pos == 'verb' and (morphology.get('VerbForm') == 'fin' and
+        elif univ_pos == "verb" and (
-                                     morphology.get('Tense') == 'pres' and
+            morphology.get("VerbForm") == "fin"
-                                     morphology.get('Number') is None and
+            and morphology.get("Tense") == "pres"
-                                     not others):
+            and morphology.get("Number") is None
            and not others
        ):
            return True
-        elif univ_pos == 'adj' and morphology.get('Degree') == 'pos':
+        elif univ_pos == "adj" and morphology.get("Degree") == "pos":
            return True
        elif VerbForm_inf in morphology:
            return True
@ -97,16 +107,16 @@ class FrenchLemmatizer(object):
            return False
    def noun(self, string, morphology=None):
-        return self(string, 'noun', morphology)
+        return self(string, "noun", morphology)
    def verb(self, string, morphology=None):
-        return self(string, 'verb', morphology)
+        return self(string, "verb", morphology)
    def adj(self, string, morphology=None):
-        return self(string, 'adj', morphology)
+        return self(string, "adj", morphology)
    def punct(self, string, morphology=None):
-        return self(string, 'punct', morphology)
+        return self(string, "punct", morphology)
    def lookup(self, string):
        if string in self.lookup_table:
@ -117,7 +127,7 @@ class FrenchLemmatizer(object):
 def lemmatize(string, index, exceptions, rules):
    string = string.lower()
    forms = []
-    if (string in index):
+    if string in index:
        forms.append(string)
        return forms
    forms.extend(exceptions.get(string, []))
--- a/spacy/lang/it/tokenizer_exceptions.py
+++ b/spacy/lang/it/tokenizer_exceptions.py
@ -2,8 +2,6 @@
 from __future__ import unicode_literals
 from ...symbols import ORTH, LEMMA
-_exc = {
+_exc = {"po'": [{ORTH: "po'", LEMMA: "poco"}]}
    "po'": [{ORTH: "po'", LEMMA: 'poco'}]
 }
 TOKENIZER_EXCEPTIONS = _exc
--- a/spacy/lang/ko/examples.py
+++ b/spacy/lang/ko/examples.py
@ -1,5 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
 """
 Example sentences to test spaCy and its language models.
@ -11,5 +12,5 @@ sentences = [
    "애플이 영국의 신생 기업을 10억 달러에 구매를 고려중이다.",
    "자동 운전 자동차의 손해 배상 책임에 자동차 메이커에 일정한 부담을 요구하겠다.",
    "자동 배달 로봇이 보도를 주행하는 것을 샌프란시스코시가 금지를 검토중이라고 합니다.",
-    "런던은 영국의 수도이자 가장 큰 도시입니다."
+    "런던은 영국의 수도이자 가장 큰 도시입니다.",
 ]
--- a/spacy/lang/ko/stop_words.py
+++ b/spacy/lang/ko/stop_words.py
@ -1,7 +1,8 @@
 # coding: utf8
 from __future__ import unicode_literals
-STOP_WORDS = set("""
+STOP_WORDS = set(
    """
 이
 있
 하
@ -65,4 +66,5 @@ STOP_WORDS = set("""
 원
 잘
 놓
-""".split())
+""".split()
 )
--- a/spacy/lang/nb/lemmatizer/init.py
+++ b/spacy/lang/nb/lemmatizer/init.py
@ -20,10 +20,10 @@ LEMMA_INDEX = {"adj": ADJECTIVES, "adv": ADVERBS, "noun": NOUNS, "verb": VERBS}
 BASE_PATH = Path(__file__).parent
 LEMMA_EXC = {
-    "adj": load_language_data(BASE_PATH / '_adjectives_wordforms.json'),
+    "adj": load_language_data(BASE_PATH / "_adjectives_wordforms.json"),
    "adv": ADVERBS_WORDFORMS,
-    "noun": load_language_data(BASE_PATH / '_nouns_wordforms.json'),
+    "noun": load_language_data(BASE_PATH / "_nouns_wordforms.json"),
-    "verb": load_language_data(BASE_PATH / '_verbs_wordforms.json'),
+    "verb": load_language_data(BASE_PATH / "_verbs_wordforms.json"),
 }
 LEMMA_RULES = {
@ -39,5 +39,3 @@ LEMMA_RULES = {
 # https://www.nb.no/sprakbanken/show?serial=oai%3Anb.no%3Asbr-5&lang=en
 # License:
 # Creative_Commons-BY (CC-BY) (https://creativecommons.org/licenses/by/4.0/)
--- a/spacy/lang/nb/lemmatizer/_adverbs_wordforms.py
+++ b/spacy/lang/nb/lemmatizer/_adverbs_wordforms.py
--- a/spacy/lang/nb/lemmatizer/lookup.py
+++ b/spacy/lang/nb/lemmatizer/lookup.py
--- a/spacy/lang/nb/punctuation.py
+++ b/spacy/lang/nb/punctuation.py
@ -14,7 +14,7 @@ _infixes = (
    + [
        r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER),
        r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA),
-        r'(?<=[{a}])[:<>=](?=[{a}])'.format(a=ALPHA),
+        r"(?<=[{a}])[:<>=](?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}])([{q}\)\]\(\[])(?=[{a}])".format(a=ALPHA, q=_quotes),
        r"(?<=[{a}])--(?=[{a}])".format(a=ALPHA),
--- a/spacy/lang/nb/tokenizer_exceptions.py
+++ b/spacy/lang/nb/tokenizer_exceptions.py
@ -118,7 +118,7 @@ for orth in [
    "o.l.",
    "on.",
    "op.",
-    "org."
+    "org.",
    "osv.",
    "ovf.",
    "p.",
--- a/spacy/lang/nl/examples.py
+++ b/spacy/lang/nl/examples.py
@ -14,5 +14,5 @@ sentences = [
    "Apple overweegt om voor 1 miljard een U.K. startup te kopen",
    "Autonome auto's verschuiven de verzekeringverantwoordelijkheid naar producenten",
    "San Francisco overweegt robots op voetpaden te verbieden",
-    "Londen is een grote stad in het Verenigd Koninkrijk"
+    "Londen is een grote stad in het Verenigd Koninkrijk",
 ]
--- a/spacy/lang/nl/lemmatizer/_adjectives.py
+++ b/spacy/lang/nl/lemmatizer/_adjectives.py
--- a/spacy/lang/nl/lemmatizer/_adjectives_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_adjectives_irreg.py
--- a/spacy/lang/nl/lemmatizer/_adpositions.py
+++ b/spacy/lang/nl/lemmatizer/_adpositions.py
@ -3,22 +3,25 @@ from __future__ import unicode_literals
 ADPOSITIONS = set(
-    ('aan aangaande aanwezig achter af afgezien al als an annex anno anti '
+    (
-     'behalve behoudens beneden benevens benoorden beoosten betreffende bewesten '
+        "aan aangaande aanwezig achter af afgezien al als an annex anno anti "
-     'bezijden bezuiden bij binnen binnenuit binst bladzij blijkens boven bovenop '
+        "behalve behoudens beneden benevens benoorden beoosten betreffende bewesten "
-     'buiten conform contra cq daaraan daarbij daarbuiten daarin daarnaar '
+        "bezijden bezuiden bij binnen binnenuit binst bladzij blijkens boven bovenop "
-     'daaronder daartegenover daarvan dankzij deure dichtbij door doordat doorheen '
+        "buiten conform contra cq daaraan daarbij daarbuiten daarin daarnaar "
-     'echter eraf erop erover errond eruit ervoor evenals exclusief gedaan '
+        "daaronder daartegenover daarvan dankzij deure dichtbij door doordat doorheen "
-     'gedurende gegeven getuige gezien halfweg halverwege heen hierdoorheen hierop '
+        "echter eraf erop erover errond eruit ervoor evenals exclusief gedaan "
-     'houdende in inclusief indien ingaande ingevolge inzake jegens kortweg '
+        "gedurende gegeven getuige gezien halfweg halverwege heen hierdoorheen hierop "
-     'krachtens kralj langs langsheen langst lastens linksom lopende luidens mede '
+        "houdende in inclusief indien ingaande ingevolge inzake jegens kortweg "
-     'mee met middels midden middenop mits na naan naar naartoe naast naat nabij '
+        "krachtens kralj langs langsheen langst lastens linksom lopende luidens mede "
-     'nadat namens neer neffe neffen neven nevenst niettegenstaande nopens '
+        "mee met middels midden middenop mits na naan naar naartoe naast naat nabij "
-     'officieel om omheen omstreeks omtrent onafgezien ondanks onder onderaan '
+        "nadat namens neer neffe neffen neven nevenst niettegenstaande nopens "
-     'ondere ongeacht ooit op open over per plus pro qua rechtover rond rondom '
+        "officieel om omheen omstreeks omtrent onafgezien ondanks onder onderaan "
        "ondere ongeacht ooit op open over per plus pro qua rechtover rond rondom "
        "sedert sinds spijts strekkende te tegen tegenaan tegenop tegenover telde "
-     'teneinde terug tijdens toe tot totdat trots tussen tégen uit uitgenomen '
+        "teneinde terug tijdens toe tot totdat trots tussen tégen uit uitgenomen "
-     'ultimo van vanaf vandaan vandoor vanop vanuit vanwege versus via vinnen '
+        "ultimo van vanaf vandaan vandoor vanop vanuit vanwege versus via vinnen "
-     'vlakbij volgens voor voor- voorbij voordat voort voren vòòr vóór waaraan '
+        "vlakbij volgens voor voor- voorbij voordat voort voren vòòr vóór waaraan "
-     'waarbij waardoor waaronder weg wegens weleens zijdens zoals zodat zonder '
+        "waarbij waardoor waaronder weg wegens weleens zijdens zoals zodat zonder "
-     'zónder à').split())
+        "zónder à"
    ).split()
 )
--- a/spacy/lang/nl/lemmatizer/_adpositions_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_adpositions_irreg.py
@ -3,10 +3,10 @@ from __future__ import unicode_literals
 ADPOSITIONS_IRREG = {
-    "'t": ('te',),
+    "'t": ("te",),
-    'me': ('mee',),
+    "me": ("mee",),
-    'meer': ('mee',),
+    "meer": ("mee",),
-    'on': ('om',),
+    "on": ("om",),
-    'ten': ('te',),
+    "ten": ("te",),
-    'ter': ('te',)
+    "ter": ("te",),
 }
--- a/spacy/lang/nl/lemmatizer/_adverbs_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_adverbs_irreg.py
@ -3,17 +3,17 @@ from __future__ import unicode_literals
 ADVERBS_IRREG = {
-    "'ns": ('eens',),
+    "'ns": ("eens",),
-    "'s": ('eens',),
+    "'s": ("eens",),
-    "'t": ('het',),
+    "'t": ("het",),
-    "d'r": ('er',),
+    "d'r": ("er",),
-    "d'raf": ('eraf',),
+    "d'raf": ("eraf",),
-    "d'rbij": ('erbij',),
+    "d'rbij": ("erbij",),
-    "d'rheen": ('erheen',),
+    "d'rheen": ("erheen",),
-    "d'rin": ('erin',),
+    "d'rin": ("erin",),
-    "d'rna": ('erna',),
+    "d'rna": ("erna",),
-    "d'rnaar": ('ernaar',),
+    "d'rnaar": ("ernaar",),
-    'hele': ('heel',),
+    "hele": ("heel",),
-    'nevenst': ('nevens',),
+    "nevenst": ("nevens",),
-    'overend': ('overeind',)
+    "overend": ("overeind",),
 }
--- a/spacy/lang/nl/lemmatizer/_determiners.py
+++ b/spacy/lang/nl/lemmatizer/_determiners.py
@ -3,15 +3,18 @@ from __future__ import unicode_literals
 DETERMINERS = set(
-    ("al allebei allerhande allerminst alletwee"
+    (
        "al allebei allerhande allerminst alletwee"
        "beide clip-on d'n d'r dat datgeen datgene de dees degeen degene den dewelke "
-     'deze dezelfde die diegeen diegene diehien dien diene diens diezelfde dit '
+        "deze dezelfde die diegeen diegene diehien dien diene diens diezelfde dit "
-     'ditgene e een eene eigen elk elkens elkes enig enkel enne ettelijke eure '
+        "ditgene e een eene eigen elk elkens elkes enig enkel enne ettelijke eure "
-     'euren evenveel ewe ge geen ginds géén haar haaren halfelf het hetgeen '
+        "euren evenveel ewe ge geen ginds géén haar haaren halfelf het hetgeen "
-     'hetwelk hetzelfde heur heure hulder hulle hullen hullie hun hunder hunderen '
+        "hetwelk hetzelfde heur heure hulder hulle hullen hullie hun hunder hunderen "
-     'ieder iederes ja je jen jouw jouwen jouwes jullie junder keiveel keiweinig '
+        "ieder iederes ja je jen jouw jouwen jouwes jullie junder keiveel keiweinig "
        "m'ne me meer meerder meerdere menen menig mijn mijnes minst méér niemendal "
-     'oe ons onse se sommig sommigeder superveel telken teveel titulair ulder '
+        "oe ons onse se sommig sommigeder superveel telken teveel titulair ulder "
-     'uldere ulderen ulle under une uw vaak veel veels véél wat weinig welk welken '
+        "uldere ulderen ulle under une uw vaak veel veels véél wat weinig welk welken "
        "welkene welksten z'nen ze zenen zijn zo'n zo'ne zoiet zoveel zovele zovelen "
-     'zuk zulk zulkdanig zulken zulks zullie zíjn àlle álle').split())
+        "zuk zulk zulkdanig zulken zulks zullie zíjn àlle álle"
    ).split()
 )
--- a/spacy/lang/nl/lemmatizer/_determiners_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_determiners_irreg.py
@ -3,67 +3,67 @@ from __future__ import unicode_literals
 DETERMINERS_IRREG = {
-    "'r": ('haar',),
+    "'r": ("haar",),
-    "'s": ('de',),
+    "'s": ("de",),
-    "'t": ('het',),
+    "'t": ("het",),
-    "'tgene": ('hetgeen',),
+    "'tgene": ("hetgeen",),
-    'alle': ('al',),
+    "alle": ("al",),
-    'allen': ('al',),
+    "allen": ("al",),
-    'aller': ('al',),
+    "aller": ("al",),
-    'beiden': ('beide',),
+    "beiden": ("beide",),
-    'beider': ('beide',),
+    "beider": ("beide",),
-    "d'": ('het',),
+    "d'": ("het",),
-    "d'r": ('haar',),
+    "d'r": ("haar",),
-    'der': ('de',),
+    "der": ("de",),
-    'des': ('de',),
+    "des": ("de",),
-    'dezer': ('deze',),
+    "dezer": ("deze",),
-    'dienen': ('die',),
+    "dienen": ("die",),
-    'dier': ('die',),
+    "dier": ("die",),
-    'elke': ('elk',),
+    "elke": ("elk",),
-    'ene': ('een',),
+    "ene": ("een",),
-    'enen': ('een',),
+    "enen": ("een",),
-    'ener': ('een',),
+    "ener": ("een",),
-    'enige': ('enig',),
+    "enige": ("enig",),
-    'enigen': ('enig',),
+    "enigen": ("enig",),
-    'er': ('haar',),
+    "er": ("haar",),
-    'gene': ('geen',),
+    "gene": ("geen",),
-    'genen': ('geen',),
+    "genen": ("geen",),
-    'hare': ('haar',),
+    "hare": ("haar",),
-    'haren': ('haar',),
+    "haren": ("haar",),
-    'harer': ('haar',),
+    "harer": ("haar",),
-    'hunne': ('hun',),
+    "hunne": ("hun",),
-    'hunnen': ('hun',),
+    "hunnen": ("hun",),
-    'jou': ('jouw',),
+    "jou": ("jouw",),
-    'jouwe': ('jouw',),
+    "jouwe": ("jouw",),
-    'julliejen': ('jullie',),
+    "julliejen": ("jullie",),
-    "m'n": ('mijn',),
+    "m'n": ("mijn",),
-    'mee': ('meer',),
+    "mee": ("meer",),
-    'meer': ('veel',),
+    "meer": ("veel",),
-    'meerderen': ('meerdere',),
+    "meerderen": ("meerdere",),
-    'meest': ('veel',),
+    "meest": ("veel",),
-    'meesten': ('veel',),
+    "meesten": ("veel",),
-    'meet': ('veel',),
+    "meet": ("veel",),
-    'menige': ('menig',),
+    "menige": ("menig",),
-    'mij': ('mijn',),
+    "mij": ("mijn",),
-    'mijnen': ('mijn',),
+    "mijnen": ("mijn",),
-    'minder': ('weinig',),
+    "minder": ("weinig",),
-    'mindere': ('weinig',),
+    "mindere": ("weinig",),
-    'minst': ('weinig',),
+    "minst": ("weinig",),
-    'minste': ('minst',),
+    "minste": ("minst",),
-    'ne': ('een',),
+    "ne": ("een",),
-    'onze': ('ons',),
+    "onze": ("ons",),
-    'onzent': ('ons',),
+    "onzent": ("ons",),
-    'onzer': ('ons',),
+    "onzer": ("ons",),
-    'ouw': ('uw',),
+    "ouw": ("uw",),
-    'sommige': ('sommig',),
+    "sommige": ("sommig",),
-    'sommigen': ('sommig',),
+    "sommigen": ("sommig",),
-    'u': ('uw',),
+    "u": ("uw",),
-    'vaker': ('vaak',),
+    "vaker": ("vaak",),
-    'vele': ('veel',),
+    "vele": ("veel",),
-    'velen': ('veel',),
+    "velen": ("veel",),
-    'welke': ('welk',),
+    "welke": ("welk",),
-    'zijne': ('zijn',),
+    "zijne": ("zijn",),
-    'zijnen': ('zijn',),
+    "zijnen": ("zijn",),
-    'zijns': ('zijn',),
+    "zijns": ("zijn",),
-    'één': ('een',)
+    "één": ("een",),
 }
--- a/spacy/lang/nl/lemmatizer/_lemma_rules.py
+++ b/spacy/lang/nl/lemmatizer/_lemma_rules.py
@ -9,7 +9,7 @@ ADJECTIVE_SUFFIX_RULES = [
    ["er", ""],
    ["en", ""],
    ["e", ""],
-    ["ende", "end"]
+    ["ende", "end"],
 ]
 VERB_SUFFIX_RULES = [
@ -39,7 +39,7 @@ NOUN_SUFFIX_RULES = [
    ["ssen", "s"],
    ["rren", "r"],
    ["kken", "k"],
-    ["bben", "b"]
+    ["bben", "b"],
 ]
 NUM_SUFFIX_RULES = [
@ -50,23 +50,20 @@ NUM_SUFFIX_RULES = [
    ["de", ""],
    ["er", ""],
    ["ër", ""],
-    ["tjes", ""]
+    ["tjes", ""],
 ]
-PUNCT_SUFFIX_RULES = [
+PUNCT_SUFFIX_RULES = [["“", '"'], ["”", '"'], ["\u2018", "'"], ["\u2019", "'"]]
    ["“", "\""],
    ["”", "\""],
    ["\u2018", "'"],
    ["\u2019", "'"]
 ]
 # In-place sort guaranteeing that longer -- more specific -- rules are
 # applied first.
-for rule_set in (ADJECTIVE_SUFFIX_RULES,
+for rule_set in (
    ADJECTIVE_SUFFIX_RULES,
    NOUN_SUFFIX_RULES,
    NUM_SUFFIX_RULES,
-                 VERB_SUFFIX_RULES):
+    VERB_SUFFIX_RULES,
 ):
    rule_set.sort(key=lambda r: len(r[0]), reverse=True)
@ -75,5 +72,5 @@ RULES = {
    "noun": NOUN_SUFFIX_RULES,
    "verb": VERB_SUFFIX_RULES,
    "num": NUM_SUFFIX_RULES,
-    "punct": PUNCT_SUFFIX_RULES
+    "punct": PUNCT_SUFFIX_RULES,
 }
--- a/spacy/lang/nl/lemmatizer/_nouns_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_nouns_irreg.py
--- a/spacy/lang/nl/lemmatizer/_numbers_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_numbers_irreg.py
@ -3,29 +3,29 @@ from __future__ import unicode_literals
 NUMBERS_IRREG = {
-    'achten': ('acht',),
+    "achten": ("acht",),
-    'biljoenen': ('biljoen',),
+    "biljoenen": ("biljoen",),
-    'drieën': ('drie',),
+    "drieën": ("drie",),
-    'duizenden': ('duizend',),
+    "duizenden": ("duizend",),
-    'eentjes': ('één',),
+    "eentjes": ("één",),
-    'elven': ('elf',),
+    "elven": ("elf",),
-    'miljoenen': ('miljoen',),
+    "miljoenen": ("miljoen",),
-    'negenen': ('negen',),
+    "negenen": ("negen",),
-    'negentiger': ('negentig',),
+    "negentiger": ("negentig",),
-    'tienduizenden': ('tienduizend',),
+    "tienduizenden": ("tienduizend",),
-    'tienen': ('tien',),
+    "tienen": ("tien",),
-    'tientjes': ('tien',),
+    "tientjes": ("tien",),
-    'twaalven': ('twaalf',),
+    "twaalven": ("twaalf",),
-    'tweeën': ('twee',),
+    "tweeën": ("twee",),
-    'twintiger': ('twintig',),
+    "twintiger": ("twintig",),
-    'twintigsten': ('twintig',),
+    "twintigsten": ("twintig",),
-    'vieren': ('vier',),
+    "vieren": ("vier",),
-    'vijftiger': ('vijftig',),
+    "vijftiger": ("vijftig",),
-    'vijven': ('vijf',),
+    "vijven": ("vijf",),
-    'zessen': ('zes',),
+    "zessen": ("zes",),
-    'zestiger': ('zestig',),
+    "zestiger": ("zestig",),
-    'zevenen': ('zeven',),
+    "zevenen": ("zeven",),
-    'zeventiger': ('zeventig',),
+    "zeventiger": ("zeventig",),
-    'zovele': ('zoveel',),
+    "zovele": ("zoveel",),
-    'zovelen': ('zoveel',)
+    "zovelen": ("zoveel",),
 }
--- a/spacy/lang/nl/lemmatizer/_pronouns_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_pronouns_irreg.py
@ -3,33 +3,33 @@ from __future__ import unicode_literals
 PRONOUNS_IRREG = {
-    "'r": ('haar',),
+    "'r": ("haar",),
-    "'rzelf": ('haarzelf',),
+    "'rzelf": ("haarzelf",),
-    "'t": ('het',),
+    "'t": ("het",),
-    "d'r": ('haar',),
+    "d'r": ("haar",),
-    'da': ('dat',),
+    "da": ("dat",),
-    'dienen': ('die',),
+    "dienen": ("die",),
-    'diens': ('die',),
+    "diens": ("die",),
-    'dies': ('die',),
+    "dies": ("die",),
-    'elkaars': ('elkaar',),
+    "elkaars": ("elkaar",),
-    'elkanders': ('elkander',),
+    "elkanders": ("elkander",),
-    'ene': ('een',),
+    "ene": ("een",),
-    'enen': ('een',),
+    "enen": ("een",),
-    'fik': ('ik',),
+    "fik": ("ik",),
-    'gaat': ('gaan',),
+    "gaat": ("gaan",),
-    'gene': ('geen',),
+    "gene": ("geen",),
-    'harer': ('haar',),
+    "harer": ("haar",),
-    'ieders': ('ieder',),
+    "ieders": ("ieder",),
-    'iemands': ('iemand',),
+    "iemands": ("iemand",),
-    'ikke': ('ik',),
+    "ikke": ("ik",),
-    'mijnen': ('mijn',),
+    "mijnen": ("mijn",),
-    'oe': ('je',),
+    "oe": ("je",),
-    'onzer': ('ons',),
+    "onzer": ("ons",),
-    'wa': ('wat',),
+    "wa": ("wat",),
-    'watte': ('wat',),
+    "watte": ("wat",),
-    'wier': ('wie',),
+    "wier": ("wie",),
-    'zijns': ('zijn',),
+    "zijns": ("zijn",),
-    'zoietsken': ('zoietske',),
+    "zoietsken": ("zoietske",),
-    'zulks': ('zulk',),
+    "zulks": ("zulk",),
-    'één': ('een',)
+    "één": ("een",),
 }
--- a/spacy/lang/nl/lemmatizer/_verbs.py
+++ b/spacy/lang/nl/lemmatizer/_verbs.py
--- a/spacy/lang/nl/lemmatizer/_verbs_irreg.py
+++ b/spacy/lang/nl/lemmatizer/_verbs_irreg.py
--- a/spacy/lang/nl/lemmatizer/lemmatizer.py
+++ b/spacy/lang/nl/lemmatizer/lemmatizer.py
@ -7,15 +7,33 @@ from ....symbols import POS, NOUN, VERB, ADJ, NUM, DET, PRON, ADP, AUX, ADV
 class DutchLemmatizer(object):
    # Note: CGN does not distinguish AUX verbs, so we treat AUX as VERB.
    univ_pos_name_variants = {
-        NOUN: "noun", "NOUN": "noun", "noun": "noun",
+        NOUN: "noun",
-        VERB: "verb", "VERB": "verb", "verb": "verb",
+        "NOUN": "noun",
-        AUX: "verb", "AUX": "verb", "aux": "verb",
+        "noun": "noun",
-        ADJ: "adj", "ADJ": "adj", "adj": "adj",
+        VERB: "verb",
-        ADV: "adv", "ADV": "adv", "adv": "adv",
+        "VERB": "verb",
-        PRON: "pron", "PRON": "pron", "pron": "pron",
+        "verb": "verb",
-        DET: "det", "DET": "det", "det": "det",
+        AUX: "verb",
-        ADP: "adp", "ADP": "adp", "adp": "adp",
+        "AUX": "verb",
-        NUM: "num", "NUM": "num", "num": "num"
+        "aux": "verb",
        ADJ: "adj",
        "ADJ": "adj",
        "adj": "adj",
        ADV: "adv",
        "ADV": "adv",
        "adv": "adv",
        PRON: "pron",
        "PRON": "pron",
        "pron": "pron",
        DET: "det",
        "DET": "det",
        "det": "det",
        ADP: "adp",
        "ADP": "adp",
        "adp": "adp",
        NUM: "num",
        "NUM": "num",
        "num": "num",
    }
    @classmethod
@ -62,10 +80,8 @@ class DutchLemmatizer(object):
            return [looked_up_lemma]
        forms, is_known = lemmatize(
-            string,
+            string, lemma_index, exceptions, self.rules.get(univ_pos, [])
-            lemma_index,
+        )
            exceptions,
            self.rules.get(univ_pos, []))
        # Back-off through remaining return value candidates.
        if forms:
@ -92,25 +108,25 @@ class DutchLemmatizer(object):
        return self.lookup_table.get(string, string)
    def noun(self, string, morphology=None):
-        return self(string, 'noun', morphology)
+        return self(string, "noun", morphology)
    def verb(self, string, morphology=None):
-        return self(string, 'verb', morphology)
+        return self(string, "verb", morphology)
    def adj(self, string, morphology=None):
-        return self(string, 'adj', morphology)
+        return self(string, "adj", morphology)
    def det(self, string, morphology=None):
-        return self(string, 'det', morphology)
+        return self(string, "det", morphology)
    def pron(self, string, morphology=None):
-        return self(string, 'pron', morphology)
+        return self(string, "pron", morphology)
    def adp(self, string, morphology=None):
-        return self(string, 'adp', morphology)
+        return self(string, "adp", morphology)
    def punct(self, string, morphology=None):
-        return self(string, 'punct', morphology)
+        return self(string, "punct", morphology)
 # Reimplemented to focus more on application of suffix rules and to return
--- a/spacy/lang/nl/lex_attrs.py
+++ b/spacy/lang/nl/lex_attrs.py
@ -4,18 +4,22 @@ from __future__ import unicode_literals
 from ...attrs import LIKE_NUM
-_num_words = set("""
+_num_words = set(
    """
 nul een één twee drie vier vijf zes zeven acht negen tien elf twaalf dertien
 veertien twintig dertig veertig vijftig zestig zeventig tachtig negentig honderd
 duizend miljoen miljard biljoen biljard triljoen triljard
-""".split())
+""".split()
 )
-_ordinal_words = set("""
+_ordinal_words = set(
    """
 eerste tweede derde vierde vijfde zesde zevende achtste negende tiende elfde
 twaalfde dertiende veertiende twintigste dertigste veertigste vijftigste
 zestigste zeventigste tachtigste negentigste honderdste duizendste miljoenste
 miljardste biljoenste biljardste triljoenste triljardste
-""".split())
+""".split()
 )
 def like_num(text):
@ -23,11 +27,11 @@ def like_num(text):
    # or matches one of the number words. In order to handle numbers like
    # "drieëntwintig", more work is required.
    # See this discussion: https://github.com/explosion/spaCy/pull/1177
-    text = text.replace(',', '').replace('.', '')
+    text = text.replace(",", "").replace(".", "")
    if text.isdigit():
        return True
-    if text.count('/') == 1:
+    if text.count("/") == 1:
-        num, denom = text.split('/')
+        num, denom = text.split("/")
        if num.isdigit() and denom.isdigit():
            return True
    if text.lower() in _num_words:
@ -37,6 +41,4 @@ def like_num(text):
    return False
-LEX_ATTRS = {
+LEX_ATTRS = {LIKE_NUM: like_num}
    LIKE_NUM: like_num
 }
--- a/spacy/lang/nl/punctuation.py
+++ b/spacy/lang/nl/punctuation.py
@ -10,24 +10,32 @@ from ..punctuation import TOKENIZER_SUFFIXES as DEFAULT_TOKENIZER_SUFFIXES
 # Copied from `de` package. Main purpose is to ensure that hyphens are not
 # split on.
-_quotes = CONCAT_QUOTES.replace("'", '')
+_quotes = CONCAT_QUOTES.replace("'", "")
-_infixes = (LIST_ELLIPSES + LIST_ICONS +
+_infixes = (
-            [r'(?<=[{}])\.(?=[{}])'.format(ALPHA_LOWER, ALPHA_UPPER),
+    LIST_ELLIPSES
-             r'(?<=[{a}])[,!?](?=[{a}])'.format(a=ALPHA),
+    + LIST_ICONS
    + [
        r"(?<=[{}])\.(?=[{}])".format(ALPHA_LOWER, ALPHA_UPPER),
        r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA),
        r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA),
-             r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
+        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
-             r'(?<=[{a}])([{q}\)\]\(\[])(?=[{a}])'.format(a=ALPHA, q=_quotes),
+        r"(?<=[{a}])([{q}\)\]\(\[])(?=[{a}])".format(a=ALPHA, q=_quotes),
-             r'(?<=[{a}])--(?=[{a}])'.format(a=ALPHA),
+        r"(?<=[{a}])--(?=[{a}])".format(a=ALPHA),
-             r'(?<=[0-9])-(?=[0-9])'])
+        r"(?<=[0-9])-(?=[0-9])",
    ]
 )
 # Remove "'s" suffix from suffix list. In Dutch, "'s" is a plural ending when
 # it occurs as a suffix and a clitic for "eens" in standalone use. To avoid
 # ambiguity it's better to just leave it attached when it occurs as a suffix.
-default_suffix_blacklist = ("'s", "'S", '’s', '’S')
+default_suffix_blacklist = ("'s", "'S", "’s", "’S")
-_suffixes = [suffix for suffix in DEFAULT_TOKENIZER_SUFFIXES
+_suffixes = [
-             if suffix not in default_suffix_blacklist]
+    suffix
    for suffix in DEFAULT_TOKENIZER_SUFFIXES
    if suffix not in default_suffix_blacklist
 ]
 TOKENIZER_INFIXES = _infixes
 TOKENIZER_SUFFIXES = _suffixes
--- a/spacy/lang/nl/stop_words.py
+++ b/spacy/lang/nl/stop_words.py
@ -16,7 +16,8 @@ from __future__ import unicode_literals
 # should have a Dutch counterpart here.
-STOP_WORDS = set("""
+STOP_WORDS = set(
    """
 aan af al alle alles allebei alleen allen als altijd ander anders andere anderen aangaangde aangezien achter achterna
 afgelopen aldus alhoewel anderzijds
@ -70,4 +71,5 @@ welk welke welken werd werden wiens wier wilde wordt
 zal ze zei zelf zich zij zijn zo zonder zou zeer zeker zekere zelfde zelfs zichzelf zijnde zijne zo’n zoals zodra zouden
 zoveel zowat zulk zulke zulks zullen zult
-""".split())
+""".split()
 )
--- a/spacy/lang/nl/tag_map.py
+++ b/spacy/lang/nl/tag_map.py
@ -47,8 +47,12 @@ TAG_MAP = {
    "Adj_Prep|adv|vergr|onverv_voor__Degree=Cmp|Variant=Short": {POS: ADJ},
    "Adj_V_Conj_V__Degree=Pos|VerbForm=Inf": {POS: ADJ},
    "Adj_V_N__Degree=Pos|Number=Sing|Tense=Past|VerbForm=Part": {POS: ADJ},
-    "Adj_V|adv|stell|onverv_intrans|inf__Degree=Pos|Variant=Short|VerbForm=Inf": {POS: ADJ},
+    "Adj_V|adv|stell|onverv_intrans|inf__Degree=Pos|Variant=Short|VerbForm=Inf": {
-    "Adj_V|adv|stell|onverv_trans|imp__Degree=Pos|Mood=Imp|Variant=Short|VerbForm=Fin": {POS: ADJ},
+        POS: ADJ
    },
    "Adj_V|adv|stell|onverv_trans|imp__Degree=Pos|Mood=Imp|Variant=Short|VerbForm=Fin": {
        POS: ADJ
    },
    "Adj|adv|stell|onverv__Degree=Pos|Variant=Short": {POS: ADJ},
    "Adj|adv|stell|vervneut__Case=Nom|Degree=Pos|Variant=Short": {POS: ADJ},
    "Adj|adv|vergr|onverv__Degree=Cmp|Variant=Short": {POS: ADJ},
@ -133,15 +137,21 @@ TAG_MAP = {
    "Art_Num__Definite=Def|Degree=Sup|Gender=Neut|PronType=Ind": {POS: DET},
    "Art_Num__Definite=Def|Gender=Neut": {POS: DET},
    "Art_Num__Degree=Pos|Number=Sing|PronType=Ind": {POS: DET},
-    "Art_N|bep|onzijd|neut_eigen|ev|neut__Definite=Def|Gender=Neut|Number=Sing": {POS: DET},
+    "Art_N|bep|onzijd|neut_eigen|ev|neut__Definite=Def|Gender=Neut|Number=Sing": {
-    "Art_N|bep|onzijd|neut_soort|ev|neut__Definite=Def|Gender=Neut|Number=Sing": {POS: DET},
+        POS: DET
    },
    "Art_N|bep|onzijd|neut_soort|ev|neut__Definite=Def|Gender=Neut|Number=Sing": {
        POS: DET
    },
    "Art_Pron_N__Case=Gen|Number=Plur|PronType=Ind": {POS: DET},
    "Art_Pron__Number=Sing|PronType=Ind": {POS: DET},
    "Art_V_N__AdpType=Prep": {POS: DET},
    "Art|bep|onzijd|neut__Definite=Def|Gender=Neut|PronType=Art": {POS: DET},
    "Art|bep|zijdofmv|gen__Case=Gen|Definite=Def|PronType=Art": {POS: DET},
    "Art|bep|zijdofmv|neut__Definite=Def|PronType=Art": {POS: DET},
-    "Art|bep|zijdofonzijd|gen__Case=Gen|Definite=Def|Number=Sing|PronType=Art": {POS: DET},
+    "Art|bep|zijdofonzijd|gen__Case=Gen|Definite=Def|Number=Sing|PronType=Art": {
        POS: DET
    },
    "Art|bep|zijd|dat__Case=Dat|Definite=Def|Gender=Com|PronType=Art": {POS: DET},
    "Art|onbep|zijdofonzijd|neut__Definite=Ind|Number=Sing|PronType=Art": {POS: DET},
    "CCONJ___": {POS: CONJ},
@ -159,17 +169,23 @@ TAG_MAP = {
    "Conj_Int|onder|metfin___": {POS: CONJ},
    "Conj_N_Adv__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
    "Conj_N_Prep__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
-    "Conj_N|onder|metfin_soort|ev|neut__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
+    "Conj_N|onder|metfin_soort|ev|neut__AdpType=Preppron|Gender=Masc|Number=Plur": {
        POS: CONJ
    },
    "Conj_Pron_Adv__Degree=Pos|Number=Sing|Person=3": {POS: CONJ},
    "Conj_Pron_V__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
    "Conj_Pron|neven_aanw|neut|zelfst__AdpType=Prep": {POS: CONJ},
    "Conj_Punc_Conj|neven_schuinstreep_neven__AdpType=Prep": {POS: CONJ},
-    "Conj_V|onder|metfin_intrans|ott|3|ev__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
+    "Conj_V|onder|metfin_intrans|ott|3|ev__AdpType=Preppron|Gender=Masc|Number=Plur": {
        POS: CONJ
    },
    "Conj|neven___": {POS: CONJ},
    "Conj|onder|metfin___": {POS: CONJ},
    "Conj|onder|metinf___": {POS: CONJ},
    "DET__Degree=Cmp|NumType=Card|PronType=Ind": {POS: DET},
-    "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET},
+    "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {
        POS: DET
    },
    "DET__Gender=Fem|Number=Sing|PronType=Art": {POS: DET},
    "DET__Gender=Masc|Number=Plur|PronType=Art": {POS: DET},
    "DET__Gender=Masc|Number=Sing|PronType=Tot": {POS: DET},
@ -185,7 +201,9 @@ TAG_MAP = {
    "Misc_Misc_Misc_Misc_Misc_Misc_Punc_Misc_Misc_Misc___": {POS: X},
    "Misc_Misc_Misc_Misc_Misc_Misc___": {POS: X},
    "Misc_Misc_Misc_Misc_Misc_N_Misc_Misc_Misc_Misc_Misc_Misc___": {POS: X},
-    "Misc_Misc_Misc_Misc|vreemd_vreemd_vreemd_vreemd__AdpType=Preppron|Gender=Masc|Number=Sing": {POS: X},
+    "Misc_Misc_Misc_Misc|vreemd_vreemd_vreemd_vreemd__AdpType=Preppron|Gender=Masc|Number=Sing": {
        POS: X
    },
    "Misc_Misc_Misc_Misc|vreemd_vreemd_vreemd_vreemd___": {POS: X},
    "Misc_Misc_Misc_N__Number=Sing": {POS: X},
    "Misc_Misc_Misc|vreemd_vreemd_vreemd___": {POS: X},
@ -217,7 +235,9 @@ TAG_MAP = {
    "N_Adj__Degree=Pos|Number=Plur": {POS: NOUN},
    "N_Adj__Degree=Pos|Number=Sing": {POS: NOUN},
    "N_Adj___": {POS: NOUN},
-    "N_Adv_Punc_V_Pron_V__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Inf": {POS: NOUN},
+    "N_Adv_Punc_V_Pron_V__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Inf": {
        POS: NOUN
    },
    "N_Adv__Degree=Pos|Number=Sing": {POS: NOUN},
    "N_Adv___": {POS: NOUN},
    "N_Adv|soort|ev|neut_deelv__Number=Sing": {POS: NOUN},
@ -320,12 +340,20 @@ TAG_MAP = {
    "N_N|eigen|ev|gen_soort|mv|neut___": {POS: NOUN},
    "N_N|eigen|ev|neut_eigen|ev|gen___": {POS: NOUN},
    "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Prep": {POS: NOUN},
-    "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Preppron|Gender=Fem|Number=Plur": {POS: NOUN},
+    "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Preppron|Gender=Fem|Number=Plur": {
-    "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Preppron|Gender=Masc|Number=Sing": {POS: NOUN},
+        POS: NOUN
    },
    "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Preppron|Gender=Masc|Number=Sing": {
        POS: NOUN
    },
    "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Fem|Number=Plur|PronType=Art": {POS: NOUN},
    "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Fem|Number=Sing|PronType=Art": {POS: NOUN},
-    "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Masc|Number=Plur|PronType=Art": {POS: NOUN},
+    "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Masc|Number=Plur|PronType=Art": {
-    "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Masc|Number=Sing|PronType=Art": {POS: NOUN},
+        POS: NOUN
    },
    "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Masc|Number=Sing|PronType=Art": {
        POS: NOUN
    },
    "N_N|eigen|ev|neut_eigen|ev|neut__NumType=Card": {POS: NOUN},
    "N_N|eigen|ev|neut_eigen|ev|neut__Number=Sing": {POS: NOUN},
    "N_N|eigen|ev|neut_eigen|ev|neut___": {POS: NOUN},
@ -335,7 +363,9 @@ TAG_MAP = {
    "N_N|eigen|ev|neut_soort|mv|neut___": {POS: NOUN},
    "N_N|eigen|mv|neut_eigen|mv|neut___": {POS: NOUN},
    "N_N|soort|ev|neut_eigen|ev|neut__Number=Sing": {POS: NOUN},
-    "N_N|soort|ev|neut_soort|ev|neut__Gender=Masc|Number=Plur|PronType=Art": {POS: NOUN},
+    "N_N|soort|ev|neut_soort|ev|neut__Gender=Masc|Number=Plur|PronType=Art": {
        POS: NOUN
    },
    "N_N|soort|ev|neut_soort|ev|neut__NumForm=Digit|NumType=Card": {POS: NOUN},
    "N_N|soort|ev|neut_soort|ev|neut__Number=Sing": {POS: NOUN},
    "N_N|soort|ev|neut_soort|mv|neut__Number=Plur": {POS: NOUN},
@ -365,7 +395,9 @@ TAG_MAP = {
    "N_Pron___": {POS: NOUN},
    "N_Punc_Adj_N___": {POS: NOUN},
    "N_Punc_Adj_Pron_Punc__Degree=Pos|Number=Sing|Person=2": {POS: NOUN},
-    "N_Punc_Adv_V_Pron_N__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: NOUN},
+    "N_Punc_Adv_V_Pron_N__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {
        POS: NOUN
    },
    "N_Punc_Misc_Punc_N___": {POS: NOUN},
    "N_Punc_N_N_N_N__Number=Sing": {POS: NOUN},
    "N_Punc_N_Punc_N__Number=Sing": {POS: NOUN},
@ -415,8 +447,12 @@ TAG_MAP = {
    "Num|hoofd|bep|attr|onverv__Definite=Def|NumType=Card": {POS: NUM},
    "Num|hoofd|bep|zelfst|onverv__Definite=Def|NumType=Card": {POS: NUM},
    "Num|hoofd|bep|zelfst|vervmv__Definite=Def|Number=Plur|NumType=Card": {POS: NUM},
-    "Num|hoofd|onbep|attr|stell|onverv__Degree=Pos|NumType=Card|PronType=Ind": {POS: NUM},
+    "Num|hoofd|onbep|attr|stell|onverv__Degree=Pos|NumType=Card|PronType=Ind": {
-    "Num|hoofd|onbep|attr|vergr|onverv__Degree=Cmp|NumType=Card|PronType=Ind": {POS: NUM},
+        POS: NUM
    },
    "Num|hoofd|onbep|attr|vergr|onverv__Degree=Cmp|NumType=Card|PronType=Ind": {
        POS: NUM
    },
    "Num|rang|bep|attr|onverv__Definite=Def|NumType=Ord": {POS: NUM},
    "Num|rang|bep|zelfst|onverv__Definite=Def|NumType=Ord": {POS: NUM},
    "N|eigen|ev|gen__Case=Gen|Number=Sing": {POS: NOUN},
@ -469,7 +505,9 @@ TAG_MAP = {
    "Prep_N_Adv|voor_soort|ev|neut_pron|aanw__AdpType=Prep": {POS: ADP},
    "Prep_N_Adv|voor_soort|ev|neut_pron|aanw__Number=Sing|PronType=Dem": {POS: ADP},
    "Prep_N_Adv|voor_soort|ev|neut_pron|vrag__Number=Sing|PronType=Int": {POS: ADP},
-    "Prep_N_Adv|voor_soort|mv|neut_deelv__Gender=Masc|Number=Sing|PronType=Tot": {POS: ADP},
+    "Prep_N_Adv|voor_soort|mv|neut_deelv__Gender=Masc|Number=Sing|PronType=Tot": {
        POS: ADP
    },
    "Prep_N_Conj_N__Number=Sing": {POS: ADP},
    "Prep_N_Conj__AdpType=Prep": {POS: ADP},
    "Prep_N_Prep_N__Number=Sing": {POS: ADP},
@ -489,7 +527,9 @@ TAG_MAP = {
    "Prep_N|voor_soort|ev|neut__Number=Sing": {POS: ADP},
    "Prep_N|voor_soort|mv|neut__AdpType=Prep": {POS: ADP},
    "Prep_N|voor_soort|mv|neut__Number=Plur": {POS: ADP},
-    "Prep_Prep_Adj|voor_voor_adv|stell|onverv__Gender=Masc|Number=Sing|PronType=Tot": {POS: ADP},
+    "Prep_Prep_Adj|voor_voor_adv|stell|onverv__Gender=Masc|Number=Sing|PronType=Tot": {
        POS: ADP
    },
    "Prep_Prep_Adv__Degree=Pos": {POS: ADP},
    "Prep_Pron_Adj__Degree=Cmp|Number=Sing|Person=3": {POS: ADP},
    "Prep_Pron_N_Adv__Number=Plur": {POS: ADP},
@ -503,7 +543,9 @@ TAG_MAP = {
    "Prep_Pron|voor_ref|3|evofmv__Number=Plur,Sing|Person=3": {POS: ADP},
    "Prep_Punc_N_Conj_N__AdpType=Prep": {POS: ADP},
    "Prep_V_N__Number=Sing|Tense=Pres|VerbForm=Part": {POS: ADP},
-    "Prep_V_Pron_Pron_Adv__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Dem|Tense=Pres|VerbForm=Fin": {POS: ADP},
+    "Prep_V_Pron_Pron_Adv__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Dem|Tense=Pres|VerbForm=Fin": {
        POS: ADP
    },
    "Prep_V|voor_intrans|inf__VerbForm=Inf": {POS: ADP},
    "Prep_V|voorinf_trans|inf__VerbForm=Inf": {POS: ADP},
    "Prep|achter__AdpType=Post": {POS: ADP},
@ -511,17 +553,25 @@ TAG_MAP = {
    "Prep|voor__AdpType=Prep": {POS: ADP},
    "Prep|voorinf__AdpType=Prep|PartType=Inf": {POS: ADP},
    "Pron_Adj_N_Punc_Art_Adj_N_Prep_Art_Adj_N__NumType=Card": {POS: PRON},
-    "Pron_Adj__Case=Nom|Degree=Sup|Number=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: PRON},
+    "Pron_Adj__Case=Nom|Degree=Sup|Number=Sing|Person=2|Poss=Yes|PronType=Prs": {
        POS: PRON
    },
    "Pron_Adj__Degree=Cmp|PronType=Ind": {POS: PRON},
    "Pron_Adv|vrag|neut|attr_deelv__PronType=Int": {POS: PRON},
    "Pron_Art_N_N__Number=Plur|PronType=Ind": {POS: PRON},
    "Pron_Art__Number=Sing|PronType=Int": {POS: PRON},
    "Pron_N_Adv__Number=Sing|PronType=Ind": {POS: PRON},
-    "Pron_N_V_Adv_Num_Punc__Aspect=Imp|Definite=Def|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {POS: PRON},
+    "Pron_N_V_Adv_Num_Punc__Aspect=Imp|Definite=Def|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
-    "Pron_N_V_Conj_N__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {POS: PRON},
+        POS: PRON
    },
    "Pron_N_V_Conj_N__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
        POS: PRON
    },
    "Pron_N__Case=Gen|Number=Sing|PronType=Ind": {POS: PRON},
    "Pron_N__Number=Sing|PronType=Ind": {POS: PRON},
-    "Pron_N|aanw|gen|attr_soort|mv|neut__Case=Gen|Number=Plur|PronType=Dem": {POS: PRON},
+    "Pron_N|aanw|gen|attr_soort|mv|neut__Case=Gen|Number=Plur|PronType=Dem": {
        POS: PRON
    },
    "Pron_N|onbep|neut|attr_soort|ev|neut__Number=Sing|PronType=Ind": {POS: PRON},
    "Pron_Prep_Art__Number=Sing|PronType=Int": {POS: PRON},
    "Pron_Prep_Art__Number=Sing|PronType=Rel": {POS: PRON},
@ -529,10 +579,16 @@ TAG_MAP = {
    "Pron_Prep|betr|neut|zelfst_voor__PronType=Rel": {POS: PRON},
    "Pron_Prep|onbep|neut|zelfst_voor__PronType=Ind": {POS: PRON},
    "Pron_Prep|vrag|neut|attr_voor__PronType=Int": {POS: PRON},
-    "Pron_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Rel|Tense=Pres|VerbForm=Fin": {POS: PRON},
+    "Pron_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Rel|Tense=Pres|VerbForm=Fin": {
        POS: PRON
    },
    "Pron_Pron__Person=3|PronType=Prs|Reflex=Yes": {POS: PRON},
-    "Pron_V_V__Aspect=Imp|Mood=Ind|Person=3|PronType=Dem|Tense=Pres|VerbForm=Inf": {POS: PRON},
+    "Pron_V_V__Aspect=Imp|Mood=Ind|Person=3|PronType=Dem|Tense=Pres|VerbForm=Inf": {
-    "Pron_V__Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs|VerbForm=Inf": {POS: PRON},
+        POS: PRON
    },
    "Pron_V__Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs|VerbForm=Inf": {
        POS: PRON
    },
    "Pron_V__Number=Plur|Person=1|Poss=Yes|PronType=Prs|VerbForm=Inf": {POS: PRON},
    "Pron|aanw|dat|attr__Case=Dat|PronType=Dem": {POS: PRON},
    "Pron|aanw|gen|attr__Case=Gen|PronType=Dem": {POS: PRON},
@ -547,27 +603,47 @@ TAG_MAP = {
    "Pron|bez|1|mv|neut|attr__Number=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: PRON},
    "Pron|bez|2|ev|neut|attr__Number=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: PRON},
    "Pron|bez|2|mv|neut|attr__Number=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: PRON},
-    "Pron|bez|3|ev|gen|attr__Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {POS: PRON},
+    "Pron|bez|3|ev|gen|attr__Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {
        POS: PRON
    },
    "Pron|bez|3|ev|neut|attr__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {POS: PRON},
-    "Pron|bez|3|ev|neut|zelfst__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {POS: PRON},
+    "Pron|bez|3|ev|neut|zelfst__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {
        POS: PRON
    },
    "Pron|bez|3|mv|neut|attr__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {POS: PRON},
    "Pron|onbep|gen|attr__Case=Gen|PronType=Ind": {POS: PRON},
    "Pron|onbep|gen|zelfst__Case=Gen|PronType=Ind": {POS: PRON},
    "Pron|onbep|neut|attr__PronType=Ind": {POS: PRON},
    "Pron|onbep|neut|zelfst__PronType=Ind": {POS: PRON},
-    "Pron|per|1|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=1|PronType=Prs": {POS: PRON},
+    "Pron|per|1|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=1|PronType=Prs": {
        POS: PRON
    },
    "Pron|per|1|ev|nom__Case=Nom|Number=Sing|Person=1|PronType=Prs": {POS: PRON},
-    "Pron|per|1|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=1|PronType=Prs": {POS: PRON},
+    "Pron|per|1|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=1|PronType=Prs": {
        POS: PRON
    },
    "Pron|per|1|mv|nom__Case=Nom|Number=Plur|Person=1|PronType=Prs": {POS: PRON},
-    "Pron|per|2|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=2|PronType=Prs": {POS: PRON},
+    "Pron|per|2|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=2|PronType=Prs": {
        POS: PRON
    },
    "Pron|per|2|ev|nom__Case=Nom|Number=Sing|Person=2|PronType=Prs": {POS: PRON},
-    "Pron|per|2|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=2|PronType=Prs": {POS: PRON},
+    "Pron|per|2|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=2|PronType=Prs": {
        POS: PRON
    },
    "Pron|per|2|mv|nom__Case=Nom|Number=Plur|Person=2|PronType=Prs": {POS: PRON},
-    "Pron|per|3|evofmv|datofacc__Case=Acc,Dat|Number=Plur,Sing|Person=3|PronType=Prs": {POS: PRON},
+    "Pron|per|3|evofmv|datofacc__Case=Acc,Dat|Number=Plur,Sing|Person=3|PronType=Prs": {
-    "Pron|per|3|evofmv|nom__Case=Nom|Number=Plur,Sing|Person=3|PronType=Prs": {POS: PRON},
+        POS: PRON
-    "Pron|per|3|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
+    },
    "Pron|per|3|evofmv|nom__Case=Nom|Number=Plur,Sing|Person=3|PronType=Prs": {
        POS: PRON
    },
    "Pron|per|3|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=3|PronType=Prs": {
        POS: PRON
    },
    "Pron|per|3|ev|nom__Case=Nom|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
-    "Pron|per|3|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
+    "Pron|per|3|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=3|PronType=Prs": {
        POS: PRON
    },
    "Pron|rec|gen__Case=Gen|PronType=Rcp": {POS: PRON},
    "Pron|rec|neut__PronType=Rcp": {POS: PRON},
    "Pron|ref|1|ev__Number=Sing|Person=1|PronType=Prs|Reflex=Yes": {POS: PRON},
@ -597,20 +673,34 @@ TAG_MAP = {
    "Punc|vraag__PunctType=Qest": {POS: PUNCT},
    "V_Adv_Art_N_Prep_Pron_N__Degree=Pos|Number=Plur|Person=2|Subcat=Tran": {POS: VERB},
    "V_Adv__Degree=Pos|Subcat=Tran": {POS: VERB},
-    "V_Art_N_Num_N__Aspect=Imp|Definite=Def|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {POS: VERB},
+    "V_Art_N_Num_N__Aspect=Imp|Definite=Def|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
        POS: VERB
    },
    "V_Art_N__Number=Sing|Subcat=Tran": {POS: VERB},
    "V_Conj_N_N__Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
    "V_Conj_Pron__Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
-    "V_N_Conj_Adj_N_Prep_Art_N__Degree=Pos|Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
+    "V_N_Conj_Adj_N_Prep_Art_N__Degree=Pos|Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {
        POS: VERB
    },
    "V_N_N__Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: VERB},
    "V_N_N__Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
-    "V_N_V__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Inf": {POS: VERB},
+    "V_N_V__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Inf": {
        POS: VERB
    },
    "V_N__Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
    "V_N|trans|imp_eigen|ev|neut__Number=Sing|Subcat=Tran": {POS: VERB},
-    "V_Prep|intrans|verldw|onverv_voor__Subcat=Intr|Tense=Past|VerbForm=Part": {POS: VERB},
+    "V_Prep|intrans|verldw|onverv_voor__Subcat=Intr|Tense=Past|VerbForm=Part": {
-    "V_Pron_Adv_Adv_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V_Pron_Adv__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    },
-    "V_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V_Pron_Adv_Adv_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V_Pron_Adv__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V_Pron__VerbType=Aux,Cop": {POS: VERB},
    "V_V|hulp|imp_intrans|inf__VerbForm=Inf|VerbType=Mod": {POS: VERB},
    "V|hulpofkopp|conj__Mood=Sub|VerbForm=Fin": {POS: VERB},
@ -620,94 +710,220 @@ TAG_MAP = {
    "V|hulpofkopp|inf__VerbForm=Inf": {POS: VERB},
    "V|hulpofkopp|inf__VerbForm=Inf|VerbType=Aux,Cop": {POS: VERB},
    "V|hulpofkopp|inf|subst__VerbForm=Inf": {POS: VERB},
-    "V|hulpofkopp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|hulpofkopp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin": {
-    "V|hulpofkopp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
+        POS: VERB
-    "V|hulpofkopp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    },
-    "V|hulpofkopp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
+    "V|hulpofkopp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
-    "V|hulpofkopp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|hulpofkopp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
+    },
-    "V|hulpofkopp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|hulpofkopp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {
-    "V|hulpofkopp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
+        POS: VERB
-    "V|hulpofkopp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {POS: VERB},
+    },
-    "V|hulpofkopp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
+    "V|hulpofkopp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
-    "V|hulpofkopp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|hulpofkopp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
+    },
    "V|hulpofkopp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulpofkopp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
        POS: VERB
    },
    "V|hulpofkopp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulpofkopp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
        POS: VERB
    },
    "V|hulpofkopp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulpofkopp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|VerbType=Aux,Cop": {
        POS: VERB
    },
    "V|hulpofkopp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulpofkopp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|VerbType=Aux,Cop": {
        POS: VERB
    },
    "V|hulpofkopp|tegdw|vervneut__Case=Nom|Tense=Pres|VerbForm=Part": {POS: VERB},
-    "V|hulpofkopp|tegdw|vervneut__Case=Nom|Tense=Pres|VerbForm=Part|VerbType=Aux,Cop": {POS: VERB},
+    "V|hulpofkopp|tegdw|vervneut__Case=Nom|Tense=Pres|VerbForm=Part|VerbType=Aux,Cop": {
        POS: VERB
    },
    "V|hulpofkopp|verldw|onverv__Tense=Past|VerbForm=Part": {POS: VERB},
-    "V|hulpofkopp|verldw|onverv__Tense=Past|VerbForm=Part|VerbType=Aux,Cop": {POS: VERB},
+    "V|hulpofkopp|verldw|onverv__Tense=Past|VerbForm=Part|VerbType=Aux,Cop": {
        POS: VERB
    },
    "V|hulp|conj__Mood=Sub|VerbForm=Fin|VerbType=Mod": {POS: VERB},
    "V|hulp|inf__VerbForm=Inf": {POS: VERB},
    "V|hulp|inf__VerbForm=Inf|VerbType=Mod": {POS: VERB},
-    "V|hulp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|hulp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin": {
-    "V|hulp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin|VerbType=Mod": {POS: VERB},
+        POS: VERB
-    "V|hulp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    },
-    "V|hulp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|VerbType=Mod": {POS: VERB},
+    "V|hulp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
-    "V|hulp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|hulp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|VerbType=Mod": {POS: VERB},
+    },
-    "V|hulp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|hulp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {
-    "V|hulp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {POS: VERB},
+        POS: VERB
-    "V|hulp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {POS: VERB},
+    },
-    "V|hulp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|VerbType=Mod": {POS: VERB},
+    "V|hulp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
-    "V|hulp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|hulp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|VerbType=Mod": {POS: VERB},
+    },
    "V|hulp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
        POS: VERB
    },
    "V|hulp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
        POS: VERB
    },
    "V|hulp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|VerbType=Mod": {
        POS: VERB
    },
    "V|hulp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|hulp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|VerbType=Mod": {
        POS: VERB
    },
    "V|hulp|verldw|onverv__Tense=Past|VerbForm=Part": {POS: VERB},
    "V|hulp|verldw|onverv__Tense=Past|VerbForm=Part|VerbType=Mod": {POS: VERB},
    "V|intrans|conj__Mood=Sub|Subcat=Intr|VerbForm=Fin": {POS: VERB},
    "V|intrans|imp__Mood=Imp|Subcat=Intr|VerbForm=Fin": {POS: VERB},
    "V|intrans|inf__Subcat=Intr|VerbForm=Inf": {POS: VERB},
    "V|intrans|inf|subst__Subcat=Intr|VerbForm=Inf": {POS: VERB},
-    "V|intrans|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Intr|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|intrans|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
-    "V|intrans|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Subcat=Intr|Tense=Pres|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|intrans|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Intr|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    },
-    "V|intrans|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Intr|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|intrans|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
-    "V|intrans|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Past|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|intrans|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Intr|Tense=Past|VerbForm=Fin": {POS: VERB},
+    },
    "V|intrans|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|intrans|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|intrans|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|intrans|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Intr|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|intrans|tegdw|onverv__Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: VERB},
-    "V|intrans|tegdw|vervmv__Number=Plur|Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: VERB},
+    "V|intrans|tegdw|vervmv__Number=Plur|Subcat=Intr|Tense=Pres|VerbForm=Part": {
-    "V|intrans|tegdw|vervneut__Case=Nom|Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: VERB},
+        POS: VERB
-    "V|intrans|tegdw|vervvergr__Degree=Cmp|Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: VERB},
+    },
    "V|intrans|tegdw|vervneut__Case=Nom|Subcat=Intr|Tense=Pres|VerbForm=Part": {
        POS: VERB
    },
    "V|intrans|tegdw|vervvergr__Degree=Cmp|Subcat=Intr|Tense=Pres|VerbForm=Part": {
        POS: VERB
    },
    "V|intrans|verldw|onverv__Subcat=Intr|Tense=Past|VerbForm=Part": {POS: VERB},
-    "V|intrans|verldw|vervmv__Number=Plur|Subcat=Intr|Tense=Past|VerbForm=Part": {POS: VERB},
+    "V|intrans|verldw|vervmv__Number=Plur|Subcat=Intr|Tense=Past|VerbForm=Part": {
-    "V|intrans|verldw|vervneut__Case=Nom|Subcat=Intr|Tense=Past|VerbForm=Part": {POS: VERB},
+        POS: VERB
    },
    "V|intrans|verldw|vervneut__Case=Nom|Subcat=Intr|Tense=Past|VerbForm=Part": {
        POS: VERB
    },
    "V|refl|imp__Mood=Imp|Reflex=Yes|VerbForm=Fin": {POS: VERB},
    "V|refl|inf__Reflex=Yes|VerbForm=Inf": {POS: VERB},
    "V|refl|inf|subst__Reflex=Yes|VerbForm=Inf": {POS: VERB},
-    "V|refl|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Reflex=Yes|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|refl|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
-    "V|refl|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Reflex=Yes|Tense=Pres|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|refl|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Reflex=Yes|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    },
-    "V|refl|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Reflex=Yes|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|refl|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
-    "V|refl|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Reflex=Yes|Tense=Past|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|refl|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Reflex=Yes|Tense=Past|VerbForm=Fin": {POS: VERB},
+    },
    "V|refl|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|refl|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|refl|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Reflex=Yes|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|refl|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Reflex=Yes|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|refl|tegdw|vervneut__Case=Nom|Reflex=Yes|Tense=Pres|VerbForm=Part": {POS: VERB},
    "V|refl|verldw|onverv__Reflex=Yes|Tense=Past|VerbForm=Part": {POS: VERB},
    "V|trans|conj__Mood=Sub|Subcat=Tran|VerbForm=Fin": {POS: VERB},
    "V|trans|imp__Mood=Imp|Subcat=Tran|VerbForm=Fin": {POS: VERB},
    "V|trans|inf__Subcat=Tran|VerbForm=Inf": {POS: VERB},
    "V|trans|inf|subst__Subcat=Tran|VerbForm=Inf": {POS: VERB},
-    "V|trans|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|trans|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
-    "V|trans|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|trans|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    },
-    "V|trans|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: VERB},
+    "V|trans|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
-    "V|trans|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Fin": {POS: VERB},
+        POS: VERB
-    "V|trans|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Fin": {POS: VERB},
+    },
    "V|trans|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|trans|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: VERB
    },
    "V|trans|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|trans|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Fin": {
        POS: VERB
    },
    "V|trans|tegdw|onverv__Subcat=Tran|Tense=Pres|VerbForm=Part": {POS: VERB},
-    "V|trans|tegdw|vervneut__Case=Nom|Subcat=Tran|Tense=Pres|VerbForm=Part": {POS: VERB},
+    "V|trans|tegdw|vervneut__Case=Nom|Subcat=Tran|Tense=Pres|VerbForm=Part": {
        POS: VERB
    },
    "V|trans|verldw|onverv__Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
-    "V|trans|verldw|vervmv__Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
+    "V|trans|verldw|vervmv__Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Part": {
-    "V|trans|verldw|vervneut__Case=Nom|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
+        POS: VERB
-    "V|trans|verldw|vervvergr__Degree=Cmp|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
+    },
-    "X__Aspect=Imp|Definite=Def|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {POS: X},
+    "V|trans|verldw|vervneut__Case=Nom|Subcat=Tran|Tense=Past|VerbForm=Part": {
-    "X__Aspect=Imp|Definite=Def|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {POS: X},
+        POS: VERB
-    "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: X},
+    },
-    "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Part": {POS: X},
+    "V|trans|verldw|vervvergr__Degree=Cmp|Subcat=Tran|Tense=Past|VerbForm=Part": {
-    "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Inf": {POS: X},
+        POS: VERB
    },
    "X__Aspect=Imp|Definite=Def|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
        POS: X
    },
    "X__Aspect=Imp|Definite=Def|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
        POS: X
    },
    "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: X
    },
    "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Part": {
        POS: X
    },
    "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Inf": {
        POS: X
    },
    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: X},
-    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Dem|Tense=Pres|VerbForm=Fin": {POS: X},
+    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Dem|Tense=Pres|VerbForm=Fin": {
-    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Rel|Tense=Pres|VerbForm=Fin": {POS: X},
+        POS: X
-    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: X},
+    },
-    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {POS: X},
+    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Rel|Tense=Pres|VerbForm=Fin": {
-    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {POS: X},
+        POS: X
    },
    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: X
    },
    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
        POS: X
    },
    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
        POS: X
    },
    "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: X},
    "X__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Inf": {POS: X},
    "X__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {POS: X},
@ -808,5 +1024,5 @@ TAG_MAP = {
    "X__VerbForm=Inf|VerbType=Mod": {POS: X},
    "X__VerbType=Aux,Cop": {POS: X},
    "X___": {POS: X},
-    "_SP": {POS: SPACE}
+    "_SP": {POS: SPACE},
 }
--- a/spacy/lang/pl/_tokenizer_exceptions_list.py
+++ b/spacy/lang/pl/_tokenizer_exceptions_list.py
--- a/spacy/lang/pt/tag_map.py
+++ b/spacy/lang/pt/tag_map.py
@ -5039,5 +5039,5 @@ TAG_MAP = {
    "punc": {POS: PUNCT},
    "v-pcp|M|P": {POS: VERB},
    "v-pcp|M|S": {POS: VERB},
-    "_SP": {POS: SPACE}
+    "_SP": {POS: SPACE},
 }
--- a/spacy/lang/punctuation.py
+++ b/spacy/lang/punctuation.py
@ -39,7 +39,9 @@ _infixes = (
    + LIST_ICONS
    + [
        r"(?<=[0-9])[+\-\*^](?=[0-9-])",
-        r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES),
+        r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(
            al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
        ),
        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
        r"(?<=[{a}0-9])[:<>=/](?=[{a}])".format(a=ALPHA),
--- a/spacy/lang/ru/tokenizer_exceptions.py
+++ b/spacy/lang/ru/tokenizer_exceptions.py
@ -19,7 +19,6 @@ _abbrev_exc = [
    {ORTH: "вс", LEMMA: "воскресенье", NORM: "воскресенье"},
    {ORTH: "вскр", LEMMA: "воскресенье", NORM: "воскресенье"},
    {ORTH: "воскр", LEMMA: "воскресенье", NORM: "воскресенье"},
    # Months abbreviations
    {ORTH: "янв", LEMMA: "январь", NORM: "январь"},
    {ORTH: "фев", LEMMA: "февраль", NORM: "февраль"},
@ -49,16 +48,18 @@ for abbrev_desc in _abbrev_exc:
    abbrev = abbrev_desc[ORTH]
    for orth in (abbrev, abbrev.capitalize(), abbrev.upper()):
        _exc[orth] = [{ORTH: orth, LEMMA: abbrev_desc[LEMMA], NORM: abbrev_desc[NORM]}]
-        _exc[orth + '.'] = [{ORTH: orth + '.', LEMMA: abbrev_desc[LEMMA], NORM: abbrev_desc[NORM]}]
+        _exc[orth + "."] = [
            {ORTH: orth + ".", LEMMA: abbrev_desc[LEMMA], NORM: abbrev_desc[NORM]}
        ]
 _slang_exc = [
-    {ORTH: '2к15', LEMMA: '2015', NORM: '2015'},
+    {ORTH: "2к15", LEMMA: "2015", NORM: "2015"},
-    {ORTH: '2к16', LEMMA: '2016', NORM: '2016'},
+    {ORTH: "2к16", LEMMA: "2016", NORM: "2016"},
-    {ORTH: '2к17', LEMMA: '2017', NORM: '2017'},
+    {ORTH: "2к17", LEMMA: "2017", NORM: "2017"},
-    {ORTH: '2к18', LEMMA: '2018', NORM: '2018'},
+    {ORTH: "2к18", LEMMA: "2018", NORM: "2018"},
-    {ORTH: '2к19', LEMMA: '2019', NORM: '2019'},
+    {ORTH: "2к19", LEMMA: "2019", NORM: "2019"},
-    {ORTH: '2к20', LEMMA: '2020', NORM: '2020'},
+    {ORTH: "2к20", LEMMA: "2020", NORM: "2020"},
 ]
 for slang_desc in _slang_exc:
--- a/spacy/lang/tt/punctuation.py
+++ b/spacy/lang/tt/punctuation.py
@ -15,7 +15,7 @@ _infixes = (
        r"(?<=[{a}])--(?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
        r"(?<=[{a}])([{q}\)\]\(\[])(?=[\-{a}])".format(a=ALPHA, q=CONCAT_QUOTES),
-        r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=_hyphens_no_dash),
+        r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=_hyphens_no_dash),
        r"(?<=[0-9])-(?=[0-9])",
    ]
 )
--- a/spacy/lang/ur/lemmatizer.py
+++ b/spacy/lang/ur/lemmatizer.py
@ -7,7 +7,6 @@ from __future__ import unicode_literals
 # Entries should be added in the following format:
 LOOKUP = {
    "آ": "آنا",
    "آْباد": "آْباد",
@ -29109,5 +29108,5 @@ LOOKUP = {
    "ظالموں": "ظالم",
    "ظلم": "ظلم",
    "ظلمو": "ظلم",
-  "ظلموں": "ظلم"
+    "ظلموں": "ظلم",
 }
--- a/spacy/lang/zh/examples.py
+++ b/spacy/lang/zh/examples.py
@ -16,5 +16,5 @@ sentences = [
    "此外，中文还是联合国正式语文，并被上海合作组织等国际组织采用为官方语言。",
    "在中国大陆，汉语通称为“汉语”。",
    "在联合国、台湾、香港及澳门，通称为“中文”。",
-    "在新加坡及马来西亚，通称为“华语”。"
+    "在新加坡及马来西亚，通称为“华语”。",
 ]
--- a/spacy/lang/zh/lex_attrs.py
+++ b/spacy/lang/zh/lex_attrs.py
@ -47,7 +47,7 @@ _single_num_words = [
    "拾陆",
    "拾柒",
    "拾捌",
-    "拾玖"
+    "拾玖",
 ]
 _count_num_words = [
@ -68,27 +68,16 @@ _count_num_words = [
    "陆",
    "柒",
    "捌",
-    "玖"
+    "玖",
 ]
-_base_num_words = [
+_base_num_words = ["十", "百", "千", "万", "亿", "兆", "拾", "佰", "仟"]
    "十",
    "百",
    "千",
    "万",
    "亿",
    "兆",
    "拾",
    "佰",
    "仟"
 ]
 def like_num(text):
    if text.startswith(("+", "-", "±", "~")):
        text = text[1:]
-    text = text.replace(",", "").replace(
+    text = text.replace(",", "").replace(".", "").replace("，", "").replace("。", "")
        ".", "").replace("，", "").replace("。", "")
    if text.isdigit():
        return True
    if text.count("/") == 1:
@ -97,10 +86,12 @@ def like_num(text):
            return True
    if text in _single_num_words:
        return True
    # fmt: off
    if re.match('^((' + '|'.join(_count_num_words) + '){1}'
                + '(' + '|'.join(_base_num_words) + '){1})+'
                + '(' + '|'.join(_count_num_words) + ')?$', text):
        return True
    # fmt: on
    return False
--- a/spacy/language.py
+++ b/spacy/language.py
@ -430,6 +430,7 @@ class Language(object):
        DOCS: https://spacy.io/api/language#update
        """
        expected_keys = ("words", "tags", "heads", "deps", "entities", "cats", "links")
        if len(docs) != len(golds):
            raise IndexError(Errors.E009.format(n_docs=len(docs), n_golds=len(golds)))
        if len(docs) == 0:
@ -445,10 +446,10 @@ class Language(object):
            if isinstance(doc, basestring_):
                doc = self.make_doc(doc)
            if not isinstance(gold, GoldParse):
-                expected_keys = ("words", "tags", "heads", "deps", "entities", "cats", "links")
+                unexpected = [k for k in gold if k not in expected_keys]
-                unexpected_keys = [k for k in gold if k not in expected_keys]
+                if unexpected:
-                if unexpected_keys:
+                    err = Errors.E151.format(unexp=unexpected, exp=expected_keys)
-                    raise ValueError(Errors.E151.format(unexpected_keys=unexpected_keys, expected_keys=expected_keys))
+                    raise ValueError(err)
                gold = GoldParse(doc, **gold)
            doc_objs.append(doc)
            gold_objs.append(gold)
--- a/spacy/tests/lang/ca/test_exception.py
+++ b/spacy/tests/lang/ca/test_exception.py
@ -5,10 +5,10 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.parametrize('text,lemma', [("aprox.", "aproximadament"),
+@pytest.mark.parametrize(
-                                        ("pàg.", "pàgina"),
+    "text,lemma",
-                                        ("p.ex.", "per exemple")
+    [("aprox.", "aproximadament"), ("pàg.", "pàgina"), ("p.ex.", "per exemple")],
-                                        ])
+)
 def test_ca_tokenizer_handles_abbr(ca_tokenizer, text, lemma):
    tokens = ca_tokenizer(text)
    assert len(tokens) == 1
--- a/spacy/tests/lang/ca/test_text.py
+++ b/spacy/tests/lang/ca/test_text.py
@ -21,21 +21,37 @@ def test_ca_tokenizer_handles_long_text(ca_tokenizer):
    assert len(tokens) == 138
-@pytest.mark.parametrize('text,length', [
+@pytest.mark.parametrize(
    "text,length",
    [
        ("Perquè va anar-hi?", 6),
        ("“Ah no?”", 5),
        ("""Sí! "Anem", va contestar el Joan Carles""", 11),
        ("Van córrer aprox. 10km", 5),
-    ("Llavors perqué...", 3)])
+        ("Llavors perqué...", 3),
    ],
 )
 def test_ca_tokenizer_handles_cnts(ca_tokenizer, text, length):
    tokens = ca_tokenizer(text)
    assert len(tokens) == length
-@pytest.mark.parametrize('text,match', [
+@pytest.mark.parametrize(
-    ('10', True), ('1', True), ('10,000', True), ('10,00', True),
+    "text,match",
-    ('999.0', True), ('un', True), ('dos', True), ('bilió', True),
+    [
-    ('gos', False), (',', False), ('1/2', True)])
+        ("10", True),
        ("1", True),
        ("10,000", True),
        ("10,00", True),
        ("999.0", True),
        ("un", True),
        ("dos", True),
        ("bilió", True),
        ("gos", False),
        (",", False),
        ("1/2", True),
    ],
 )
 def test_ca_lex_attrs_like_number(ca_tokenizer, text, match):
    tokens = ca_tokenizer(text)
    assert len(tokens) == 1
--- a/spacy/tests/lang/de/test_exceptions.py
+++ b/spacy/tests/lang/de/test_exceptions.py
@ -32,7 +32,7 @@ def test_de_tokenizer_norm_exceptions(de_tokenizer, text, norms):
    assert [token.norm_ for token in tokens] == norms
-@pytest.mark.parametrize('text,norm', [("daß", "dass")])
+@pytest.mark.parametrize("text,norm", [("daß", "dass")])
 def test_de_lex_attrs_norm_exceptions(de_tokenizer, text, norm):
    tokens = de_tokenizer(text)
    assert tokens[0].norm_ == norm
--- a/spacy/tests/lang/fr/test_exceptions.py
+++ b/spacy/tests/lang/fr/test_exceptions.py
@ -7,33 +7,33 @@ import pytest
@pytest.mark.parametrize(
    "text",
    [
-        u"aujourd'hui",
+        "aujourd'hui",
-        u"Aujourd'hui",
+        "Aujourd'hui",
-        u"prud'hommes",
+        "prud'hommes",
-        u"prud’hommal",
+        "prud’hommal",
-        u"audio-numérique",
+        "audio-numérique",
-        u"Audio-numérique",
+        "Audio-numérique",
-        u"entr'amis",
+        "entr'amis",
-        u"entr'abat",
+        "entr'abat",
-        u"rentr'ouvertes",
+        "rentr'ouvertes",
-        u"grand'hamien",
+        "grand'hamien",
-        u"Châteauneuf-la-Forêt",
+        "Châteauneuf-la-Forêt",
-        u"Château-Guibert",
+        "Château-Guibert",
-        u"11-septembre",
+        "11-septembre",
-        u"11-Septembre",
+        "11-Septembre",
-        u"refox-trottâmes",
+        "refox-trottâmes",
        # u"K-POP",
        # u"K-Pop",
        # u"K-pop",
-        u"z'yeutes",
+        "z'yeutes",
-        u"black-outeront",
+        "black-outeront",
-        u"états-unienne",
+        "états-unienne",
-        u"courtes-pattes",
+        "courtes-pattes",
-        u"court-pattes",
+        "court-pattes",
-        u"saut-de-ski",
+        "saut-de-ski",
-        u"Écourt-Saint-Quentin",
+        "Écourt-Saint-Quentin",
-        u"Bout-de-l'Îlien",
+        "Bout-de-l'Îlien",
-        u"pet-en-l'air",
+        "pet-en-l'air",
    ],
 )
 def test_fr_tokenizer_infix_exceptions(fr_tokenizer, text):
--- a/spacy/tests/lang/lt/test_lemmatizer.py
+++ b/spacy/tests/lang/lt/test_lemmatizer.py
@ -3,13 +3,18 @@ from __future__ import unicode_literals
 import pytest
-
+# fmt: off
-@pytest.mark.parametrize("tokens,lemmas", [
+TEST_CASES = [
    (["Galime", "vadinti", "gerovės", "valstybe", ",", "turime", "išvystytą", "socialinę", "apsaugą", ",",
      "sveikatos", "apsaugą", "ir", "prieinamą", "švietimą", "."],
     ["galėti", "vadintas", "gerovė", "valstybė", ",", "turėti", "išvystytas", "socialinis",
      "apsauga", ",", "sveikata", "apsauga", "ir", "prieinamas", "švietimas", "."]),
    (["taip", ",", "uoliai", "tyrinėjau", "ir", "pasirinkau", "geriausią", "variantą", "."],
-     ["taip", ",", "uolus", "tyrinėti", "ir", "pasirinkti", "geras", "variantas", "."])])
+     ["taip", ",", "uolus", "tyrinėti", "ir", "pasirinkti", "geras", "variantas", "."])
 ]
 # fmt: on
@pytest.mark.parametrize("tokens,lemmas", TEST_CASES)
 def test_lt_lemmatizer(lt_lemmatizer, tokens, lemmas):
    assert lemmas == [lt_lemmatizer.lookup(token) for token in tokens]
--- a/spacy/tests/lang/pl/test_text.py
+++ b/spacy/tests/lang/pl/test_text.py
@ -7,10 +7,21 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.parametrize('text,match', [
+@pytest.mark.parametrize(
-    ('10', True), ('1', True), ('10,000', True), ('10,00', True),
+    "text,match",
-    ('jeden', True), ('dwa', True), ('milion', True),
+    [
-    ('pies', False), (',', False), ('1/2', True)])
+        ("10", True),
        ("1", True),
        ("10,000", True),
        ("10,00", True),
        ("jeden", True),
        ("dwa", True),
        ("milion", True),
        ("pies", False),
        (",", False),
        ("1/2", True),
    ],
 )
 def test_lex_attrs_like_number(pl_tokenizer, text, match):
    tokens = pl_tokenizer(text)
    assert len(tokens) == 1
--- a/spacy/tests/lang/ur/test_prefix_suffix_infix.py
+++ b/spacy/tests/lang/ur/test_prefix_suffix_infix.py
@ -4,9 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.parametrize(
+@pytest.mark.parametrize("text", ["ہےں۔", "کیا۔"])
    "text", ['ہےں۔', 'کیا۔']
 )
 def test_contractions(ur_tokenizer, text):
    """Test specific Urdu punctuation character"""
    tokens = ur_tokenizer(text)
--- a/spacy/tests/matcher/test_matcher_logic.py
+++ b/spacy/tests/matcher/test_matcher_logic.py
@ -134,12 +134,12 @@ def test_matcher_end_zero_plus(en_vocab):
 def test_matcher_sets_return_correct_tokens(en_vocab):
    matcher = Matcher(en_vocab)
    patterns = [
-        [{'LOWER': {'IN': ["zero"]}}],
+        [{"LOWER": {"IN": ["zero"]}}],
-        [{'LOWER': {'IN': ["one"]}}],
+        [{"LOWER": {"IN": ["one"]}}],
-        [{'LOWER': {'IN': ["two"]}}],
+        [{"LOWER": {"IN": ["two"]}}],
    ]
-    matcher.add('TEST', None, *patterns)
+    matcher.add("TEST", None, *patterns)
    doc = Doc(en_vocab, words="zero one two three".split())
    matches = matcher(doc)
    texts = [Span(doc, s, e, label=L).text for L, s, e in matches]
-    assert texts == ['zero', 'one', 'two']
+    assert texts == ["zero", "one", "two"]
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@ -52,7 +52,9 @@ def test_get_pipe(nlp, name):
    assert nlp.get_pipe(name) == new_pipe
-@pytest.mark.parametrize("name,replacement,not_callable", [("my_component", lambda doc: doc, {})])
+@pytest.mark.parametrize(
    "name,replacement,not_callable", [("my_component", lambda doc: doc, {})]
 )
 def test_replace_pipe(nlp, name, replacement, not_callable):
    with pytest.raises(ValueError):
        nlp.replace_pipe(name, new_pipe)
--- a/spacy/tests/regression/test_issue1-1000.py
+++ b/spacy/tests/regression/test_issue1-1000.py
@ -358,7 +358,9 @@ def test_issue850_basic():
    assert end == 4
-@pytest.mark.skip(reason="French exception list is not enabled in the default tokenizer anymore")
+@pytest.mark.skip(
    reason="French exception list is not enabled in the default tokenizer anymore"
 )
@pytest.mark.parametrize(
    "text", ["au-delàs", "pair-programmâmes", "terra-formées", "σ-compacts"]
 )
--- a/spacy/tests/regression/test_issue1001-1500.py
+++ b/spacy/tests/regression/test_issue1001-1500.py
@ -19,7 +19,7 @@ from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part
 def test_issue1235():
    """Test that g is not split of if preceded by a number and a letter"""
    nlp = English()
-    testwords = u'e2g 2g 52g'
+    testwords = "e2g 2g 52g"
    doc = nlp(testwords)
    assert len(doc) == 5
    assert doc[0].text == "e2g"
--- a/spacy/tests/regression/test_issue3521.py
+++ b/spacy/tests/regression/test_issue3521.py
@ -4,15 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.parametrize(
+@pytest.mark.parametrize("word", ["don't", "don’t", "I'd", "I’d"])
    "word",
    [
        "don't",
        "don’t",
        "I'd",
        "I’d",
    ],
 )
 def test_issue3521(en_tokenizer, word):
    tok = en_tokenizer(word)[1]
    # 'not' and 'would' should be stopwords, also in their abbreviated forms
--- a/spacy/tests/regression/test_issue3540.py
+++ b/spacy/tests/regression/test_issue3540.py
@ -9,7 +9,10 @@ import numpy as np
 def test_issue3540(en_vocab):
    words = ["I", "live", "in", "NewYork", "right", "now"]
-    tensor = np.asarray([[1.0, 1.1], [2.0, 2.1], [3.0, 3.1], [4.0, 4.1], [5.0, 5.1], [6.0, 6.1]], dtype="f")
+    tensor = np.asarray(
        [[1.0, 1.1], [2.0, 2.1], [3.0, 3.1], [4.0, 4.1], [5.0, 5.1], [6.0, 6.1]],
        dtype="f",
    )
    doc = Doc(en_vocab, words=words)
    doc.tensor = tensor
@ -25,7 +28,7 @@ def test_issue3540(en_vocab):
    with doc.retokenize() as retokenizer:
        heads = [(doc[3], 1), doc[2]]
        attrs = {"POS": ["PROPN", "PROPN"], "DEP": ["pobj", "compound"]}
-        retokenizer.split(doc[3], [u"New", u"York"], heads=heads, attrs=attrs)
+        retokenizer.split(doc[3], ["New", "York"], heads=heads, attrs=attrs)
    gold_text = ["I", "live", "in", "New", "York", "right", "now"]
    assert [token.text for token in doc] == gold_text
--- a/spacy/tests/regression/test_issue3962.py
+++ b/spacy/tests/regression/test_issue3962.py
@ -35,7 +35,9 @@ def test_issue3962(doc):
    doc2_json = doc2.to_json()
    assert doc2_json
-    assert doc2[0].head.text == "jests"  # head set to itself, being the new artificial root
+    assert (
        doc2[0].head.text == "jests"
    )  # head set to itself, being the new artificial root
    assert doc2[0].dep_ == "dep"
    assert doc2[1].head.text == "jests"
    assert doc2[1].dep_ == "prep"
@ -92,7 +94,9 @@ def test_issue3962_long(two_sent_doc):
    doc2_json = doc2.to_json()
    assert doc2_json
-    assert doc2[0].head.text == "jests"  # head set to itself, being the new artificial root (in sentence 1)
+    assert (
        doc2[0].head.text == "jests"
    )  # head set to itself, being the new artificial root (in sentence 1)
    assert doc2[0].dep_ == "ROOT"
    assert doc2[1].head.text == "jests"
    assert doc2[1].dep_ == "prep"
@ -100,9 +104,13 @@ def test_issue3962_long(two_sent_doc):
    assert doc2[2].dep_ == "pobj"
    assert doc2[3].head.text == "jests"
    assert doc2[3].dep_ == "punct"
-    assert doc2[4].head.text == "They"  # head set to itself, being the new artificial root (in sentence 2)
+    assert (
        doc2[4].head.text == "They"
    )  # head set to itself, being the new artificial root (in sentence 2)
    assert doc2[4].dep_ == "dep"
-    assert doc2[4].head.text == "They"  # head set to the new artificial head (in sentence 2)
+    assert (
        doc2[4].head.text == "They"
    )  # head set to the new artificial head (in sentence 2)
    assert doc2[4].dep_ == "dep"
    # We should still have 2 sentences
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@ -30,14 +30,18 @@ def test_serialize_kb_disk(en_vocab):
 def _get_dummy_kb(vocab):
    kb = KnowledgeBase(vocab=vocab, entity_vector_length=3)
-    kb.add_entity(entity='Q53', freq=33, entity_vector=[0, 5, 3])
+    kb.add_entity(entity="Q53", freq=33, entity_vector=[0, 5, 3])
-    kb.add_entity(entity='Q17', freq=2, entity_vector=[7, 1, 0])
+    kb.add_entity(entity="Q17", freq=2, entity_vector=[7, 1, 0])
-    kb.add_entity(entity='Q007', freq=7, entity_vector=[0, 0, 7])
+    kb.add_entity(entity="Q007", freq=7, entity_vector=[0, 0, 7])
-    kb.add_entity(entity='Q44', freq=342, entity_vector=[4, 4, 4])
+    kb.add_entity(entity="Q44", freq=342, entity_vector=[4, 4, 4])
-    kb.add_alias(alias='double07', entities=['Q17', 'Q007'], probabilities=[0.1, 0.9])
+    kb.add_alias(alias="double07", entities=["Q17", "Q007"], probabilities=[0.1, 0.9])
-    kb.add_alias(alias='guy', entities=['Q53', 'Q007', 'Q17', 'Q44'], probabilities=[0.3, 0.3, 0.2, 0.1])
+    kb.add_alias(
-    kb.add_alias(alias='random', entities=['Q007'], probabilities=[1.0])
+        alias="guy",
        entities=["Q53", "Q007", "Q17", "Q44"],
        probabilities=[0.3, 0.3, 0.2, 0.1],
    )
    kb.add_alias(alias="random", entities=["Q007"], probabilities=[1.0])
    return kb
@ -45,30 +49,30 @@ def _get_dummy_kb(vocab):
 def _check_kb(kb):
    # check entities
    assert kb.get_size_entities() == 4
-    for entity_string in ['Q53', 'Q17', 'Q007', 'Q44']:
+    for entity_string in ["Q53", "Q17", "Q007", "Q44"]:
        assert entity_string in kb.get_entity_strings()
-    for entity_string in ['', 'Q0']:
+    for entity_string in ["", "Q0"]:
        assert entity_string not in kb.get_entity_strings()
    # check aliases
    assert kb.get_size_aliases() == 3
-    for alias_string in ['double07', 'guy', 'random']:
+    for alias_string in ["double07", "guy", "random"]:
        assert alias_string in kb.get_alias_strings()
-    for alias_string in ['nothingness', '', 'randomnoise']:
+    for alias_string in ["nothingness", "", "randomnoise"]:
        assert alias_string not in kb.get_alias_strings()
    # check candidates & probabilities
-    candidates = sorted(kb.get_candidates('double07'), key=lambda x: x.entity_)
+    candidates = sorted(kb.get_candidates("double07"), key=lambda x: x.entity_)
    assert len(candidates) == 2
-    assert candidates[0].entity_ == 'Q007'
+    assert candidates[0].entity_ == "Q007"
    assert 6.999 < candidates[0].entity_freq < 7.01
    assert candidates[0].entity_vector == [0, 0, 7]
-    assert candidates[0].alias_ == 'double07'
+    assert candidates[0].alias_ == "double07"
    assert 0.899 < candidates[0].prior_prob < 0.901
-    assert candidates[1].entity_ == 'Q17'
+    assert candidates[1].entity_ == "Q17"
    assert 1.99 < candidates[1].entity_freq < 2.01
    assert candidates[1].entity_vector == [7, 1, 0]
-    assert candidates[1].alias_ == 'double07'
+    assert candidates[1].alias_ == "double07"
    assert 0.099 < candidates[1].prior_prob < 0.101