diff --git a/spacy/cli/converters/iob2json.py b/spacy/cli/converters/iob2json.py index fabf2ae26..740f29001 100644 --- a/spacy/cli/converters/iob2json.py +++ b/spacy/cli/converters/iob2json.py @@ -1,7 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -import re from wasabi import Printer from ...gold import iob_to_biluo diff --git a/spacy/lang/pt/__init__.py b/spacy/lang/pt/__init__.py index ae3b52165..1b99a4a84 100644 --- a/spacy/lang/pt/__init__.py +++ b/spacy/lang/pt/__init__.py @@ -1,8 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -from pathlib import Path - from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS diff --git a/spacy/lang/ro/tag_map.py b/spacy/lang/ro/tag_map.py index 7632491ee..cb5239809 100644 --- a/spacy/lang/ro/tag_map.py +++ b/spacy/lang/ro/tag_map.py @@ -1,2085 +1,1654 @@ from __future__ import unicode_literals -from ...symbols import POS, ADJ, ADP, ADV, CONJ, INTJ, NOUN, NUM, PART +from ...symbols import POS, ADJ, ADP, ADV, INTJ, NOUN, NUM, PART from ...symbols import PRON, PROPN, PUNCT, SYM, VERB, X, CCONJ, SCONJ, DET, AUX -TAG_MAP = { - "Afcfson":{ - "Case":"Dat,Gen", - "Degree":"Cmp", - "Gender":"Fem", - "Number":"Sing", - POS:ADJ - }, - "Afcfsrn":{ - "Case":"Acc,Nom", - "Degree":"Cmp", - "Gender":"Fem", - "Number":"Sing", - POS:ADJ - }, - "Afp":{ - "Degree":"Pos", - POS:ADJ - }, - "Afp-p-n":{ - "Degree":"Pos", - "Number":"Plur", - POS:ADJ - }, - "Afp-p-ny":{ - "Degree":"Pos", - "Number":"Plur", - POS:ADJ, - "Variant":"Short" - }, - "Afp-poy":{ - "Case":"Dat,Gen", - "Degree":"Pos", - "Number":"Plur", - POS:ADJ - }, - "Afpf--n":{ - "Degree":"Pos", - "Gender":"Fem", - POS:ADJ - }, - "Afpfp-n":{ - "Degree":"Pos", - "Gender":"Fem", - "Number":"Plur", - POS:ADJ - }, - "Afpfpoy":{ - "Case":"Dat,Gen", - "Degree":"Pos", - "Gender":"Fem", - "Number":"Plur", - POS:ADJ - }, - "Afpfpry":{ - "Case":"Acc,Nom", - "Degree":"Pos", - "Gender":"Fem", - "Number":"Plur", - POS:ADJ - }, - "Afpfson":{ - "Case":"Dat,Gen", - "Degree":"Pos", - "Gender":"Fem", - "Number":"Sing", - POS:ADJ - }, - "Afpfsoy":{ - "Case":"Dat,Gen", - "Degree":"Pos", - "Gender":"Fem", - "Number":"Sing", - POS:ADJ - }, - "Afpfsrn":{ - "Case":"Acc,Nom", - "Degree":"Pos", - "Gender":"Fem", - "Number":"Sing", - POS:ADJ - }, - "Afpfsry":{ - "Case":"Acc,Nom", - "Degree":"Pos", - "Gender":"Fem", - "Number":"Sing", - POS:ADJ - }, - "Afpmp-n":{ - "Degree":"Pos", - "Gender":"Masc", - "Number":"Plur", - POS:ADJ - }, - "Afpmpoy":{ - "Case":"Dat,Gen", - "Degree":"Pos", - "Gender":"Masc", - "Number":"Plur", - POS:ADJ - }, - "Afpmpry":{ - "Case":"Acc,Nom", - "Degree":"Pos", - "Gender":"Masc", - "Number":"Plur", - POS:ADJ - }, - "Afpms-n":{ - "Degree":"Pos", - "Gender":"Masc", - "Number":"Sing", - POS:ADJ - }, - "Afpmsoy":{ - "Case":"Dat,Gen", - "Degree":"Pos", - "Gender":"Masc", - "Number":"Sing", - POS:ADJ - }, - "Afpmsry":{ - "Case":"Acc,Nom", - "Degree":"Pos", - "Gender":"Masc", - "Number":"Sing", - POS:ADJ - }, - "COLON":{ - POS:PUNCT - }, - "COMMA":{ - POS:PUNCT - }, - "Ccssp":{ - POS:CCONJ, - "Polarity":"Pos" - }, - "Crssp":{ - POS:CCONJ, - "Polarity":"Pos" - }, - "Csssp":{ - POS:SCONJ, - "Polarity":"Pos" - }, - "Cssspy":{ - POS:SCONJ, - "Polarity":"Pos", - "Variant":"Short" - }, - "DASH":{ - POS:PUNCT - }, - "DBLQ":{ - POS:PUNCT - }, - "Dd3-po---e":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3fpr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3fpr---e":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3fso---e":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3fso---o":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3fsr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3fsr---e":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3fsr---o":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3mpo":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3mpr---e":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3mso---e":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3msr---e":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dd3msr---o":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Dem" - }, - "Dh3fsr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Emp" - }, - "Dh3mp":{ - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Emp" - }, - "Dh3ms":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Emp" - }, - "Di3":{ - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3--r---e":{ - "Case":"Acc,Nom", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3-po":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3-po---e":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3-sr":{ - "Case":"Acc,Nom", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3-sr---e":{ - "Case":"Acc,Nom", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3fp":{ - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3fpr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3fpr---e":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3fso---e":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3fsr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3fsr---e":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3mp":{ - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3mpr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3mpr---e":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3ms":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3ms----e":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3mso---e":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3msr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Di3msr---e":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Ind" - }, - "Ds1fp-s":{ - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"1", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds1fsos":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"1", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds1fsrp":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"1", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds1fsrs":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"1", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds1ms-p":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"1", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds1ms-s":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"1", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds2---s":{ - POS:DET, - "Person":"2", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds2fsrs":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"2", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds3---p":{ - POS:DET, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds3---s":{ - POS:DET, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds3fp-s":{ - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds3fsos":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds3fsrs":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ds3ms-s":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Dw3--r---e":{ - "Case":"Acc,Nom", - POS:DET, - "Person":"3", - "PronType":"Int,Rel" - }, - "Dw3fpr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Person":"3", - "PronType":"Int,Rel" - }, - "Dw3mso---e":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Int,Rel" - }, - "Dz3fsr---e":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Neg" - }, - "Dz3msr---e":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Person":"3", - "PronType":"Neg" - }, - "EQUAL":{ - POS:SYM - }, - "EXCL":{ - POS:PUNCT - }, - "GT":{ - POS:SYM - }, - "I":{ - POS:INTJ - }, - "LPAR":{ - POS:PUNCT - }, - "Mc":{ - "NumType":"Card", - POS:NUM - }, - "Mc-p-d":{ - "NumForm":"Digit", - "NumType":"Card", - "Number":"Plur", - POS:NUM - }, - "Mc-p-l":{ - "NumForm":"Word", - "NumType":"Card", - "Number":"Plur", - POS:NUM - }, - "Mcfp-l":{ - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Card", - "Number":"Plur", - POS:NUM - }, - "Mcfp-ln":{ - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Card", - "Number":"Plur", - POS:NUM - }, - "Mcfsrln":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Card", - "Number":"Sing", - POS:NUM - }, - "Mcmp-l":{ - "Gender":"Masc", - "NumForm":"Word", - "NumType":"Card", - "Number":"Plur", - POS:NUM - }, - "Mcmsrl":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "NumForm":"Word", - "NumType":"Card", - "Number":"Sing", - POS:NUM - }, - "Mffprln":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Card", - "Number":"Plur", - POS:NUM - }, - "Mlfpo":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "NumType":"Card", - "Number":"Plur", - POS:NUM, - "PronType":"Tot" - }, - "Mlfpr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "NumType":"Card", - "Number":"Plur", - POS:NUM, - "PronType":"Tot" - }, - "Mlmpr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "NumType":"Card", - "Number":"Plur", - POS:NUM, - "PronType":"Tot" - }, - "Mo---l":{ - "NumForm":"Word", - "NumType":"Ord", - POS:NUM - }, - "Mo-s-r":{ - "NumForm":"Roman", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Mofp-ln":{ - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Plur", - POS:NUM - }, - "Mofprly":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Plur", - POS:NUM - }, - "Mofs-l":{ - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Mofsrln":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Mofsrly":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Momprly":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Plur", - POS:NUM - }, - "Moms-l":{ - "Gender":"Masc", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Moms-ln":{ - "Gender":"Masc", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Momsoly":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Momsrly":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "NumForm":"Word", - "NumType":"Ord", - "Number":"Sing", - POS:NUM - }, - "Nc":{ - POS:NOUN - }, - "Ncf--n":{ - "Gender":"Fem", - POS:NOUN - }, - "Ncfp-n":{ - "Gender":"Fem", - "Number":"Plur", - POS:NOUN - }, - "Ncfpoy":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Plur", - POS:NOUN - }, - "Ncfpry":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:NOUN - }, - "Ncfson":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:NOUN - }, - "Ncfsoy":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:NOUN - }, - "Ncfsrn":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:NOUN - }, - "Ncfsry":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:NOUN - }, - "Ncm--n":{ - "Gender":"Masc", - POS:NOUN - }, - "Ncmp-n":{ - "Gender":"Masc", - "Number":"Plur", - POS:NOUN - }, - "Ncmpoy":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Plur", - POS:NOUN - }, - "Ncmpry":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:NOUN - }, - "Ncms-n":{ - "Gender":"Masc", - "Number":"Sing", - POS:NOUN - }, - "Ncms-ny":{ - "Gender":"Masc", - "Number":"Sing", - POS:NOUN, - "Variant":"Short" - }, - "Ncmsoy":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:NOUN - }, - "Ncmsrn":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:NOUN - }, - "Ncmsry":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:NOUN - }, - "Np":{ - POS:PROPN - }, - "Npfsoy":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:PROPN - }, - "Npfsry":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:PROPN - }, - "Npmsoy":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:PROPN - }, - "Npmsry":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:PROPN - }, - "PERCENT":{ - POS:SYM - }, - "PERIOD":{ - POS:PUNCT - }, - "PLUSMINUS":{ - POS:SYM - }, - "Pd3-po":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Dem" - }, - "Pd3fpr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Dem" - }, - "Pd3fso":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Dem" - }, - "Pd3fsr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Dem" - }, - "Pd3mpr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Dem" - }, - "Pd3mso":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Dem" - }, - "Pd3msr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Dem" - }, - "Pi3--r":{ - "Case":"Acc,Nom", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3-po":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3-so":{ - "Case":"Dat,Gen", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3-sr":{ - "Case":"Acc,Nom", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3fpr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3fso":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3fsr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3mpr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3msr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Ind" - }, - "Pi3msr--y":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Ind", - "Variant":"Short" - }, - "Pp1-pa--------w":{ - "Case":"Acc", - "Number":"Plur", - POS:PRON, - "Person":"1", - "PronType":"Prs", - }, - "Pp1-pa--y-----w":{ - "Case":"Acc", - "Number":"Plur", - POS:PRON, - "Person":"1", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp1-pd--------w":{ - "Case":"Dat", - "Number":"Plur", - POS:PRON, - "Person":"1", - "PronType":"Prs" - }, - "Pp1-pr--------s":{ - "Case":"Acc,Nom", - "Number":"Plur", - POS:PRON, - "Person":"1", - "PronType":"Prs" - }, - "Pp1-sa--------s":{ - "Case":"Acc", - "Number":"Sing", - POS:PRON, - "Person":"1", - "PronType":"Prs" - }, - "Pp1-sa--------w":{ - "Case":"Acc", - "Number":"Sing", - POS:PRON, - "Person":"1", - "PronType":"Prs" - }, - "Pp1-sa--y-----w":{ - "Case":"Acc", - "Number":"Sing", - POS:PRON, - "Person":"1", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp1-sd--------w":{ - "Case":"Dat", - "Number":"Sing", - POS:PRON, - "Person":"1", - "PronType":"Prs", - }, - "Pp1-sd--y-----w":{ - "Case":"Dat", - "Number":"Sing", - POS:PRON, - "Person":"1", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp1-sn--------s":{ - "Case":"Nom", - "Number":"Sing", - POS:PRON, - "Person":"1", - "PronType":"Prs", - }, - "Pp2-----------s":{ - POS:PRON, - "Person":"2", - "PronType":"Prs" - }, - "Pp2-pa--------w":{ - "Case":"Acc", - "Number":"Plur", - POS:PRON, - "Person":"2", - "PronType":"Prs" - }, - "Pp2-pa--y-----w":{ - "Case":"Acc", - "Number":"Plur", - POS:PRON, - "Person":"2", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp2-pd--------w":{ - "Case":"Dat", - "Number":"Plur", - POS:PRON, - "Person":"2", - "PronType":"Prs", - }, - "Pp2-pr--------s":{ - "Case":"Acc,Nom", - "Number":"Plur", - POS:PRON, - "Person":"2", - "PronType":"Prs", - }, - "Pp2-sa--------s":{ - "Case":"Acc", - "Number":"Sing", - POS:PRON, - "Person":"2", - "PronType":"Prs", - }, - "Pp2-sa--------w":{ - "Case":"Acc", - "Number":"Sing", - POS:PRON, - "Person":"2", - "PronType":"Prs", - }, - "Pp2-sa--y-----w":{ - "Case":"Acc", - "Number":"Sing", - POS:PRON, - "Person":"2", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp2-sd--y-----w":{ - "Case":"Dat", - "Number":"Sing", - POS:PRON, - "Person":"2", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp2-sn--------s":{ - "Case":"Nom", - "Number":"Sing", - POS:PRON, - "Person":"2", - "PronType":"Prs", - }, - "Pp3-pd--------w":{ - "Case":"Dat", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3-pd--y-----w":{ - "Case":"Dat", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp3-po--------s":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3-sd--------w":{ - "Case":"Dat", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3-sd--y-----w":{ - "Case":"Dat", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp3fpa--------w":{ - "Case":"Acc", - "Gender":"Fem", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3fpa--y-----w":{ - "Case":"Acc", - "Gender":"Fem", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp3fpr--------s":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3fsa--------w":{ - "Case":"Acc", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3fsa--y-----w":{ - "Case":"Acc", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp3fsr--------s":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3mpa--------w":{ - "Case":"Acc", - "Gender":"Masc", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3mpa--y-----w":{ - "Case":"Acc", - "Gender":"Masc", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp3mpr--------s":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3msa--------w":{ - "Case":"Acc", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3msa--y-----w":{ - "Case":"Acc", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Variant":"Short" - }, - "Pp3mso--------s":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Pp3msr--------s":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Prs", - }, - "Ps1mp-s":{ - "Gender":"Masc", - "Number":"Plur", - POS:PRON, - "Person":"1", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ps3---p":{ - POS:PRON, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ps3---s":{ - POS:PRON, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Ps3fp-s":{ - "Gender":"Fem", - "Number":"Plur", - POS:PRON, - "Person":"3", - "Poss":"Yes", - "PronType":"Prs" - }, - "Pw3--r":{ - "Case":"Acc,Nom", - POS:PRON, - "Person":"3", - "PronType":"Int,Rel" - }, - "Pw3-po":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Int,Rel" - }, - "Pw3fso":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Int,Rel" - }, - "Pw3mpr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:PRON, - "Person":"3", - "PronType":"Int,Rel" - }, - "Px3--a--------s":{ - "Case":"Acc", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Reflex":"Yes", - }, - "Px3--a--------w":{ - "Case":"Acc", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Reflex":"Yes", - }, - "Px3--a--y-----w":{ - "Case":"Acc", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Reflex":"Yes", - "Variant":"Short" - }, - "Px3--d--------w":{ - "Case":"Dat", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Reflex":"Yes", - }, - "Px3--d--y-----w":{ - "Case":"Dat", - POS:PRON, - "Person":"3", - "PronType":"Prs", - "Reflex":"Yes", - "Variant":"Short" - }, - "Pz3-sr":{ - "Case":"Acc,Nom", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Neg" - }, - "Pz3msr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:PRON, - "Person":"3", - "PronType":"Neg" - }, - "QUEST":{ - POS:PUNCT - }, - "QUOT":{ - POS:PUNCT - }, - "Qn":{ - POS:PART, - "PartType":"Inf" - }, - "Qs":{ - "Mood":"Sub", - POS:PART - }, - "Qs-y":{ - "Mood":"Sub", - POS:PART, - "Variant":"Short" - }, - "Qz":{ - POS:PART, - "Polarity":"Neg" - }, - "Qz-y":{ - POS:PART, - "Polarity":"Neg", - "Variant":"Short" - }, - "RPAR":{ - POS:PUNCT - }, - "Rc":{ - POS:ADV - }, - "Rgp":{ - "Degree":"Pos", - POS:ADV - }, - "Rgpy":{ - "Degree":"Pos", - POS:ADV, - "Variant":"Short" - }, - "Rgs":{ - "Degree":"Sup", - POS:ADV - }, - "Rp":{ - POS:ADV - }, - "Rw":{ - POS:ADV, - "PronType":"Int,Rel" - }, - "Rz":{ - POS:ADV, - "PronType":"Neg" - }, - "SCOLON":{ - "AdpType":"Prep", - POS:PUNCT - }, - "SLASH":{ - "AdpType":"Prep", - POS:SYM - }, - "Spsa":{ - "AdpType":"Prep", - "Case":"Acc", - POS:ADP - }, - "Spsay":{ - "AdpType":"Prep", - "Case":"Acc", - POS:ADP, - "Variant":"Short" - }, - "Spsd":{ - "AdpType":"Prep", - "Case":"Dat", - POS:ADP - }, - "Spsg":{ - "AdpType":"Prep", - "Case":"Gen", - POS:ADP - }, - "Spsgy":{ - "AdpType":"Prep", - "Case":"Gen", - POS:ADP, - "Variant":"Short" - }, - "Td-po":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:DET, - "PronType":"Dem" - }, - "Tdfpr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "PronType":"Dem" - }, - "Tdfso":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "PronType":"Dem" - }, - "Tdfsr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "PronType":"Dem" - }, - "Tdmpr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "PronType":"Dem" - }, - "Tdmso":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "PronType":"Dem" - }, - "Tdmsr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "PronType":"Dem" - }, - "Tf-so":{ - "Case":"Dat,Gen", - "Number":"Sing", - POS:DET, - "PronType":"Art" - }, - "Tffs-y":{ - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "PronType":"Art", - "Variant":"Short" - }, - "Tfms-y":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "PronType":"Art", - "Variant":"Short" - }, - "Tfmsoy":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "PronType":"Art", - "Variant":"Short" - }, - "Tfmsry":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "PronType":"Art", - "Variant":"Short" - }, - "Ti-po":{ - "Case":"Dat,Gen", - "Number":"Plur", - POS:DET, - "PronType":"Ind" - }, - "Tifp-y":{ - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "PronType":"Ind", - "Variant":"Short" - }, - "Tifso":{ - "Case":"Dat,Gen", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "PronType":"Ind" - }, - "Tifsr":{ - "Case":"Acc,Nom", - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "PronType":"Ind" - }, - "Timso":{ - "Case":"Dat,Gen", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "PronType":"Ind" - }, - "Timsr":{ - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "PronType":"Ind" - }, - "Tsfp":{ - "Gender":"Fem", - "Number":"Plur", - POS:DET, - "Poss":"Yes", - "PronType":"Prs" - }, - "Tsfs":{ - "Gender":"Fem", - "Number":"Sing", - POS:DET, - "Poss":"Yes", - "PronType":"Prs" - }, - "Tsmp":{ - "Gender":"Masc", - "Number":"Plur", - POS:DET, - "Poss":"Yes", - "PronType":"Prs" - }, - "Tsms":{ - "Gender":"Masc", - "Number":"Sing", - POS:DET, - "Poss":"Yes", - "PronType":"Prs" - }, - "Va--1":{ - POS:AUX, - "Person":"1" - }, - "Va--1p":{ - "Number":"Plur", - POS:AUX, - "Person":"1" - }, - "Va--1s":{ - "Number":"Sing", - POS:AUX, - "Person":"1" - }, - "Va--2p":{ - "Number":"Plur", - POS:AUX, - "Person":"2" - }, - "Va--2s":{ - "Number":"Sing", - POS:AUX, - "Person":"2" - }, - "Va--3":{ - POS:AUX, - "Person":"3" - }, - "Va--3-----y":{ - POS:AUX, - "Person":"3", - "Variant":"Short" - }, - "Va--3p":{ - "Number":"Plur", - POS:AUX, - "Person":"3" - }, - "Va--3p----y":{ - "Number":"Plur", - POS:AUX, - "Person":"3", - "Variant":"Short" - }, - "Va--3s":{ - "Number":"Sing", - POS:AUX, - "Person":"3" - }, - "Va--3s----y":{ - "Number":"Sing", - POS:AUX, - "Person":"3", - "Variant":"Short" - }, - "Vag":{ - POS:AUX, - "VerbForm":"Ger" - }, - "Vaii3p":{ - "Mood":"Ind", - "Number":"Plur", - POS:AUX, - "Person":"3", - "Tense":"Imp", - "VerbForm":"Fin" - }, - "Vaii3s":{ - "Mood":"Ind", - "Number":"Sing", - POS:AUX, - "Person":"3", - "Tense":"Imp", - "VerbForm":"Fin" - }, - "Vail3s":{ - "Mood":"Ind", - "Number":"Sing", - POS:AUX, - "Person":"3", - "Tense":"Pqp", - "VerbForm":"Fin" - }, - "Vaip1s":{ - "Mood":"Ind", - "Number":"Sing", - POS:AUX, - "Person":"1", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vaip2s":{ - "Mood":"Ind", - "Number":"Sing", - POS:AUX, - "Person":"2", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vaip3p":{ - "Mood":"Ind", - "Number":"Plur", - POS:AUX, - "Person":"3", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vaip3s":{ - "Mood":"Ind", - "Number":"Sing", - POS:AUX, - "Person":"3", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vanp":{ - POS:AUX, - "Tense":"Pres", - "VerbForm":"Inf" - }, - "Vap--sm":{ - "Gender":"Masc", - "Number":"Sing", - POS:AUX, - "VerbForm":"Part" - }, - "Vasp3":{ - "Mood":"Sub", - POS:AUX, - "Person":"3", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmg":{ - POS:VERB, - "VerbForm":"Ger" - }, - "Vmg-------y":{ - POS:VERB, - "Variant":"Short", - "VerbForm":"Ger" - }, - "Vmii1":{ - "Mood":"Ind", - POS:VERB, - "Person":"1", - "Tense":"Imp", - "VerbForm":"Fin" - }, - "Vmii1-----y":{ - "Mood":"Ind", - POS:VERB, - "Person":"1", - "Tense":"Imp", - "Variant":"Short", - "VerbForm":"Fin" - }, - "Vmii2p":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"2", - "Tense":"Imp", - "VerbForm":"Fin" - }, - "Vmii2s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"2", - "Tense":"Imp", - "VerbForm":"Fin" - }, - "Vmii3p":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"3", - "Tense":"Imp", - "VerbForm":"Fin" - }, - "Vmii3p----y":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"3", - "Tense":"Imp", - "Variant":"Short", - "VerbForm":"Fin" - }, - "Vmii3s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"3", - "Tense":"Imp", - "VerbForm":"Fin" - }, - "Vmil3p":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"3", - "Tense":"Pqp", - "VerbForm":"Fin" - }, - "Vmil3s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"3", - "Tense":"Pqp", - "VerbForm":"Fin" - }, - "Vmip1p":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"1", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmip1s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"1", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmip1s----y":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"1", - "Tense":"Pres", - "Variant":"Short", - "VerbForm":"Fin" - }, - "Vmip2p":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"2", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmip2s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"2", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmip3":{ - "Mood":"Ind", - POS:VERB, - "Person":"3", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmip3-----y":{ - "Mood":"Ind", - POS:VERB, - "Person":"3", - "Tense":"Pres", - "Variant":"Short", - "VerbForm":"Fin" - }, - "Vmip3p":{ - "Mood":"Ind", - "Number":"Plur", - POS:AUX, - "Person":"3", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmip3s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"3", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmip3s----y":{ - "Mood":"Ind", - "Number":"Sing", - POS:AUX, - "Person":"3", - "Tense":"Pres", - "Variant":"Short", - "VerbForm":"Fin" - }, - "Vmis1p":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"1", - "Tense":"Past", - "VerbForm":"Fin" - }, - "Vmis1s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"1", - "Tense":"Past", - "VerbForm":"Fin" - }, - "Vmis3p":{ - "Mood":"Ind", - "Number":"Plur", - POS:VERB, - "Person":"3", - "Tense":"Past", - "VerbForm":"Fin" - }, - "Vmis3s":{ - "Mood":"Ind", - "Number":"Sing", - POS:VERB, - "Person":"3", - "Tense":"Past", - "VerbForm":"Fin" - }, - "Vmm-2p":{ - "Mood":"Imp", - "Number":"Plur", - POS:VERB, - "Person":"2", - "VerbForm":"Fin" - }, - "Vmm-2s":{ - "Mood":"Imp", - "Number":"Sing", - POS:VERB, - "Person":"2", - "VerbForm":"Fin" - }, - "Vmnp":{ - POS:VERB, - "Tense":"Pres", - "VerbForm":"Inf" - }, - "Vmp--pf":{ - "Gender":"Fem", - "Number":"Plur", - POS:VERB, - "VerbForm":"Part" - }, - "Vmp--pm":{ - "Gender":"Masc", - "Number":"Plur", - POS:VERB, - "VerbForm":"Part" - }, - "Vmp--sf":{ - "Gender":"Fem", - "Number":"Sing", - POS:VERB, - "VerbForm":"Part" - }, - "Vmp--sm":{ - "Gender":"Masc", - "Number":"Sing", - POS:VERB, - "VerbForm":"Part" - }, - "Vmsp3":{ - "Mood":"Sub", - POS:VERB, - "Person":"3", - "Tense":"Pres", - "VerbForm":"Fin" - }, - "Vmsp3-----y":{ - "Mood":"Sub", - POS:VERB, - "Person":"3", - "Tense":"Pres", - "Variant":"Short", - "VerbForm":"Fin" - }, - "X":{ - POS:X - }, - "Y":{ - "Abbr":"Yes", - POS:X - }, - "Yn":{ - "Abbr":"Yes", - POS:NOUN - }, - "Ynmsry":{ - "Abbr":"Yes", - "Case":"Acc,Nom", - "Gender":"Masc", - "Number":"Sing", - POS:NOUN - } - } \ No newline at end of file +TAG_MAP = { + "Afcfson": { + "Case": "Dat,Gen", + "Degree": "Cmp", + "Gender": "Fem", + "Number": "Sing", + POS: ADJ, + }, + "Afcfsrn": { + "Case": "Acc,Nom", + "Degree": "Cmp", + "Gender": "Fem", + "Number": "Sing", + POS: ADJ, + }, + "Afp": {"Degree": "Pos", POS: ADJ}, + "Afp-p-n": {"Degree": "Pos", "Number": "Plur", POS: ADJ}, + "Afp-p-ny": {"Degree": "Pos", "Number": "Plur", POS: ADJ, "Variant": "Short"}, + "Afp-poy": {"Case": "Dat,Gen", "Degree": "Pos", "Number": "Plur", POS: ADJ}, + "Afpf--n": {"Degree": "Pos", "Gender": "Fem", POS: ADJ}, + "Afpfp-n": {"Degree": "Pos", "Gender": "Fem", "Number": "Plur", POS: ADJ}, + "Afpfpoy": { + "Case": "Dat,Gen", + "Degree": "Pos", + "Gender": "Fem", + "Number": "Plur", + POS: ADJ, + }, + "Afpfpry": { + "Case": "Acc,Nom", + "Degree": "Pos", + "Gender": "Fem", + "Number": "Plur", + POS: ADJ, + }, + "Afpfson": { + "Case": "Dat,Gen", + "Degree": "Pos", + "Gender": "Fem", + "Number": "Sing", + POS: ADJ, + }, + "Afpfsoy": { + "Case": "Dat,Gen", + "Degree": "Pos", + "Gender": "Fem", + "Number": "Sing", + POS: ADJ, + }, + "Afpfsrn": { + "Case": "Acc,Nom", + "Degree": "Pos", + "Gender": "Fem", + "Number": "Sing", + POS: ADJ, + }, + "Afpfsry": { + "Case": "Acc,Nom", + "Degree": "Pos", + "Gender": "Fem", + "Number": "Sing", + POS: ADJ, + }, + "Afpmp-n": {"Degree": "Pos", "Gender": "Masc", "Number": "Plur", POS: ADJ}, + "Afpmpoy": { + "Case": "Dat,Gen", + "Degree": "Pos", + "Gender": "Masc", + "Number": "Plur", + POS: ADJ, + }, + "Afpmpry": { + "Case": "Acc,Nom", + "Degree": "Pos", + "Gender": "Masc", + "Number": "Plur", + POS: ADJ, + }, + "Afpms-n": {"Degree": "Pos", "Gender": "Masc", "Number": "Sing", POS: ADJ}, + "Afpmsoy": { + "Case": "Dat,Gen", + "Degree": "Pos", + "Gender": "Masc", + "Number": "Sing", + POS: ADJ, + }, + "Afpmsry": { + "Case": "Acc,Nom", + "Degree": "Pos", + "Gender": "Masc", + "Number": "Sing", + POS: ADJ, + }, + "COLON": {POS: PUNCT}, + "COMMA": {POS: PUNCT}, + "Ccssp": {POS: CCONJ, "Polarity": "Pos"}, + "Crssp": {POS: CCONJ, "Polarity": "Pos"}, + "Csssp": {POS: SCONJ, "Polarity": "Pos"}, + "Cssspy": {POS: SCONJ, "Polarity": "Pos", "Variant": "Short"}, + "DASH": {POS: PUNCT}, + "DBLQ": {POS: PUNCT}, + "Dd3-po---e": { + "Case": "Dat,Gen", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3fpr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3fpr---e": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3fso---e": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3fso---o": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3fsr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3fsr---e": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3fsr---o": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3mpo": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3mpr---e": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3mso---e": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3msr---e": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dd3msr---o": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Dem", + }, + "Dh3fsr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Emp", + }, + "Dh3mp": { + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Emp", + }, + "Dh3ms": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Emp", + }, + "Di3": {POS: DET, "Person": "3", "PronType": "Ind"}, + "Di3--r---e": {"Case": "Acc,Nom", POS: DET, "Person": "3", "PronType": "Ind"}, + "Di3-po": { + "Case": "Dat,Gen", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3-po---e": { + "Case": "Dat,Gen", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3-sr": { + "Case": "Acc,Nom", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3-sr---e": { + "Case": "Acc,Nom", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3fp": { + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3fpr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3fpr---e": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3fso---e": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3fsr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3fsr---e": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3mp": { + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3mpr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3mpr---e": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3ms": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3ms----e": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3mso---e": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3msr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Di3msr---e": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Ind", + }, + "Ds1fp-s": { + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "1", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds1fsos": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "1", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds1fsrp": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "1", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds1fsrs": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "1", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds1ms-p": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "1", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds1ms-s": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "1", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds2---s": {POS: DET, "Person": "2", "Poss": "Yes", "PronType": "Prs"}, + "Ds2fsrs": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "2", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds3---p": {POS: DET, "Person": "3", "Poss": "Yes", "PronType": "Prs"}, + "Ds3---s": {POS: DET, "Person": "3", "Poss": "Yes", "PronType": "Prs"}, + "Ds3fp-s": { + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "3", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds3fsos": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds3fsrs": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ds3ms-s": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "Poss": "Yes", + "PronType": "Prs", + }, + "Dw3--r---e": {"Case": "Acc,Nom", POS: DET, "Person": "3", "PronType": "Int,Rel"}, + "Dw3fpr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Person": "3", + "PronType": "Int,Rel", + }, + "Dw3mso---e": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Int,Rel", + }, + "Dz3fsr---e": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Neg", + }, + "Dz3msr---e": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Person": "3", + "PronType": "Neg", + }, + "EQUAL": {POS: SYM}, + "EXCL": {POS: PUNCT}, + "GT": {POS: SYM}, + "I": {POS: INTJ}, + "LPAR": {POS: PUNCT}, + "Mc": {"NumType": "Card", POS: NUM}, + "Mc-p-d": {"NumForm": "Digit", "NumType": "Card", "Number": "Plur", POS: NUM}, + "Mc-p-l": {"NumForm": "Word", "NumType": "Card", "Number": "Plur", POS: NUM}, + "Mcfp-l": { + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Card", + "Number": "Plur", + POS: NUM, + }, + "Mcfp-ln": { + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Card", + "Number": "Plur", + POS: NUM, + }, + "Mcfsrln": { + "Case": "Acc,Nom", + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Card", + "Number": "Sing", + POS: NUM, + }, + "Mcmp-l": { + "Gender": "Masc", + "NumForm": "Word", + "NumType": "Card", + "Number": "Plur", + POS: NUM, + }, + "Mcmsrl": { + "Case": "Acc,Nom", + "Gender": "Masc", + "NumForm": "Word", + "NumType": "Card", + "Number": "Sing", + POS: NUM, + }, + "Mffprln": { + "Case": "Acc,Nom", + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Card", + "Number": "Plur", + POS: NUM, + }, + "Mlfpo": { + "Case": "Dat,Gen", + "Gender": "Fem", + "NumType": "Card", + "Number": "Plur", + POS: NUM, + "PronType": "Tot", + }, + "Mlfpr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "NumType": "Card", + "Number": "Plur", + POS: NUM, + "PronType": "Tot", + }, + "Mlmpr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "NumType": "Card", + "Number": "Plur", + POS: NUM, + "PronType": "Tot", + }, + "Mo---l": {"NumForm": "Word", "NumType": "Ord", POS: NUM}, + "Mo-s-r": {"NumForm": "Roman", "NumType": "Ord", "Number": "Sing", POS: NUM}, + "Mofp-ln": { + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Plur", + POS: NUM, + }, + "Mofprly": { + "Case": "Acc,Nom", + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Plur", + POS: NUM, + }, + "Mofs-l": { + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Sing", + POS: NUM, + }, + "Mofsrln": { + "Case": "Acc,Nom", + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Sing", + POS: NUM, + }, + "Mofsrly": { + "Case": "Acc,Nom", + "Gender": "Fem", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Sing", + POS: NUM, + }, + "Momprly": { + "Case": "Acc,Nom", + "Gender": "Masc", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Plur", + POS: NUM, + }, + "Moms-l": { + "Gender": "Masc", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Sing", + POS: NUM, + }, + "Moms-ln": { + "Gender": "Masc", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Sing", + POS: NUM, + }, + "Momsoly": { + "Case": "Dat,Gen", + "Gender": "Masc", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Sing", + POS: NUM, + }, + "Momsrly": { + "Case": "Acc,Nom", + "Gender": "Masc", + "NumForm": "Word", + "NumType": "Ord", + "Number": "Sing", + POS: NUM, + }, + "Nc": {POS: NOUN}, + "Ncf--n": {"Gender": "Fem", POS: NOUN}, + "Ncfp-n": {"Gender": "Fem", "Number": "Plur", POS: NOUN}, + "Ncfpoy": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Plur", POS: NOUN}, + "Ncfpry": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Plur", POS: NOUN}, + "Ncfson": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Sing", POS: NOUN}, + "Ncfsoy": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Sing", POS: NOUN}, + "Ncfsrn": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Sing", POS: NOUN}, + "Ncfsry": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Sing", POS: NOUN}, + "Ncm--n": {"Gender": "Masc", POS: NOUN}, + "Ncmp-n": {"Gender": "Masc", "Number": "Plur", POS: NOUN}, + "Ncmpoy": {"Case": "Dat,Gen", "Gender": "Masc", "Number": "Plur", POS: NOUN}, + "Ncmpry": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Plur", POS: NOUN}, + "Ncms-n": {"Gender": "Masc", "Number": "Sing", POS: NOUN}, + "Ncms-ny": {"Gender": "Masc", "Number": "Sing", POS: NOUN, "Variant": "Short"}, + "Ncmsoy": {"Case": "Dat,Gen", "Gender": "Masc", "Number": "Sing", POS: NOUN}, + "Ncmsrn": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Sing", POS: NOUN}, + "Ncmsry": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Sing", POS: NOUN}, + "Np": {POS: PROPN}, + "Npfsoy": {"Case": "Dat,Gen", "Gender": "Fem", "Number": "Sing", POS: PROPN}, + "Npfsry": {"Case": "Acc,Nom", "Gender": "Fem", "Number": "Sing", POS: PROPN}, + "Npmsoy": {"Case": "Dat,Gen", "Gender": "Masc", "Number": "Sing", POS: PROPN}, + "Npmsry": {"Case": "Acc,Nom", "Gender": "Masc", "Number": "Sing", POS: PROPN}, + "PERCENT": {POS: SYM}, + "PERIOD": {POS: PUNCT}, + "PLUSMINUS": {POS: SYM}, + "Pd3-po": { + "Case": "Dat,Gen", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Dem", + }, + "Pd3fpr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Dem", + }, + "Pd3fso": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Dem", + }, + "Pd3fsr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Dem", + }, + "Pd3mpr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Dem", + }, + "Pd3mso": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Dem", + }, + "Pd3msr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Dem", + }, + "Pi3--r": {"Case": "Acc,Nom", POS: PRON, "Person": "3", "PronType": "Ind"}, + "Pi3-po": { + "Case": "Dat,Gen", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3-so": { + "Case": "Dat,Gen", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3-sr": { + "Case": "Acc,Nom", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3fpr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3fso": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3fsr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3mpr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3msr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Ind", + }, + "Pi3msr--y": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Ind", + "Variant": "Short", + }, + "Pp1-pa--------w": { + "Case": "Acc", + "Number": "Plur", + POS: PRON, + "Person": "1", + "PronType": "Prs", + }, + "Pp1-pa--y-----w": { + "Case": "Acc", + "Number": "Plur", + POS: PRON, + "Person": "1", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp1-pd--------w": { + "Case": "Dat", + "Number": "Plur", + POS: PRON, + "Person": "1", + "PronType": "Prs", + }, + "Pp1-pr--------s": { + "Case": "Acc,Nom", + "Number": "Plur", + POS: PRON, + "Person": "1", + "PronType": "Prs", + }, + "Pp1-sa--------s": { + "Case": "Acc", + "Number": "Sing", + POS: PRON, + "Person": "1", + "PronType": "Prs", + }, + "Pp1-sa--------w": { + "Case": "Acc", + "Number": "Sing", + POS: PRON, + "Person": "1", + "PronType": "Prs", + }, + "Pp1-sa--y-----w": { + "Case": "Acc", + "Number": "Sing", + POS: PRON, + "Person": "1", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp1-sd--------w": { + "Case": "Dat", + "Number": "Sing", + POS: PRON, + "Person": "1", + "PronType": "Prs", + }, + "Pp1-sd--y-----w": { + "Case": "Dat", + "Number": "Sing", + POS: PRON, + "Person": "1", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp1-sn--------s": { + "Case": "Nom", + "Number": "Sing", + POS: PRON, + "Person": "1", + "PronType": "Prs", + }, + "Pp2-----------s": {POS: PRON, "Person": "2", "PronType": "Prs"}, + "Pp2-pa--------w": { + "Case": "Acc", + "Number": "Plur", + POS: PRON, + "Person": "2", + "PronType": "Prs", + }, + "Pp2-pa--y-----w": { + "Case": "Acc", + "Number": "Plur", + POS: PRON, + "Person": "2", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp2-pd--------w": { + "Case": "Dat", + "Number": "Plur", + POS: PRON, + "Person": "2", + "PronType": "Prs", + }, + "Pp2-pr--------s": { + "Case": "Acc,Nom", + "Number": "Plur", + POS: PRON, + "Person": "2", + "PronType": "Prs", + }, + "Pp2-sa--------s": { + "Case": "Acc", + "Number": "Sing", + POS: PRON, + "Person": "2", + "PronType": "Prs", + }, + "Pp2-sa--------w": { + "Case": "Acc", + "Number": "Sing", + POS: PRON, + "Person": "2", + "PronType": "Prs", + }, + "Pp2-sa--y-----w": { + "Case": "Acc", + "Number": "Sing", + POS: PRON, + "Person": "2", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp2-sd--y-----w": { + "Case": "Dat", + "Number": "Sing", + POS: PRON, + "Person": "2", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp2-sn--------s": { + "Case": "Nom", + "Number": "Sing", + POS: PRON, + "Person": "2", + "PronType": "Prs", + }, + "Pp3-pd--------w": { + "Case": "Dat", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3-pd--y-----w": { + "Case": "Dat", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp3-po--------s": { + "Case": "Dat,Gen", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3-sd--------w": { + "Case": "Dat", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3-sd--y-----w": { + "Case": "Dat", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp3fpa--------w": { + "Case": "Acc", + "Gender": "Fem", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3fpa--y-----w": { + "Case": "Acc", + "Gender": "Fem", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp3fpr--------s": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3fsa--------w": { + "Case": "Acc", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3fsa--y-----w": { + "Case": "Acc", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp3fsr--------s": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3mpa--------w": { + "Case": "Acc", + "Gender": "Masc", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3mpa--y-----w": { + "Case": "Acc", + "Gender": "Masc", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp3mpr--------s": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3msa--------w": { + "Case": "Acc", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3msa--y-----w": { + "Case": "Acc", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Variant": "Short", + }, + "Pp3mso--------s": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Pp3msr--------s": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Prs", + }, + "Ps1mp-s": { + "Gender": "Masc", + "Number": "Plur", + POS: PRON, + "Person": "1", + "Poss": "Yes", + "PronType": "Prs", + }, + "Ps3---p": {POS: PRON, "Person": "3", "Poss": "Yes", "PronType": "Prs"}, + "Ps3---s": {POS: PRON, "Person": "3", "Poss": "Yes", "PronType": "Prs"}, + "Ps3fp-s": { + "Gender": "Fem", + "Number": "Plur", + POS: PRON, + "Person": "3", + "Poss": "Yes", + "PronType": "Prs", + }, + "Pw3--r": {"Case": "Acc,Nom", POS: PRON, "Person": "3", "PronType": "Int,Rel"}, + "Pw3-po": { + "Case": "Dat,Gen", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Int,Rel", + }, + "Pw3fso": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Int,Rel", + }, + "Pw3mpr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: PRON, + "Person": "3", + "PronType": "Int,Rel", + }, + "Px3--a--------s": { + "Case": "Acc", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Reflex": "Yes", + }, + "Px3--a--------w": { + "Case": "Acc", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Reflex": "Yes", + }, + "Px3--a--y-----w": { + "Case": "Acc", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Reflex": "Yes", + "Variant": "Short", + }, + "Px3--d--------w": { + "Case": "Dat", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Reflex": "Yes", + }, + "Px3--d--y-----w": { + "Case": "Dat", + POS: PRON, + "Person": "3", + "PronType": "Prs", + "Reflex": "Yes", + "Variant": "Short", + }, + "Pz3-sr": { + "Case": "Acc,Nom", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Neg", + }, + "Pz3msr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: PRON, + "Person": "3", + "PronType": "Neg", + }, + "QUEST": {POS: PUNCT}, + "QUOT": {POS: PUNCT}, + "Qn": {POS: PART, "PartType": "Inf"}, + "Qs": {"Mood": "Sub", POS: PART}, + "Qs-y": {"Mood": "Sub", POS: PART, "Variant": "Short"}, + "Qz": {POS: PART, "Polarity": "Neg"}, + "Qz-y": {POS: PART, "Polarity": "Neg", "Variant": "Short"}, + "RPAR": {POS: PUNCT}, + "Rc": {POS: ADV}, + "Rgp": {"Degree": "Pos", POS: ADV}, + "Rgpy": {"Degree": "Pos", POS: ADV, "Variant": "Short"}, + "Rgs": {"Degree": "Sup", POS: ADV}, + "Rp": {POS: ADV}, + "Rw": {POS: ADV, "PronType": "Int,Rel"}, + "Rz": {POS: ADV, "PronType": "Neg"}, + "SCOLON": {"AdpType": "Prep", POS: PUNCT}, + "SLASH": {"AdpType": "Prep", POS: SYM}, + "Spsa": {"AdpType": "Prep", "Case": "Acc", POS: ADP}, + "Spsay": {"AdpType": "Prep", "Case": "Acc", POS: ADP, "Variant": "Short"}, + "Spsd": {"AdpType": "Prep", "Case": "Dat", POS: ADP}, + "Spsg": {"AdpType": "Prep", "Case": "Gen", POS: ADP}, + "Spsgy": {"AdpType": "Prep", "Case": "Gen", POS: ADP, "Variant": "Short"}, + "Td-po": {"Case": "Dat,Gen", "Number": "Plur", POS: DET, "PronType": "Dem"}, + "Tdfpr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "PronType": "Dem", + }, + "Tdfso": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "PronType": "Dem", + }, + "Tdfsr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "PronType": "Dem", + }, + "Tdmpr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "PronType": "Dem", + }, + "Tdmso": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "PronType": "Dem", + }, + "Tdmsr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "PronType": "Dem", + }, + "Tf-so": {"Case": "Dat,Gen", "Number": "Sing", POS: DET, "PronType": "Art"}, + "Tffs-y": { + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "PronType": "Art", + "Variant": "Short", + }, + "Tfms-y": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "PronType": "Art", + "Variant": "Short", + }, + "Tfmsoy": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "PronType": "Art", + "Variant": "Short", + }, + "Tfmsry": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "PronType": "Art", + "Variant": "Short", + }, + "Ti-po": {"Case": "Dat,Gen", "Number": "Plur", POS: DET, "PronType": "Ind"}, + "Tifp-y": { + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "PronType": "Ind", + "Variant": "Short", + }, + "Tifso": { + "Case": "Dat,Gen", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "PronType": "Ind", + }, + "Tifsr": { + "Case": "Acc,Nom", + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "PronType": "Ind", + }, + "Timso": { + "Case": "Dat,Gen", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "PronType": "Ind", + }, + "Timsr": { + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "PronType": "Ind", + }, + "Tsfp": { + "Gender": "Fem", + "Number": "Plur", + POS: DET, + "Poss": "Yes", + "PronType": "Prs", + }, + "Tsfs": { + "Gender": "Fem", + "Number": "Sing", + POS: DET, + "Poss": "Yes", + "PronType": "Prs", + }, + "Tsmp": { + "Gender": "Masc", + "Number": "Plur", + POS: DET, + "Poss": "Yes", + "PronType": "Prs", + }, + "Tsms": { + "Gender": "Masc", + "Number": "Sing", + POS: DET, + "Poss": "Yes", + "PronType": "Prs", + }, + "Va--1": {POS: AUX, "Person": "1"}, + "Va--1p": {"Number": "Plur", POS: AUX, "Person": "1"}, + "Va--1s": {"Number": "Sing", POS: AUX, "Person": "1"}, + "Va--2p": {"Number": "Plur", POS: AUX, "Person": "2"}, + "Va--2s": {"Number": "Sing", POS: AUX, "Person": "2"}, + "Va--3": {POS: AUX, "Person": "3"}, + "Va--3-----y": {POS: AUX, "Person": "3", "Variant": "Short"}, + "Va--3p": {"Number": "Plur", POS: AUX, "Person": "3"}, + "Va--3p----y": {"Number": "Plur", POS: AUX, "Person": "3", "Variant": "Short"}, + "Va--3s": {"Number": "Sing", POS: AUX, "Person": "3"}, + "Va--3s----y": {"Number": "Sing", POS: AUX, "Person": "3", "Variant": "Short"}, + "Vag": {POS: AUX, "VerbForm": "Ger"}, + "Vaii3p": { + "Mood": "Ind", + "Number": "Plur", + POS: AUX, + "Person": "3", + "Tense": "Imp", + "VerbForm": "Fin", + }, + "Vaii3s": { + "Mood": "Ind", + "Number": "Sing", + POS: AUX, + "Person": "3", + "Tense": "Imp", + "VerbForm": "Fin", + }, + "Vail3s": { + "Mood": "Ind", + "Number": "Sing", + POS: AUX, + "Person": "3", + "Tense": "Pqp", + "VerbForm": "Fin", + }, + "Vaip1s": { + "Mood": "Ind", + "Number": "Sing", + POS: AUX, + "Person": "1", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vaip2s": { + "Mood": "Ind", + "Number": "Sing", + POS: AUX, + "Person": "2", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vaip3p": { + "Mood": "Ind", + "Number": "Plur", + POS: AUX, + "Person": "3", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vaip3s": { + "Mood": "Ind", + "Number": "Sing", + POS: AUX, + "Person": "3", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vanp": {POS: AUX, "Tense": "Pres", "VerbForm": "Inf"}, + "Vap--sm": {"Gender": "Masc", "Number": "Sing", POS: AUX, "VerbForm": "Part"}, + "Vasp3": { + "Mood": "Sub", + POS: AUX, + "Person": "3", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmg": {POS: VERB, "VerbForm": "Ger"}, + "Vmg-------y": {POS: VERB, "Variant": "Short", "VerbForm": "Ger"}, + "Vmii1": { + "Mood": "Ind", + POS: VERB, + "Person": "1", + "Tense": "Imp", + "VerbForm": "Fin", + }, + "Vmii1-----y": { + "Mood": "Ind", + POS: VERB, + "Person": "1", + "Tense": "Imp", + "Variant": "Short", + "VerbForm": "Fin", + }, + "Vmii2p": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "2", + "Tense": "Imp", + "VerbForm": "Fin", + }, + "Vmii2s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "2", + "Tense": "Imp", + "VerbForm": "Fin", + }, + "Vmii3p": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "3", + "Tense": "Imp", + "VerbForm": "Fin", + }, + "Vmii3p----y": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "3", + "Tense": "Imp", + "Variant": "Short", + "VerbForm": "Fin", + }, + "Vmii3s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "3", + "Tense": "Imp", + "VerbForm": "Fin", + }, + "Vmil3p": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "3", + "Tense": "Pqp", + "VerbForm": "Fin", + }, + "Vmil3s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "3", + "Tense": "Pqp", + "VerbForm": "Fin", + }, + "Vmip1p": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "1", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmip1s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "1", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmip1s----y": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "1", + "Tense": "Pres", + "Variant": "Short", + "VerbForm": "Fin", + }, + "Vmip2p": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "2", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmip2s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "2", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmip3": { + "Mood": "Ind", + POS: VERB, + "Person": "3", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmip3-----y": { + "Mood": "Ind", + POS: VERB, + "Person": "3", + "Tense": "Pres", + "Variant": "Short", + "VerbForm": "Fin", + }, + "Vmip3p": { + "Mood": "Ind", + "Number": "Plur", + POS: AUX, + "Person": "3", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmip3s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "3", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmip3s----y": { + "Mood": "Ind", + "Number": "Sing", + POS: AUX, + "Person": "3", + "Tense": "Pres", + "Variant": "Short", + "VerbForm": "Fin", + }, + "Vmis1p": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "1", + "Tense": "Past", + "VerbForm": "Fin", + }, + "Vmis1s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "1", + "Tense": "Past", + "VerbForm": "Fin", + }, + "Vmis3p": { + "Mood": "Ind", + "Number": "Plur", + POS: VERB, + "Person": "3", + "Tense": "Past", + "VerbForm": "Fin", + }, + "Vmis3s": { + "Mood": "Ind", + "Number": "Sing", + POS: VERB, + "Person": "3", + "Tense": "Past", + "VerbForm": "Fin", + }, + "Vmm-2p": { + "Mood": "Imp", + "Number": "Plur", + POS: VERB, + "Person": "2", + "VerbForm": "Fin", + }, + "Vmm-2s": { + "Mood": "Imp", + "Number": "Sing", + POS: VERB, + "Person": "2", + "VerbForm": "Fin", + }, + "Vmnp": {POS: VERB, "Tense": "Pres", "VerbForm": "Inf"}, + "Vmp--pf": {"Gender": "Fem", "Number": "Plur", POS: VERB, "VerbForm": "Part"}, + "Vmp--pm": {"Gender": "Masc", "Number": "Plur", POS: VERB, "VerbForm": "Part"}, + "Vmp--sf": {"Gender": "Fem", "Number": "Sing", POS: VERB, "VerbForm": "Part"}, + "Vmp--sm": {"Gender": "Masc", "Number": "Sing", POS: VERB, "VerbForm": "Part"}, + "Vmsp3": { + "Mood": "Sub", + POS: VERB, + "Person": "3", + "Tense": "Pres", + "VerbForm": "Fin", + }, + "Vmsp3-----y": { + "Mood": "Sub", + POS: VERB, + "Person": "3", + "Tense": "Pres", + "Variant": "Short", + "VerbForm": "Fin", + }, + "X": {POS: X}, + "Y": {"Abbr": "Yes", POS: X}, + "Yn": {"Abbr": "Yes", POS: NOUN}, + "Ynmsry": { + "Abbr": "Yes", + "Case": "Acc,Nom", + "Gender": "Masc", + "Number": "Sing", + POS: NOUN, + }, +} diff --git a/spacy/lang/sr/tokenizer_exceptions.py b/spacy/lang/sr/tokenizer_exceptions.py index c4e97cc54..8fca346a3 100755 --- a/spacy/lang/sr/tokenizer_exceptions.py +++ b/spacy/lang/sr/tokenizer_exceptions.py @@ -15,7 +15,6 @@ _abbrev_exc = [ {ORTH: "пет", LEMMA: "петак", NORM: "петак"}, {ORTH: "суб", LEMMA: "субота", NORM: "субота"}, {ORTH: "нед", LEMMA: "недеља", NORM: "недеља"}, - # Months abbreviations {ORTH: "јан", LEMMA: "јануар", NORM: "јануар"}, {ORTH: "феб", LEMMA: "фебруар", NORM: "фебруар"}, @@ -28,7 +27,7 @@ _abbrev_exc = [ {ORTH: "септ", LEMMA: "септембар", NORM: "септембар"}, {ORTH: "окт", LEMMA: "октобар", NORM: "октобар"}, {ORTH: "нов", LEMMA: "новембар", NORM: "новембар"}, - {ORTH: "дец", LEMMA: "децембар", NORM: "децембар"} + {ORTH: "дец", LEMMA: "децембар", NORM: "децембар"}, ] diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py index 27b5f1ab1..28f00aa18 100644 --- a/spacy/tests/doc/test_retokenize_merge.py +++ b/spacy/tests/doc/test_retokenize_merge.py @@ -103,7 +103,13 @@ def test_doc_retokenize_spans_merge_tokens_default_attrs(en_tokenizer): text = "The players start." heads = [1, 1, 0, -1] tokens = en_tokenizer(text) - doc = get_doc(tokens.vocab, words=[t.text for t in tokens], tags=["DT", "NN", "VBZ", "."], pos=["DET", "NOUN", "VERB", "PUNCT"], heads=heads) + doc = get_doc( + tokens.vocab, + words=[t.text for t in tokens], + tags=["DT", "NN", "VBZ", "."], + pos=["DET", "NOUN", "VERB", "PUNCT"], + heads=heads, + ) assert len(doc) == 4 assert doc[0].text == "The" assert doc[0].tag_ == "DT" @@ -115,7 +121,13 @@ def test_doc_retokenize_spans_merge_tokens_default_attrs(en_tokenizer): assert doc[0].tag_ == "NN" assert doc[0].pos_ == "NOUN" assert doc[0].lemma_ == "The players" - doc = get_doc(tokens.vocab, words=[t.text for t in tokens], tags=["DT", "NN", "VBZ", "."], pos=["DET", "NOUN", "VERB", "PUNCT"], heads=heads) + doc = get_doc( + tokens.vocab, + words=[t.text for t in tokens], + tags=["DT", "NN", "VBZ", "."], + pos=["DET", "NOUN", "VERB", "PUNCT"], + heads=heads, + ) assert len(doc) == 4 assert doc[0].text == "The" assert doc[0].tag_ == "DT" @@ -269,18 +281,15 @@ def test_doc_retokenize_spans_entity_merge_iob(en_vocab): # if there is a parse, span.root provides default values words = ["a", "b", "c", "d", "e", "f", "g", "h", "i"] - heads = [ 0, -1, 1, -3, -4, -5, -1, -7, -8 ] - ents = [ - (3, 5, "ent-de"), - (5, 7, "ent-fg"), - ] - deps = ["dep"] * len(words) + heads = [0, -1, 1, -3, -4, -5, -1, -7, -8] + ents = [(3, 5, "ent-de"), (5, 7, "ent-fg")] + deps = ["dep"] * len(words) en_vocab.strings.add("ent-de") en_vocab.strings.add("ent-fg") en_vocab.strings.add("dep") doc = get_doc(en_vocab, words=words, heads=heads, deps=deps, ents=ents) - assert doc[2:4].root == doc[3] # root of 'c d' is d - assert doc[4:6].root == doc[4] # root is 'e f' is e + assert doc[2:4].root == doc[3] # root of 'c d' is d + assert doc[4:6].root == doc[4] # root is 'e f' is e with doc.retokenize() as retokenizer: retokenizer.merge(doc[2:4]) retokenizer.merge(doc[4:6]) @@ -295,12 +304,9 @@ def test_doc_retokenize_spans_entity_merge_iob(en_vocab): # check that B is preserved if span[start] is B words = ["a", "b", "c", "d", "e", "f", "g", "h", "i"] - heads = [ 0, -1, 1, 1, -4, -5, -1, -7, -8 ] - ents = [ - (3, 5, "ent-de"), - (5, 7, "ent-de"), - ] - deps = ["dep"] * len(words) + heads = [0, -1, 1, 1, -4, -5, -1, -7, -8] + ents = [(3, 5, "ent-de"), (5, 7, "ent-de")] + deps = ["dep"] * len(words) doc = get_doc(en_vocab, words=words, heads=heads, deps=deps, ents=ents) with doc.retokenize() as retokenizer: retokenizer.merge(doc[3:5]) diff --git a/spacy/tests/regression/test_issue1001-1500.py b/spacy/tests/regression/test_issue1001-1500.py index cc848f214..a405d7b0f 100644 --- a/spacy/tests/regression/test_issue1001-1500.py +++ b/spacy/tests/regression/test_issue1001-1500.py @@ -14,24 +14,24 @@ from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part def test_issue1061(): - '''Test special-case works after tokenizing. Was caching problem.''' - text = 'I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_.' + """Test special-case works after tokenizing. Was caching problem.""" + text = "I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_." tokenizer = English.Defaults.create_tokenizer() doc = tokenizer(text) - assert 'MATH' in [w.text for w in doc] - assert '_MATH_' not in [w.text for w in doc] + assert "MATH" in [w.text for w in doc] + assert "_MATH_" not in [w.text for w in doc] - tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}]) + tokenizer.add_special_case("_MATH_", [{ORTH: "_MATH_"}]) doc = tokenizer(text) - assert '_MATH_' in [w.text for w in doc] - assert 'MATH' not in [w.text for w in doc] + assert "_MATH_" in [w.text for w in doc] + assert "MATH" not in [w.text for w in doc] # For sanity, check it works when pipeline is clean. tokenizer = English.Defaults.create_tokenizer() - tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}]) + tokenizer.add_special_case("_MATH_", [{ORTH: "_MATH_"}]) doc = tokenizer(text) - assert '_MATH_' in [w.text for w in doc] - assert 'MATH' not in [w.text for w in doc] + assert "_MATH_" in [w.text for w in doc] + assert "MATH" not in [w.text for w in doc] @pytest.mark.xfail( diff --git a/spacy/tests/regression/test_issue3879.py b/spacy/tests/regression/test_issue3879.py index 908c6c435..123e9fce3 100644 --- a/spacy/tests/regression/test_issue3879.py +++ b/spacy/tests/regression/test_issue3879.py @@ -1,7 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -import pytest from spacy.matcher import Matcher from spacy.tokens import Doc diff --git a/spacy/tests/regression/test_issue3951.py b/spacy/tests/regression/test_issue3951.py index 7e1e213c0..e07ffd36e 100644 --- a/spacy/tests/regression/test_issue3951.py +++ b/spacy/tests/regression/test_issue3951.py @@ -1,7 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -import pytest from spacy.matcher import Matcher from spacy.tokens import Doc diff --git a/spacy/tests/regression/test_issue3972.py b/spacy/tests/regression/test_issue3972.py index 1bc762699..a7f76e4d7 100644 --- a/spacy/tests/regression/test_issue3972.py +++ b/spacy/tests/regression/test_issue3972.py @@ -1,7 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -import pytest from spacy.matcher import PhraseMatcher from spacy.tokens import Doc diff --git a/spacy/tests/regression/test_issue4120.py b/spacy/tests/regression/test_issue4120.py index 4806d1607..2ce5aec6a 100644 --- a/spacy/tests/regression/test_issue4120.py +++ b/spacy/tests/regression/test_issue4120.py @@ -1,7 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -import pytest from spacy.matcher import Matcher from spacy.tokens import Doc diff --git a/spacy/tests/regression/test_issue4190.py b/spacy/tests/regression/test_issue4190.py index 464996705..eb4eb8648 100644 --- a/spacy/tests/regression/test_issue4190.py +++ b/spacy/tests/regression/test_issue4190.py @@ -2,44 +2,37 @@ from __future__ import unicode_literals from spacy.lang.en import English - -import spacy from spacy.tokenizer import Tokenizer +from spacy import util -from spacy.tests.util import make_tempdir +from ..util import make_tempdir def test_issue4190(): test_string = "Test c." - # Load default language nlp_1 = English() doc_1a = nlp_1(test_string) - result_1a = [token.text for token in doc_1a] - + result_1a = [token.text for token in doc_1a] # noqa: F841 # Modify tokenizer customize_tokenizer(nlp_1) doc_1b = nlp_1(test_string) result_1b = [token.text for token in doc_1b] - # Save and Reload with make_tempdir() as model_dir: nlp_1.to_disk(model_dir) - nlp_2 = spacy.load(model_dir) - + nlp_2 = util.load_model(model_dir) # This should be the modified tokenizer doc_2 = nlp_2(test_string) result_2 = [token.text for token in doc_2] - assert result_1b == result_2 def customize_tokenizer(nlp): - prefix_re = spacy.util.compile_prefix_regex(nlp.Defaults.prefixes) - suffix_re = spacy.util.compile_suffix_regex(nlp.Defaults.suffixes) - infix_re = spacy.util.compile_infix_regex(nlp.Defaults.infixes) - - # remove all exceptions where a single letter is followed by a period (e.g. 'h.') + prefix_re = util.compile_prefix_regex(nlp.Defaults.prefixes) + suffix_re = util.compile_suffix_regex(nlp.Defaults.suffixes) + infix_re = util.compile_infix_regex(nlp.Defaults.infixes) + # Remove all exceptions where a single letter is followed by a period (e.g. 'h.') exceptions = { k: v for k, v in dict(nlp.Defaults.tokenizer_exceptions).items() @@ -53,5 +46,4 @@ def customize_tokenizer(nlp): infix_finditer=infix_re.finditer, token_match=nlp.tokenizer.token_match, ) - nlp.tokenizer = new_tokenizer diff --git a/spacy/tests/vocab_vectors/test_lookups.py b/spacy/tests/vocab_vectors/test_lookups.py index 0a7c9625c..16ffe83fc 100644 --- a/spacy/tests/vocab_vectors/test_lookups.py +++ b/spacy/tests/vocab_vectors/test_lookups.py @@ -56,6 +56,7 @@ def test_lookups_to_from_bytes(): assert table2.get("b") == 2 assert new_lookups.to_bytes() == lookups_bytes + # This fails on Python 3.5 @pytest.mark.xfail def test_lookups_to_from_disk(): @@ -76,6 +77,7 @@ def test_lookups_to_from_disk(): assert len(table2) == 3 assert table2.get("b") == 2 + # This fails on Python 3.5 @pytest.mark.xfail def test_lookups_to_from_bytes_via_vocab():