# coding: utf8 #cython: optimize.unpack_method_calls=False from __future__ import unicode_literals IDS = { "": NIL, "IS_ALPHA": IS_ALPHA, "IS_ASCII": IS_ASCII, "IS_DIGIT": IS_DIGIT, "IS_LOWER": IS_LOWER, "IS_PUNCT": IS_PUNCT, "IS_SPACE": IS_SPACE, "IS_TITLE": IS_TITLE, "IS_UPPER": IS_UPPER, "LIKE_URL": LIKE_URL, "LIKE_NUM": LIKE_NUM, "LIKE_EMAIL": LIKE_EMAIL, "IS_STOP": IS_STOP, "IS_OOV": IS_OOV, "IS_BRACKET": IS_BRACKET, "IS_QUOTE": IS_QUOTE, "IS_LEFT_PUNCT": IS_LEFT_PUNCT, "IS_RIGHT_PUNCT": IS_RIGHT_PUNCT, "IS_CURRENCY": IS_CURRENCY, "FLAG19": FLAG19, "FLAG20": FLAG20, "FLAG21": FLAG21, "FLAG22": FLAG22, "FLAG23": FLAG23, "FLAG24": FLAG24, "FLAG25": FLAG25, "FLAG26": FLAG26, "FLAG27": FLAG27, "FLAG28": FLAG28, "FLAG29": FLAG29, "FLAG30": FLAG30, "FLAG31": FLAG31, "FLAG32": FLAG32, "FLAG33": FLAG33, "FLAG34": FLAG34, "FLAG35": FLAG35, "FLAG36": FLAG36, "FLAG37": FLAG37, "FLAG38": FLAG38, "FLAG39": FLAG39, "FLAG40": FLAG40, "FLAG41": FLAG41, "FLAG42": FLAG42, "FLAG43": FLAG43, "FLAG44": FLAG44, "FLAG45": FLAG45, "FLAG46": FLAG46, "FLAG47": FLAG47, "FLAG48": FLAG48, "FLAG49": FLAG49, "FLAG50": FLAG50, "FLAG51": FLAG51, "FLAG52": FLAG52, "FLAG53": FLAG53, "FLAG54": FLAG54, "FLAG55": FLAG55, "FLAG56": FLAG56, "FLAG57": FLAG57, "FLAG58": FLAG58, "FLAG59": FLAG59, "FLAG60": FLAG60, "FLAG61": FLAG61, "FLAG62": FLAG62, "FLAG63": FLAG63, "ID": ID, "ORTH": ORTH, "LOWER": LOWER, "NORM": NORM, "SHAPE": SHAPE, "PREFIX": PREFIX, "SUFFIX": SUFFIX, "LENGTH": LENGTH, "CLUSTER": CLUSTER, "LEMMA": LEMMA, "POS": POS, "TAG": TAG, "DEP": DEP, "ENT_IOB": ENT_IOB, "ENT_TYPE": ENT_TYPE, "HEAD": HEAD, "SENT_START": SENT_START, "SPACY": SPACY, "PROB": PROB, "LANG": LANG, "ADJ": ADJ, "ADP": ADP, "ADV": ADV, "AUX": AUX, "CONJ": CONJ, "CCONJ": CCONJ, # U20 "DET": DET, "INTJ": INTJ, "NOUN": NOUN, "NUM": NUM, "PART": PART, "PRON": PRON, "PROPN": PROPN, "PUNCT": PUNCT, "SCONJ": SCONJ, "SYM": SYM, "VERB": VERB, "X": X, "EOL": EOL, "SPACE": SPACE, "Animacy_anim": Animacy_anim, "Animacy_inam": Animacy_inan, "Animacy_hum": Animacy_hum, # U20 "Animacy_nhum": Animacy_nhum, "Aspect_freq": Aspect_freq, "Aspect_imp": Aspect_imp, "Aspect_mod": Aspect_mod, "Aspect_none": Aspect_none, "Aspect_perf": Aspect_perf, "Aspect_iter": Aspect_iter, # U20 "Aspect_hab": Aspect_hab, # U20 "Case_abe": Case_abe, "Case_abl": Case_abl, "Case_abs": Case_abs, "Case_acc": Case_acc, "Case_ade": Case_ade, "Case_all": Case_all, "Case_cau": Case_cau, "Case_com": Case_com, "Case_cmp": Case_cmp, # U20 "Case_dat": Case_dat, "Case_del": Case_del, "Case_dis": Case_dis, "Case_ela": Case_ela, "Case_equ": Case_equ, # U20 "Case_ess": Case_ess, "Case_gen": Case_gen, "Case_ill": Case_ill, "Case_ine": Case_ine, "Case_ins": Case_ins, "Case_loc": Case_loc, "Case_lat": Case_lat, "Case_nom": Case_nom, "Case_par": Case_par, "Case_sub": Case_sub, "Case_sup": Case_sup, "Case_tem": Case_tem, "Case_ter": Case_ter, "Case_tra": Case_tra, "Case_voc": Case_voc, "Definite_two": Definite_two, "Definite_def": Definite_def, "Definite_red": Definite_red, "Definite_cons": Definite_cons, # U20 "Definite_ind": Definite_ind, "Definite_spec": Definite_spec, # U20 "Degree_cmp": Degree_cmp, "Degree_comp": Degree_comp, "Degree_none": Degree_none, "Degree_pos": Degree_pos, "Degree_sup": Degree_sup, "Degree_abs": Degree_abs, "Degree_com": Degree_com, "Degree_dim": Degree_dim, # du "Degree_equ": Degree_equ, # U20 "Evident_nfh": Evident_nfh, # U20 "Gender_com": Gender_com, "Gender_fem": Gender_fem, "Gender_masc": Gender_masc, "Gender_neut": Gender_neut, "Mood_cnd": Mood_cnd, "Mood_imp": Mood_imp, "Mood_ind": Mood_ind, "Mood_n": Mood_n, "Mood_pot": Mood_pot, "Mood_sub": Mood_sub, "Mood_opt": Mood_opt, "Mood_prp": Mood_prp, # U20 "Mood_adm": Mood_adm, # U20 "Negative_neg": Negative_neg, "Negative_pos": Negative_pos, "Negative_yes": Negative_yes, "Polarity_neg": Polarity_neg, # U20 "Polarity_pos": Polarity_pos, # U20 "Number_com": Number_com, "Number_dual": Number_dual, "Number_none": Number_none, "Number_plur": Number_plur, "Number_sing": Number_sing, "Number_ptan": Number_ptan, # bg "Number_count": Number_count, # bg, U20 "Number_tri": Number_tri, # U20 "NumType_card": NumType_card, "NumType_dist": NumType_dist, "NumType_frac": NumType_frac, "NumType_gen": NumType_gen, "NumType_mult": NumType_mult, "NumType_none": NumType_none, "NumType_ord": NumType_ord, "NumType_sets": NumType_sets, "Person_one": Person_one, "Person_two": Person_two, "Person_three": Person_three, "Person_none": Person_none, "Poss_yes": Poss_yes, "PronType_advPart": PronType_advPart, "PronType_art": PronType_art, "PronType_default": PronType_default, "PronType_dem": PronType_dem, "PronType_ind": PronType_ind, "PronType_int": PronType_int, "PronType_neg": PronType_neg, "PronType_prs": PronType_prs, "PronType_rcp": PronType_rcp, "PronType_rel": PronType_rel, "PronType_tot": PronType_tot, "PronType_clit": PronType_clit, "PronType_exc": PronType_exc, # es, ca, it, fa, U20 "PronType_emp": PronType_emp, # U20 "Reflex_yes": Reflex_yes, "Tense_fut": Tense_fut, "Tense_imp": Tense_imp, "Tense_past": Tense_past, "Tense_pres": Tense_pres, "VerbForm_fin": VerbForm_fin, "VerbForm_ger": VerbForm_ger, "VerbForm_inf": VerbForm_inf, "VerbForm_none": VerbForm_none, "VerbForm_part": VerbForm_part, "VerbForm_partFut": VerbForm_partFut, "VerbForm_partPast": VerbForm_partPast, "VerbForm_partPres": VerbForm_partPres, "VerbForm_sup": VerbForm_sup, "VerbForm_trans": VerbForm_trans, "VerbForm_conv": VerbForm_conv, # U20 "VerbForm_gdv": VerbForm_gdv, # la, "VerbForm_vnoun": VerbForm_vnoun, # U20 "Voice_act": Voice_act, "Voice_cau": Voice_cau, "Voice_pass": Voice_pass, "Voice_mid": Voice_mid, # gkc, U20 "Voice_int": Voice_int, # hb, "Voice_antip": Voice_antip, # U20 "Voice_dir": Voice_dir, # U20 "Voice_inv": Voice_inv, # U20 "Abbr_yes": Abbr_yes, # cz, fi, sl, U, "AdpType_prep": AdpType_prep, # cz, U, "AdpType_post": AdpType_post, # U, "AdpType_voc": AdpType_voc, # cz, "AdpType_comprep": AdpType_comprep, # cz, "AdpType_circ": AdpType_circ, # U, "AdvType_man": AdvType_man, "AdvType_loc": AdvType_loc, "AdvType_tim": AdvType_tim, "AdvType_deg": AdvType_deg, "AdvType_cau": AdvType_cau, "AdvType_mod": AdvType_mod, "AdvType_sta": AdvType_sta, "AdvType_ex": AdvType_ex, "AdvType_adadj": AdvType_adadj, "ConjType_oper": ConjType_oper, # cz, U, "ConjType_comp": ConjType_comp, # cz, U, "Connegative_yes": Connegative_yes, # fi, "Derivation_minen": Derivation_minen, # fi, "Derivation_sti": Derivation_sti, # fi, "Derivation_inen": Derivation_inen, # fi, "Derivation_lainen": Derivation_lainen, # fi, "Derivation_ja": Derivation_ja, # fi, "Derivation_ton": Derivation_ton, # fi, "Derivation_vs": Derivation_vs, # fi, "Derivation_ttain": Derivation_ttain, # fi, "Derivation_ttaa": Derivation_ttaa, # fi, "Echo_rdp": Echo_rdp, # U, "Echo_ech": Echo_ech, # U, "Foreign_foreign": Foreign_foreign, # cz, fi, U, "Foreign_fscript": Foreign_fscript, # cz, fi, U, "Foreign_tscript": Foreign_tscript, # cz, U, "Foreign_yes": Foreign_yes, # sl, "Gender_dat_masc": Gender_dat_masc, # bq, U, "Gender_dat_fem": Gender_dat_fem, # bq, U, "Gender_erg_masc": Gender_erg_masc, # bq, "Gender_erg_fem": Gender_erg_fem, # bq, "Gender_psor_masc": Gender_psor_masc, # cz, sl, U, "Gender_psor_fem": Gender_psor_fem, # cz, sl, U, "Gender_psor_neut": Gender_psor_neut, # sl, "Hyph_yes": Hyph_yes, # cz, U, "InfForm_one": InfForm_one, # fi, "InfForm_two": InfForm_two, # fi, "InfForm_three": InfForm_three, # fi, "NameType_geo": NameType_geo, # U, cz, "NameType_prs": NameType_prs, # U, cz, "NameType_giv": NameType_giv, # U, cz, "NameType_sur": NameType_sur, # U, cz, "NameType_nat": NameType_nat, # U, cz, "NameType_com": NameType_com, # U, cz, "NameType_pro": NameType_pro, # U, cz, "NameType_oth": NameType_oth, # U, cz, "NounType_com": NounType_com, # U, "NounType_prop": NounType_prop, # U, "NounType_class": NounType_class, # U, "Number_abs_sing": Number_abs_sing, # bq, U, "Number_abs_plur": Number_abs_plur, # bq, U, "Number_dat_sing": Number_dat_sing, # bq, U, "Number_dat_plur": Number_dat_plur, # bq, U, "Number_erg_sing": Number_erg_sing, # bq, U, "Number_erg_plur": Number_erg_plur, # bq, U, "Number_psee_sing": Number_psee_sing, # U, "Number_psee_plur": Number_psee_plur, # U, "Number_psor_sing": Number_psor_sing, # cz, fi, sl, U, "Number_psor_plur": Number_psor_plur, # cz, fi, sl, U, "Number_pauc": Number_pauc, # U20 "Number_grpa": Number_grpa, # U20 "Number_grpl": Number_grpl, # U20 "Number_inv": Number_inv, # U20 "NumForm_digit": NumForm_digit, # cz, sl, U, "NumForm_roman": NumForm_roman, # cz, sl, U, "NumForm_word": NumForm_word, # cz, sl, U, "NumValue_one": NumValue_one, # cz, U, "NumValue_two": NumValue_two, # cz, U, "NumValue_three": NumValue_three, # cz, U, "PartForm_pres": PartForm_pres, # fi, "PartForm_past": PartForm_past, # fi, "PartForm_agt": PartForm_agt, # fi, "PartForm_neg": PartForm_neg, # fi, "PartType_mod": PartType_mod, # U, "PartType_emp": PartType_emp, # U, "PartType_res": PartType_res, # U, "PartType_inf": PartType_inf, # U, "PartType_vbp": PartType_vbp, # U, "Person_abs_one": Person_abs_one, # bq, U, "Person_abs_two": Person_abs_two, # bq, U, "Person_abs_three": Person_abs_three, # bq, U, "Person_dat_one": Person_dat_one, # bq, U, "Person_dat_two": Person_dat_two, # bq, U, "Person_dat_three": Person_dat_three, # bq, U, "Person_erg_one": Person_erg_one, # bq, U, "Person_erg_two": Person_erg_two, # bq, U, "Person_erg_three": Person_erg_three, # bq, U, "Person_psor_one": Person_psor_one, # fi, U, "Person_psor_two": Person_psor_two, # fi, U, "Person_psor_three": Person_psor_three, # fi, U, "Person_zero": Person_zero, # U20 "Person_four": Person_four, # U20 "Polite_inf": Polite_inf, # bq, U, "Polite_pol": Polite_pol, # bq, U, "Polite_abs_inf": Polite_abs_inf, # bq, U, "Polite_abs_pol": Polite_abs_pol, # bq, U, "Polite_erg_inf": Polite_erg_inf, # bq, U, "Polite_erg_pol": Polite_erg_pol, # bq, U, "Polite_dat_inf": Polite_dat_inf, # bq, U, "Polite_dat_pol": Polite_dat_pol, # bq, U, "Polite_infm": Polite_infm, # U20 "Polite_form": Polite_form, # U20 "Polite_form_elev": Polite_form_elev, # U20 "Polite_form_humb": Polite_form_humb, # U20 "Prefix_yes": Prefix_yes, # U, "PrepCase_npr": PrepCase_npr, # cz, "PrepCase_pre": PrepCase_pre, # U, "PunctSide_ini": PunctSide_ini, # U, "PunctSide_fin": PunctSide_fin, # U, "PunctType_peri": PunctType_peri, # U, "PunctType_qest": PunctType_qest, # U, "PunctType_excl": PunctType_excl, # U, "PunctType_quot": PunctType_quot, # U, "PunctType_brck": PunctType_brck, # U, "PunctType_comm": PunctType_comm, # U, "PunctType_colo": PunctType_colo, # U, "PunctType_semi": PunctType_semi, # U, "PunctType_dash": PunctType_dash, # U, "Style_arch": Style_arch, # cz, fi, U, "Style_rare": Style_rare, # cz, fi, U, "Style_poet": Style_poet, # cz, U, "Style_norm": Style_norm, # cz, U, "Style_coll": Style_coll, # cz, U, "Style_vrnc": Style_vrnc, # cz, U, "Style_sing": Style_sing, # cz, U, "Style_expr": Style_expr, # cz, U, "Style_derg": Style_derg, # cz, U, "Style_vulg": Style_vulg, # cz, U, "Style_yes": Style_yes, # fi, U, "StyleVariant_styleShort": StyleVariant_styleShort, # cz, "StyleVariant_styleBound": StyleVariant_styleBound, # cz, sl, "VerbType_aux": VerbType_aux, # U, "VerbType_cop": VerbType_cop, # U, "VerbType_mod": VerbType_mod, # U, "VerbType_light": VerbType_light, # U, "PERSON": PERSON, "NORP": NORP, "FACILITY": FACILITY, "ORG": ORG, "GPE": GPE, "LOC": LOC, "PRODUCT": PRODUCT, "EVENT": EVENT, "WORK_OF_ART": WORK_OF_ART, "LANGUAGE": LANGUAGE, "DATE": DATE, "TIME": TIME, "PERCENT": PERCENT, "MONEY": MONEY, "QUANTITY": QUANTITY, "ORDINAL": ORDINAL, "CARDINAL": CARDINAL, "acomp": acomp, "advcl": advcl, "advmod": advmod, "agent": agent, "amod": amod, "appos": appos, "attr": attr, "aux": aux, "auxpass": auxpass, "cc": cc, "ccomp": ccomp, "complm": complm, "conj": conj, "cop": cop, # U20 "csubj": csubj, "csubjpass": csubjpass, "dep": dep, "det": det, "dobj": dobj, "expl": expl, "hmod": hmod, "hyph": hyph, "infmod": infmod, "intj": intj, "iobj": iobj, "mark": mark, "meta": meta, "neg": neg, "nmod": nmod, "nn": nn, "npadvmod": npadvmod, "nsubj": nsubj, "nsubjpass": nsubjpass, "num": num, "number": number, "oprd": oprd, "obj": obj, # U20 "obl": obl, # U20 "parataxis": parataxis, "partmod": partmod, "pcomp": pcomp, "pobj": pobj, "poss": poss, "possessive": possessive, "preconj": preconj, "prep": prep, "prt": prt, "punct": punct, "quantmod": quantmod, "rcmod": rcmod, "root": root, "xcomp": xcomp, "acl": acl, "LAW": LAW, } def sort_nums(x): return x[1] PRON_LEMMA = "-PRON-" NAMES = [it[0] for it in sorted(IDS.items(), key=sort_nums)] # Unfortunate hack here, to work around problem with long cpdef enum # (which is generating an enormous amount of C++ in Cython 0.24+) # We keep the enum cdef, and just make sure the names are available to Python locals().update(IDS)