diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 3ba50123c..870f05a87 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -62,6 +62,12 @@ def parse_feature(feature): return (field_id, offset) +def get_field_size(field): + begin = 'begin_%s' % field + end = 'end_%s' % field + return (IDS[end] - IDS[begin]) - 1 + + cdef class Morphology: '''Store the possible morphological analyses for a language, and index them by hash. @@ -471,261 +477,6 @@ cdef int is_verb_type_feature(univ_morph_t feature) nogil: return feature > begin_VerbType and feature < end_VerbType -IDS = { - "Animacy_anim": Animacy_anim, - "Animacy_inan": Animacy_inan, - "Animacy_hum": Animacy_hum, # U20 - "Animacy_nhum": Animacy_nhum, - "Aspect_freq": Aspect_freq, - "Aspect_imp": Aspect_imp, - "Aspect_mod": Aspect_mod, - "Aspect_none": Aspect_none, - "Aspect_perf": Aspect_perf, - "Case_abe": Case_abe, - "Case_abl": Case_abl, - "Case_abs": Case_abs, - "Case_acc": Case_acc, - "Case_ade": Case_ade, - "Case_all": Case_all, - "Case_cau": Case_cau, - "Case_com": Case_com, - "Case_dat": Case_dat, - "Case_del": Case_del, - "Case_dis": Case_dis, - "Case_ela": Case_ela, - "Case_ess": Case_ess, - "Case_gen": Case_gen, - "Case_ill": Case_ill, - "Case_ine": Case_ine, - "Case_ins": Case_ins, - "Case_loc": Case_loc, - "Case_lat": Case_lat, - "Case_nom": Case_nom, - "Case_par": Case_par, - "Case_sub": Case_sub, - "Case_sup": Case_sup, - "Case_tem": Case_tem, - "Case_ter": Case_ter, - "Case_tra": Case_tra, - "Case_voc": Case_voc, - "Definite_two": Definite_two, - "Definite_def": Definite_def, - "Definite_red": Definite_red, - "Definite_cons": Definite_cons, # U20 - "Definite_ind": Definite_ind, - "Degree_cmp": Degree_cmp, - "Degree_comp": Degree_comp, - "Degree_none": Degree_none, - "Degree_pos": Degree_pos, - "Degree_sup": Degree_sup, - "Degree_abs": Degree_abs, - "Degree_com": Degree_com, - "Degree_dim ": Degree_dim, # du - "Gender_com": Gender_com, - "Gender_fem": Gender_fem, - "Gender_masc": Gender_masc, - "Gender_neut": Gender_neut, - "Mood_cnd": Mood_cnd, - "Mood_imp": Mood_imp, - "Mood_ind": Mood_ind, - "Mood_n": Mood_n, - "Mood_pot": Mood_pot, - "Mood_sub": Mood_sub, - "Mood_opt": Mood_opt, - "Negative_neg": Negative_neg, - "Negative_pos": Negative_pos, - "Negative_yes": Negative_yes, - "Polarity_neg": Polarity_neg, # U20 - "Polarity_pos": Polarity_pos, # U20 - "Number_com": Number_com, - "Number_dual": Number_dual, - "Number_none": Number_none, - "Number_plur": Number_plur, - "Number_sing": Number_sing, - "Number_ptan ": Number_ptan, # bg - "Number_count ": Number_count, # bg - "NumType_card": NumType_card, - "NumType_dist": NumType_dist, - "NumType_frac": NumType_frac, - "NumType_gen": NumType_gen, - "NumType_mult": NumType_mult, - "NumType_none": NumType_none, - "NumType_ord": NumType_ord, - "NumType_sets": NumType_sets, - "Person_one": Person_one, - "Person_two": Person_two, - "Person_three": Person_three, - "Person_none": Person_none, - "Poss_yes": Poss_yes, - "PronType_advPart": PronType_advPart, - "PronType_art": PronType_art, - "PronType_default": PronType_default, - "PronType_dem": PronType_dem, - "PronType_ind": PronType_ind, - "PronType_int": PronType_int, - "PronType_neg": PronType_neg, - "PronType_prs": PronType_prs, - "PronType_rcp": PronType_rcp, - "PronType_rel": PronType_rel, - "PronType_tot": PronType_tot, - "PronType_clit": PronType_clit, - "PronType_exc ": PronType_exc, # es, ca, it, fa, - "Reflex_yes": Reflex_yes, - "Tense_fut": Tense_fut, - "Tense_imp": Tense_imp, - "Tense_past": Tense_past, - "Tense_pres": Tense_pres, - "VerbForm_fin": VerbForm_fin, - "VerbForm_ger": VerbForm_ger, - "VerbForm_inf": VerbForm_inf, - "VerbForm_none": VerbForm_none, - "VerbForm_part": VerbForm_part, - "VerbForm_partFut": VerbForm_partFut, - "VerbForm_partPast": VerbForm_partPast, - "VerbForm_partPres": VerbForm_partPres, - "VerbForm_sup": VerbForm_sup, - "VerbForm_trans": VerbForm_trans, - "VerbForm_conv": VerbForm_conv, # U20 - "VerbForm_gdv ": VerbForm_gdv, # la, - "Voice_act": Voice_act, - "Voice_cau": Voice_cau, - "Voice_pass": Voice_pass, - "Voice_mid ": Voice_mid, # gkc, - "Voice_int ": Voice_int, # hb, - "Abbr_yes ": Abbr_yes, # cz, fi, sl, U, - "AdpType_prep ": AdpType_prep, # cz, U, - "AdpType_post ": AdpType_post, # U, - "AdpType_voc ": AdpType_voc, # cz, - "AdpType_comprep ": AdpType_comprep, # cz, - "AdpType_circ ": AdpType_circ, # U, - "AdvType_man": AdvType_man, - "AdvType_loc": AdvType_loc, - "AdvType_tim": AdvType_tim, - "AdvType_deg": AdvType_deg, - "AdvType_cau": AdvType_cau, - "AdvType_mod": AdvType_mod, - "AdvType_sta": AdvType_sta, - "AdvType_ex": AdvType_ex, - "AdvType_adadj": AdvType_adadj, - "ConjType_oper ": ConjType_oper, # cz, U, - "ConjType_comp ": ConjType_comp, # cz, U, - "Connegative_yes ": Connegative_yes, # fi, - "Derivation_minen ": Derivation_minen, # fi, - "Derivation_sti ": Derivation_sti, # fi, - "Derivation_inen ": Derivation_inen, # fi, - "Derivation_lainen ": Derivation_lainen, # fi, - "Derivation_ja ": Derivation_ja, # fi, - "Derivation_ton ": Derivation_ton, # fi, - "Derivation_vs ": Derivation_vs, # fi, - "Derivation_ttain ": Derivation_ttain, # fi, - "Derivation_ttaa ": Derivation_ttaa, # fi, - "Echo_rdp ": Echo_rdp, # U, - "Echo_ech ": Echo_ech, # U, - "Foreign_foreign ": Foreign_foreign, # cz, fi, U, - "Foreign_fscript ": Foreign_fscript, # cz, fi, U, - "Foreign_tscript ": Foreign_tscript, # cz, U, - "Foreign_yes ": Foreign_yes, # sl, - "Gender_dat_masc ": Gender_dat_masc, # bq, U, - "Gender_dat_fem ": Gender_dat_fem, # bq, U, - "Gender_erg_masc ": Gender_erg_masc, # bq, - "Gender_erg_fem ": Gender_erg_fem, # bq, - "Gender_psor_masc ": Gender_psor_masc, # cz, sl, U, - "Gender_psor_fem ": Gender_psor_fem, # cz, sl, U, - "Gender_psor_neut ": Gender_psor_neut, # sl, - "Hyph_yes ": Hyph_yes, # cz, U, - "InfForm_one ": InfForm_one, # fi, - "InfForm_two ": InfForm_two, # fi, - "InfForm_three ": InfForm_three, # fi, - "NameType_geo ": NameType_geo, # U, cz, - "NameType_prs ": NameType_prs, # U, cz, - "NameType_giv ": NameType_giv, # U, cz, - "NameType_sur ": NameType_sur, # U, cz, - "NameType_nat ": NameType_nat, # U, cz, - "NameType_com ": NameType_com, # U, cz, - "NameType_pro ": NameType_pro, # U, cz, - "NameType_oth ": NameType_oth, # U, cz, - "NounType_com ": NounType_com, # U, - "NounType_prop ": NounType_prop, # U, - "NounType_class ": NounType_class, # U, - "Number_abs_sing ": Number_abs_sing, # bq, U, - "Number_abs_plur ": Number_abs_plur, # bq, U, - "Number_dat_sing ": Number_dat_sing, # bq, U, - "Number_dat_plur ": Number_dat_plur, # bq, U, - "Number_erg_sing ": Number_erg_sing, # bq, U, - "Number_erg_plur ": Number_erg_plur, # bq, U, - "Number_psee_sing ": Number_psee_sing, # U, - "Number_psee_plur ": Number_psee_plur, # U, - "Number_psor_sing ": Number_psor_sing, # cz, fi, sl, U, - "Number_psor_plur ": Number_psor_plur, # cz, fi, sl, U, - "NumForm_digit ": NumForm_digit, # cz, sl, U, - "NumForm_roman ": NumForm_roman, # cz, sl, U, - "NumForm_word ": NumForm_word, # cz, sl, U, - "NumValue_one ": NumValue_one, # cz, U, - "NumValue_two ": NumValue_two, # cz, U, - "NumValue_three ": NumValue_three, # cz, U, - "PartForm_pres ": PartForm_pres, # fi, - "PartForm_past ": PartForm_past, # fi, - "PartForm_agt ": PartForm_agt, # fi, - "PartForm_neg ": PartForm_neg, # fi, - "PartType_mod ": PartType_mod, # U, - "PartType_emp ": PartType_emp, # U, - "PartType_res ": PartType_res, # U, - "PartType_inf ": PartType_inf, # U, - "PartType_vbp ": PartType_vbp, # U, - "Person_abs_one ": Person_abs_one, # bq, U, - "Person_abs_two ": Person_abs_two, # bq, U, - "Person_abs_three ": Person_abs_three, # bq, U, - "Person_dat_one ": Person_dat_one, # bq, U, - "Person_dat_two ": Person_dat_two, # bq, U, - "Person_dat_three ": Person_dat_three, # bq, U, - "Person_erg_one ": Person_erg_one, # bq, U, - "Person_erg_two ": Person_erg_two, # bq, U, - "Person_erg_three ": Person_erg_three, # bq, U, - "Person_psor_one ": Person_psor_one, # fi, U, - "Person_psor_two ": Person_psor_two, # fi, U, - "Person_psor_three ": Person_psor_three, # fi, U, - "Polite_inf ": Polite_inf, # bq, U, - "Polite_pol ": Polite_pol, # bq, U, - "Polite_abs_inf ": Polite_abs_inf, # bq, U, - "Polite_abs_pol ": Polite_abs_pol, # bq, U, - "Polite_erg_inf ": Polite_erg_inf, # bq, U, - "Polite_erg_pol ": Polite_erg_pol, # bq, U, - "Polite_dat_inf ": Polite_dat_inf, # bq, U, - "Polite_dat_pol ": Polite_dat_pol, # bq, U, - "Prefix_yes ": Prefix_yes, # U, - "PrepCase_npr ": PrepCase_npr, # cz, - "PrepCase_pre ": PrepCase_pre, # U, - "PunctSide_ini ": PunctSide_ini, # U, - "PunctSide_fin ": PunctSide_fin, # U, - "PunctType_peri ": PunctType_peri, # U, - "PunctType_qest ": PunctType_qest, # U, - "PunctType_excl ": PunctType_excl, # U, - "PunctType_quot ": PunctType_quot, # U, - "PunctType_brck ": PunctType_brck, # U, - "PunctType_comm ": PunctType_comm, # U, - "PunctType_colo ": PunctType_colo, # U, - "PunctType_semi ": PunctType_semi, # U, - "PunctType_dash ": PunctType_dash, # U, - "Style_arch ": Style_arch, # cz, fi, U, - "Style_rare ": Style_rare, # cz, fi, U, - "Style_poet ": Style_poet, # cz, U, - "Style_norm ": Style_norm, # cz, U, - "Style_coll ": Style_coll, # cz, U, - "Style_vrnc ": Style_vrnc, # cz, U, - "Style_sing ": Style_sing, # cz, U, - "Style_expr ": Style_expr, # cz, U, - "Style_derg ": Style_derg, # cz, U, - "Style_vulg ": Style_vulg, # cz, U, - "Style_yes ": Style_yes, # fi, U, - "StyleVariant_styleShort ": StyleVariant_styleShort, # cz, - "StyleVariant_styleBound ": StyleVariant_styleBound, # cz, sl, - "VerbType_aux ": VerbType_aux, # U, - "VerbType_cop ": VerbType_cop, # U, - "VerbType_mod ": VerbType_mod, # U, - "VerbType_light ": VerbType_light, # U, -} - - FIELDS = { 'Abbr': 0, 'AdpType': 1, @@ -770,6 +521,346 @@ FIELDS = { 'VerbType': 40 } +IDS = { + "begin_Abbr": begin_Abbr, + "Abbr_yes ": Abbr_yes , + "end_Abbr": end_Abbr, + "begin_AdpType": begin_AdpType, + "AdpType_circ": AdpType_circ, + "AdpType_comprep": AdpType_comprep, + "AdpType_prep ": AdpType_prep , + "AdpType_post": AdpType_post, + "AdpType_voc": AdpType_voc, + "end_AdpType": end_AdpType, + "begin_AdvType": begin_AdvType, + "AdvType_adadj": AdvType_adadj, + "AdvType_cau": AdvType_cau, + "AdvType_deg": AdvType_deg, + "AdvType_ex": AdvType_ex, + "AdvType_loc": AdvType_loc, + "AdvType_man": AdvType_man, + "AdvType_mod": AdvType_mod, + "AdvType_sta": AdvType_sta, + "AdvType_tim": AdvType_tim, + "end_AdvType": end_AdvType, + "begin_Animacy": begin_Animacy, + "Animacy_anim": Animacy_anim, + "Animacy_hum": Animacy_hum, + "Animacy_inan": Animacy_inan, + "Animacy_nhum": Animacy_nhum, + "end_Animacy": end_Animacy, + "begin_Aspect": begin_Aspect, + "Aspect_freq": Aspect_freq, + "Aspect_imp": Aspect_imp, + "Aspect_mod": Aspect_mod, + "Aspect_none": Aspect_none, + "Aspect_perf": Aspect_perf, + "end_Aspect": end_Aspect, + "begin_Case": begin_Case, + "Case_abe": Case_abe, + "Case_abl": Case_abl, + "Case_abs": Case_abs, + "Case_acc": Case_acc, + "Case_ade": Case_ade, + "Case_all": Case_all, + "Case_cau": Case_cau, + "Case_com": Case_com, + "Case_dat": Case_dat, + "Case_del": Case_del, + "Case_dis": Case_dis, + "Case_ela": Case_ela, + "Case_ess": Case_ess, + "Case_gen": Case_gen, + "Case_ill": Case_ill, + "Case_ine": Case_ine, + "Case_ins": Case_ins, + "Case_loc": Case_loc, + "Case_lat": Case_lat, + "Case_nom": Case_nom, + "Case_par": Case_par, + "Case_sub": Case_sub, + "Case_sup": Case_sup, + "Case_tem": Case_tem, + "Case_ter": Case_ter, + "Case_tra": Case_tra, + "Case_voc": Case_voc, + "end_Case": end_Case, + "begin_ConjType": begin_ConjType, + "ConjType_comp ": ConjType_comp , + "ConjType_oper": ConjType_oper, + "end_ConjType": end_ConjType, + "begin_Connegative": begin_Connegative, + "Connegative_yes": Connegative_yes, + "end_Connegative": end_Connegative, + "begin_Definite": begin_Definite, + "Definite_cons": Definite_cons, + "Definite_def": Definite_def, + "Definite_ind": Definite_ind, + "Definite_red": Definite_red, + "Definite_two": Definite_two, + "end_Definite": end_Definite, + "begin_Degree": begin_Degree, + "Degree_abs": Degree_abs, + "Degree_cmp": Degree_cmp, + "Degree_comp": Degree_comp, + "Degree_none": Degree_none, + "Degree_pos": Degree_pos, + "Degree_sup": Degree_sup, + "Degree_com": Degree_com, + "Degree_dim": Degree_dim, + "end_Degree": end_Degree, + "begin_Gender": begin_Gender, + "Gender_com": Gender_com, + "Gender_fem": Gender_fem, + "Gender_masc": Gender_masc, + "Gender_neut": Gender_neut, + "Gender_dat_masc": Gender_dat_masc, + "Gender_dat_fem": Gender_dat_fem, + "Gender_erg_masc": Gender_erg_masc, + "Gender_erg_fem": Gender_erg_fem, + "Gender_psor_masc": Gender_psor_masc, + "Gender_psor_fem": Gender_psor_fem, + "Gender_psor_neut": Gender_psor_neut, + "end_Gender": end_Gender, + "begin_Mood": begin_Mood, + "Mood_cnd": Mood_cnd, + "Mood_imp": Mood_imp, + "Mood_ind": Mood_ind, + "Mood_n": Mood_n, + "Mood_pot": Mood_pot, + "Mood_sub": Mood_sub, + "Mood_opt": Mood_opt, + "end_Mood": end_Mood, + "begin_Negative": begin_Negative, + "Negative_neg": Negative_neg, + "Negative_pos": Negative_pos, + "Negative_yes": Negative_yes, + "end_Negative": end_Negative, + "begin_Polarity": begin_Polarity, + "Polarity_neg": Polarity_neg, + "Polarity_pos": Polarity_pos, + "end_Polarity": end_Polarity, + "begin_Number": begin_Number, + "Number_com": Number_com, + "Number_dual": Number_dual, + "Number_none": Number_none, + "Number_plur": Number_plur, + "Number_sing": Number_sing, + "Number_ptan": Number_ptan, + "Number_count": Number_count, + "Number_abs_sing": Number_abs_sing, + "Number_abs_plur": Number_abs_plur, + "Number_dat_sing": Number_dat_sing, + "Number_dat_plur": Number_dat_plur, + "Number_erg_sing": Number_erg_sing, + "Number_erg_plur": Number_erg_plur, + "Number_psee_sing": Number_psee_sing, + "Number_psee_plur": Number_psee_plur, + "Number_psor_sing": Number_psor_sing, + "Number_psor_plur": Number_psor_plur, + "end_Number": end_Number, + "begin_NumType": begin_NumType, + "NumType_card": NumType_card, + "NumType_dist": NumType_dist, + "NumType_frac": NumType_frac, + "NumType_gen": NumType_gen, + "NumType_mult": NumType_mult, + "NumType_none": NumType_none, + "NumType_ord": NumType_ord, + "NumType_sets": NumType_sets, + "end_NumType": end_NumType, + "begin_Person": begin_Person, + "Person_one": Person_one, + "Person_two": Person_two, + "Person_three": Person_three, + "Person_none": Person_none, + "Person_abs_one": Person_abs_one, + "Person_abs_two": Person_abs_two, + "Person_abs_three": Person_abs_three, + "Person_dat_one": Person_dat_one, + "Person_dat_two": Person_dat_two, + "Person_dat_three": Person_dat_three, + "Person_erg_one": Person_erg_one, + "Person_erg_two": Person_erg_two, + "Person_erg_three": Person_erg_three, + "Person_psor_one": Person_psor_one, + "Person_psor_two": Person_psor_two, + "Person_psor_three": Person_psor_three, + "end_Person": end_Person, + "begin_Poss": begin_Poss, + "Poss_yes": Poss_yes, + "end_Poss": end_Poss, + "begin_PronType": begin_PronType, + "PronType_advPart": PronType_advPart, + "PronType_art": PronType_art, + "PronType_default": PronType_default, + "PronType_dem": PronType_dem, + "PronType_ind": PronType_ind, + "PronType_int": PronType_int, + "PronType_neg": PronType_neg, + "PronType_prs": PronType_prs, + "PronType_rcp": PronType_rcp, + "PronType_rel": PronType_rel, + "PronType_tot": PronType_tot, + "PronType_clit": PronType_clit, + "PronType_exc": PronType_exc, + "end_PronType": end_PronType, + "begin_Reflex": begin_Reflex, + "Reflex_yes": Reflex_yes, + "end_Reflex": end_Reflex, + "begin_Tense": begin_Tense, + "Tense_fut": Tense_fut, + "Tense_imp": Tense_imp, + "Tense_past": Tense_past, + "Tense_pres": Tense_pres, + "end_Tense": end_Tense, + "begin_VerbForm": begin_VerbForm, + "VerbForm_fin": VerbForm_fin, + "VerbForm_ger": VerbForm_ger, + "VerbForm_inf": VerbForm_inf, + "VerbForm_none": VerbForm_none, + "VerbForm_part": VerbForm_part, + "VerbForm_partFut": VerbForm_partFut, + "VerbForm_partPast": VerbForm_partPast, + "VerbForm_partPres": VerbForm_partPres, + "VerbForm_sup": VerbForm_sup, + "VerbForm_trans": VerbForm_trans, + "VerbForm_conv": VerbForm_conv, + "VerbForm_gdv": VerbForm_gdv, + "end_VerbForm": end_VerbForm, + "begin_Voice": begin_Voice, + "Voice_act": Voice_act, + "Voice_cau": Voice_cau, + "Voice_pass": Voice_pass, + "Voice_mid": Voice_mid, + "Voice_int": Voice_int, + "end_Voice": end_Voice, + "begin_Derivation": begin_Derivation, + "Derivation_minen": Derivation_minen, + "Derivation_sti": Derivation_sti, + "Derivation_inen": Derivation_inen, + "Derivation_lainen": Derivation_lainen, + "Derivation_ja": Derivation_ja, + "Derivation_ton": Derivation_ton, + "Derivation_vs": Derivation_vs, + "Derivation_ttain": Derivation_ttain, + "Derivation_ttaa": Derivation_ttaa, + "end_Derivation": end_Derivation, + "begin_Echo": begin_Echo, + "Echo_rdp": Echo_rdp, + "Echo_ech": Echo_ech, + "end_Echo": end_Echo, + "begin_Foreign": begin_Foreign, + "Foreign_foreign": Foreign_foreign, + "Foreign_fscript": Foreign_fscript, + "Foreign_tscript": Foreign_tscript, + "Foreign_yes": Foreign_yes, + "end_Foreign": end_Foreign, + "begin_Hyph": begin_Hyph, + "Hyph_yes": Hyph_yes, + "end_Hyph": end_Hyph, + "begin_InfForm": begin_InfForm, + "InfForm_one": InfForm_one, + "InfForm_two": InfForm_two, + "InfForm_three": InfForm_three, + "end_InfForm": end_InfForm, + "begin_NameType": begin_NameType, + "NameType_geo": NameType_geo, + "NameType_prs": NameType_prs, + "NameType_giv": NameType_giv, + "NameType_sur": NameType_sur, + "NameType_nat": NameType_nat, + "NameType_com": NameType_com, + "NameType_pro": NameType_pro, + "NameType_oth": NameType_oth, + "end_NameType": end_NameType, + "begin_NounType": begin_NounType, + "NounType_com": NounType_com, + "NounType_prop": NounType_prop, + "NounType_class": NounType_class, + "end_NounType": end_NounType, + "begin_NumForm": begin_NumForm, + "NumForm_digit": NumForm_digit, + "NumForm_roman": NumForm_roman, + "NumForm_word": NumForm_word, + "end_NumForm": end_NumForm, + "begin_NumValue": begin_NumValue, + "NumValue_one": NumValue_one, + "NumValue_two": NumValue_two, + "NumValue_three": NumValue_three, + "end_NumValue": end_NumValue, + "begin_PartForm": begin_PartForm, + "PartForm_pres": PartForm_pres, + "PartForm_past": PartForm_past, + "PartForm_agt": PartForm_agt, + "PartForm_neg": PartForm_neg, + "end_PartForm": end_PartForm, + "begin_PartType": begin_PartType, + "PartType_mod": PartType_mod, + "PartType_emp": PartType_emp, + "PartType_res": PartType_res, + "PartType_inf": PartType_inf, + "PartType_vbp": PartType_vbp, + "end_PartType": end_PartType, + "begin_Polite": begin_Polite, + "Polite_inf": Polite_inf, + "Polite_pol": Polite_pol, + "Polite_abs_inf": Polite_abs_inf, + "Polite_abs_pol": Polite_abs_pol, + "Polite_erg_inf": Polite_erg_inf, + "Polite_erg_pol": Polite_erg_pol, + "Polite_dat_inf": Polite_dat_inf, + "Polite_dat_pol": Polite_dat_pol, + "end_Polite": end_Polite, + "begin_Prefix": begin_Prefix, + "Prefix_yes": Prefix_yes, + "end_Prefix": end_Prefix, + "begin_PrepCase": begin_PrepCase, + "PrepCase_npr": PrepCase_npr, + "PrepCase_pre": PrepCase_pre, + "end_PrepCase": end_PrepCase, + "begin_PunctSide": begin_PunctSide, + "PunctSide_ini": PunctSide_ini, + "PunctSide_fin": PunctSide_fin, + "end_PunctSide": end_PunctSide, + "begin_PunctType": begin_PunctType, + "PunctType_peri": PunctType_peri, + "PunctType_qest": PunctType_qest, + "PunctType_excl": PunctType_excl, + "PunctType_quot": PunctType_quot, + "PunctType_brck": PunctType_brck, + "PunctType_comm": PunctType_comm, + "PunctType_colo": PunctType_colo, + "PunctType_semi": PunctType_semi, + "PunctType_dash": PunctType_dash, + "end_PunctType": end_PunctType, + "begin_Style": begin_Style, + "Style_arch": Style_arch, + "Style_rare": Style_rare, + "Style_poet": Style_poet, + "Style_norm": Style_norm, + "Style_coll": Style_coll, + "Style_vrnc": Style_vrnc, + "Style_sing": Style_sing, + "Style_expr": Style_expr, + "Style_derg": Style_derg, + "Style_vulg": Style_vulg, + "Style_yes": Style_yes, + "end_Style": end_Style, + "begin_StyleVariant": begin_StyleVariant, + "StyleVariant_styleShort": StyleVariant_styleShort, + "StyleVariant_styleBound": StyleVariant_styleBound, + "end_StyleVariant": end_StyleVariant, + "begin_VerbType": begin_VerbType, + "VerbType_aux": VerbType_aux, + "VerbType_cop": VerbType_cop, + "VerbType_mod": VerbType_mod, + "VerbType_light": VerbType_light, + "end_VerbType": end_VerbType, +} + + +FIELD_SIZES = [get_field_size(field) for field in FIELDS] NAMES = {value: key for key, value in IDS.items()} # Unfortunate hack here, to work around problem with long cpdef enum