From fed0371db753765425521243b5325fd09296dd4a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 7 Mar 2019 17:14:57 +0100
Subject: [PATCH] Remove enums from morphology

---
 spacy/morphology.pxd                          |  385 ------
 spacy/morphology.pyx                          | 1104 +++++++----------
 spacy/pipeline/morphologizer.pyx              |    6 +-
 spacy/structs.pxd                             |    1 -
 spacy/tests/doc/test_retokenize_merge.py      |    1 -
 spacy/tests/morphology/test_morph_features.py |    8 +-
 spacy/tokens/token.pyx                        |    5 +
 7 files changed, 487 insertions(+), 1023 deletions(-)

diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd
index 24e54bdee..a057e8ed8 100644
--- a/spacy/morphology.pxd
+++ b/spacy/morphology.pxd
@@ -31,388 +31,3 @@ cdef class Morphology:
     cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
 
     cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
-
-
-cdef enum univ_morph_t:
-    NIL = 0
-
-    begin_Abbr
-    Abbr_yes
-    end_Abbr
-
-    begin_AdpType
-    AdpType_circ 
-    AdpType_comprep 
-    AdpType_prep 
-    AdpType_post 
-    AdpType_voc 
-    end_AdpType
-
-    begin_AdvType
-    AdvType_adadj
-    AdvType_cau
-    AdvType_deg
-    AdvType_ex
-    AdvType_loc
-    AdvType_man
-    AdvType_mod
-    AdvType_sta
-    AdvType_tim
-    end_AdvType
-
-    begin_Animacy
-    Animacy_anim
-    Animacy_hum
-    Animacy_inan
-    Animacy_nhum
-    end_Animacy
-
-    begin_Aspect
-    Aspect_freq
-    Aspect_imp
-    Aspect_mod
-    Aspect_none
-    Aspect_perf
-    end_Aspect
-
-    begin_Case
-    Case_abe
-    Case_abl
-    Case_abs
-    Case_acc
-    Case_ade
-    Case_all
-    Case_cau
-    Case_com
-    Case_dat
-    Case_del
-    Case_dis
-    Case_ela
-    Case_ess
-    Case_gen
-    Case_ill
-    Case_ine
-    Case_ins
-    Case_loc
-    Case_lat
-    Case_nom
-    Case_par
-    Case_sub
-    Case_sup
-    Case_tem
-    Case_ter
-    Case_tra
-    Case_voc
-    end_Case
-
-    begin_ConjType
-    ConjType_comp # cz, U
-    ConjType_oper # cz, U
-    end_ConjType
-    begin_Connegative
-    Connegative_yes # fi
-    end_Connegative
-
-    begin_Definite
-    Definite_cons # U20
-    Definite_def
-    Definite_ind
-    Definite_red
-    Definite_two
-    end_Definite
-
-    begin_Degree
-    Degree_abs
-    Degree_cmp
-    Degree_comp
-    Degree_none
-    Degree_pos
-    Degree_sup
-    Degree_com
-    Degree_dim # du
-    end_Degree
-
-    begin_Derivation
-    Derivation_minen # fi
-    Derivation_sti # fi
-    Derivation_inen # fi
-    Derivation_lainen # fi
-    Derivation_ja # fi
-    Derivation_ton # fi
-    Derivation_vs # fi
-    Derivation_ttain # fi
-    Derivation_ttaa # fi
-    end_Derivation
-
-    begin_Echo
-    Echo_rdp # U
-    Echo_ech # U
-    end_Echo
-
-    begin_Foreign
-    Foreign_foreign # cz, fi, U
-    Foreign_fscript # cz, fi, U
-    Foreign_tscript # cz, U
-    Foreign_yes # sl
-    end_Foreign
-
-    begin_Gender
-    Gender_com
-    Gender_fem
-    Gender_masc
-    Gender_neut
-    Gender_dat_masc # bq, U
-    Gender_dat_fem # bq, U
-    Gender_erg_masc # bq
-    Gender_erg_fem # bq
-    Gender_psor_masc # cz, sl, U
-    Gender_psor_fem # cz, sl, U
-    Gender_psor_neut # sl
-    end_Gender
-
-    begin_Hyph
-    Hyph_yes # cz, U
-    end_Hyph
-
-    begin_InfForm
-    InfForm_one # fi
-    InfForm_two # fi
-    InfForm_three # fi
-    end_InfForm
-
-    begin_Mood
-    Mood_cnd
-    Mood_imp
-    Mood_ind
-    Mood_n
-    Mood_pot
-    Mood_sub
-    Mood_opt
-    end_Mood
-
-    begin_NameType
-    NameType_geo # U, cz
-    NameType_prs # U, cz
-    NameType_giv # U, cz
-    NameType_sur # U, cz
-    NameType_nat # U, cz
-    NameType_com # U, cz
-    NameType_pro # U, cz
-    NameType_oth # U, cz
-    end_NameType
-
-    begin_Negative
-    Negative_neg
-    Negative_pos
-    Negative_yes
-    end_Negative
-
-    begin_NounType
-    NounType_com # U
-    NounType_prop # U
-    NounType_class # U
-    end_NounType
-
-    begin_Number
-    Number_com
-    Number_dual
-    Number_none
-    Number_plur
-    Number_sing
-    Number_ptan # bg
-    Number_count # bg
-    Number_abs_sing # bq, U
-    Number_abs_plur # bq, U
-    Number_dat_sing # bq, U
-    Number_dat_plur # bq, U
-    Number_erg_sing # bq, U
-    Number_erg_plur # bq, U
-    Number_psee_sing # U
-    Number_psee_plur # U
-    Number_psor_sing # cz, fi, sl, U
-    Number_psor_plur # cz, fi, sl, U
-    end_Number
-    
-    begin_NumForm
-    NumForm_digit # cz, sl, U
-    NumForm_roman # cz, sl, U
-    NumForm_word # cz, sl, U
-    end_NumForm
-
-    begin_NumType
-    NumType_card
-    NumType_dist
-    NumType_frac
-    NumType_gen
-    NumType_mult
-    NumType_none
-    NumType_ord
-    NumType_sets
-    end_NumType
-    
-    begin_NumValue
-    NumValue_one # cz, U
-    NumValue_two # cz, U
-    NumValue_three # cz, U
-    end_NumValue
-
-    begin_PartForm
-    PartForm_pres # fi
-    PartForm_past # fi
-    PartForm_agt # fi
-    PartForm_neg # fi
-    end_PartForm
-
-    begin_PartType
-    PartType_mod # U
-    PartType_emp # U
-    PartType_res # U
-    PartType_inf # U
-    PartType_vbp # U
-    end_PartType
-
-    begin_Person 
-    Person_one
-    Person_two
-    Person_three
-    Person_none
-    Person_abs_one # bq, U
-    Person_abs_two # bq, U
-    Person_abs_three # bq, U
-    Person_dat_one # bq, U
-    Person_dat_two # bq, U
-    Person_dat_three # bq, U
-    Person_erg_one # bq, U
-    Person_erg_two # bq, U
-    Person_erg_three # bq, U
-    Person_psor_one # fi, U
-    Person_psor_two # fi, U
-    Person_psor_three # fi, U
-    end_Person
-
-    begin_Polarity
-    Polarity_neg # U20
-    Polarity_pos # U20
-    end_Polarity
-    
-    begin_Polite
-    Polite_inf # bq, U
-    Polite_pol # bq, U
-    Polite_abs_inf # bq, U
-    Polite_abs_pol # bq, U
-    Polite_erg_inf # bq, U
-    Polite_erg_pol # bq, U
-    Polite_dat_inf # bq, U
-    Polite_dat_pol # bq, U
-    end_Polite
-
-    begin_Poss
-    Poss_yes
-    end_Poss
-    
-    begin_Prefix
-    Prefix_yes # U
-    end_Prefix
-    
-    begin_PrepCase
-    PrepCase_npr # cz
-    PrepCase_pre # U
-    end_PrepCase
-
-    begin_PronType
-    PronType_advPart
-    PronType_art
-    PronType_default
-    PronType_dem
-    PronType_ind
-    PronType_int
-    PronType_neg
-    PronType_prs
-    PronType_rcp
-    PronType_rel
-    PronType_tot
-    PronType_clit
-    PronType_exc # es, ca, it, fa
-    end_PronType
-
-    begin_PunctSide
-    PunctSide_ini # U
-    PunctSide_fin # U
-    end_PunctSide
-
-    begin_PunctType
-    PunctType_peri # U
-    PunctType_qest # U
-    PunctType_excl # U
-    PunctType_quot # U
-    PunctType_brck # U
-    PunctType_comm # U
-    PunctType_colo # U
-    PunctType_semi # U
-    PunctType_dash # U
-    end_PunctType
-
-    begin_Reflex
-    Reflex_yes
-    end_Reflex
-
-    begin_Style
-    Style_arch # cz, fi, U
-    Style_rare # cz, fi, U
-    Style_poet # cz, U
-    Style_norm # cz, U
-    Style_coll # cz, U
-    Style_vrnc # cz, U
-    Style_sing # cz, U
-    Style_expr # cz, U
-    Style_derg # cz, U
-    Style_vulg # cz, U
-    Style_yes # fi, U
-    end_Style
-
-    begin_StyleVariant
-    StyleVariant_styleShort # cz
-    StyleVariant_styleBound # cz, sl
-    end_StyleVariant
-    
-    begin_Tense
-    Tense_fut
-    Tense_imp
-    Tense_past
-    Tense_pres
-    end_Tense
-
-    begin_Typo
-    Typo_yes
-    end_Typo
-    
-    begin_VerbForm
-    VerbForm_fin
-    VerbForm_ger
-    VerbForm_inf
-    VerbForm_none
-    VerbForm_part
-    VerbForm_partFut
-    VerbForm_partPast
-    VerbForm_partPres
-    VerbForm_sup
-    VerbForm_trans
-    VerbForm_conv # U20
-    VerbForm_gdv # la
-    end_VerbForm
-
-    begin_VerbType
-    VerbType_aux # U
-    VerbType_cop # U
-    VerbType_mod # U
-    VerbType_light # U
-    end_VerbType
-
-    begin_Voice
-    Voice_act
-    Voice_cau
-    Voice_pass
-    Voice_mid # gkc
-    Voice_int # hb
-    end_Voice
-
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index 52acfedfb..1157c2502 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 
 from libc.string cimport memset
 import srsly
+from collections import Counter
 
 from .strings import get_string_id
 from . import symbols
@@ -14,6 +15,50 @@ from .parts_of_speech import IDS as POS_IDS
 from .lexeme cimport Lexeme
 from .errors import Errors
 
+cdef enum univ_field_t:
+    Field_Abbr
+    Field_AdpType
+    Field_AdvType
+    Field_Animacy
+    Field_Aspect
+    Field_Case
+    Field_ConjType
+    Field_Connegative
+    Field_Definite
+    Field_Degree
+    Field_Derivation
+    Field_Echo
+    Field_Foreign
+    Field_Gender
+    Field_Hyph
+    Field_InfForm
+    Field_Mood
+    Field_NameType
+    Field_Negative
+    Field_NounType
+    Field_Number
+    Field_NumForm
+    Field_NumType
+    Field_NumValue
+    Field_PartForm
+    Field_PartType
+    Field_Person
+    Field_Polite
+    Field_Polarity
+    Field_Poss
+    Field_Prefix
+    Field_PrepCase
+    Field_PronType
+    Field_PunctSide
+    Field_PunctType
+    Field_Reflex
+    Field_Style
+    Field_StyleVariant
+    Field_Tense
+    Field_Typo
+    Field_VerbForm
+    Field_Voice
+    Field_VerbType
 
 
 def _normalize_props(props):
@@ -23,7 +68,7 @@ def _normalize_props(props):
     for key in FIELDS:
         if key in props:
             attr = '%s_%s' % (key, props[key])
-            if attr in IDS:
+            if attr in FEATURES:
                 props.pop(key)
                 props[attr] = True
     for key, value in props.items():
@@ -43,21 +88,21 @@ def _normalize_props(props):
 
 
 def parse_feature(feature):
-    if not hasattr(feature, 'split'):
-        feature = NAMES[feature]
-    key, value = feature.split('_')
-    begin = 'begin_%s' % key
-    # Note that this includes a 0 offset for the field, for no entry
-    offset = IDS[feature] - IDS[begin]
-    field_id = FIELDS[key]
-    return (field_id, offset)
+    field = FEATURE_FIELDS[feature]
+    offset = FEATURE_OFFSETS[feature]
+    return (field, offset)
+
+
+def get_field_id(feature):
+    return FEATURE_FIELDS[feature]
 
 
 def get_field_size(field):
-    begin = 'begin_%s' % field
-    end = 'end_%s' % field
-    # Extra field for no entry -- always 0
-    return IDS[end] - IDS[begin]
+    return FIELD_SIZES[field]
+
+
+def get_field_offset(field):
+    return FIELD_OFFSETS[field]
 
 
 cdef class Morphology:
@@ -105,11 +150,9 @@ cdef class Morphology:
         present. Returns the hash of the new analysis.
         """
         features = intify_features(features)
-        cdef univ_morph_t feature
+        cdef attr_t feature
         for feature in features:
-            if feature != 0 and feature not in NAMES:
-                print(list(NAMES.keys())[:10])
-                print(NAMES.get(feature-1), NAMES.get(feature+1))
+            if feature != 0 and feature not in FEATURE_NAMES:
                 raise KeyError("Unknown feature: %d" % feature)
         cdef MorphAnalysisC tag
         tag = create_rich_tag(features)
@@ -127,9 +170,10 @@ cdef class Morphology:
         """Update a morphological analysis with new feature values."""
         tag = (<MorphAnalysisC*>self.tags.get(morph))[0]
         features = intify_features(features)
-        cdef univ_morph_t feature
+        cdef attr_t feature
         for feature in features:
-            set_feature(&tag, feature, 1)
+            field = get_field_id(feature)
+            set_feature(&tag, field, feature, 1)
         morph = self.insert(tag)
         return morph
 
@@ -259,729 +303,531 @@ cpdef univ_pos_t get_int_tag(pos_):
     return <univ_pos_t>0
 
 cpdef intify_features(features):
-    return {IDS.get(feature, feature) for feature in features}
+    return {get_string_id(feature) for feature in features}
 
 cdef hash_t hash_tag(MorphAnalysisC tag) nogil:
     return mrmr.hash64(&tag, sizeof(tag), 0)
 
+
+def get_feature_field(feature):
+    cdef attr_t key = get_string_id(feature)
+    return FEATURE_FIELDS[feature]
+
+
 cdef MorphAnalysisC create_rich_tag(features) except *:
     cdef MorphAnalysisC tag
-    cdef univ_morph_t feature
+    cdef attr_t feature
     memset(&tag, 0, sizeof(tag))
     for feature in features:
-        set_feature(&tag, feature, 1)
+        field = get_field_id(feature)
+        set_feature(&tag, field, feature, 1)
     return tag
 
+
 cdef tag_to_json(MorphAnalysisC tag):
     features = []
     if tag.abbr != 0:
-        features.append(NAMES[tag.abbr])
+        features.append(FEATURE_NAMES[tag.abbr])
     if tag.adp_type != 0:
-        features.append(NAMES[tag.adp_type])
+        features.append(FEATURE_NAMES[tag.adp_type])
     if tag.adv_type != 0:
-        features.append(NAMES[tag.adv_type])
+        features.append(FEATURE_NAMES[tag.adv_type])
     if tag.animacy != 0:
-        features.append(NAMES[tag.animacy])
+        features.append(FEATURE_NAMES[tag.animacy])
     if tag.aspect != 0:
-        features.append(NAMES[tag.aspect])
+        features.append(FEATURE_NAMES[tag.aspect])
     if tag.case != 0:
-        features.append(NAMES[tag.case])
+        features.append(FEATURE_NAMES[tag.case])
     if tag.conj_type != 0:
-        features.append(NAMES[tag.conj_type])
+        features.append(FEATURE_NAMES[tag.conj_type])
     if tag.connegative != 0:
-        features.append(NAMES[tag.connegative])
+        features.append(FEATURE_NAMES[tag.connegative])
     if tag.definite != 0:
-        features.append(NAMES[tag.definite])
+        features.append(FEATURE_NAMES[tag.definite])
     if tag.degree != 0:
-        features.append(NAMES[tag.degree])
+        features.append(FEATURE_NAMES[tag.degree])
     if tag.derivation != 0:
-        features.append(NAMES[tag.derivation])
+        features.append(FEATURE_NAMES[tag.derivation])
     if tag.echo != 0:
-        features.append(NAMES[tag.echo])
+        features.append(FEATURE_NAMES[tag.echo])
     if tag.foreign != 0:
-        features.append(NAMES[tag.foreign])
+        features.append(FEATURE_NAMES[tag.foreign])
     if tag.gender != 0:
-        features.append(NAMES[tag.gender])
+        features.append(FEATURE_NAMES[tag.gender])
     if tag.hyph != 0:
-        features.append(NAMES[tag.hyph])
+        features.append(FEATURE_NAMES[tag.hyph])
     if tag.inf_form != 0:
-        features.append(NAMES[tag.inf_form])
+        features.append(FEATURE_NAMES[tag.inf_form])
     if tag.mood != 0:
-        features.append(NAMES[tag.mood])
+        features.append(FEATURE_NAMES[tag.mood])
     if tag.negative != 0:
-        features.append(NAMES[tag.negative])
+        features.append(FEATURE_NAMES[tag.negative])
     if tag.number != 0:
-        features.append(NAMES[tag.number])
+        features.append(FEATURE_NAMES[tag.number])
     if tag.name_type != 0:
-        features.append(NAMES[tag.name_type])
+        features.append(FEATURE_NAMES[tag.name_type])
     if tag.noun_type != 0:
-        features.append(NAMES[tag.noun_type])
+        features.append(FEATURE_NAMES[tag.noun_type])
     if tag.num_form != 0:
-        features.append(NAMES[tag.num_form])
+        features.append(FEATURE_NAMES[tag.num_form])
     if tag.num_type != 0:
-        features.append(NAMES[tag.num_type])
+        features.append(FEATURE_NAMES[tag.num_type])
     if tag.num_value != 0:
-        features.append(NAMES[tag.num_value])
+        features.append(FEATURE_NAMES[tag.num_value])
     if tag.part_form != 0:
-        features.append(NAMES[tag.part_form])
+        features.append(FEATURE_NAMES[tag.part_form])
     if tag.part_type != 0:
-        features.append(NAMES[tag.part_type])
+        features.append(FEATURE_NAMES[tag.part_type])
     if tag.person != 0:
-        features.append(NAMES[tag.person])
+        features.append(FEATURE_NAMES[tag.person])
     if tag.polite != 0:
-        features.append(NAMES[tag.polite])
+        features.append(FEATURE_NAMES[tag.polite])
     if tag.polarity != 0:
-        features.append(NAMES[tag.polarity])
+        features.append(FEATURE_NAMES[tag.polarity])
     if tag.poss != 0:
-        features.append(NAMES[tag.poss])
+        features.append(FEATURE_NAMES[tag.poss])
     if tag.prefix != 0:
-        features.append(NAMES[tag.prefix])
+        features.append(FEATURE_NAMES[tag.prefix])
     if tag.prep_case != 0:
-        features.append(NAMES[tag.prep_case])
+        features.append(FEATURE_NAMES[tag.prep_case])
     if tag.pron_type != 0:
-        features.append(NAMES[tag.pron_type])
+        features.append(FEATURE_NAMES[tag.pron_type])
     if tag.punct_side != 0:
-        features.append(NAMES[tag.punct_side])
+        features.append(FEATURE_NAMES[tag.punct_side])
     if tag.punct_type != 0:
-        features.append(NAMES[tag.punct_type])
+        features.append(FEATURE_NAMES[tag.punct_type])
     if tag.reflex != 0:
-        features.append(NAMES[tag.reflex])
+        features.append(FEATURE_NAMES[tag.reflex])
     if tag.style != 0:
-        features.append(NAMES[tag.style])
+        features.append(FEATURE_NAMES[tag.style])
     if tag.style_variant != 0:
-        features.append(NAMES[tag.style_variant])
+        features.append(FEATURE_NAMES[tag.style_variant])
     if tag.tense != 0:
-        features.append(NAMES[tag.tense])
+        features.append(FEATURE_NAMES[tag.tense])
     if tag.verb_form != 0:
-        features.append(NAMES[tag.verb_form])
+        features.append(FEATURE_NAMES[tag.verb_form])
     if tag.voice != 0:
-        features.append(NAMES[tag.voice])
+        features.append(FEATURE_NAMES[tag.voice])
     if tag.verb_type != 0:
-        features.append(NAMES[tag.verb_type])
+        features.append(FEATURE_NAMES[tag.verb_type])
     return features
 
 cdef MorphAnalysisC tag_from_json(json_tag):
     cdef MorphAnalysisC tag
     return tag
  
-cdef int set_feature(MorphAnalysisC* tag, univ_morph_t feature, int value) except -1:
+cdef int set_feature(MorphAnalysisC* tag,
+        univ_field_t field, attr_t feature, int value) except -1:
     if value == True:
         value_ = feature
     else:
-        value_ = NIL
-    if feature == NIL:
+        value_ = 0
+    if feature == 0:
         pass
-    elif is_abbr_feature(feature):
+    elif field == Field_Abbr:
         tag.abbr = value_
-    elif is_adp_type_feature(feature):
+    elif field == Field_AdpType:
         tag.adp_type = value_
-    elif is_adv_type_feature(feature):
+    elif field == Field_AdvType:
         tag.adv_type = value_
-    elif is_animacy_feature(feature):
+    elif field == Field_Animacy:
         tag.animacy = value_
-    elif is_aspect_feature(feature):
+    elif field == Field_Aspect:
         tag.aspect = value_
-    elif is_case_feature(feature):
+    elif field == Field_Case:
         tag.case = value_
-    elif is_conj_type_feature(feature):
+    elif field == Field_ConjType:
         tag.conj_type = value_
-    elif is_connegative_feature(feature):
+    elif field == Field_Connegative:
         tag.connegative = value_
-    elif is_definite_feature(feature):
+    elif field == Field_Definite:
         tag.definite = value_
-    elif is_degree_feature(feature):
+    elif field == Field_Degree:
         tag.degree = value_
-    elif is_derivation_feature(feature):
+    elif field == Field_Derivation:
         tag.derivation = value_
-    elif is_echo_feature(feature):
+    elif field == Field_Echo:
         tag.echo = value_
-    elif is_foreign_feature(feature):
+    elif field == Field_Foreign:
         tag.foreign = value_
-    elif is_gender_feature(feature):
+    elif field == Field_Gender:
         tag.gender = value_
-    elif is_hyph_feature(feature):
+    elif field == Field_Hyph:
         tag.hyph = value_
-    elif is_inf_form_feature(feature):
+    elif field == Field_InfForm:
         tag.inf_form = value_
-    elif is_mood_feature(feature):
+    elif field == Field_Mood:
         tag.mood = value_
-    elif is_negative_feature(feature):
+    elif field == Field_Negative:
         tag.negative = value_
-    elif is_number_feature(feature):
+    elif field == Field_Number:
         tag.number = value_
-    elif is_name_type_feature(feature):
+    elif field == Field_NameType:
         tag.name_type = value_
-    elif is_noun_type_feature(feature):
+    elif field == Field_NounType:
         tag.noun_type = value_
-    elif is_num_form_feature(feature):
+    elif field == Field_NumForm:
         tag.num_form = value_
-    elif is_num_type_feature(feature):
+    elif field == Field_NumType:
         tag.num_type = value_
-    elif is_num_value_feature(feature):
+    elif field == Field_NumValue:
         tag.num_value = value_
-    elif is_part_form_feature(feature):
+    elif field == Field_PartForm:
         tag.part_form = value_
-    elif is_part_type_feature(feature):
+    elif field == Field_PartType:
         tag.part_type = value_
-    elif is_person_feature(feature):
+    elif field == Field_Person:
         tag.person = value_
-    elif is_polite_feature(feature):
+    elif field == Field_Polite:
         tag.polite = value_
-    elif is_polarity_feature(feature):
+    elif field == Field_Polarity:
         tag.polarity = value_
-    elif is_poss_feature(feature):
+    elif field == Field_Poss:
         tag.poss = value_
-    elif is_prefix_feature(feature):
+    elif field == Field_Prefix:
         tag.prefix = value_
-    elif is_prep_case_feature(feature):
+    elif field == Field_PrepCase:
         tag.prep_case = value_
-    elif is_pron_type_feature(feature):
+    elif field == Field_PronType:
         tag.pron_type = value_
-    elif is_punct_side_feature(feature):
+    elif field == Field_PunctSide:
         tag.punct_side = value_
-    elif is_punct_type_feature(feature):
+    elif field == Field_PunctType:
         tag.punct_type = value_
-    elif is_reflex_feature(feature):
+    elif field == Field_Reflex:
         tag.reflex = value_
-    elif is_style_feature(feature):
+    elif field == Field_Style:
         tag.style = value_
-    elif is_style_variant_feature(feature):
+    elif field == Field_StyleVariant:
         tag.style_variant = value_
-    elif is_tense_feature(feature):
+    elif field == Field_Tense:
         tag.tense = value_
-    elif is_typo_feature(feature):
+    elif field == Field_Typo:
         tag.typo = value_
-    elif is_verb_form_feature(feature):
+    elif field == Field_VerbForm:
         tag.verb_form = value_
-    elif is_voice_feature(feature):
+    elif field == Field_Voice:
         tag.voice = value_
-    elif is_verb_type_feature(feature):
+    elif field == Field_VerbType:
         tag.verb_type = value_
     else:
-        raise ValueError("Unknown feature: %s (%d)" % (NAMES.get(feature), feature))
-
-cdef int is_abbr_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Abbr  and feature <= end_Abbr
-
-cdef int is_adp_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_AdpType and feature <= end_AdpType
-
-cdef int is_adv_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_AdvType and feature <= end_AdvType
-
-cdef int is_animacy_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Animacy and feature <= end_Animacy
-
-cdef int is_aspect_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Aspect and feature <= end_Aspect
-
-cdef int is_case_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Case and feature <= end_Case
-
-cdef int is_conj_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_ConjType and feature <= end_ConjType
-
-cdef int is_connegative_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Connegative and feature <= end_Connegative
-
-cdef int is_definite_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Definite and feature <= end_Definite
-
-cdef int is_degree_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Degree and feature <= end_Degree
-
-cdef int is_derivation_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Derivation and feature <= end_Derivation
-
-cdef int is_echo_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Echo and feature <= end_Echo
-
-cdef int is_foreign_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Foreign and feature <= end_Foreign
-
-cdef int is_gender_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Gender and feature <= end_Gender
-
-cdef int is_hyph_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Hyph and feature <= end_Hyph
-
-cdef int is_inf_form_feature(univ_morph_t feature) nogil:
-    return feature >= begin_InfForm and feature <= end_InfForm
-
-cdef int is_mood_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Mood and feature <= end_Mood
-
-cdef int is_name_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_NameType and feature < end_NameType
-
-cdef int is_negative_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Negative and feature <= end_Negative
-
-cdef int is_noun_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_NounType and feature <= end_NounType
-
-cdef int is_number_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Number and feature <= end_Number
-
-cdef int is_num_form_feature(univ_morph_t feature) nogil:
-    return feature >= begin_NumForm and feature <= end_NumForm
-
-cdef int is_num_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_NumType and feature <= end_NumType
-
-cdef int is_num_value_feature(univ_morph_t feature) nogil:
-    return feature >= begin_NumValue and feature <= end_NumValue
-
-cdef int is_part_form_feature(univ_morph_t feature) nogil:
-    return feature >= begin_PartForm and feature <= end_PartForm
-
-cdef int is_part_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_PartType and feature <= end_PartType
-
-cdef int is_person_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Person and feature <= end_Person
-
-cdef int is_polite_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Polite and feature <= end_Polite
-
-cdef int is_polarity_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Polarity and feature <= end_Polarity
-
-cdef int is_poss_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Poss and feature <= end_Poss
-
-cdef int is_prefix_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Prefix and feature <= end_Prefix
-
-cdef int is_prep_case_feature(univ_morph_t feature) nogil:
-    return feature >= begin_PrepCase and feature <= end_PrepCase
-
-cdef int is_pron_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_PronType and feature <= end_PronType
-
-cdef int is_punct_side_feature(univ_morph_t feature) nogil:
-    return feature >= begin_PunctSide and feature <= end_PunctSide
-
-cdef int is_punct_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_PunctType and feature <= end_PunctType
-
-cdef int is_reflex_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Reflex and feature <= end_Reflex
-
-cdef int is_style_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Style and feature <= end_Style
-
-cdef int is_style_variant_feature(univ_morph_t feature) nogil:
-    return feature >= begin_StyleVariant and feature <= end_StyleVariant
-
-cdef int is_tense_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Tense and feature <= end_Tense
-
-cdef int is_typo_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Typo and feature <= end_Typo
-
-cdef int is_verb_form_feature(univ_morph_t feature) nogil:
-    return feature >= begin_VerbForm and feature <= end_VerbForm
-
-cdef int is_voice_feature(univ_morph_t feature) nogil:
-    return feature >= begin_Voice and feature <= end_Voice
-
-cdef int is_verb_type_feature(univ_morph_t feature) nogil:
-    return feature >= begin_VerbType and feature <= end_VerbType
+        raise ValueError("Unknown feature: %s (%d)" % (FEATURE_NAMES.get(feature), feature))
 
 
 FIELDS = {
-    'Abbr': 0,
-    'AdpType': 1,
-    'AdvType': 2,
-    'Animacy': 3,
-    'Aspect': 4,
-    'Case': 5,
-    'ConjType': 6,
-    'Connegative': 7,
-    'Definite': 8,
-    'Degree': 9,
-    'Derivation': 10,
-    'Echo': 11,
-    'Foreign': 12,
-    'Gender': 13,
-    'Hyph': 14,
-    'InfForm': 15,
-    'Mood': 16,
-    'NameType': 17,
-    'Negative': 18,
-    'Number': 19,
-    'NumForm': 20,
-    'NumType': 21,
-    'NumValue': 22,
-    'PartForm': 23,
-    'PartType': 24,
-    'Person': 25,
-    'Polite': 26,
-    'Polarity': 27,
-    'Poss': 28,
-    'Prefix': 29,
-    'PrepCase': 30,
-    'PronType': 31,
-    'PunctSide': 32,
-    'PunctType': 33,
-    'Reflex': 34,
-    'Style': 35,
-    'StyleVariant': 36,
-    'Tense': 37,
-    'Typo': 38,
-    'VerbForm': 39,
-    'Voice': 40,
-    'VerbType': 41
+    'Abbr': Field_Abbr,
+    'AdpType': Field_AdpType,
+    'AdvType': Field_AdvType,
+    'Animacy': Field_Animacy,
+    'Aspect': Field_Aspect,
+    'Case': Field_Case,
+    'ConjType': Field_ConjType,
+    'Connegative': Field_Connegative,
+    'Definite': Field_Definite,
+    'Degree': Field_Degree,
+    'Derivation': Field_Derivation,
+    'Echo': Field_Echo,
+    'Foreign': Field_Foreign,
+    'Gender': Field_Gender,
+    'Hyph': Field_Hyph,
+    'InfForm': Field_InfForm,
+    'Mood': Field_Mood,
+    'NameType': Field_NameType,
+    'Negative': Field_Negative,
+    'NounType': Field_NounType,
+    'Number': Field_Number,
+    'NumForm': Field_NumForm,
+    'NumType': Field_NumType,
+    'NumValue': Field_NumValue,
+    'PartForm': Field_PartForm,
+    'PartType': Field_PartType,
+    'Person': Field_Person,
+    'Polite': Field_Polite,
+    'Polarity': Field_Polarity,
+    'Poss': Field_Poss,
+    'Prefix': Field_Prefix,
+    'PrepCase': Field_PrepCase,
+    'PronType': Field_PronType,
+    'PunctSide': Field_PunctSide,
+    'PunctType': Field_PunctType,
+    'Reflex': Field_Reflex,
+    'Style': Field_Style,
+    'StyleVariant': Field_StyleVariant,
+    'Tense': Field_Tense,
+    'Typo': Field_Typo,
+    'VerbForm': Field_VerbForm,
+    'Voice': Field_Voice,
+    'VerbType': Field_VerbType
 }
 
-IDS = {
-   "begin_Abbr": begin_Abbr,
-   "Abbr_yes": Abbr_yes ,
-   "end_Abbr": end_Abbr,
-   "begin_AdpType": begin_AdpType,
-   "AdpType_circ": AdpType_circ,
-   "AdpType_comprep": AdpType_comprep,
-   "AdpType_prep ": AdpType_prep ,
-   "AdpType_post": AdpType_post,
-   "AdpType_voc": AdpType_voc,
-   "end_AdpType": end_AdpType,
-   "begin_AdvType": begin_AdvType,
-   "AdvType_adadj": AdvType_adadj,
-   "AdvType_cau": AdvType_cau,
-   "AdvType_deg": AdvType_deg,
-   "AdvType_ex": AdvType_ex,
-   "AdvType_loc": AdvType_loc,
-   "AdvType_man": AdvType_man,
-   "AdvType_mod": AdvType_mod,
-   "AdvType_sta": AdvType_sta,
-   "AdvType_tim": AdvType_tim,
-   "end_AdvType": end_AdvType,
-   "begin_Animacy": begin_Animacy,
-   "Animacy_anim": Animacy_anim,
-   "Animacy_hum": Animacy_hum,
-   "Animacy_inan": Animacy_inan,
-   "Animacy_nhum": Animacy_nhum,
-   "end_Animacy": end_Animacy,
-   "begin_Aspect": begin_Aspect,
-   "Aspect_freq": Aspect_freq,
-   "Aspect_imp": Aspect_imp,
-   "Aspect_mod": Aspect_mod,
-   "Aspect_none": Aspect_none,
-   "Aspect_perf": Aspect_perf,
-   "end_Aspect": end_Aspect,
-   "begin_Case": begin_Case,
-   "Case_abe": Case_abe,
-   "Case_abl": Case_abl,
-   "Case_abs": Case_abs,
-   "Case_acc": Case_acc,
-   "Case_ade": Case_ade,
-   "Case_all": Case_all,
-   "Case_cau": Case_cau,
-   "Case_com": Case_com,
-   "Case_dat": Case_dat,
-   "Case_del": Case_del,
-   "Case_dis": Case_dis,
-   "Case_ela": Case_ela,
-   "Case_ess": Case_ess,
-   "Case_gen": Case_gen,
-   "Case_ill": Case_ill,
-   "Case_ine": Case_ine,
-   "Case_ins": Case_ins,
-   "Case_loc": Case_loc,
-   "Case_lat": Case_lat,
-   "Case_nom": Case_nom,
-   "Case_par": Case_par,
-   "Case_sub": Case_sub,
-   "Case_sup": Case_sup,
-   "Case_tem": Case_tem,
-   "Case_ter": Case_ter,
-   "Case_tra": Case_tra,
-   "Case_voc": Case_voc,
-   "end_Case": end_Case,
-   "begin_ConjType": begin_ConjType,
-   "ConjType_comp ": ConjType_comp ,
-   "ConjType_oper": ConjType_oper,
-   "end_ConjType": end_ConjType,
-   "begin_Connegative": begin_Connegative,
-   "Connegative_yes": Connegative_yes,
-   "end_Connegative": end_Connegative,
-   "begin_Definite": begin_Definite,
-   "Definite_cons": Definite_cons,
-   "Definite_def": Definite_def,
-   "Definite_ind": Definite_ind,
-   "Definite_red": Definite_red,
-   "Definite_two": Definite_two,
-   "end_Definite": end_Definite,
-   "begin_Degree": begin_Degree,
-   "Degree_abs": Degree_abs,
-   "Degree_cmp": Degree_cmp,
-   "Degree_comp": Degree_comp,
-   "Degree_none": Degree_none,
-   "Degree_pos": Degree_pos,
-   "Degree_sup": Degree_sup,
-   "Degree_com": Degree_com,
-   "Degree_dim": Degree_dim,
-   "end_Degree": end_Degree,
-   "begin_Derivation": begin_Derivation,
-   "Derivation_minen": Derivation_minen,
-   "Derivation_sti": Derivation_sti,
-   "Derivation_inen": Derivation_inen,
-   "Derivation_lainen": Derivation_lainen,
-   "Derivation_ja": Derivation_ja,
-   "Derivation_ton": Derivation_ton,
-   "Derivation_vs": Derivation_vs,
-   "Derivation_ttain": Derivation_ttain,
-   "Derivation_ttaa": Derivation_ttaa,
-   "end_Derivation": end_Derivation,
-   "begin_Echo": begin_Echo,
-   "Echo_rdp": Echo_rdp,
-   "Echo_ech": Echo_ech,
-   "end_Echo": end_Echo,
-   "begin_Foreign": begin_Foreign,
-   "Foreign_foreign": Foreign_foreign,
-   "Foreign_fscript": Foreign_fscript,
-   "Foreign_tscript": Foreign_tscript,
-   "Foreign_yes": Foreign_yes,
-   "end_Foreign": end_Foreign,
-   "begin_Gender": begin_Gender,
-   "Gender_com": Gender_com,
-   "Gender_fem": Gender_fem,
-   "Gender_masc": Gender_masc,
-   "Gender_neut": Gender_neut,
-   "Gender_dat_masc": Gender_dat_masc,
-   "Gender_dat_fem": Gender_dat_fem,
-   "Gender_erg_masc": Gender_erg_masc,
-   "Gender_erg_fem": Gender_erg_fem,
-   "Gender_psor_masc": Gender_psor_masc,
-   "Gender_psor_fem": Gender_psor_fem,
-   "Gender_psor_neut": Gender_psor_neut,
-   "end_Gender": end_Gender,
-   "begin_Hyph": begin_Hyph,
-   "Hyph_yes": Hyph_yes,
-   "end_Hyph": end_Hyph,
-   "begin_InfForm": begin_InfForm,
-   "InfForm_one": InfForm_one,
-   "InfForm_two": InfForm_two,
-   "InfForm_three": InfForm_three,
-   "end_InfForm": end_InfForm,
-   "begin_Mood": begin_Mood,
-   "Mood_cnd": Mood_cnd,
-   "Mood_imp": Mood_imp,
-   "Mood_ind": Mood_ind,
-   "Mood_n": Mood_n,
-   "Mood_pot": Mood_pot,
-   "Mood_sub": Mood_sub,
-   "Mood_opt": Mood_opt,
-   "end_Mood": end_Mood,
-   "begin_NameType": begin_NameType,
-   "NameType_geo": NameType_geo,
-   "NameType_prs": NameType_prs,
-   "NameType_giv": NameType_giv,
-   "NameType_sur": NameType_sur,
-   "NameType_nat": NameType_nat,
-   "NameType_com": NameType_com,
-   "NameType_pro": NameType_pro,
-   "NameType_oth": NameType_oth,
-   "end_NameType": end_NameType,
-   "begin_Negative": begin_Negative,
-   "Negative_neg": Negative_neg,
-   "Negative_pos": Negative_pos,
-   "Negative_yes": Negative_yes,
-   "end_Negative": end_Negative,
-   "begin_NounType": begin_NounType,
-   "NounType_com": NounType_com,
-   "NounType_prop": NounType_prop,
-   "NounType_class": NounType_class,
-   "end_NounType": end_NounType,
-   "begin_Number": begin_Number,
-   "Number_com": Number_com,
-   "Number_dual": Number_dual,
-   "Number_none": Number_none,
-   "Number_plur": Number_plur,
-   "Number_sing": Number_sing,
-   "Number_ptan": Number_ptan,
-   "Number_count": Number_count,
-   "Number_abs_sing": Number_abs_sing,
-   "Number_abs_plur": Number_abs_plur,
-   "Number_dat_sing": Number_dat_sing,
-   "Number_dat_plur": Number_dat_plur,
-   "Number_erg_sing": Number_erg_sing,
-   "Number_erg_plur": Number_erg_plur,
-   "Number_psee_sing": Number_psee_sing,
-   "Number_psee_plur": Number_psee_plur,
-   "Number_psor_sing": Number_psor_sing,
-   "Number_psor_plur": Number_psor_plur,
-   "end_Number": end_Number,
-   "begin_NumForm": begin_NumForm,
-   "NumForm_digit": NumForm_digit,
-   "NumForm_roman": NumForm_roman,
-   "NumForm_word": NumForm_word,
-   "end_NumForm": end_NumForm,
-   "begin_NumType": begin_NumType,
-   "NumType_card": NumType_card,
-   "NumType_dist": NumType_dist,
-   "NumType_frac": NumType_frac,
-   "NumType_gen": NumType_gen,
-   "NumType_mult": NumType_mult,
-   "NumType_none": NumType_none,
-   "NumType_ord": NumType_ord,
-   "NumType_sets": NumType_sets,
-   "end_NumType": end_NumType,
-   "begin_NumValue": begin_NumValue,
-   "NumValue_one": NumValue_one,
-   "NumValue_two": NumValue_two,
-   "NumValue_three": NumValue_three,
-   "end_NumValue": end_NumValue,
-   "begin_PartForm": begin_PartForm,
-   "PartForm_pres": PartForm_pres,
-   "PartForm_past": PartForm_past,
-   "PartForm_agt": PartForm_agt,
-   "PartForm_neg": PartForm_neg,
-   "end_PartForm": end_PartForm,
-   "begin_PartType": begin_PartType,
-   "PartType_mod": PartType_mod,
-   "PartType_emp": PartType_emp,
-   "PartType_res": PartType_res,
-   "PartType_inf": PartType_inf,
-   "PartType_vbp": PartType_vbp,
-   "end_PartType": end_PartType,
+FEATURES = [
+   "Abbr_yes",
+   "AdpType_circ",
+   "AdpType_comprep",
+   "AdpType_prep ",
+   "AdpType_post",
+   "AdpType_voc",
+   "AdvType_adadj,"
+   "AdvType_cau",
+   "AdvType_deg",
+   "AdvType_ex",
+   "AdvType_loc",
+   "AdvType_man",
+   "AdvType_mod",
+   "AdvType_sta",
+   "AdvType_tim",
+   "Animacy_anim",
+   "Animacy_hum",
+   "Animacy_inan",
+   "Animacy_nhum",
+   "Aspect_freq",
+   "Aspect_imp",
+   "Aspect_mod",
+   "Aspect_none",
+   "Aspect_perf",
+   "Case_abe",
+   "Case_abl",
+   "Case_abs",
+   "Case_acc",
+   "Case_ade",
+   "Case_all",
+   "Case_cau",
+   "Case_com",
+   "Case_dat",
+   "Case_del",
+   "Case_dis",
+   "Case_ela",
+   "Case_ess",
+   "Case_gen",
+   "Case_ill",
+   "Case_ine",
+   "Case_ins",
+   "Case_loc",
+   "Case_lat",
+   "Case_nom",
+   "Case_par",
+   "Case_sub",
+   "Case_sup",
+   "Case_tem",
+   "Case_ter",
+   "Case_tra",
+   "Case_voc",
+   "ConjType_comp",
+   "ConjType_oper",
+   "Connegative_yes",
+   "Definite_cons",
+   "Definite_def",
+   "Definite_ind",
+   "Definite_red",
+   "Definite_two",
+   "Degree_abs",
+   "Degree_cmp",
+   "Degree_comp",
+   "Degree_none",
+   "Degree_pos",
+   "Degree_sup",
+   "Degree_com",
+   "Degree_dim",
+   "Derivation_minen",
+   "Derivation_sti",
+   "Derivation_inen",
+   "Derivation_lainen",
+   "Derivation_ja",
+   "Derivation_ton",
+   "Derivation_vs",
+   "Derivation_ttain",
+   "Derivation_ttaa",
+   "Echo_rdp",
+   "Echo_ech",
+   "Foreign_foreign",
+   "Foreign_fscript",
+   "Foreign_tscript",
+   "Foreign_yes",
+   "Gender_com",
+   "Gender_fem",
+   "Gender_masc",
+   "Gender_neut",
+   "Gender_dat_masc",
+   "Gender_dat_fem",
+   "Gender_erg_masc",
+   "Gender_erg_fem",
+   "Gender_psor_masc",
+   "Gender_psor_fem",
+   "Gender_psor_neut",
+   "Hyph_yes",
+   "InfForm_one",
+   "InfForm_two",
+   "InfForm_three",
+   "Mood_cnd",
+   "Mood_imp",
+   "Mood_ind",
+   "Mood_n",
+   "Mood_pot",
+   "Mood_sub",
+   "Mood_opt",
+   "NameType_geo",
+   "NameType_prs",
+   "NameType_giv",
+   "NameType_sur",
+   "NameType_nat",
+   "NameType_com",
+   "NameType_pro",
+   "NameType_oth",
+   "Negative_neg",
+   "Negative_pos",
+   "Negative_yes",
+   "NounType_com",
+   "NounType_prop",
+   "NounType_class",
+   "Number_com",
+   "Number_dual",
+   "Number_none",
+   "Number_plur",
+   "Number_sing",
+   "Number_ptan",
+   "Number_count",
+   "Number_abs_sing",
+   "Number_abs_plur",
+   "Number_dat_sing",
+   "Number_dat_plur",
+   "Number_erg_sing",
+   "Number_erg_plur",
+   "Number_psee_sing",
+   "Number_psee_plur",
+   "Number_psor_sing",
+   "Number_psor_plur",
+   "NumForm_digit",
+   "NumForm_roman",
+   "NumForm_word",
+   "NumType_card",
+   "NumType_dist",
+   "NumType_frac",
+   "NumType_gen",
+   "NumType_mult",
+   "NumType_none",
+   "NumType_ord",
+   "NumType_sets",
+   "NumValue_one",
+   "NumValue_two",
+   "NumValue_three",
+   "PartForm_pres",
+   "PartForm_past",
+   "PartForm_agt",
+   "PartForm_neg",
+   "PartType_mod",
+   "PartType_emp",
+   "PartType_res",
+   "PartType_inf",
+   "PartType_vbp",
+   "Person_one",
+   "Person_two",
+   "Person_three",
+   "Person_none",
+   "Person_abs_one",
+   "Person_abs_two",
+   "Person_abs_three",
+   "Person_dat_one",
+   "Person_dat_two",
+   "Person_dat_three",
+   "Person_erg_one",
+   "Person_erg_two",
+   "Person_erg_three",
+   "Person_psor_one",
+   "Person_psor_two",
+   "Person_psor_three",
+   "Polarity_neg",
+   "Polarity_pos",
+   "Polite_inf",
+   "Polite_pol",
+   "Polite_abs_inf",
+   "Polite_abs_pol",
+   "Polite_erg_inf",
+   "Polite_erg_pol",
+   "Polite_dat_inf",
+   "Polite_dat_pol",
+   "Poss_yes",
+   "Prefix_yes",
+   "PrepCase_npr",
+   "PrepCase_pre",
+   "PronType_advPart",
+   "PronType_art",
+   "PronType_default",
+   "PronType_dem",
+   "PronType_ind",
+   "PronType_int",
+   "PronType_neg",
+   "PronType_prs",
+   "PronType_rcp",
+   "PronType_rel",
+   "PronType_tot",
+   "PronType_clit",
+   "PronType_exc",
+   "PunctSide_ini",
+   "PunctSide_fin",
+   "PunctType_peri",
+   "PunctType_qest",
+   "PunctType_excl",
+   "PunctType_quot",
+   "PunctType_brck",
+   "PunctType_comm",
+   "PunctType_colo",
+   "PunctType_semi",
+   "PunctType_dash",
+   "Reflex_yes",
+   "Style_arch",
+   "Style_rare",
+   "Style_poet",
+   "Style_norm",
+   "Style_coll",
+   "Style_vrnc",
+   "Style_sing",
+   "Style_expr",
+   "Style_derg",
+   "Style_vulg",
+   "Style_yes",
+   "StyleVariant_styleShort",
+   "StyleVariant_styleBound",
+   "Tense_fut",
+   "Tense_imp",
+   "Tense_past",
+   "Tense_pres",
+   "Typo_yes",
+   "VerbForm_fin",
+   "VerbForm_ger",
+   "VerbForm_inf",
+   "VerbForm_none",
+   "VerbForm_part",
+   "VerbForm_partFut",
+   "VerbForm_partPast",
+   "VerbForm_partPres",
+   "VerbForm_sup",
+   "VerbForm_trans",
+   "VerbForm_conv",
+   "VerbForm_gdv",
+   "VerbType_aux",
+   "VerbType_cop",
+   "VerbType_mod",
+   "VerbType_light",
+   "Voice_act",
+   "Voice_cau",
+   "Voice_pass",
+   "Voice_mid",
+   "Voice_int",
+]
 
-   "begin_Person": begin_Person,
-   "Person_one": Person_one,
-   "Person_two": Person_two,
-   "Person_three": Person_three,
-   "Person_none": Person_none,
-   "Person_abs_one": Person_abs_one,
-   "Person_abs_two": Person_abs_two,
-   "Person_abs_three": Person_abs_three,
-   "Person_dat_one": Person_dat_one,
-   "Person_dat_two": Person_dat_two,
-   "Person_dat_three": Person_dat_three,
-   "Person_erg_one": Person_erg_one,
-   "Person_erg_two": Person_erg_two,
-   "Person_erg_three": Person_erg_three,
-   "Person_psor_one": Person_psor_one,
-   "Person_psor_two": Person_psor_two,
-   "Person_psor_three": Person_psor_three,
-   "end_Person": end_Person,
-   "begin_Polarity": begin_Polarity,
-   "Polarity_neg": Polarity_neg,
-   "Polarity_pos": Polarity_pos,
-   "end_Polarity": end_Polarity,
-   "begin_Polite": begin_Polite,
-   "Polite_inf": Polite_inf,
-   "Polite_pol": Polite_pol,
-   "Polite_abs_inf": Polite_abs_inf,
-   "Polite_abs_pol": Polite_abs_pol,
-   "Polite_erg_inf": Polite_erg_inf,
-   "Polite_erg_pol": Polite_erg_pol,
-   "Polite_dat_inf": Polite_dat_inf,
-   "Polite_dat_pol": Polite_dat_pol,
-   "end_Polite": end_Polite,
-   "begin_Poss": begin_Poss,
-   "Poss_yes": Poss_yes,
-   "end_Poss": end_Poss,
-   "begin_Prefix": begin_Prefix,
-   "Prefix_yes": Prefix_yes,
-   "end_Prefix": end_Prefix,
-   "begin_PrepCase": begin_PrepCase,
-   "PrepCase_npr": PrepCase_npr,
-   "PrepCase_pre": PrepCase_pre,
-   "end_PrepCase": end_PrepCase,
-   "begin_PronType": begin_PronType,
-   "PronType_advPart": PronType_advPart,
-   "PronType_art": PronType_art,
-   "PronType_default": PronType_default,
-   "PronType_dem": PronType_dem,
-   "PronType_ind": PronType_ind,
-   "PronType_int": PronType_int,
-   "PronType_neg": PronType_neg,
-   "PronType_prs": PronType_prs,
-   "PronType_rcp": PronType_rcp,
-   "PronType_rel": PronType_rel,
-   "PronType_tot": PronType_tot,
-   "PronType_clit": PronType_clit,
-   "PronType_exc": PronType_exc,
-   "end_PronType": end_PronType,
-   "begin_PunctSide": begin_PunctSide,
-   "PunctSide_ini": PunctSide_ini,
-   "PunctSide_fin": PunctSide_fin,
-   "end_PunctSide": end_PunctSide,
-   "begin_PunctType": begin_PunctType,
-   "PunctType_peri": PunctType_peri,
-   "PunctType_qest": PunctType_qest,
-   "PunctType_excl": PunctType_excl,
-   "PunctType_quot": PunctType_quot,
-   "PunctType_brck": PunctType_brck,
-   "PunctType_comm": PunctType_comm,
-   "PunctType_colo": PunctType_colo,
-   "PunctType_semi": PunctType_semi,
-   "PunctType_dash": PunctType_dash,
-   "end_PunctType": end_PunctType,
-   "begin_Reflex": begin_Reflex,
-   "Reflex_yes": Reflex_yes,
-   "end_Reflex": end_Reflex,
-   "begin_Style": begin_Style,
-   "Style_arch": Style_arch,
-   "Style_rare": Style_rare,
-   "Style_poet": Style_poet,
-   "Style_norm": Style_norm,
-   "Style_coll": Style_coll,
-   "Style_vrnc": Style_vrnc,
-   "Style_sing": Style_sing,
-   "Style_expr": Style_expr,
-   "Style_derg": Style_derg,
-   "Style_vulg": Style_vulg,
-   "Style_yes": Style_yes,
-   "end_Style": end_Style,
-   "begin_StyleVariant": begin_StyleVariant,
-   "StyleVariant_styleShort": StyleVariant_styleShort,
-   "StyleVariant_styleBound": StyleVariant_styleBound,
-   "end_StyleVariant": end_StyleVariant,
-   "begin_Tense": begin_Tense,
-   "Tense_fut": Tense_fut,
-   "Tense_imp": Tense_imp,
-   "Tense_past": Tense_past,
-   "Tense_pres": Tense_pres,
-   "end_Tense": end_Tense,
-   "begin_Typo": begin_Typo,
-   "Typo_yes": Typo_yes,
-   "end_Typo": end_Typo,
-   "begin_VerbForm": begin_VerbForm,
-   "VerbForm_fin": VerbForm_fin,
-   "VerbForm_ger": VerbForm_ger,
-   "VerbForm_inf": VerbForm_inf,
-   "VerbForm_none": VerbForm_none,
-   "VerbForm_part": VerbForm_part,
-   "VerbForm_partFut": VerbForm_partFut,
-   "VerbForm_partPast": VerbForm_partPast,
-   "VerbForm_partPres": VerbForm_partPres,
-   "VerbForm_sup": VerbForm_sup,
-   "VerbForm_trans": VerbForm_trans,
-   "VerbForm_conv": VerbForm_conv,
-   "VerbForm_gdv": VerbForm_gdv,
-   "end_VerbForm": end_VerbForm,
-   "begin_VerbType": begin_VerbType,
-   "VerbType_aux": VerbType_aux,
-   "VerbType_cop": VerbType_cop,
-   "VerbType_mod": VerbType_mod,
-   "VerbType_light": VerbType_light,
-   "end_VerbType": end_VerbType,
-   "begin_Voice": begin_Voice,
-   "Voice_act": Voice_act,
-   "Voice_cau": Voice_cau,
-   "Voice_pass": Voice_pass,
-   "Voice_mid": Voice_mid,
-   "Voice_int": Voice_int,
-   "end_Voice": end_Voice,
-}
+FEATURE_NAMES = {get_string_id(name): name for name in FEATURES}
 
+FEATURE_FIELDS = {feature: FIELDS[feature.split('_', 1)[0]] for feature in FEATURES}
+for feat_id, name in FEATURE_NAMES.items():
+    FEATURE_FIELDS[feat_id] = FEATURE_FIELDS[name]
 
-FIELD_SIZES = [get_field_size(field) for field in FIELDS]
-
-NAMES = {value: key for key, value in IDS.items()}
-# Unfortunate hack here, to work around problem with long cpdef enum
-# (which is generating an enormous amount of C++ in Cython 0.24+)
-# We keep the enum cdef, and just make sure the names are available to Python
-locals().update(IDS)
+FIELD_SIZES = Counter(FEATURE_FIELDS.values())
+FEATURE_OFFSETS = {}
+FIELD_OFFSETS = {}
+_seen_fields = Counter()
+for i, feature in enumerate(FEATURES):
+    field = FEATURE_FIELDS[feature]
+    FEATURE_OFFSETS[feature] = _seen_fields[field]
+    if _seen_fields == 0:
+        FIELD_OFFSETS[field] = i
+    _seen_fields[field] += 1 
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index 820567e71..9f25ba357 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -16,7 +16,7 @@ from ..compat import basestring_
 from ..tokens.doc cimport Doc
 from ..vocab cimport Vocab
 from ..morphology cimport Morphology
-from ..morphology import parse_feature, IDS, FIELDS, FIELD_SIZES, NAMES
+from ..morphology import get_field_size, get_field_offset, parse_feature, FIELDS
 
 
 class Morphologizer(Pipe):
@@ -27,7 +27,7 @@ class Morphologizer(Pipe):
         if cfg.get('pretrained_dims') and not cfg.get('pretrained_vectors'):
             raise ValueError(TempErrors.T008)
         if attr_nums is None:
-            attr_nums = list(FIELD_SIZES)
+            attr_nums = [get_field_size(name) for name in FIELDS]
         return build_morphologizer_model(attr_nums, **cfg)
 
     def __init__(self, vocab, model=True, **cfg):
@@ -76,7 +76,7 @@ class Morphologizer(Pipe):
         cdef Doc doc
         cdef Vocab vocab = self.vocab
         field_names = list(FIELDS)
-        offsets = [IDS['begin_%s' % field] for field in field_names]
+        offsets = [get_field_offset(field) for field in field_names]
         for i, doc in enumerate(docs):
             doc_scores = batch_scores[i]
             doc_guesses = scores_to_guesses(doc_scores, self.model.softmax.out_sizes)
diff --git a/spacy/structs.pxd b/spacy/structs.pxd
index 7452123c0..a4daa9b94 100644
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@@ -2,7 +2,6 @@ from libc.stdint cimport uint8_t, uint32_t, int32_t, uint64_t
 
 from .typedefs cimport flags_t, attr_t, hash_t
 from .parts_of_speech cimport univ_pos_t
-from .morphology cimport univ_morph_t
 
 
 cdef struct LexemeC:
diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py
index 4d4a70e30..b62e69f6c 100644
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@@ -69,7 +69,6 @@ def test_doc_retokenize_retokenizer_attrs(en_tokenizer):
     assert doc[4].ent_type_ == "ORG"
 
 
-@pytest.mark.xfail
 def test_doc_retokenize_lex_attrs(en_tokenizer):
     """Test that lexical attributes can be changed (see #2390)."""
     doc = en_tokenizer("WKRO played beach boys songs")
diff --git a/spacy/tests/morphology/test_morph_features.py b/spacy/tests/morphology/test_morph_features.py
index 32cc665af..dcb0b32ff 100644
--- a/spacy/tests/morphology/test_morph_features.py
+++ b/spacy/tests/morphology/test_morph_features.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
 import pytest
 
 from ...morphology import Morphology
-from ...strings import StringStore
+from ...strings import StringStore, get_string_id
 from ...lemmatizer import Lemmatizer
 from ...morphology import *
 
@@ -17,14 +17,14 @@ def test_add_morphology_with_string_names(morphology):
     morphology.add({"Case_gen", "Number_sing"})
 
 def test_add_morphology_with_int_ids(morphology):
-    morphology.add({Case_gen, Number_sing})
+    morphology.add({get_string_id("Case_gen"), get_string_id("Number_sing")})
 
 def test_add_morphology_with_mix_strings_and_ints(morphology):
-    morphology.add({PunctSide_ini, 'VerbType_aux'})
+    morphology.add({get_string_id("PunctSide_ini"), 'VerbType_aux'})
 
 
 def test_morphology_tags_hash_distinctly(morphology):
-    tag1 = morphology.add({PunctSide_ini, 'VerbType_aux'})
+    tag1 = morphology.add({"PunctSide_ini", 'VerbType_aux'})
     tag2 = morphology.add({"Case_gen", 'Number_sing'})
     assert tag1 != tag2
 
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index df596ceb5..1b60a3271 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -22,6 +22,7 @@ from ..compat import is_config
 from ..errors import Errors, Warnings, user_warning, models_warning
 from .. import util
 from .underscore import Underscore, get_ext_args
+from .morphanalysis cimport MorphAnalysis
 
 
 cdef class Token:
@@ -176,6 +177,10 @@ cdef class Token:
         def __get__(self):
             return self.c.morph
 
+    property morph:
+        def __get__(self):
+            return MorphAnalysis.from_id(self.vocab, self.c.morph)
+
     property lex_id:
         """RETURNS (int): Sequential ID of the token's lexical type."""
         def __get__(self):