mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-01 03:33:12 +03:00
Add helper to parse features into field and column IDs
This commit is contained in:
parent
d0dc032842
commit
a4fc397880
|
@ -52,6 +52,16 @@ def _normalize_props(props):
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def parse_feature(feature):
|
||||||
|
if not hasattr(feature, 'split'):
|
||||||
|
feature = NAMES[feature]
|
||||||
|
key, value = feature.split('_')
|
||||||
|
begin = 'begin_%s' % key
|
||||||
|
offset = IDS[feature] - IDS[begin]
|
||||||
|
field_id = FIELDS[key]
|
||||||
|
return (field_id, offset)
|
||||||
|
|
||||||
|
|
||||||
cdef class Morphology:
|
cdef class Morphology:
|
||||||
'''Store the possible morphological analyses for a language, and index them
|
'''Store the possible morphological analyses for a language, and index them
|
||||||
by hash.
|
by hash.
|
||||||
|
@ -716,7 +726,52 @@ IDS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NAMES = [key for key, value in sorted(IDS.items(), key=lambda item: item[1])]
|
FIELDS = {
|
||||||
|
'Abbr': 0,
|
||||||
|
'AdpType': 1,
|
||||||
|
'AdvType': 2,
|
||||||
|
'Animacy': 3,
|
||||||
|
'Aspect': 4,
|
||||||
|
'Case': 5,
|
||||||
|
'ConjType': 6,
|
||||||
|
'Connegative': 7,
|
||||||
|
'Definite': 8,
|
||||||
|
'Degree': 9,
|
||||||
|
'Derivation': 10,
|
||||||
|
'Echo': 11,
|
||||||
|
'Foreign': 12,
|
||||||
|
'Gender': 13,
|
||||||
|
'Hyph': 14,
|
||||||
|
'InfForm': 15,
|
||||||
|
'Mood': 16,
|
||||||
|
'Negative': 17,
|
||||||
|
'Number': 18,
|
||||||
|
'NameType': 19,
|
||||||
|
'NumForm': 20,
|
||||||
|
'NumType': 21,
|
||||||
|
'NumValue': 22,
|
||||||
|
'PartForm': 23,
|
||||||
|
'PartType': 24,
|
||||||
|
'Person': 25,
|
||||||
|
'Polite': 26,
|
||||||
|
'Polarity': 27,
|
||||||
|
'Poss': 28,
|
||||||
|
'Prefix': 29,
|
||||||
|
'PrepCase': 30,
|
||||||
|
'PronType': 31,
|
||||||
|
'PunctSide': 32,
|
||||||
|
'PunctType': 33,
|
||||||
|
'Reflex': 34,
|
||||||
|
'Style': 35,
|
||||||
|
'StyleVariant': 36,
|
||||||
|
'Tense': 37,
|
||||||
|
'VerbForm': 38,
|
||||||
|
'Voice': 39,
|
||||||
|
'VerbType': 40
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NAMES = {value: key for key, value in IDS.items()}
|
||||||
# Unfortunate hack here, to work around problem with long cpdef enum
|
# Unfortunate hack here, to work around problem with long cpdef enum
|
||||||
# (which is generating an enormous amount of C++ in Cython 0.24+)
|
# (which is generating an enormous amount of C++ in Cython 0.24+)
|
||||||
# We keep the enum cdef, and just make sure the names are available to Python
|
# We keep the enum cdef, and just make sure the names are available to Python
|
||||||
|
|
Loading…
Reference in New Issue
Block a user