Add MorphAnalysisC struct

This commit is contained in:
Matthew Honnibal 2019-03-07 14:03:07 +01:00
parent b69013e2d7
commit b9ade7d4e0
4 changed files with 420 additions and 74 deletions

View File

@ -3,7 +3,7 @@ from preshed.maps cimport PreshMap, PreshMapArray
from libc.stdint cimport uint64_t
from murmurhash cimport mrmr
from .structs cimport TokenC
from .structs cimport TokenC, MorphAnalysisC
from .strings cimport StringStore
from .typedefs cimport hash_t, attr_t, flags_t
from .parts_of_speech cimport univ_pos_t
@ -24,7 +24,7 @@ cdef class Morphology:
cdef readonly int n_tags
cpdef update(self, hash_t morph, features)
cdef hash_t insert(self, RichTagC tag) except 0
cdef hash_t insert(self, MorphAnalysisC tag) except 0
cdef int assign_untagged(self, TokenC* token) except -1
cdef int assign_tag(self, TokenC* token, tag) except -1
@ -416,50 +416,3 @@ cdef enum univ_morph_t:
Voice_int # hb
end_Voice
cdef struct RichTagC:
univ_pos_t pos
univ_morph_t abbr
univ_morph_t adp_type
univ_morph_t adv_type
univ_morph_t animacy
univ_morph_t aspect
univ_morph_t case
univ_morph_t conj_type
univ_morph_t connegative
univ_morph_t definite
univ_morph_t degree
univ_morph_t derivation
univ_morph_t echo
univ_morph_t foreign
univ_morph_t gender
univ_morph_t hyph
univ_morph_t inf_form
univ_morph_t mood
univ_morph_t negative
univ_morph_t number
univ_morph_t name_type
univ_morph_t noun_type
univ_morph_t num_form
univ_morph_t num_type
univ_morph_t num_value
univ_morph_t part_form
univ_morph_t part_type
univ_morph_t person
univ_morph_t polite
univ_morph_t polarity
univ_morph_t poss
univ_morph_t prefix
univ_morph_t prep_case
univ_morph_t pron_type
univ_morph_t punct_side
univ_morph_t punct_type
univ_morph_t reflex
univ_morph_t style
univ_morph_t style_variant
univ_morph_t tense
univ_morph_t typo
univ_morph_t verb_form
univ_morph_t voice
univ_morph_t verb_type

View File

@ -111,13 +111,13 @@ cdef class Morphology:
print(list(NAMES.keys())[:10])
print(NAMES.get(feature-1), NAMES.get(feature+1))
raise KeyError("Unknown feature: %d" % feature)
cdef RichTagC tag
cdef MorphAnalysisC tag
tag = create_rich_tag(features)
cdef hash_t key = self.insert(tag)
return key
def get(self, hash_t morph):
tag = <RichTagC*>self.tags.get(morph)
tag = <MorphAnalysisC*>self.tags.get(morph)
if tag == NULL:
return []
else:
@ -125,7 +125,7 @@ cdef class Morphology:
cpdef update(self, hash_t morph, features):
"""Update a morphological analysis with new feature values."""
tag = (<RichTagC*>self.tags.get(morph))[0]
tag = (<MorphAnalysisC*>self.tags.get(morph))[0]
features = intify_features(features)
cdef univ_morph_t feature
for feature in features:
@ -168,10 +168,10 @@ cdef class Morphology:
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
self.exc[(tag_str, self.strings.add(orth_str))] = attrs
cdef hash_t insert(self, RichTagC tag) except 0:
cdef hash_t insert(self, MorphAnalysisC tag) except 0:
cdef hash_t key = hash_tag(tag)
if self.tags.get(key) == NULL:
tag_ptr = <RichTagC*>self.mem.alloc(1, sizeof(RichTagC))
tag_ptr = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
tag_ptr[0] = tag
self.tags.set(key, <void*>tag_ptr)
return key
@ -240,7 +240,7 @@ cdef class Morphology:
def to_bytes(self):
json_tags = []
for key in self.tags:
tag_ptr = <RichTagC*>self.tags.get(key)
tag_ptr = <MorphAnalysisC*>self.tags.get(key)
if tag_ptr != NULL:
json_tags.append(tag_to_json(tag_ptr[0]))
return srsly.json_dumps(json_tags)
@ -261,18 +261,18 @@ cpdef univ_pos_t get_int_tag(pos_):
cpdef intify_features(features):
return {IDS.get(feature, feature) for feature in features}
cdef hash_t hash_tag(RichTagC tag) nogil:
cdef hash_t hash_tag(MorphAnalysisC tag) nogil:
return mrmr.hash64(&tag, sizeof(tag), 0)
cdef RichTagC create_rich_tag(features) except *:
cdef RichTagC tag
cdef MorphAnalysisC create_rich_tag(features) except *:
cdef MorphAnalysisC tag
cdef univ_morph_t feature
memset(&tag, 0, sizeof(tag))
for feature in features:
set_feature(&tag, feature, 1)
return tag
cdef tag_to_json(RichTagC tag):
cdef tag_to_json(MorphAnalysisC tag):
features = []
if tag.abbr != 0:
features.append(NAMES[tag.abbr])
@ -360,11 +360,11 @@ cdef tag_to_json(RichTagC tag):
features.append(NAMES[tag.verb_type])
return features
cdef RichTagC tag_from_json(json_tag):
cdef RichTagC tag
cdef MorphAnalysisC tag_from_json(json_tag):
cdef MorphAnalysisC tag
return tag
cdef int set_feature(RichTagC* tag, univ_morph_t feature, int value) except -1:
cdef int set_feature(MorphAnalysisC* tag, univ_morph_t feature, int value) except -1:
if value == True:
value_ = feature
else:

View File

@ -74,4 +74,50 @@ cdef struct TokenC:
hash_t ent_id
cdef struct MorphAnalysisC:
univ_pos_t pos
attr_t abbr
attr_t adp_type
attr_t adv_type
attr_t animacy
attr_t aspect
attr_t case
attr_t conj_type
attr_t connegative
attr_t definite
attr_t degree
attr_t derivation
attr_t echo
attr_t foreign
attr_t gender
attr_t hyph
attr_t inf_form
attr_t mood
attr_t negative
attr_t number
attr_t name_type
attr_t noun_type
attr_t num_form
attr_t num_type
attr_t num_value
attr_t part_form
attr_t part_type
attr_t person
attr_t polite
attr_t polarity
attr_t poss
attr_t prefix
attr_t prep_case
attr_t pron_type
attr_t punct_side
attr_t punct_type
attr_t reflex
attr_t style
attr_t style_variant
attr_t tense
attr_t typo
attr_t verb_form
attr_t voice
attr_t verb_type

View File

@ -1,10 +1,14 @@
from ..vocab cimport Vocab
from ..typedefs cimport hash_t
cdef class Morphanalysis:
"""Control access to morphological features for a token."""
def __init__(self, Vocab vocab, features=None):
pass
def __init__(self, Vocab vocab, features=tuple()):
self.vocab = vocab
self.key = self.vocab.morphology.add(features)
analysis = <const MorphAnalysisC*>self.vocab.morphology.tags.get(self.key)
self.c = analysis[0]
@classmethod
def from_id(self, Vocab vocab, hash_t key):
@ -28,6 +32,12 @@ cdef class Morphanalysis:
def __hash__(self):
pass
def get(self, name):
pass
def to_json(self):
pass
@property
def is_base_form(self):
pass
@ -44,17 +54,354 @@ cdef class Morphanalysis:
def id(self):
pass
def get(self, name):
property abbr:
def __get__(self):
pass
def set(self, name, value):
property adp_type:
def __get__(self):
pass
def add(self, feature):
property adv_type:
def __get__(self):
pass
def remove(self, feature):
property animacy:
def __get__(self):
pass
def to_json(self):
property aspect:
def __get__(self):
pass
property case:
def __get__(self):
pass
property conj_type:
def __get__(self):
pass
property connegative:
def __get__(self):
pass
property definite:
def __get__(self):
pass
property degree:
def __get__(self):
pass
property derivation:
def __get__(self):
pass
property echo:
def __get__(self):
pass
property foreign:
def __get__(self):
pass
property gender:
def __get__(self):
pass
property hyph:
def __get__(self):
pass
property inf_form:
def __get__(self):
pass
property name_type:
def __get__(self):
pass
property negative:
def __get__(self):
pass
property mood:
def __get__(self):
pass
property name_type:
def __get__(self):
pass
property negative:
def __get__(self):
pass
property number:
def __get__(self):
pass
property num_form:
def __get__(self):
pass
property num_type:
def __get__(self):
pass
property num_value:
def __get__(self):
pass
property part_form:
def __get__(self):
pass
property part_type:
def __get__(self):
pass
property person:
def __get__(self):
pass
property polite:
def __get__(self):
pass
property polarity:
def __get__(self):
pass
property poss:
def __get__(self):
pass
property prefix:
def __get__(self):
pass
property prep_case:
def __get__(self):
pass
property pron_type:
def __get__(self):
pass
property punct_side:
def __get__(self):
pass
property punct_type:
def __get__(self):
pass
property reflex:
def __get__(self):
pass
property style:
def __get__(self):
pass
property style_variant:
def __get__(self):
pass
property tense:
def __get__(self):
pass
property typo:
def __get__(self):
pass
property verb_form:
def __get__(self):
pass
property voice:
def __get__(self):
pass
property verb_type:
def __get__(self):
pass
property abbr_:
def __get__(self):
pass
property adp_type_:
def __get__(self):
pass
property adv_type_:
def __get__(self):
pass
property animacy_:
def __get__(self):
pass
property aspect_:
def __get__(self):
pass
property case_:
def __get__(self):
pass
property conj_type_:
def __get__(self):
pass
property connegative_:
def __get__(self):
pass
property definite_:
def __get__(self):
pass
property degree_:
def __get__(self):
pass
property derivation_:
def __get__(self):
pass
property echo_:
def __get__(self):
pass
property foreign_:
def __get__(self):
pass
property gender_:
def __get__(self):
pass
property hyph_:
def __get__(self):
pass
property inf_form_:
def __get__(self):
pass
property name_type_:
def __get__(self):
pass
property negative_:
def __get__(self):
pass
property mood_:
def __get__(self):
pass
property name_type_:
def __get__(self):
pass
property negative_:
def __get__(self):
pass
property number_:
def __get__(self):
pass
property num_form_:
def __get__(self):
pass
property num_type_:
def __get__(self):
pass
property num_value_:
def __get__(self):
pass
property part_form_:
def __get__(self):
pass
property part_type_:
def __get__(self):
pass
property person_:
def __get__(self):
pass
property polite_:
def __get__(self):
pass
property polarity_:
def __get__(self):
pass
property poss_:
def __get__(self):
pass
property prefix_:
def __get__(self):
pass
property prep_case_:
def __get__(self):
pass
property pron_type_:
def __get__(self):
pass
property punct_side_:
def __get__(self):
pass
property punct_type_:
def __get__(self):
pass
property reflex_:
def __get__(self):
pass
property style_:
def __get__(self):
pass
property style_variant_:
def __get__(self):
pass
property tense_:
def __get__(self):
pass
property typo_:
def __get__(self):
pass
property verb_form_:
def __get__(self):
pass
property voice_:
def __get__(self):
pass
property verb_type_:
def __get__(self):
pass