mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Add MorphAnalysisC struct
This commit is contained in:
		
							parent
							
								
									b69013e2d7
								
							
						
					
					
						commit
						b9ade7d4e0
					
				|  | @ -3,7 +3,7 @@ from preshed.maps cimport PreshMap, PreshMapArray | |||
| from libc.stdint cimport uint64_t | ||||
| from murmurhash cimport mrmr | ||||
| 
 | ||||
| from .structs cimport TokenC | ||||
| from .structs cimport TokenC, MorphAnalysisC | ||||
| from .strings cimport StringStore | ||||
| from .typedefs cimport hash_t, attr_t, flags_t | ||||
| from .parts_of_speech cimport univ_pos_t | ||||
|  | @ -24,7 +24,7 @@ cdef class Morphology: | |||
|     cdef readonly int n_tags | ||||
| 
 | ||||
|     cpdef update(self, hash_t morph, features) | ||||
|     cdef hash_t insert(self, RichTagC tag) except 0 | ||||
|     cdef hash_t insert(self, MorphAnalysisC tag) except 0 | ||||
|      | ||||
|     cdef int assign_untagged(self, TokenC* token) except -1 | ||||
|     cdef int assign_tag(self, TokenC* token, tag) except -1 | ||||
|  | @ -416,50 +416,3 @@ cdef enum univ_morph_t: | |||
|     Voice_int # hb | ||||
|     end_Voice | ||||
| 
 | ||||
| 
 | ||||
| cdef struct RichTagC: | ||||
|     univ_pos_t pos | ||||
|      | ||||
|     univ_morph_t abbr | ||||
|     univ_morph_t adp_type | ||||
|     univ_morph_t adv_type | ||||
|     univ_morph_t animacy | ||||
|     univ_morph_t aspect | ||||
|     univ_morph_t case | ||||
|     univ_morph_t conj_type | ||||
|     univ_morph_t connegative | ||||
|     univ_morph_t definite | ||||
|     univ_morph_t degree | ||||
|     univ_morph_t derivation | ||||
|     univ_morph_t echo | ||||
|     univ_morph_t foreign | ||||
|     univ_morph_t gender | ||||
|     univ_morph_t hyph | ||||
|     univ_morph_t inf_form | ||||
|     univ_morph_t mood | ||||
|     univ_morph_t negative | ||||
|     univ_morph_t number | ||||
|     univ_morph_t name_type | ||||
|     univ_morph_t noun_type | ||||
|     univ_morph_t num_form | ||||
|     univ_morph_t num_type | ||||
|     univ_morph_t num_value | ||||
|     univ_morph_t part_form | ||||
|     univ_morph_t part_type | ||||
|     univ_morph_t person | ||||
|     univ_morph_t polite | ||||
|     univ_morph_t polarity | ||||
|     univ_morph_t poss | ||||
|     univ_morph_t prefix | ||||
|     univ_morph_t prep_case | ||||
|     univ_morph_t pron_type | ||||
|     univ_morph_t punct_side | ||||
|     univ_morph_t punct_type | ||||
|     univ_morph_t reflex | ||||
|     univ_morph_t style | ||||
|     univ_morph_t style_variant | ||||
|     univ_morph_t tense | ||||
|     univ_morph_t typo | ||||
|     univ_morph_t verb_form | ||||
|     univ_morph_t voice | ||||
|     univ_morph_t verb_type | ||||
|  |  | |||
|  | @ -111,13 +111,13 @@ cdef class Morphology: | |||
|                 print(list(NAMES.keys())[:10]) | ||||
|                 print(NAMES.get(feature-1), NAMES.get(feature+1)) | ||||
|                 raise KeyError("Unknown feature: %d" % feature) | ||||
|         cdef RichTagC tag | ||||
|         cdef MorphAnalysisC tag | ||||
|         tag = create_rich_tag(features) | ||||
|         cdef hash_t key = self.insert(tag) | ||||
|         return key | ||||
| 
 | ||||
|     def get(self, hash_t morph): | ||||
|         tag = <RichTagC*>self.tags.get(morph) | ||||
|         tag = <MorphAnalysisC*>self.tags.get(morph) | ||||
|         if tag == NULL: | ||||
|             return [] | ||||
|         else: | ||||
|  | @ -125,7 +125,7 @@ cdef class Morphology: | |||
|      | ||||
|     cpdef update(self, hash_t morph, features): | ||||
|         """Update a morphological analysis with new feature values.""" | ||||
|         tag = (<RichTagC*>self.tags.get(morph))[0] | ||||
|         tag = (<MorphAnalysisC*>self.tags.get(morph))[0] | ||||
|         features = intify_features(features) | ||||
|         cdef univ_morph_t feature | ||||
|         for feature in features: | ||||
|  | @ -168,10 +168,10 @@ cdef class Morphology: | |||
|         attrs = intify_attrs(attrs, self.strings, _do_deprecated=True) | ||||
|         self.exc[(tag_str, self.strings.add(orth_str))] = attrs | ||||
|   | ||||
|     cdef hash_t insert(self, RichTagC tag) except 0: | ||||
|     cdef hash_t insert(self, MorphAnalysisC tag) except 0: | ||||
|         cdef hash_t key = hash_tag(tag) | ||||
|         if self.tags.get(key) == NULL: | ||||
|             tag_ptr = <RichTagC*>self.mem.alloc(1, sizeof(RichTagC)) | ||||
|             tag_ptr = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC)) | ||||
|             tag_ptr[0] = tag | ||||
|             self.tags.set(key, <void*>tag_ptr) | ||||
|         return key | ||||
|  | @ -240,7 +240,7 @@ cdef class Morphology: | |||
|     def to_bytes(self): | ||||
|         json_tags = [] | ||||
|         for key in self.tags: | ||||
|             tag_ptr = <RichTagC*>self.tags.get(key) | ||||
|             tag_ptr = <MorphAnalysisC*>self.tags.get(key) | ||||
|             if tag_ptr != NULL: | ||||
|                 json_tags.append(tag_to_json(tag_ptr[0])) | ||||
|         return srsly.json_dumps(json_tags) | ||||
|  | @ -261,18 +261,18 @@ cpdef univ_pos_t get_int_tag(pos_): | |||
| cpdef intify_features(features): | ||||
|     return {IDS.get(feature, feature) for feature in features} | ||||
| 
 | ||||
| cdef hash_t hash_tag(RichTagC tag) nogil: | ||||
| cdef hash_t hash_tag(MorphAnalysisC tag) nogil: | ||||
|     return mrmr.hash64(&tag, sizeof(tag), 0) | ||||
| 
 | ||||
| cdef RichTagC create_rich_tag(features) except *: | ||||
|     cdef RichTagC tag | ||||
| cdef MorphAnalysisC create_rich_tag(features) except *: | ||||
|     cdef MorphAnalysisC tag | ||||
|     cdef univ_morph_t feature | ||||
|     memset(&tag, 0, sizeof(tag)) | ||||
|     for feature in features: | ||||
|         set_feature(&tag, feature, 1) | ||||
|     return tag | ||||
| 
 | ||||
| cdef tag_to_json(RichTagC tag): | ||||
| cdef tag_to_json(MorphAnalysisC tag): | ||||
|     features = [] | ||||
|     if tag.abbr != 0: | ||||
|         features.append(NAMES[tag.abbr]) | ||||
|  | @ -360,11 +360,11 @@ cdef tag_to_json(RichTagC tag): | |||
|         features.append(NAMES[tag.verb_type]) | ||||
|     return features | ||||
| 
 | ||||
| cdef RichTagC tag_from_json(json_tag): | ||||
|     cdef RichTagC tag | ||||
| cdef MorphAnalysisC tag_from_json(json_tag): | ||||
|     cdef MorphAnalysisC tag | ||||
|     return tag | ||||
|   | ||||
| cdef int set_feature(RichTagC* tag, univ_morph_t feature, int value) except -1: | ||||
| cdef int set_feature(MorphAnalysisC* tag, univ_morph_t feature, int value) except -1: | ||||
|     if value == True: | ||||
|         value_ = feature | ||||
|     else: | ||||
|  |  | |||
|  | @ -74,4 +74,50 @@ cdef struct TokenC: | |||
|     hash_t ent_id | ||||
| 
 | ||||
| 
 | ||||
| cdef struct MorphAnalysisC: | ||||
|     univ_pos_t pos | ||||
|      | ||||
|     attr_t abbr | ||||
|     attr_t adp_type | ||||
|     attr_t adv_type | ||||
|     attr_t animacy | ||||
|     attr_t aspect | ||||
|     attr_t case | ||||
|     attr_t conj_type | ||||
|     attr_t connegative | ||||
|     attr_t definite | ||||
|     attr_t degree | ||||
|     attr_t derivation | ||||
|     attr_t echo | ||||
|     attr_t foreign | ||||
|     attr_t gender | ||||
|     attr_t hyph | ||||
|     attr_t inf_form | ||||
|     attr_t mood | ||||
|     attr_t negative | ||||
|     attr_t number | ||||
|     attr_t name_type | ||||
|     attr_t noun_type | ||||
|     attr_t num_form | ||||
|     attr_t num_type | ||||
|     attr_t num_value | ||||
|     attr_t part_form | ||||
|     attr_t part_type | ||||
|     attr_t person | ||||
|     attr_t polite | ||||
|     attr_t polarity | ||||
|     attr_t poss | ||||
|     attr_t prefix | ||||
|     attr_t prep_case | ||||
|     attr_t pron_type | ||||
|     attr_t punct_side | ||||
|     attr_t punct_type | ||||
|     attr_t reflex | ||||
|     attr_t style | ||||
|     attr_t style_variant | ||||
|     attr_t tense | ||||
|     attr_t typo | ||||
|     attr_t verb_form | ||||
|     attr_t voice | ||||
|     attr_t verb_type | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,10 +1,14 @@ | |||
| from ..vocab cimport Vocab | ||||
| from ..typedefs cimport hash_t | ||||
| 
 | ||||
| 
 | ||||
| cdef class Morphanalysis: | ||||
|     """Control access to morphological features for a token.""" | ||||
|     def __init__(self, Vocab vocab, features=None): | ||||
|         pass | ||||
|     def __init__(self, Vocab vocab, features=tuple()): | ||||
|         self.vocab = vocab | ||||
|         self.key = self.vocab.morphology.add(features) | ||||
|         analysis = <const MorphAnalysisC*>self.vocab.morphology.tags.get(self.key) | ||||
|         self.c = analysis[0] | ||||
| 
 | ||||
|     @classmethod | ||||
|     def from_id(self, Vocab vocab, hash_t key): | ||||
|  | @ -28,6 +32,12 @@ cdef class Morphanalysis: | |||
|     def __hash__(self): | ||||
|         pass | ||||
| 
 | ||||
|     def get(self, name): | ||||
|         pass | ||||
| 
 | ||||
|     def to_json(self): | ||||
|         pass | ||||
| 
 | ||||
|     @property | ||||
|     def is_base_form(self): | ||||
|         pass | ||||
|  | @ -44,17 +54,354 @@ cdef class Morphanalysis: | |||
|     def id(self): | ||||
|         pass | ||||
| 
 | ||||
|     def get(self, name): | ||||
|         pass | ||||
|     property abbr: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     def set(self, name, value): | ||||
|         pass | ||||
|     property adp_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     def add(self, feature): | ||||
|         pass | ||||
|     property adv_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     def remove(self, feature): | ||||
|         pass | ||||
|     property animacy: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     def to_json(self): | ||||
|         pass | ||||
|     property aspect: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|          | ||||
|     property case: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property conj_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property connegative: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property definite: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property degree: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property derivation: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property echo: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property foreign: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property gender: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property hyph: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property inf_form: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property name_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property negative: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property mood: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property name_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property negative: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property number: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property num_form: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property num_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property num_value: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property part_form: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property part_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property person: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property polite: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property polarity: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property poss: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property prefix: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property prep_case: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property pron_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property punct_side: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property punct_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property reflex: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property style: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property style_variant: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property tense: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property typo: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property verb_form: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property voice: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property verb_type: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property abbr_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property adp_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property adv_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property animacy_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property aspect_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|          | ||||
|     property case_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property conj_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property connegative_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property definite_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property degree_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property derivation_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property echo_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property foreign_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property gender_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property hyph_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property inf_form_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property name_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property negative_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property mood_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property name_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property negative_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property number_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property num_form_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property num_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property num_value_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property part_form_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property part_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property person_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property polite_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property polarity_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property poss_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property prefix_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property prep_case_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property pron_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property punct_side_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property punct_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property reflex_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
| 
 | ||||
|     property style_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|      | ||||
|     property style_variant_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property tense_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property typo_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property verb_form_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property voice_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|   | ||||
|     property verb_type_: | ||||
|         def __get__(self): | ||||
|             pass | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user