mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 05:01:02 +03:00 
			
		
		
		
	Restore previous morphology stuff
This commit is contained in:
		
							parent
							
								
									3bba8e9245
								
							
						
					
					
						commit
						a3d2e616d5
					
				|  | @ -1,5 +1,5 @@ | |||
| from cymem.cymem cimport Pool | ||||
| from preshed.maps cimport PreshMap | ||||
| from preshed.maps cimport PreshMap, PreshMapArray | ||||
| from libc.stdint cimport uint64_t | ||||
| from murmurhash cimport mrmr | ||||
| 
 | ||||
|  | @ -17,14 +17,17 @@ cdef class Morphology: | |||
|   | ||||
|     cdef public object lemmatizer | ||||
|     cdef readonly object tag_map | ||||
|     cdef readonly object tag_names | ||||
|     cdef readonly object reverse_index | ||||
|     cdef readonly object exc | ||||
|     cdef readonly int n_tags | ||||
| 
 | ||||
|     cdef hash_t insert(self, RichTagC tag) except 0 | ||||
|      | ||||
|     cdef int assign_untagged(self, TokenC* token) except -1 | ||||
|     cdef int assign_tag(self, TokenC* token, tag) except -1 | ||||
|     cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1 | ||||
|     cdef update_token_morph(self, TokenC* token, features) | ||||
|     cdef set_token_morph(self, TokenC* token, pos, features) | ||||
|     cdef update_morph(self, hash_t morph, features) | ||||
| 
 | ||||
| cdef enum univ_morph_t: | ||||
|     NIL = 0 | ||||
|  |  | |||
|  | @ -125,17 +125,17 @@ cdef class Morphology: | |||
|         # figure out why the statistical model fails. Related to Issue #220 | ||||
|         if Lexeme.c_check_flag(token.lex, IS_SPACE): | ||||
|             tag_id = self.reverse_index[self.strings.add('_SP')] | ||||
|         tag_str = self.tag_names[tag_id] | ||||
|         features = dict(self.tag_map.get(tag_str, {})) | ||||
|         lemma = <attr_t>self._cache.get(tag_id, token.lex.orth) | ||||
|         if lemma == 0: | ||||
|             tag_str = self.tag_names[tag_id] | ||||
|             features = dict(self.tag_map.get(tag_str, {})) | ||||
|         if lemma == 0 and features: | ||||
|             pos = self.strings.as_int(features.pop('POS')) | ||||
|             lemma = self.lemmatize(pos, token.lex.orth, features) | ||||
|             self._cache.set(tag_id, token.lex.orth, lemma) | ||||
|         token.lemma = lemma | ||||
|         token.pos = pos | ||||
|         token.tag = self.strings[tag_str] | ||||
|         token.morph = self.add(attrs) | ||||
|         token.morph = self.add(features) | ||||
| 
 | ||||
|     cdef update_morph(self, hash_t morph, features): | ||||
|         """Update a morphological analysis with new feature values.""" | ||||
|  | @ -175,10 +175,9 @@ cpdef intify_features(StringStore strings, features): | |||
| cdef hash_t hash_tag(RichTagC tag) nogil: | ||||
|     return mrmr.hash64(&tag, sizeof(tag), 0) | ||||
| 
 | ||||
| cdef RichTagC create_rich_tag(pos_, features): | ||||
| cdef RichTagC create_rich_tag(features): | ||||
|     cdef RichTagC tag | ||||
|     cdef univ_morph_t feature | ||||
|     tag.pos = get_int_tag(pos_) | ||||
|     for feature in features: | ||||
|         set_feature(&tag, feature, 1) | ||||
|     return tag | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user