mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Update Morphology to load exceptions as MORPH_RULES
Update `Morphology` to load exceptions in `Morphology.__init__` and `Morphology.load_morph_exceptions` from the format used in `MORPH_RULES` rather than the internal format with tuple keys. * Rename to `Morphology.exc` to `Morphology._exc` for internal use with tuple keys * Add `Morphology.exc` as a property that converts the internal `_exc` back to `MORPH_RULES` format, primarily for serialization
This commit is contained in:
		
							parent
							
								
									d83e3c44c5
								
							
						
					
					
						commit
						d106cf66dd
					
				| 
						 | 
					@ -78,9 +78,7 @@ class BaseDefaults:
 | 
				
			||||||
            BASE_NORMS,
 | 
					            BASE_NORMS,
 | 
				
			||||||
            vocab.lookups.get_table("lexeme_norm"),
 | 
					            vocab.lookups.get_table("lexeme_norm"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        for tag_str, exc in cls.morph_rules.items():
 | 
					        vocab.morphology.load_morph_exceptions(cls.morph_rules)
 | 
				
			||||||
            for orth_str, attrs in exc.items():
 | 
					 | 
				
			||||||
                vocab.morphology.add_special_case(tag_str, orth_str, attrs)
 | 
					 | 
				
			||||||
        return vocab
 | 
					        return vocab
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -20,7 +20,7 @@ cdef class Morphology:
 | 
				
			||||||
    cdef readonly object tag_map
 | 
					    cdef readonly object tag_map
 | 
				
			||||||
    cdef readonly object tag_names
 | 
					    cdef readonly object tag_names
 | 
				
			||||||
    cdef readonly object reverse_index
 | 
					    cdef readonly object reverse_index
 | 
				
			||||||
    cdef readonly object exc
 | 
					    cdef readonly object _exc
 | 
				
			||||||
    cdef readonly PreshMapArray _cache
 | 
					    cdef readonly PreshMapArray _cache
 | 
				
			||||||
    cdef readonly int n_tags
 | 
					    cdef readonly int n_tags
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -82,12 +82,9 @@ cdef class Morphology:
 | 
				
			||||||
        self._load_from_tag_map(tag_map)
 | 
					        self._load_from_tag_map(tag_map)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self._cache = PreshMapArray(self.n_tags)
 | 
					        self._cache = PreshMapArray(self.n_tags)
 | 
				
			||||||
        self.exc = {}
 | 
					        self._exc = {}
 | 
				
			||||||
        if exc is not None:
 | 
					        if exc is not None:
 | 
				
			||||||
            for (tag, orth), attrs in exc.items():
 | 
					            self.load_morph_exceptions(exc)
 | 
				
			||||||
                attrs = _normalize_props(attrs)
 | 
					 | 
				
			||||||
                self.add_special_case(
 | 
					 | 
				
			||||||
                    self.strings.as_string(tag), self.strings.as_string(orth), attrs)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _load_from_tag_map(self, tag_map):
 | 
					    def _load_from_tag_map(self, tag_map):
 | 
				
			||||||
        for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
 | 
					        for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
 | 
				
			||||||
| 
						 | 
					@ -98,7 +95,7 @@ cdef class Morphology:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __reduce__(self):
 | 
					    def __reduce__(self):
 | 
				
			||||||
        return (Morphology, (self.strings, self.tag_map, self.lemmatizer,
 | 
					        return (Morphology, (self.strings, self.tag_map, self.lemmatizer,
 | 
				
			||||||
                self.exc), None, None)
 | 
					                self._exc), None, None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add(self, features):
 | 
					    def add(self, features):
 | 
				
			||||||
        """Insert a morphological analysis in the morphology table, if not
 | 
					        """Insert a morphological analysis in the morphology table, if not
 | 
				
			||||||
| 
						 | 
					@ -208,7 +205,7 @@ cdef class Morphology:
 | 
				
			||||||
        attrs = _normalize_props(attrs)
 | 
					        attrs = _normalize_props(attrs)
 | 
				
			||||||
        self.add(attrs)
 | 
					        self.add(attrs)
 | 
				
			||||||
        attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
 | 
					        attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
 | 
				
			||||||
        self.exc[(tag_str, self.strings.add(orth_str))] = attrs
 | 
					        self._exc[(tag_str, self.strings.add(orth_str))] = attrs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef int assign_untagged(self, TokenC* token) except -1:
 | 
					    cdef int assign_untagged(self, TokenC* token) except -1:
 | 
				
			||||||
        """Set morphological attributes on a token without a POS tag. Uses
 | 
					        """Set morphological attributes on a token without a POS tag. Uses
 | 
				
			||||||
| 
						 | 
					@ -254,21 +251,34 @@ cdef class Morphology:
 | 
				
			||||||
        token.pos = <univ_pos_t>pos
 | 
					        token.pos = <univ_pos_t>pos
 | 
				
			||||||
        token.tag = self.strings[tag_str]
 | 
					        token.tag = self.strings[tag_str]
 | 
				
			||||||
        token.morph = self.add(features)
 | 
					        token.morph = self.add(features)
 | 
				
			||||||
        if (self.tag_names[tag_id], token.lex.orth) in self.exc:
 | 
					        if (self.tag_names[tag_id], token.lex.orth) in self._exc:
 | 
				
			||||||
            self._assign_tag_from_exceptions(token, tag_id)
 | 
					            self._assign_tag_from_exceptions(token, tag_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1:
 | 
					    cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1:
 | 
				
			||||||
        key = (self.tag_names[tag_id], token.lex.orth)
 | 
					        key = (self.tag_names[tag_id], token.lex.orth)
 | 
				
			||||||
        cdef dict attrs
 | 
					        cdef dict attrs
 | 
				
			||||||
        attrs = self.exc[key]
 | 
					        attrs = self._exc[key]
 | 
				
			||||||
        token.pos = attrs.get(POS, token.pos)
 | 
					        token.pos = attrs.get(POS, token.pos)
 | 
				
			||||||
        token.lemma = attrs.get(LEMMA, token.lemma)
 | 
					        token.lemma = attrs.get(LEMMA, token.lemma)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def load_morph_exceptions(self, dict exc):
 | 
					    def load_morph_exceptions(self, dict morph_rules):
 | 
				
			||||||
 | 
					        self._exc = {}
 | 
				
			||||||
        # Map (form, pos) to attributes
 | 
					        # Map (form, pos) to attributes
 | 
				
			||||||
        for tag_str, entries in exc.items():
 | 
					        for tag, exc in morph_rules.items():
 | 
				
			||||||
            for form_str, attrs in entries.items():
 | 
					            for orth, attrs in exc.items():
 | 
				
			||||||
                self.add_special_case(tag_str, form_str, attrs)
 | 
					                attrs = _normalize_props(attrs)
 | 
				
			||||||
 | 
					                self.add_special_case(self.strings.as_string(tag), self.strings.as_string(orth), attrs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def exc(self):
 | 
				
			||||||
 | 
					        # generate the serializable exc in the MORPH_RULES format from the
 | 
				
			||||||
 | 
					        # internal tuple-key format
 | 
				
			||||||
 | 
					        morph_rules = {}
 | 
				
			||||||
 | 
					        for (tag, orth) in sorted(self._exc):
 | 
				
			||||||
 | 
					            if not tag in morph_rules:
 | 
				
			||||||
 | 
					                morph_rules[tag] = {}
 | 
				
			||||||
 | 
					            morph_rules[tag][self.strings[orth]] = self._exc[(tag, orth)]
 | 
				
			||||||
 | 
					        return morph_rules
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def feats_to_dict(feats):
 | 
					    def feats_to_dict(feats):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user