mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 08:14:15 +03:00
Update Morphology to load exceptions as MORPH_RULES
Update `Morphology` to load exceptions in `Morphology.__init__` and `Morphology.load_morph_exceptions` from the format used in `MORPH_RULES` rather than the internal format with tuple keys. * Rename to `Morphology.exc` to `Morphology._exc` for internal use with tuple keys * Add `Morphology.exc` as a property that converts the internal `_exc` back to `MORPH_RULES` format, primarily for serialization
This commit is contained in:
parent
d83e3c44c5
commit
d106cf66dd
|
@ -78,9 +78,7 @@ class BaseDefaults:
|
|||
BASE_NORMS,
|
||||
vocab.lookups.get_table("lexeme_norm"),
|
||||
)
|
||||
for tag_str, exc in cls.morph_rules.items():
|
||||
for orth_str, attrs in exc.items():
|
||||
vocab.morphology.add_special_case(tag_str, orth_str, attrs)
|
||||
vocab.morphology.load_morph_exceptions(cls.morph_rules)
|
||||
return vocab
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -20,7 +20,7 @@ cdef class Morphology:
|
|||
cdef readonly object tag_map
|
||||
cdef readonly object tag_names
|
||||
cdef readonly object reverse_index
|
||||
cdef readonly object exc
|
||||
cdef readonly object _exc
|
||||
cdef readonly PreshMapArray _cache
|
||||
cdef readonly int n_tags
|
||||
|
||||
|
|
|
@ -82,12 +82,9 @@ cdef class Morphology:
|
|||
self._load_from_tag_map(tag_map)
|
||||
|
||||
self._cache = PreshMapArray(self.n_tags)
|
||||
self.exc = {}
|
||||
self._exc = {}
|
||||
if exc is not None:
|
||||
for (tag, orth), attrs in exc.items():
|
||||
attrs = _normalize_props(attrs)
|
||||
self.add_special_case(
|
||||
self.strings.as_string(tag), self.strings.as_string(orth), attrs)
|
||||
self.load_morph_exceptions(exc)
|
||||
|
||||
def _load_from_tag_map(self, tag_map):
|
||||
for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
|
||||
|
@ -98,7 +95,7 @@ cdef class Morphology:
|
|||
|
||||
def __reduce__(self):
|
||||
return (Morphology, (self.strings, self.tag_map, self.lemmatizer,
|
||||
self.exc), None, None)
|
||||
self._exc), None, None)
|
||||
|
||||
def add(self, features):
|
||||
"""Insert a morphological analysis in the morphology table, if not
|
||||
|
@ -208,7 +205,7 @@ cdef class Morphology:
|
|||
attrs = _normalize_props(attrs)
|
||||
self.add(attrs)
|
||||
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
||||
self.exc[(tag_str, self.strings.add(orth_str))] = attrs
|
||||
self._exc[(tag_str, self.strings.add(orth_str))] = attrs
|
||||
|
||||
cdef int assign_untagged(self, TokenC* token) except -1:
|
||||
"""Set morphological attributes on a token without a POS tag. Uses
|
||||
|
@ -254,21 +251,34 @@ cdef class Morphology:
|
|||
token.pos = <univ_pos_t>pos
|
||||
token.tag = self.strings[tag_str]
|
||||
token.morph = self.add(features)
|
||||
if (self.tag_names[tag_id], token.lex.orth) in self.exc:
|
||||
if (self.tag_names[tag_id], token.lex.orth) in self._exc:
|
||||
self._assign_tag_from_exceptions(token, tag_id)
|
||||
|
||||
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1:
|
||||
key = (self.tag_names[tag_id], token.lex.orth)
|
||||
cdef dict attrs
|
||||
attrs = self.exc[key]
|
||||
attrs = self._exc[key]
|
||||
token.pos = attrs.get(POS, token.pos)
|
||||
token.lemma = attrs.get(LEMMA, token.lemma)
|
||||
|
||||
def load_morph_exceptions(self, dict exc):
|
||||
def load_morph_exceptions(self, dict morph_rules):
|
||||
self._exc = {}
|
||||
# Map (form, pos) to attributes
|
||||
for tag_str, entries in exc.items():
|
||||
for form_str, attrs in entries.items():
|
||||
self.add_special_case(tag_str, form_str, attrs)
|
||||
for tag, exc in morph_rules.items():
|
||||
for orth, attrs in exc.items():
|
||||
attrs = _normalize_props(attrs)
|
||||
self.add_special_case(self.strings.as_string(tag), self.strings.as_string(orth), attrs)
|
||||
|
||||
@property
|
||||
def exc(self):
|
||||
# generate the serializable exc in the MORPH_RULES format from the
|
||||
# internal tuple-key format
|
||||
morph_rules = {}
|
||||
for (tag, orth) in sorted(self._exc):
|
||||
if not tag in morph_rules:
|
||||
morph_rules[tag] = {}
|
||||
morph_rules[tag][self.strings[orth]] = self._exc[(tag, orth)]
|
||||
return morph_rules
|
||||
|
||||
@staticmethod
|
||||
def feats_to_dict(feats):
|
||||
|
|
Loading…
Reference in New Issue
Block a user