mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-02 18:06:46 +03:00
Ratty implementation of morphology features
This commit is contained in:
parent
9c32388235
commit
9db60acd7c
|
@ -107,6 +107,7 @@ cdef class Morphology:
|
|||
# justification is that this is where the specific word and the tag
|
||||
# interact. Still, we should have a better way to enforce this rule, or
|
||||
# figure out why the statistical model fails. Related to Issue #220
|
||||
previous_features = self.get_features(token.morph)
|
||||
if Lexeme.c_check_flag(token.lex, IS_SPACE):
|
||||
tag_id = self.reverse_index[self.strings.add('_SP')]
|
||||
rich_tag = self.rich_tags[tag_id]
|
||||
|
@ -122,6 +123,8 @@ cdef class Morphology:
|
|||
token.pos = analysis.tag.pos
|
||||
token.tag = analysis.tag.name
|
||||
token.morph = analysis.tag.morph
|
||||
for feature in previous_features:
|
||||
self.set_feature(&token.morph, feature, True)
|
||||
|
||||
cdef int assign_feature(self, uint64_t* morph, univ_morph_t flag_id, bint value) except -1:
|
||||
# Deprecated
|
||||
|
@ -146,7 +149,10 @@ cdef class Morphology:
|
|||
self._morph2features[morph[0]] = new_features
|
||||
|
||||
def get_features(self, uint64_t morph):
|
||||
return self._morph2features.get(morph, frozenset())
|
||||
if morph in self._morph2features:
|
||||
return self._morph2features[morph]
|
||||
else:
|
||||
return frozenset()
|
||||
|
||||
def add_special_case(self, unicode tag_str, unicode orth_str, attrs,
|
||||
force=False):
|
||||
|
|
|
@ -484,9 +484,11 @@ class Tagger(Pipe):
|
|||
new_tag_map[tag] = {POS: X}
|
||||
cdef Vocab vocab = self.vocab
|
||||
if new_tag_map:
|
||||
morph_feats = self.vocab.morphology._morph2features
|
||||
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
||||
vocab.morphology.lemmatizer,
|
||||
exc=vocab.morphology.exc)
|
||||
vocab.morphology._morph2features = morph_feats
|
||||
if self.model is True:
|
||||
self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
|
||||
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
|
||||
|
@ -519,10 +521,12 @@ class Tagger(Pipe):
|
|||
if values is None:
|
||||
values = {POS: "X"}
|
||||
tag_map[label] = values
|
||||
morph_feats = Morphology._morph2features
|
||||
self.vocab.morphology = Morphology(
|
||||
self.vocab.strings, tag_map=tag_map,
|
||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||
exc=self.vocab.morphology.exc)
|
||||
self.vocab.morphology._morph2features = morph_feats
|
||||
return 1
|
||||
|
||||
def use_params(self, params):
|
||||
|
@ -554,10 +558,12 @@ class Tagger(Pipe):
|
|||
|
||||
def load_tag_map(b):
|
||||
tag_map = msgpack.loads(b, encoding='utf8')
|
||||
morph_feats = self.vocab.morphology._morph2features
|
||||
self.vocab.morphology = Morphology(
|
||||
self.vocab.strings, tag_map=tag_map,
|
||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||
exc=self.vocab.morphology.exc)
|
||||
self.vocab.morphology._morph2features = morph_feats
|
||||
|
||||
deserialize = OrderedDict((
|
||||
('vocab', lambda b: self.vocab.from_bytes(b)),
|
||||
|
@ -590,10 +596,12 @@ class Tagger(Pipe):
|
|||
def load_tag_map(p):
|
||||
with p.open('rb') as file_:
|
||||
tag_map = msgpack.loads(file_.read(), encoding='utf8')
|
||||
morph_feats = self.vocab.morphology._morph2features
|
||||
self.vocab.morphology = Morphology(
|
||||
self.vocab.strings, tag_map=tag_map,
|
||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||
exc=self.vocab.morphology.exc)
|
||||
self.vocab.morphology._morph2features = morph_feats
|
||||
|
||||
deserialize = OrderedDict((
|
||||
('cfg', lambda p: self.cfg.update(_load_cfg(p))),
|
||||
|
|
Loading…
Reference in New Issue
Block a user