Fix morphology tagger

This commit is contained in:
Matthew Honnibal 2016-11-04 19:19:09 +01:00
parent a36353df47
commit 1fb09c3dc1
4 changed files with 5 additions and 6 deletions

View File

@ -35,7 +35,7 @@ cdef class Morphology:
cdef int assign_tag(self, TokenC* token, tag) except -1 cdef int assign_tag(self, TokenC* token, tag) except -1
cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1 cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
cdef int assign_feature(self, uint64_t* morph, feature, value) except -1 cdef int assign_feature(self, uint64_t* morph, feature, value) except -1

View File

@ -39,9 +39,9 @@ cdef class Morphology:
tag_id = self.reverse_index[self.strings[tag]] tag_id = self.reverse_index[self.strings[tag]]
else: else:
tag_id = self.reverse_index[tag] tag_id = self.reverse_index[tag]
self._assign_tag_id(token, tag_id) self.assign_tag_id(token, tag_id)
cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1: cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
if tag_id >= self.n_tags: if tag_id >= self.n_tags:
raise ValueError("Unknown tag ID: %s" % tag_id) raise ValueError("Unknown tag ID: %s" % tag_id)
# TODO: It's pretty arbitrary to put this logic here. I guess the justification # TODO: It's pretty arbitrary to put this logic here. I guess the justification

View File

@ -196,7 +196,7 @@ cdef class Tagger:
self.model.set_scoresC(eg.c.scores, self.model.set_scoresC(eg.c.scores,
eg.c.features, eg.c.nr_feat) eg.c.features, eg.c.nr_feat)
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class) guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
self.vocab.morphology.assign_tag(&tokens.c[i], guess) self.vocab.morphology.assign_tag_id(&tokens.c[i], guess)
eg.fill_scores(0, eg.c.nr_class) eg.fill_scores(0, eg.c.nr_class)
tokens.is_tagged = True tokens.is_tagged = True
tokens._py_tokens = [None] * tokens.length tokens._py_tokens = [None] * tokens.length

View File

@ -577,8 +577,7 @@ cdef class Doc:
elif attr_id == TAG: elif attr_id == TAG:
for i in range(length): for i in range(length):
if values[i] != 0: if values[i] != 0:
self.vocab.morphology.assign_tag(&tokens[i], self.vocab.morphology.assign_tag(&tokens[i], values[i])
self.vocab.morphology.reverse_index[values[i]])
elif attr_id == POS: elif attr_id == POS:
for i in range(length): for i in range(length):
tokens[i].pos = <univ_pos_t>values[i] tokens[i].pos = <univ_pos_t>values[i]