From 1fb09c3dc16d20fdbcc3576efbd020ea93e291e1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 4 Nov 2016 19:19:09 +0100 Subject: [PATCH] Fix morphology tagger --- spacy/morphology.pxd | 2 +- spacy/morphology.pyx | 4 ++-- spacy/tagger.pyx | 2 +- spacy/tokens/doc.pyx | 3 +-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index ad9d61eab..aa45c47f0 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -35,7 +35,7 @@ cdef class Morphology: cdef int assign_tag(self, TokenC* token, tag) except -1 - cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1 + cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1 cdef int assign_feature(self, uint64_t* morph, feature, value) except -1 diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index ef546e4e6..e5e5e013f 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -39,9 +39,9 @@ cdef class Morphology: tag_id = self.reverse_index[self.strings[tag]] else: tag_id = self.reverse_index[tag] - self._assign_tag_id(token, tag_id) + self.assign_tag_id(token, tag_id) - cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1: + cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1: if tag_id >= self.n_tags: raise ValueError("Unknown tag ID: %s" % tag_id) # TODO: It's pretty arbitrary to put this logic here. I guess the justification diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx index a387ccb12..53e648f24 100644 --- a/spacy/tagger.pyx +++ b/spacy/tagger.pyx @@ -196,7 +196,7 @@ cdef class Tagger: self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat) guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class) - self.vocab.morphology.assign_tag(&tokens.c[i], guess) + self.vocab.morphology.assign_tag_id(&tokens.c[i], guess) eg.fill_scores(0, eg.c.nr_class) tokens.is_tagged = True tokens._py_tokens = [None] * tokens.length diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 1200a0517..3d09b7ad0 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -577,8 +577,7 @@ cdef class Doc: elif attr_id == TAG: for i in range(length): if values[i] != 0: - self.vocab.morphology.assign_tag(&tokens[i], - self.vocab.morphology.reverse_index[values[i]]) + self.vocab.morphology.assign_tag(&tokens[i], values[i]) elif attr_id == POS: for i in range(length): tokens[i].pos = values[i]