* Fix tag assignment in doc.from_array

This commit is contained in:
Matthew Honnibal 2015-11-03 18:45:54 +11:00
parent 09664177d7
commit 833eb35c57
2 changed files with 4 additions and 1 deletions

View File

@ -38,6 +38,8 @@ cdef class Morphology:
tag_id = self.reverse_index[self.strings[tag]] tag_id = self.reverse_index[self.strings[tag]]
else: else:
tag_id = tag tag_id = tag
if tag_id >= self.n_tags:
raise ValueError("Unknown tag: %s" % tag)
analysis = <MorphAnalysisC*>self._cache.get(tag_id, token.lex.orth) analysis = <MorphAnalysisC*>self._cache.get(tag_id, token.lex.orth)
if analysis is NULL: if analysis is NULL:
analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC)) analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))

View File

@ -398,7 +398,8 @@ cdef class Doc:
self.is_parsed = True self.is_parsed = True
elif attr_id == TAG: elif attr_id == TAG:
for i in range(length): for i in range(length):
self.vocab.morphology.assign_tag(&tokens[i], values[i]) self.vocab.morphology.assign_tag(&tokens[i],
self.vocab.strings[values[i]])
if not self.is_tagged and tokens[i].tag != 0: if not self.is_tagged and tokens[i].tag != 0:
self.is_tagged = True self.is_tagged = True
elif attr_id == POS: elif attr_id == POS: