Make Morphology not fail on unknown tags

This commit is contained in:
Matthew Honnibal 2017-11-03 13:29:09 +01:00
parent c9b118a7e9
commit bd2cbdfa85

View File

@ -129,8 +129,14 @@ cdef class Morphology:
tag (unicode): The part-of-speech tag to key the exception. tag (unicode): The part-of-speech tag to key the exception.
orth (unicode): The word-form to key the exception. orth (unicode): The word-form to key the exception.
""" """
# TODO: Currently we've assumed that we know the number of tags --
# RichTagC is an array, and _cache is a PreshMapArray
# This is really bad: it makes the morphology typed to the tagger
# classes, which is all wrong.
self.exc[(tag_str, orth_str)] = dict(attrs) self.exc[(tag_str, orth_str)] = dict(attrs)
tag = self.strings.add(tag_str) tag = self.strings.add(tag_str)
if tag not in self.reverse_index:
return
tag_id = self.reverse_index[tag] tag_id = self.reverse_index[tag]
orth = self.strings[orth_str] orth = self.strings[orth_str]
cdef RichTagC rich_tag = self.rich_tags[tag_id] cdef RichTagC rich_tag = self.rich_tags[tag_id]