Avoid clobbering existing lemmas

This commit is contained in:
Matthew Honnibal 2017-10-11 03:33:06 -05:00
parent 807e109f2b
commit 17c467e0ab

View File

@ -55,7 +55,7 @@ cdef class Morphology:
# Add a 'null' tag, which we can reference when assign morphology to # Add a 'null' tag, which we can reference when assign morphology to
# untagged tokens. # untagged tokens.
self.rich_tags[self.n_tags].id = self.n_tags self.rich_tags[self.n_tags].id = self.n_tags
self._cache = PreshMapArray(self.n_tags) self._cache = PreshMapArray(self.n_tags)
self.exc = {} self.exc = {}
if exc is not None: if exc is not None:
@ -68,7 +68,8 @@ cdef class Morphology:
cdef int assign_untagged(self, TokenC* token) except -1: cdef int assign_untagged(self, TokenC* token) except -1:
'''Set morphological attributes on a token without a POS tag.''' '''Set morphological attributes on a token without a POS tag.'''
token.lemma = self.lemmatize(0, token.lex.orth, {}) if token.lemma == 0:
token.lemma = self.lemmatize(0, token.lex.orth, {})
cdef int assign_tag(self, TokenC* token, tag) except -1: cdef int assign_tag(self, TokenC* token, tag) except -1:
if isinstance(tag, basestring): if isinstance(tag, basestring):