Fix non-clobbering lemmatization

This commit is contained in:
Matthew Honnibal 2017-11-06 12:36:05 +01:00
parent 63c6ae4191
commit 31babe3c3f
2 changed files with 5 additions and 2 deletions

View File

@ -109,8 +109,7 @@ cdef class Morphology:
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
self.tag_map.get(tag_str, {}))
self._cache.set(tag_id, token.lex.orth, analysis)
if token.lemma == 0:
token.lemma = analysis.lemma
token.lemma = analysis.lemma
token.pos = analysis.tag.pos
token.tag = analysis.tag.name
token.morph = analysis.tag.morph

View File

@ -412,7 +412,11 @@ class Tagger(Pipe):
for j, tag_id in enumerate(doc_tag_ids):
# Don't clobber preset POS tags
if doc.c[j].tag == 0 and doc.c[j].pos == 0:
# Don't clobber preset lemmas
lemma = doc.c[j].lemma
vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
if lemma != 0:
doc.c[j].lemma = lemma
idx += 1
if tensors is not None:
if isinstance(doc.tensor, numpy.ndarray) \