diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 3414eec5f..a5c5c0fbe 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -109,8 +109,7 @@ cdef class Morphology: analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth, self.tag_map.get(tag_str, {})) self._cache.set(tag_id, token.lex.orth, analysis) - if token.lemma == 0: - token.lemma = analysis.lemma + token.lemma = analysis.lemma token.pos = analysis.tag.pos token.tag = analysis.tag.name token.morph = analysis.tag.morph diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index f3defeeb9..5291b6b5e 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -412,7 +412,11 @@ class Tagger(Pipe): for j, tag_id in enumerate(doc_tag_ids): # Don't clobber preset POS tags if doc.c[j].tag == 0 and doc.c[j].pos == 0: + # Don't clobber preset lemmas + lemma = doc.c[j].lemma vocab.morphology.assign_tag_id(&doc.c[j], tag_id) + if lemma != 0: + doc.c[j].lemma = lemma idx += 1 if tensors is not None: if isinstance(doc.tensor, numpy.ndarray) \