Merge branch 'feature/lemmatizer' of https://github.com/explosion/spaCy into feature/lemmatizer

2025-10-28 22:47:52 +03:00 · 2019-03-09 00:41:53 +00:00 · 2019-03-09 00:41:53 +00:00 · b6d60d0041
commit b6d60d0041
parent 4c8730526b cc2b2dba14
2 changed files with 2 additions and 10 deletions
--- a/spacy/cli/ud/ud_train.py
+++ b/spacy/cli/ud/ud_train.py
@ -350,7 +350,6 @@ def initialize_pipeline(nlp, docs, golds, config, device):
    nlp.add_pipe(nlp.create_pipe("tagger", config={"set_morphology": False}))
    nlp.add_pipe(nlp.create_pipe("morphologizer"))
    nlp.add_pipe(nlp.create_pipe("parser"))
    assert not nlp.get_pipe("tagger").set_morphology
    if config.multitask_tag:
        nlp.parser.add_multitask_objective("tag")
    if config.multitask_sent:
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -357,14 +357,6 @@ class Tagger(Pipe):
        self.cfg = OrderedDict(sorted(cfg.items()))
        self.cfg.setdefault("cnn_maxout_pieces", 2)
    @property
    def set_morphology(self):
        return self.cfg.get("set_morphology", True)
    @set_morphology.setter
    def set_morphology(self, value):
        self.cfg["set_morphology"] = value
    @property
    def labels(self):
        return tuple(self.vocab.morphology.tag_names)
@ -412,6 +404,7 @@ class Tagger(Pipe):
        cdef Doc doc
        cdef int idx = 0
        cdef Vocab vocab = self.vocab
        assign_morphology = self.cfg.get("set_morphology", True)
        for i, doc in enumerate(docs):
            doc_tag_ids = batch_tag_ids[i]
            if hasattr(doc_tag_ids, "get"):
@ -419,7 +412,7 @@ class Tagger(Pipe):
            for j, tag_id in enumerate(doc_tag_ids):
                # Don't clobber preset POS tags
                if doc.c[j].tag == 0:
-                    if doc.c[j].pos == 0 and self.set_morphology:
+                    if doc.c[j].pos == 0 and assign_morphology:
                        # Don't clobber preset lemmas
                        lemma = doc.c[j].lemma
                        vocab.morphology.assign_tag_id(&doc.c[j], tag_id)