Revert "Set annotations in update" (#6810)

* Revert "Set annotations in update (#6767)" This reverts commit e680efc7cc. * Fix version * Update spacy/pipeline/entity_linker.py * Update spacy/pipeline/entity_linker.py * Update spacy/pipeline/tagger.pyx * Update spacy/pipeline/tok2vec.py * Update spacy/pipeline/tok2vec.py * Update spacy/pipeline/transition_parser.pyx * Update spacy/pipeline/transition_parser.pyx * Update website/docs/api/multilabel_textcategorizer.md * Update website/docs/api/tok2vec.md * Update website/docs/usage/layers-architectures.md * Update website/docs/usage/layers-architectures.md * Update website/docs/api/transformer.md * Update website/docs/api/textcategorizer.md * Update website/docs/api/tagger.md * Update spacy/pipeline/entity_linker.py * Update website/docs/api/sentencerecognizer.md * Update website/docs/api/pipe.md * Update website/docs/api/morphologizer.md * Update website/docs/api/entityrecognizer.md * Update spacy/pipeline/entity_linker.py * Update spacy/pipeline/multitask.pyx * Update spacy/pipeline/tagger.pyx * Update spacy/pipeline/tagger.pyx * Update spacy/pipeline/textcat.py * Update spacy/pipeline/textcat.py * Update spacy/pipeline/textcat.py * Update spacy/pipeline/tok2vec.py * Update spacy/pipeline/trainable_pipe.pyx * Update spacy/pipeline/trainable_pipe.pyx * Update spacy/pipeline/transition_parser.pyx * Update spacy/pipeline/transition_parser.pyx * Update website/docs/api/entitylinker.md * Update website/docs/api/dependencyparser.md * Update spacy/pipeline/trainable_pipe.pyx
2025-08-09 06:34:54 +03:00 · 2021-01-26 01:18:45 +11:00 · 2021-01-26 01:18:45 +11:00 · f049df1715
commit f049df1715
parent 42b117e561
16 changed files with 24 additions and 51 deletions
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -198,8 +198,7 @@ class EntityLinker(TrainablePipe):
        losses: Optional[Dict[str, float]] = None,
    ) -> Dict[str, float]:
        """Learn from a batch of documents and gold-standard information,
-        updating the pipe's model. Delegates to predict, get_loss and
-        set_annotations.
+        updating the pipe's model. Delegates to predict and get_loss.

        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
@ -218,13 +217,7 @@ class EntityLinker(TrainablePipe):
            return losses
        validate_examples(examples, "EntityLinker.update")
        sentence_docs = []
-        docs = []
-        for eg in examples:
-            eg.predicted.ents = eg.reference.ents
-            docs.append(eg.predicted)
-        # This seems simpler than other ways to get that exact output -- but
-        # it does run the model twice :(
-        predictions = self.predict(docs)
+        docs = [eg.predicted for eg in examples]
        for eg in examples:
            sentences = [s for s in eg.reference.sents]
            kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
@ -260,7 +253,6 @@ class EntityLinker(TrainablePipe):
        if sgd is not None:
            self.finish_update(sgd)
        losses[self.name] += loss
-        self.set_annotations(docs, predictions)
        return losses

    def get_loss(self, examples: Iterable[Example], sentence_encodings):
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -175,8 +175,7 @@ class Tagger(TrainablePipe):

    def update(self, examples, *, drop=0., sgd=None, losses=None):
        """Learn from a batch of documents and gold-standard information,
-        updating the pipe's model. Delegates to predict, get_loss and
-        set_annotations.
+        updating the pipe's model. Delegates to predict and get_loss.

        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
@ -205,8 +204,6 @@ class Tagger(TrainablePipe):
            self.finish_update(sgd)

        losses[self.name] += loss
-        docs = [eg.predicted for eg in examples]
-        self.set_annotations(docs, self._scores2guesses(tag_scores))
        return losses

    def rehearse(self, examples, *, drop=0., sgd=None, losses=None):
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@ -199,8 +199,7 @@ class TextCategorizer(TrainablePipe):
        losses: Optional[Dict[str, float]] = None,
    ) -> Dict[str, float]:
        """Learn from a batch of documents and gold-standard information,
-        updating the pipe's model. Delegates to predict, get_loss and
-        set_annotations.
+        updating the pipe's model. Delegates to predict and get_loss.

        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
@ -226,8 +225,6 @@ class TextCategorizer(TrainablePipe):
        if sgd is not None:
            self.finish_update(sgd)
        losses[self.name] += loss
-        docs = [eg.predicted for eg in examples]
-        self.set_annotations(docs, scores=scores)
        return losses

    def rehearse(
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@ -207,7 +207,6 @@ class Tok2Vec(TrainablePipe):
            listener.receive(batch_id, tokvecs, accumulate_gradient)
        if self.listeners:
            self.listeners[-1].receive(batch_id, tokvecs, backprop)
-        self.set_annotations(docs, tokvecs)
        return losses

    def get_loss(self, examples, scores) -> None:
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@ -94,8 +94,7 @@ cdef class TrainablePipe(Pipe):
               sgd: Optimizer=None,
               losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
        """Learn from a batch of documents and gold-standard information,
-        updating the pipe's model. Delegates to predict, get_loss and
-        set_annotations.
+        updating the pipe's model. Delegates to predict and get_loss.

        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
@ -122,8 +121,6 @@ cdef class TrainablePipe(Pipe):
        if sgd not in (None, False):
            self.finish_update(sgd)
        losses[self.name] += loss
-        docs = [eg.predicted for eg in examples]
-        self.set_annotations(docs, scores=scores)
        return losses

    def rehearse(self,
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@ -369,8 +369,6 @@ cdef class Parser(TrainablePipe):
        backprop_tok2vec(golds)
        if sgd not in (None, False):
            self.finish_update(sgd)
-        docs = [eg.predicted for eg in examples]
-        self.set_annotations(docs, all_states)
        # Ugh, this is annoying. If we're working on GPU, we want to free the
        # memory ASAP. It seems that Python doesn't necessarily get around to
        # removing these in time if we don't explicitly delete? It's confusing.
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -425,7 +425,6 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 def test_overfitting_IO():
    # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
    nlp = English()
-    nlp.add_pipe("sentencizer", first=True)
    vector_length = 3
    assert "Q2146908" not in nlp.vocab.strings

@ -465,6 +464,9 @@ def test_overfitting_IO():
        nlp.update(train_examples, sgd=optimizer, losses=losses)
    assert losses["entity_linker"] < 0.001

+    # adding additional components that are required for the entity_linker
+    nlp.add_pipe("sentencizer", first=True)
+
    # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
    patterns = [
        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
--- a/website/docs/api/dependencyparser.md
+++ b/website/docs/api/dependencyparser.md
@ -220,9 +220,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
 ## DependencyParser.update {#update tag="method"}

 Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
-model. Delegates to [`predict`](/api/dependencyparser#predict), 
-[`get_loss`](/api/dependencyparser#get_loss) and 
-[`set_annotations`](/api/dependencyparser#set_annotations).
+model. Delegates to [`predict`](/api/dependencyparser#predict) and
+[`get_loss`](/api/dependencyparser#get_loss).

 > #### Example
 >
--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@ -237,8 +237,7 @@ entities.

 Learn from a batch of [`Example`](/api/example) objects, updating both the
 pipe's entity linking model and context encoder. Delegates to
-[`predict`](/api/entitylinker#predict) and 
-[`set_annotations`](/api/entitylinker#set_annotations).
+[`predict`](/api/entitylinker#predict).

 > #### Example
 >
--- a/website/docs/api/entityrecognizer.md
+++ b/website/docs/api/entityrecognizer.md
@ -209,9 +209,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
 ## EntityRecognizer.update {#update tag="method"}

 Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
-model. Delegates to [`predict`](/api/entityrecognizer#predict), 
-[`get_loss`](/api/entityrecognizer#get_loss) and 
-[`set_annotations`](/api/entityrecognizer#set_annotations).
+model. Delegates to [`predict`](/api/entityrecognizer#predict) and
+[`get_loss`](/api/entityrecognizer#get_loss).

 > #### Example
 >
--- a/website/docs/api/morphologizer.md
+++ b/website/docs/api/morphologizer.md
@ -189,9 +189,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.

 Learn from a batch of [`Example`](/api/example) objects containing the
 predictions and gold-standard annotations, and update the component's model.
-Delegates to [`predict`](/api/morphologizer#predict), 
-[`get_loss`](/api/morphologizer#get_loss) and 
-[`set_annotations`](/api/morphologizer#set_annotations).
+Delegates to [`predict`](/api/morphologizer#predict) and
+[`get_loss`](/api/morphologizer#get_loss).

 > #### Example
 >
--- a/website/docs/api/multilabel_textcategorizer.md
+++ b/website/docs/api/multilabel_textcategorizer.md
@ -199,9 +199,8 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores.

 Learn from a batch of [`Example`](/api/example) objects containing the
 predictions and gold-standard annotations, and update the component's model.
-Delegates to [`predict`](/api/multilabel_textcategorizer#predict),
-[`get_loss`](/api/multilabel_textcategorizer#get_loss) and 
-[`set_annotations`](/api/multilabel_textcategorizer#set_annotations).
+Delegates to [`predict`](/api/multilabel_textcategorizer#predict) and
+[`get_loss`](/api/multilabel_textcategorizer#get_loss).

 > #### Example
 >
--- a/website/docs/api/sentencerecognizer.md
+++ b/website/docs/api/sentencerecognizer.md
@ -176,9 +176,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.

 Learn from a batch of [`Example`](/api/example) objects containing the
 predictions and gold-standard annotations, and update the component's model.
-Delegates to [`predict`](/api/sentencerecognizer#predict), 
-[`get_loss`](/api/sentencerecognizer#get_loss) and 
-[`set_annotations`](/api/sentencerecognizer#set_annotations).
+Delegates to [`predict`](/api/sentencerecognizer#predict) and
+[`get_loss`](/api/sentencerecognizer#get_loss).

 > #### Example
 >
--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -187,9 +187,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.

 Learn from a batch of [`Example`](/api/example) objects containing the
 predictions and gold-standard annotations, and update the component's model.
-Delegates to [`predict`](/api/tagger#predict), 
-[`get_loss`](/api/tagger#get_loss) and 
-[`set_annotations`](/api/tagger#set_annotations).
+Delegates to [`predict`](/api/tagger#predict) and
+[`get_loss`](/api/tagger#get_loss).

 > #### Example
 >
--- a/website/docs/api/textcategorizer.md
+++ b/website/docs/api/textcategorizer.md
@ -201,9 +201,8 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores.

 Learn from a batch of [`Example`](/api/example) objects containing the
 predictions and gold-standard annotations, and update the component's model.
-Delegates to [`predict`](/api/textcategorizer#predict),
-[`get_loss`](/api/textcategorizer#get_loss) and 
-[`set_annotations`](/api/textcategorizer#set_annotations).
+Delegates to [`predict`](/api/textcategorizer#predict) and
+[`get_loss`](/api/textcategorizer#get_loss).

 > #### Example
 >
--- a/website/docs/api/tok2vec.md
+++ b/website/docs/api/tok2vec.md
@ -186,8 +186,7 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.

 Learn from a batch of [`Example`](/api/example) objects containing the
 predictions and gold-standard annotations, and update the component's model.
-Delegates to [`predict`](/api/tok2vec#predict) and 
-[`set_annotations`](/api/tok2vec#set_annotations).
+Delegates to [`predict`](/api/tok2vec#predict).

 > #### Example
 >