From f049df1715beffc8b7a967d676b0bd9d74020972 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 26 Jan 2021 01:18:45 +1100 Subject: [PATCH] Revert "Set annotations in update" (#6810) * Revert "Set annotations in update (#6767)" This reverts commit e680efc7cc365a31c1c7f9d5eb8733c1e61e558d. * Fix version * Update spacy/pipeline/entity_linker.py * Update spacy/pipeline/entity_linker.py * Update spacy/pipeline/tagger.pyx * Update spacy/pipeline/tok2vec.py * Update spacy/pipeline/tok2vec.py * Update spacy/pipeline/transition_parser.pyx * Update spacy/pipeline/transition_parser.pyx * Update website/docs/api/multilabel_textcategorizer.md * Update website/docs/api/tok2vec.md * Update website/docs/usage/layers-architectures.md * Update website/docs/usage/layers-architectures.md * Update website/docs/api/transformer.md * Update website/docs/api/textcategorizer.md * Update website/docs/api/tagger.md * Update spacy/pipeline/entity_linker.py * Update website/docs/api/sentencerecognizer.md * Update website/docs/api/pipe.md * Update website/docs/api/morphologizer.md * Update website/docs/api/entityrecognizer.md * Update spacy/pipeline/entity_linker.py * Update spacy/pipeline/multitask.pyx * Update spacy/pipeline/tagger.pyx * Update spacy/pipeline/tagger.pyx * Update spacy/pipeline/textcat.py * Update spacy/pipeline/textcat.py * Update spacy/pipeline/textcat.py * Update spacy/pipeline/tok2vec.py * Update spacy/pipeline/trainable_pipe.pyx * Update spacy/pipeline/trainable_pipe.pyx * Update spacy/pipeline/transition_parser.pyx * Update spacy/pipeline/transition_parser.pyx * Update website/docs/api/entitylinker.md * Update website/docs/api/dependencyparser.md * Update spacy/pipeline/trainable_pipe.pyx --- spacy/pipeline/entity_linker.py | 12 ++---------- spacy/pipeline/tagger.pyx | 5 +---- spacy/pipeline/textcat.py | 5 +---- spacy/pipeline/tok2vec.py | 1 - spacy/pipeline/trainable_pipe.pyx | 5 +---- spacy/pipeline/transition_parser.pyx | 2 -- spacy/tests/pipeline/test_entity_linker.py | 4 +++- website/docs/api/dependencyparser.md | 5 ++--- website/docs/api/entitylinker.md | 3 +-- website/docs/api/entityrecognizer.md | 5 ++--- website/docs/api/morphologizer.md | 5 ++--- website/docs/api/multilabel_textcategorizer.md | 5 ++--- website/docs/api/sentencerecognizer.md | 5 ++--- website/docs/api/tagger.md | 5 ++--- website/docs/api/textcategorizer.md | 5 ++--- website/docs/api/tok2vec.md | 3 +-- 16 files changed, 24 insertions(+), 51 deletions(-) diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index f1ba8637b..52c4a0bf1 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -198,8 +198,7 @@ class EntityLinker(TrainablePipe): losses: Optional[Dict[str, float]] = None, ) -> Dict[str, float]: """Learn from a batch of documents and gold-standard information, - updating the pipe's model. Delegates to predict, get_loss and - set_annotations. + updating the pipe's model. Delegates to predict and get_loss. examples (Iterable[Example]): A batch of Example objects. drop (float): The dropout rate. @@ -218,13 +217,7 @@ class EntityLinker(TrainablePipe): return losses validate_examples(examples, "EntityLinker.update") sentence_docs = [] - docs = [] - for eg in examples: - eg.predicted.ents = eg.reference.ents - docs.append(eg.predicted) - # This seems simpler than other ways to get that exact output -- but - # it does run the model twice :( - predictions = self.predict(docs) + docs = [eg.predicted for eg in examples] for eg in examples: sentences = [s for s in eg.reference.sents] kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) @@ -260,7 +253,6 @@ class EntityLinker(TrainablePipe): if sgd is not None: self.finish_update(sgd) losses[self.name] += loss - self.set_annotations(docs, predictions) return losses def get_loss(self, examples: Iterable[Example], sentence_encodings): diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 85592aba5..fbcb6f89f 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -175,8 +175,7 @@ class Tagger(TrainablePipe): def update(self, examples, *, drop=0., sgd=None, losses=None): """Learn from a batch of documents and gold-standard information, - updating the pipe's model. Delegates to predict, get_loss and - set_annotations. + updating the pipe's model. Delegates to predict and get_loss. examples (Iterable[Example]): A batch of Example objects. drop (float): The dropout rate. @@ -205,8 +204,6 @@ class Tagger(TrainablePipe): self.finish_update(sgd) losses[self.name] += loss - docs = [eg.predicted for eg in examples] - self.set_annotations(docs, self._scores2guesses(tag_scores)) return losses def rehearse(self, examples, *, drop=0., sgd=None, losses=None): diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 6d8c7b101..fb0fc8016 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -199,8 +199,7 @@ class TextCategorizer(TrainablePipe): losses: Optional[Dict[str, float]] = None, ) -> Dict[str, float]: """Learn from a batch of documents and gold-standard information, - updating the pipe's model. Delegates to predict, get_loss and - set_annotations. + updating the pipe's model. Delegates to predict and get_loss. examples (Iterable[Example]): A batch of Example objects. drop (float): The dropout rate. @@ -226,8 +225,6 @@ class TextCategorizer(TrainablePipe): if sgd is not None: self.finish_update(sgd) losses[self.name] += loss - docs = [eg.predicted for eg in examples] - self.set_annotations(docs, scores=scores) return losses def rehearse( diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index a95fc8927..4c2271b1f 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -207,7 +207,6 @@ class Tok2Vec(TrainablePipe): listener.receive(batch_id, tokvecs, accumulate_gradient) if self.listeners: self.listeners[-1].receive(batch_id, tokvecs, backprop) - self.set_annotations(docs, tokvecs) return losses def get_loss(self, examples, scores) -> None: diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx index 1abd6b43e..3fb50b766 100644 --- a/spacy/pipeline/trainable_pipe.pyx +++ b/spacy/pipeline/trainable_pipe.pyx @@ -94,8 +94,7 @@ cdef class TrainablePipe(Pipe): sgd: Optimizer=None, losses: Optional[Dict[str, float]]=None) -> Dict[str, float]: """Learn from a batch of documents and gold-standard information, - updating the pipe's model. Delegates to predict, get_loss and - set_annotations. + updating the pipe's model. Delegates to predict and get_loss. examples (Iterable[Example]): A batch of Example objects. drop (float): The dropout rate. @@ -122,8 +121,6 @@ cdef class TrainablePipe(Pipe): if sgd not in (None, False): self.finish_update(sgd) losses[self.name] += loss - docs = [eg.predicted for eg in examples] - self.set_annotations(docs, scores=scores) return losses def rehearse(self, diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 8cb4ea15d..e97d2b020 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -369,8 +369,6 @@ cdef class Parser(TrainablePipe): backprop_tok2vec(golds) if sgd not in (None, False): self.finish_update(sgd) - docs = [eg.predicted for eg in examples] - self.set_annotations(docs, all_states) # Ugh, this is annoying. If we're working on GPU, we want to free the # memory ASAP. It seems that Python doesn't necessarily get around to # removing these in time if we don't explicitly delete? It's confusing. diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index 743800536..8ba2d0d3e 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -425,7 +425,6 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"] def test_overfitting_IO(): # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly nlp = English() - nlp.add_pipe("sentencizer", first=True) vector_length = 3 assert "Q2146908" not in nlp.vocab.strings @@ -465,6 +464,9 @@ def test_overfitting_IO(): nlp.update(train_examples, sgd=optimizer, losses=losses) assert losses["entity_linker"] < 0.001 + # adding additional components that are required for the entity_linker + nlp.add_pipe("sentencizer", first=True) + # Add a custom component to recognize "Russ Cochran" as an entity for the example training data patterns = [ {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]} diff --git a/website/docs/api/dependencyparser.md b/website/docs/api/dependencyparser.md index 8974d9ea7..c4e2e1697 100644 --- a/website/docs/api/dependencyparser.md +++ b/website/docs/api/dependencyparser.md @@ -220,9 +220,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores. ## DependencyParser.update {#update tag="method"} Learn from a batch of [`Example`](/api/example) objects, updating the pipe's -model. Delegates to [`predict`](/api/dependencyparser#predict), -[`get_loss`](/api/dependencyparser#get_loss) and -[`set_annotations`](/api/dependencyparser#set_annotations). +model. Delegates to [`predict`](/api/dependencyparser#predict) and +[`get_loss`](/api/dependencyparser#get_loss). > #### Example > diff --git a/website/docs/api/entitylinker.md b/website/docs/api/entitylinker.md index b90c52710..47ba80c0f 100644 --- a/website/docs/api/entitylinker.md +++ b/website/docs/api/entitylinker.md @@ -237,8 +237,7 @@ entities. Learn from a batch of [`Example`](/api/example) objects, updating both the pipe's entity linking model and context encoder. Delegates to -[`predict`](/api/entitylinker#predict) and -[`set_annotations`](/api/entitylinker#set_annotations). +[`predict`](/api/entitylinker#predict). > #### Example > diff --git a/website/docs/api/entityrecognizer.md b/website/docs/api/entityrecognizer.md index dd969d14b..348736209 100644 --- a/website/docs/api/entityrecognizer.md +++ b/website/docs/api/entityrecognizer.md @@ -209,9 +209,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores. ## EntityRecognizer.update {#update tag="method"} Learn from a batch of [`Example`](/api/example) objects, updating the pipe's -model. Delegates to [`predict`](/api/entityrecognizer#predict), -[`get_loss`](/api/entityrecognizer#get_loss) and -[`set_annotations`](/api/entityrecognizer#set_annotations). +model. Delegates to [`predict`](/api/entityrecognizer#predict) and +[`get_loss`](/api/entityrecognizer#get_loss). > #### Example > diff --git a/website/docs/api/morphologizer.md b/website/docs/api/morphologizer.md index 9cda478c8..059040a19 100644 --- a/website/docs/api/morphologizer.md +++ b/website/docs/api/morphologizer.md @@ -189,9 +189,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores. Learn from a batch of [`Example`](/api/example) objects containing the predictions and gold-standard annotations, and update the component's model. -Delegates to [`predict`](/api/morphologizer#predict), -[`get_loss`](/api/morphologizer#get_loss) and -[`set_annotations`](/api/morphologizer#set_annotations). +Delegates to [`predict`](/api/morphologizer#predict) and +[`get_loss`](/api/morphologizer#get_loss). > #### Example > diff --git a/website/docs/api/multilabel_textcategorizer.md b/website/docs/api/multilabel_textcategorizer.md index d74f7ad9d..6e1a627c6 100644 --- a/website/docs/api/multilabel_textcategorizer.md +++ b/website/docs/api/multilabel_textcategorizer.md @@ -199,9 +199,8 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores. Learn from a batch of [`Example`](/api/example) objects containing the predictions and gold-standard annotations, and update the component's model. -Delegates to [`predict`](/api/multilabel_textcategorizer#predict), -[`get_loss`](/api/multilabel_textcategorizer#get_loss) and -[`set_annotations`](/api/multilabel_textcategorizer#set_annotations). +Delegates to [`predict`](/api/multilabel_textcategorizer#predict) and +[`get_loss`](/api/multilabel_textcategorizer#get_loss). > #### Example > diff --git a/website/docs/api/sentencerecognizer.md b/website/docs/api/sentencerecognizer.md index 23c8e87d9..ce66ecaa4 100644 --- a/website/docs/api/sentencerecognizer.md +++ b/website/docs/api/sentencerecognizer.md @@ -176,9 +176,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores. Learn from a batch of [`Example`](/api/example) objects containing the predictions and gold-standard annotations, and update the component's model. -Delegates to [`predict`](/api/sentencerecognizer#predict), -[`get_loss`](/api/sentencerecognizer#get_loss) and -[`set_annotations`](/api/sentencerecognizer#set_annotations). +Delegates to [`predict`](/api/sentencerecognizer#predict) and +[`get_loss`](/api/sentencerecognizer#get_loss). > #### Example > diff --git a/website/docs/api/tagger.md b/website/docs/api/tagger.md index 8e6132d40..1a4c70522 100644 --- a/website/docs/api/tagger.md +++ b/website/docs/api/tagger.md @@ -187,9 +187,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores. Learn from a batch of [`Example`](/api/example) objects containing the predictions and gold-standard annotations, and update the component's model. -Delegates to [`predict`](/api/tagger#predict), -[`get_loss`](/api/tagger#get_loss) and -[`set_annotations`](/api/tagger#set_annotations). +Delegates to [`predict`](/api/tagger#predict) and +[`get_loss`](/api/tagger#get_loss). > #### Example > diff --git a/website/docs/api/textcategorizer.md b/website/docs/api/textcategorizer.md index 16049c327..ac0ab4f27 100644 --- a/website/docs/api/textcategorizer.md +++ b/website/docs/api/textcategorizer.md @@ -201,9 +201,8 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores. Learn from a batch of [`Example`](/api/example) objects containing the predictions and gold-standard annotations, and update the component's model. -Delegates to [`predict`](/api/textcategorizer#predict), -[`get_loss`](/api/textcategorizer#get_loss) and -[`set_annotations`](/api/textcategorizer#set_annotations). +Delegates to [`predict`](/api/textcategorizer#predict) and +[`get_loss`](/api/textcategorizer#get_loss). > #### Example > diff --git a/website/docs/api/tok2vec.md b/website/docs/api/tok2vec.md index 409c7f25b..90278e8cc 100644 --- a/website/docs/api/tok2vec.md +++ b/website/docs/api/tok2vec.md @@ -186,8 +186,7 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores. Learn from a batch of [`Example`](/api/example) objects containing the predictions and gold-standard annotations, and update the component's model. -Delegates to [`predict`](/api/tok2vec#predict) and -[`set_annotations`](/api/tok2vec#set_annotations). +Delegates to [`predict`](/api/tok2vec#predict). > #### Example >