Revert "Set annotations in update" (#6810)

* Revert "Set annotations in update (#6767)"

This reverts commit e680efc7cc.

* Fix version

* Update spacy/pipeline/entity_linker.py

* Update spacy/pipeline/entity_linker.py

* Update spacy/pipeline/tagger.pyx

* Update spacy/pipeline/tok2vec.py

* Update spacy/pipeline/tok2vec.py

* Update spacy/pipeline/transition_parser.pyx

* Update spacy/pipeline/transition_parser.pyx

* Update website/docs/api/multilabel_textcategorizer.md

* Update website/docs/api/tok2vec.md

* Update website/docs/usage/layers-architectures.md

* Update website/docs/usage/layers-architectures.md

* Update website/docs/api/transformer.md

* Update website/docs/api/textcategorizer.md

* Update website/docs/api/tagger.md

* Update spacy/pipeline/entity_linker.py

* Update website/docs/api/sentencerecognizer.md

* Update website/docs/api/pipe.md

* Update website/docs/api/morphologizer.md

* Update website/docs/api/entityrecognizer.md

* Update spacy/pipeline/entity_linker.py

* Update spacy/pipeline/multitask.pyx

* Update spacy/pipeline/tagger.pyx

* Update spacy/pipeline/tagger.pyx

* Update spacy/pipeline/textcat.py

* Update spacy/pipeline/textcat.py

* Update spacy/pipeline/textcat.py

* Update spacy/pipeline/tok2vec.py

* Update spacy/pipeline/trainable_pipe.pyx

* Update spacy/pipeline/trainable_pipe.pyx

* Update spacy/pipeline/transition_parser.pyx

* Update spacy/pipeline/transition_parser.pyx

* Update website/docs/api/entitylinker.md

* Update website/docs/api/dependencyparser.md

* Update spacy/pipeline/trainable_pipe.pyx
This commit is contained in:
Matthew Honnibal 2021-01-26 01:18:45 +11:00 committed by GitHub
parent 42b117e561
commit f049df1715
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 24 additions and 51 deletions

View File

@ -198,8 +198,7 @@ class EntityLinker(TrainablePipe):
losses: Optional[Dict[str, float]] = None,
) -> Dict[str, float]:
"""Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict, get_loss and
set_annotations.
updating the pipe's model. Delegates to predict and get_loss.
examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate.
@ -218,13 +217,7 @@ class EntityLinker(TrainablePipe):
return losses
validate_examples(examples, "EntityLinker.update")
sentence_docs = []
docs = []
for eg in examples:
eg.predicted.ents = eg.reference.ents
docs.append(eg.predicted)
# This seems simpler than other ways to get that exact output -- but
# it does run the model twice :(
predictions = self.predict(docs)
docs = [eg.predicted for eg in examples]
for eg in examples:
sentences = [s for s in eg.reference.sents]
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
@ -260,7 +253,6 @@ class EntityLinker(TrainablePipe):
if sgd is not None:
self.finish_update(sgd)
losses[self.name] += loss
self.set_annotations(docs, predictions)
return losses
def get_loss(self, examples: Iterable[Example], sentence_encodings):

View File

@ -175,8 +175,7 @@ class Tagger(TrainablePipe):
def update(self, examples, *, drop=0., sgd=None, losses=None):
"""Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict, get_loss and
set_annotations.
updating the pipe's model. Delegates to predict and get_loss.
examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate.
@ -205,8 +204,6 @@ class Tagger(TrainablePipe):
self.finish_update(sgd)
losses[self.name] += loss
docs = [eg.predicted for eg in examples]
self.set_annotations(docs, self._scores2guesses(tag_scores))
return losses
def rehearse(self, examples, *, drop=0., sgd=None, losses=None):

View File

@ -199,8 +199,7 @@ class TextCategorizer(TrainablePipe):
losses: Optional[Dict[str, float]] = None,
) -> Dict[str, float]:
"""Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict, get_loss and
set_annotations.
updating the pipe's model. Delegates to predict and get_loss.
examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate.
@ -226,8 +225,6 @@ class TextCategorizer(TrainablePipe):
if sgd is not None:
self.finish_update(sgd)
losses[self.name] += loss
docs = [eg.predicted for eg in examples]
self.set_annotations(docs, scores=scores)
return losses
def rehearse(

View File

@ -207,7 +207,6 @@ class Tok2Vec(TrainablePipe):
listener.receive(batch_id, tokvecs, accumulate_gradient)
if self.listeners:
self.listeners[-1].receive(batch_id, tokvecs, backprop)
self.set_annotations(docs, tokvecs)
return losses
def get_loss(self, examples, scores) -> None:

View File

@ -94,8 +94,7 @@ cdef class TrainablePipe(Pipe):
sgd: Optimizer=None,
losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
"""Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict, get_loss and
set_annotations.
updating the pipe's model. Delegates to predict and get_loss.
examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate.
@ -122,8 +121,6 @@ cdef class TrainablePipe(Pipe):
if sgd not in (None, False):
self.finish_update(sgd)
losses[self.name] += loss
docs = [eg.predicted for eg in examples]
self.set_annotations(docs, scores=scores)
return losses
def rehearse(self,

View File

@ -369,8 +369,6 @@ cdef class Parser(TrainablePipe):
backprop_tok2vec(golds)
if sgd not in (None, False):
self.finish_update(sgd)
docs = [eg.predicted for eg in examples]
self.set_annotations(docs, all_states)
# Ugh, this is annoying. If we're working on GPU, we want to free the
# memory ASAP. It seems that Python doesn't necessarily get around to
# removing these in time if we don't explicitly delete? It's confusing.

View File

@ -425,7 +425,6 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
def test_overfitting_IO():
# Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
nlp = English()
nlp.add_pipe("sentencizer", first=True)
vector_length = 3
assert "Q2146908" not in nlp.vocab.strings
@ -465,6 +464,9 @@ def test_overfitting_IO():
nlp.update(train_examples, sgd=optimizer, losses=losses)
assert losses["entity_linker"] < 0.001
# adding additional components that are required for the entity_linker
nlp.add_pipe("sentencizer", first=True)
# Add a custom component to recognize "Russ Cochran" as an entity for the example training data
patterns = [
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}

View File

@ -220,9 +220,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
## DependencyParser.update {#update tag="method"}
Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
model. Delegates to [`predict`](/api/dependencyparser#predict),
[`get_loss`](/api/dependencyparser#get_loss) and
[`set_annotations`](/api/dependencyparser#set_annotations).
model. Delegates to [`predict`](/api/dependencyparser#predict) and
[`get_loss`](/api/dependencyparser#get_loss).
> #### Example
>

View File

@ -237,8 +237,7 @@ entities.
Learn from a batch of [`Example`](/api/example) objects, updating both the
pipe's entity linking model and context encoder. Delegates to
[`predict`](/api/entitylinker#predict) and
[`set_annotations`](/api/entitylinker#set_annotations).
[`predict`](/api/entitylinker#predict).
> #### Example
>

View File

@ -209,9 +209,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
## EntityRecognizer.update {#update tag="method"}
Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
model. Delegates to [`predict`](/api/entityrecognizer#predict),
[`get_loss`](/api/entityrecognizer#get_loss) and
[`set_annotations`](/api/entityrecognizer#set_annotations).
model. Delegates to [`predict`](/api/entityrecognizer#predict) and
[`get_loss`](/api/entityrecognizer#get_loss).
> #### Example
>

View File

@ -189,9 +189,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
Learn from a batch of [`Example`](/api/example) objects containing the
predictions and gold-standard annotations, and update the component's model.
Delegates to [`predict`](/api/morphologizer#predict),
[`get_loss`](/api/morphologizer#get_loss) and
[`set_annotations`](/api/morphologizer#set_annotations).
Delegates to [`predict`](/api/morphologizer#predict) and
[`get_loss`](/api/morphologizer#get_loss).
> #### Example
>

View File

@ -199,9 +199,8 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores.
Learn from a batch of [`Example`](/api/example) objects containing the
predictions and gold-standard annotations, and update the component's model.
Delegates to [`predict`](/api/multilabel_textcategorizer#predict),
[`get_loss`](/api/multilabel_textcategorizer#get_loss) and
[`set_annotations`](/api/multilabel_textcategorizer#set_annotations).
Delegates to [`predict`](/api/multilabel_textcategorizer#predict) and
[`get_loss`](/api/multilabel_textcategorizer#get_loss).
> #### Example
>

View File

@ -176,9 +176,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
Learn from a batch of [`Example`](/api/example) objects containing the
predictions and gold-standard annotations, and update the component's model.
Delegates to [`predict`](/api/sentencerecognizer#predict),
[`get_loss`](/api/sentencerecognizer#get_loss) and
[`set_annotations`](/api/sentencerecognizer#set_annotations).
Delegates to [`predict`](/api/sentencerecognizer#predict) and
[`get_loss`](/api/sentencerecognizer#get_loss).
> #### Example
>

View File

@ -187,9 +187,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
Learn from a batch of [`Example`](/api/example) objects containing the
predictions and gold-standard annotations, and update the component's model.
Delegates to [`predict`](/api/tagger#predict),
[`get_loss`](/api/tagger#get_loss) and
[`set_annotations`](/api/tagger#set_annotations).
Delegates to [`predict`](/api/tagger#predict) and
[`get_loss`](/api/tagger#get_loss).
> #### Example
>

View File

@ -201,9 +201,8 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores.
Learn from a batch of [`Example`](/api/example) objects containing the
predictions and gold-standard annotations, and update the component's model.
Delegates to [`predict`](/api/textcategorizer#predict),
[`get_loss`](/api/textcategorizer#get_loss) and
[`set_annotations`](/api/textcategorizer#set_annotations).
Delegates to [`predict`](/api/textcategorizer#predict) and
[`get_loss`](/api/textcategorizer#get_loss).
> #### Example
>

View File

@ -186,8 +186,7 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
Learn from a batch of [`Example`](/api/example) objects containing the
predictions and gold-standard annotations, and update the component's model.
Delegates to [`predict`](/api/tok2vec#predict) and
[`set_annotations`](/api/tok2vec#set_annotations).
Delegates to [`predict`](/api/tok2vec#predict).
> #### Example
>