mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Set annotations in update (#6767)
* bump to 3.0.0rc4 * do set_annotations in component update calls * update docs and remove set_annotations flag * fix EL test
This commit is contained in:
parent
57640aa838
commit
e680efc7cc
|
@ -1,6 +1,6 @@
|
||||||
# fmt: off
|
# fmt: off
|
||||||
__title__ = "spacy-nightly"
|
__title__ = "spacy-nightly"
|
||||||
__version__ = "3.0.0rc3"
|
__version__ = "3.0.0rc4"
|
||||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||||
__projects__ = "https://github.com/explosion/projects"
|
__projects__ = "https://github.com/explosion/projects"
|
||||||
|
|
|
@ -193,18 +193,16 @@ class EntityLinker(TrainablePipe):
|
||||||
self,
|
self,
|
||||||
examples: Iterable[Example],
|
examples: Iterable[Example],
|
||||||
*,
|
*,
|
||||||
set_annotations: bool = False,
|
|
||||||
drop: float = 0.0,
|
drop: float = 0.0,
|
||||||
sgd: Optional[Optimizer] = None,
|
sgd: Optional[Optimizer] = None,
|
||||||
losses: Optional[Dict[str, float]] = None,
|
losses: Optional[Dict[str, float]] = None,
|
||||||
) -> Dict[str, float]:
|
) -> Dict[str, float]:
|
||||||
"""Learn from a batch of documents and gold-standard information,
|
"""Learn from a batch of documents and gold-standard information,
|
||||||
updating the pipe's model. Delegates to predict and get_loss.
|
updating the pipe's model. Delegates to predict, get_loss and
|
||||||
|
set_annotations.
|
||||||
|
|
||||||
examples (Iterable[Example]): A batch of Example objects.
|
examples (Iterable[Example]): A batch of Example objects.
|
||||||
drop (float): The dropout rate.
|
drop (float): The dropout rate.
|
||||||
set_annotations (bool): Whether or not to update the Example objects
|
|
||||||
with the predictions.
|
|
||||||
sgd (thinc.api.Optimizer): The optimizer.
|
sgd (thinc.api.Optimizer): The optimizer.
|
||||||
losses (Dict[str, float]): Optional record of the loss during training.
|
losses (Dict[str, float]): Optional record of the loss during training.
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
|
@ -220,11 +218,13 @@ class EntityLinker(TrainablePipe):
|
||||||
return losses
|
return losses
|
||||||
validate_examples(examples, "EntityLinker.update")
|
validate_examples(examples, "EntityLinker.update")
|
||||||
sentence_docs = []
|
sentence_docs = []
|
||||||
docs = [eg.predicted for eg in examples]
|
docs = []
|
||||||
if set_annotations:
|
for eg in examples:
|
||||||
# This seems simpler than other ways to get that exact output -- but
|
eg.predicted.ents = eg.reference.ents
|
||||||
# it does run the model twice :(
|
docs.append(eg.predicted)
|
||||||
predictions = self.model.predict(docs)
|
# This seems simpler than other ways to get that exact output -- but
|
||||||
|
# it does run the model twice :(
|
||||||
|
predictions = self.predict(docs)
|
||||||
for eg in examples:
|
for eg in examples:
|
||||||
sentences = [s for s in eg.reference.sents]
|
sentences = [s for s in eg.reference.sents]
|
||||||
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
|
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
|
||||||
|
@ -260,8 +260,7 @@ class EntityLinker(TrainablePipe):
|
||||||
if sgd is not None:
|
if sgd is not None:
|
||||||
self.finish_update(sgd)
|
self.finish_update(sgd)
|
||||||
losses[self.name] += loss
|
losses[self.name] += loss
|
||||||
if set_annotations:
|
self.set_annotations(docs, predictions)
|
||||||
self.set_annotations(docs, predictions)
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def get_loss(self, examples: Iterable[Example], sentence_encodings):
|
def get_loss(self, examples: Iterable[Example], sentence_encodings):
|
||||||
|
|
|
@ -199,7 +199,7 @@ class ClozeMultitask(TrainablePipe):
|
||||||
loss = self.distance.get_loss(prediction, target)
|
loss = self.distance.get_loss(prediction, target)
|
||||||
return loss, gradient
|
return loss, gradient
|
||||||
|
|
||||||
def update(self, examples, *, drop=0., set_annotations=False, sgd=None, losses=None):
|
def update(self, examples, *, drop=0., sgd=None, losses=None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
||||||
|
|
|
@ -173,14 +173,13 @@ class Tagger(TrainablePipe):
|
||||||
if doc.c[j].tag == 0:
|
if doc.c[j].tag == 0:
|
||||||
doc.c[j].tag = self.vocab.strings[self.labels[tag_id]]
|
doc.c[j].tag = self.vocab.strings[self.labels[tag_id]]
|
||||||
|
|
||||||
def update(self, examples, *, drop=0., sgd=None, losses=None, set_annotations=False):
|
def update(self, examples, *, drop=0., sgd=None, losses=None):
|
||||||
"""Learn from a batch of documents and gold-standard information,
|
"""Learn from a batch of documents and gold-standard information,
|
||||||
updating the pipe's model. Delegates to predict and get_loss.
|
updating the pipe's model. Delegates to predict, get_loss and
|
||||||
|
set_annotations.
|
||||||
|
|
||||||
examples (Iterable[Example]): A batch of Example objects.
|
examples (Iterable[Example]): A batch of Example objects.
|
||||||
drop (float): The dropout rate.
|
drop (float): The dropout rate.
|
||||||
set_annotations (bool): Whether or not to update the Example objects
|
|
||||||
with the predictions.
|
|
||||||
sgd (thinc.api.Optimizer): The optimizer.
|
sgd (thinc.api.Optimizer): The optimizer.
|
||||||
losses (Dict[str, float]): Optional record of the loss during training.
|
losses (Dict[str, float]): Optional record of the loss during training.
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
|
@ -206,9 +205,8 @@ class Tagger(TrainablePipe):
|
||||||
self.finish_update(sgd)
|
self.finish_update(sgd)
|
||||||
|
|
||||||
losses[self.name] += loss
|
losses[self.name] += loss
|
||||||
if set_annotations:
|
docs = [eg.predicted for eg in examples]
|
||||||
docs = [eg.predicted for eg in examples]
|
self.set_annotations(docs, self._scores2guesses(tag_scores))
|
||||||
self.set_annotations(docs, self._scores2guesses(tag_scores))
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def rehearse(self, examples, *, drop=0., sgd=None, losses=None):
|
def rehearse(self, examples, *, drop=0., sgd=None, losses=None):
|
||||||
|
|
|
@ -195,17 +195,15 @@ class TextCategorizer(TrainablePipe):
|
||||||
examples: Iterable[Example],
|
examples: Iterable[Example],
|
||||||
*,
|
*,
|
||||||
drop: float = 0.0,
|
drop: float = 0.0,
|
||||||
set_annotations: bool = False,
|
|
||||||
sgd: Optional[Optimizer] = None,
|
sgd: Optional[Optimizer] = None,
|
||||||
losses: Optional[Dict[str, float]] = None,
|
losses: Optional[Dict[str, float]] = None,
|
||||||
) -> Dict[str, float]:
|
) -> Dict[str, float]:
|
||||||
"""Learn from a batch of documents and gold-standard information,
|
"""Learn from a batch of documents and gold-standard information,
|
||||||
updating the pipe's model. Delegates to predict and get_loss.
|
updating the pipe's model. Delegates to predict, get_loss and
|
||||||
|
set_annotations.
|
||||||
|
|
||||||
examples (Iterable[Example]): A batch of Example objects.
|
examples (Iterable[Example]): A batch of Example objects.
|
||||||
drop (float): The dropout rate.
|
drop (float): The dropout rate.
|
||||||
set_annotations (bool): Whether or not to update the Example objects
|
|
||||||
with the predictions.
|
|
||||||
sgd (thinc.api.Optimizer): The optimizer.
|
sgd (thinc.api.Optimizer): The optimizer.
|
||||||
losses (Dict[str, float]): Optional record of the loss during training.
|
losses (Dict[str, float]): Optional record of the loss during training.
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
|
@ -228,9 +226,8 @@ class TextCategorizer(TrainablePipe):
|
||||||
if sgd is not None:
|
if sgd is not None:
|
||||||
self.finish_update(sgd)
|
self.finish_update(sgd)
|
||||||
losses[self.name] += loss
|
losses[self.name] += loss
|
||||||
if set_annotations:
|
docs = [eg.predicted for eg in examples]
|
||||||
docs = [eg.predicted for eg in examples]
|
self.set_annotations(docs, scores=scores)
|
||||||
self.set_annotations(docs, scores=scores)
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def rehearse(
|
def rehearse(
|
||||||
|
|
|
@ -163,15 +163,12 @@ class Tok2Vec(TrainablePipe):
|
||||||
drop: float = 0.0,
|
drop: float = 0.0,
|
||||||
sgd: Optional[Optimizer] = None,
|
sgd: Optional[Optimizer] = None,
|
||||||
losses: Optional[Dict[str, float]] = None,
|
losses: Optional[Dict[str, float]] = None,
|
||||||
set_annotations: bool = False,
|
|
||||||
):
|
):
|
||||||
"""Learn from a batch of documents and gold-standard information,
|
"""Learn from a batch of documents and gold-standard information,
|
||||||
updating the pipe's model.
|
updating the pipe's model.
|
||||||
|
|
||||||
examples (Iterable[Example]): A batch of Example objects.
|
examples (Iterable[Example]): A batch of Example objects.
|
||||||
drop (float): The dropout rate.
|
drop (float): The dropout rate.
|
||||||
set_annotations (bool): Whether or not to update the Example objects
|
|
||||||
with the predictions.
|
|
||||||
sgd (thinc.api.Optimizer): The optimizer.
|
sgd (thinc.api.Optimizer): The optimizer.
|
||||||
losses (Dict[str, float]): Optional record of the loss during training.
|
losses (Dict[str, float]): Optional record of the loss during training.
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
|
@ -210,8 +207,7 @@ class Tok2Vec(TrainablePipe):
|
||||||
listener.receive(batch_id, tokvecs, accumulate_gradient)
|
listener.receive(batch_id, tokvecs, accumulate_gradient)
|
||||||
if self.listeners:
|
if self.listeners:
|
||||||
self.listeners[-1].receive(batch_id, tokvecs, backprop)
|
self.listeners[-1].receive(batch_id, tokvecs, backprop)
|
||||||
if set_annotations:
|
self.set_annotations(docs, tokvecs)
|
||||||
self.set_annotations(docs, tokvecs)
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def get_loss(self, examples, scores) -> None:
|
def get_loss(self, examples, scores) -> None:
|
||||||
|
|
|
@ -91,16 +91,14 @@ cdef class TrainablePipe(Pipe):
|
||||||
def update(self,
|
def update(self,
|
||||||
examples: Iterable["Example"],
|
examples: Iterable["Example"],
|
||||||
*, drop: float=0.0,
|
*, drop: float=0.0,
|
||||||
set_annotations: bool=False,
|
|
||||||
sgd: Optimizer=None,
|
sgd: Optimizer=None,
|
||||||
losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
|
losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
|
||||||
"""Learn from a batch of documents and gold-standard information,
|
"""Learn from a batch of documents and gold-standard information,
|
||||||
updating the pipe's model. Delegates to predict and get_loss.
|
updating the pipe's model. Delegates to predict, get_loss and
|
||||||
|
set_annotations.
|
||||||
|
|
||||||
examples (Iterable[Example]): A batch of Example objects.
|
examples (Iterable[Example]): A batch of Example objects.
|
||||||
drop (float): The dropout rate.
|
drop (float): The dropout rate.
|
||||||
set_annotations (bool): Whether or not to update the Example objects
|
|
||||||
with the predictions.
|
|
||||||
sgd (thinc.api.Optimizer): The optimizer.
|
sgd (thinc.api.Optimizer): The optimizer.
|
||||||
losses (Dict[str, float]): Optional record of the loss during training.
|
losses (Dict[str, float]): Optional record of the loss during training.
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
|
@ -124,9 +122,8 @@ cdef class TrainablePipe(Pipe):
|
||||||
if sgd not in (None, False):
|
if sgd not in (None, False):
|
||||||
self.finish_update(sgd)
|
self.finish_update(sgd)
|
||||||
losses[self.name] += loss
|
losses[self.name] += loss
|
||||||
if set_annotations:
|
docs = [eg.predicted for eg in examples]
|
||||||
docs = [eg.predicted for eg in examples]
|
self.set_annotations(docs, scores=scores)
|
||||||
self.set_annotations(docs, scores=scores)
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def rehearse(self,
|
def rehearse(self,
|
||||||
|
|
|
@ -308,7 +308,7 @@ cdef class Parser(TrainablePipe):
|
||||||
action.do(states[i], action.label)
|
action.do(states[i], action.label)
|
||||||
free(is_valid)
|
free(is_valid)
|
||||||
|
|
||||||
def update(self, examples, *, drop=0., set_annotations=False, sgd=None, losses=None):
|
def update(self, examples, *, drop=0., sgd=None, losses=None):
|
||||||
cdef StateClass state
|
cdef StateClass state
|
||||||
if losses is None:
|
if losses is None:
|
||||||
losses = {}
|
losses = {}
|
||||||
|
@ -328,7 +328,6 @@ cdef class Parser(TrainablePipe):
|
||||||
return self.update_beam(
|
return self.update_beam(
|
||||||
examples,
|
examples,
|
||||||
beam_width=self.cfg["beam_width"],
|
beam_width=self.cfg["beam_width"],
|
||||||
set_annotations=set_annotations,
|
|
||||||
sgd=sgd,
|
sgd=sgd,
|
||||||
losses=losses,
|
losses=losses,
|
||||||
beam_density=self.cfg["beam_density"]
|
beam_density=self.cfg["beam_density"]
|
||||||
|
@ -370,9 +369,8 @@ cdef class Parser(TrainablePipe):
|
||||||
backprop_tok2vec(golds)
|
backprop_tok2vec(golds)
|
||||||
if sgd not in (None, False):
|
if sgd not in (None, False):
|
||||||
self.finish_update(sgd)
|
self.finish_update(sgd)
|
||||||
if set_annotations:
|
docs = [eg.predicted for eg in examples]
|
||||||
docs = [eg.predicted for eg in examples]
|
self.set_annotations(docs, all_states)
|
||||||
self.set_annotations(docs, all_states)
|
|
||||||
# Ugh, this is annoying. If we're working on GPU, we want to free the
|
# Ugh, this is annoying. If we're working on GPU, we want to free the
|
||||||
# memory ASAP. It seems that Python doesn't necessarily get around to
|
# memory ASAP. It seems that Python doesn't necessarily get around to
|
||||||
# removing these in time if we don't explicitly delete? It's confusing.
|
# removing these in time if we don't explicitly delete? It's confusing.
|
||||||
|
@ -432,7 +430,7 @@ cdef class Parser(TrainablePipe):
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def update_beam(self, examples, *, beam_width,
|
def update_beam(self, examples, *, beam_width,
|
||||||
drop=0., sgd=None, losses=None, set_annotations=False, beam_density=0.0):
|
drop=0., sgd=None, losses=None, beam_density=0.0):
|
||||||
states, golds, _ = self.moves.init_gold_batch(examples)
|
states, golds, _ = self.moves.init_gold_batch(examples)
|
||||||
if not states:
|
if not states:
|
||||||
return losses
|
return losses
|
||||||
|
|
|
@ -425,6 +425,7 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
|
||||||
def test_overfitting_IO():
|
def test_overfitting_IO():
|
||||||
# Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
|
# Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
nlp.add_pipe("sentencizer", first=True)
|
||||||
vector_length = 3
|
vector_length = 3
|
||||||
assert "Q2146908" not in nlp.vocab.strings
|
assert "Q2146908" not in nlp.vocab.strings
|
||||||
|
|
||||||
|
@ -464,9 +465,6 @@ def test_overfitting_IO():
|
||||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||||
assert losses["entity_linker"] < 0.001
|
assert losses["entity_linker"] < 0.001
|
||||||
|
|
||||||
# adding additional components that are required for the entity_linker
|
|
||||||
nlp.add_pipe("sentencizer", first=True)
|
|
||||||
|
|
||||||
# Add a custom component to recognize "Russ Cochran" as an entity for the example training data
|
# Add a custom component to recognize "Russ Cochran" as an entity for the example training data
|
||||||
patterns = [
|
patterns = [
|
||||||
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
|
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
|
||||||
|
|
|
@ -220,8 +220,9 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
|
||||||
## DependencyParser.update {#update tag="method"}
|
## DependencyParser.update {#update tag="method"}
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
|
Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
|
||||||
model. Delegates to [`predict`](/api/dependencyparser#predict) and
|
model. Delegates to [`predict`](/api/dependencyparser#predict),
|
||||||
[`get_loss`](/api/dependencyparser#get_loss).
|
[`get_loss`](/api/dependencyparser#get_loss) and
|
||||||
|
[`set_annotations`](/api/dependencyparser#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -236,7 +237,6 @@ model. Delegates to [`predict`](/api/dependencyparser#predict) and
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -237,7 +237,8 @@ entities.
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects, updating both the
|
Learn from a batch of [`Example`](/api/example) objects, updating both the
|
||||||
pipe's entity linking model and context encoder. Delegates to
|
pipe's entity linking model and context encoder. Delegates to
|
||||||
[`predict`](/api/entitylinker#predict).
|
[`predict`](/api/entitylinker#predict) and
|
||||||
|
[`set_annotations`](/api/entitylinker#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -252,7 +253,6 @@ pipe's entity linking model and context encoder. Delegates to
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -209,8 +209,9 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
|
||||||
## EntityRecognizer.update {#update tag="method"}
|
## EntityRecognizer.update {#update tag="method"}
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
|
Learn from a batch of [`Example`](/api/example) objects, updating the pipe's
|
||||||
model. Delegates to [`predict`](/api/entityrecognizer#predict) and
|
model. Delegates to [`predict`](/api/entityrecognizer#predict),
|
||||||
[`get_loss`](/api/entityrecognizer#get_loss).
|
[`get_loss`](/api/entityrecognizer#get_loss) and
|
||||||
|
[`set_annotations`](/api/entityrecognizer#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -225,7 +226,6 @@ model. Delegates to [`predict`](/api/entityrecognizer#predict) and
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -189,8 +189,9 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects containing the
|
Learn from a batch of [`Example`](/api/example) objects containing the
|
||||||
predictions and gold-standard annotations, and update the component's model.
|
predictions and gold-standard annotations, and update the component's model.
|
||||||
Delegates to [`predict`](/api/morphologizer#predict) and
|
Delegates to [`predict`](/api/morphologizer#predict),
|
||||||
[`get_loss`](/api/morphologizer#get_loss).
|
[`get_loss`](/api/morphologizer#get_loss) and
|
||||||
|
[`set_annotations`](/api/morphologizer#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -205,7 +206,6 @@ Delegates to [`predict`](/api/morphologizer#predict) and
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -199,8 +199,9 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores.
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects containing the
|
Learn from a batch of [`Example`](/api/example) objects containing the
|
||||||
predictions and gold-standard annotations, and update the component's model.
|
predictions and gold-standard annotations, and update the component's model.
|
||||||
Delegates to [`predict`](/api/multilabel_textcategorizer#predict) and
|
Delegates to [`predict`](/api/multilabel_textcategorizer#predict),
|
||||||
[`get_loss`](/api/multilabel_textcategorizer#get_loss).
|
[`get_loss`](/api/multilabel_textcategorizer#get_loss) and
|
||||||
|
[`set_annotations`](/api/multilabel_textcategorizer#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -215,7 +216,6 @@ Delegates to [`predict`](/api/multilabel_textcategorizer#predict) and
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -195,7 +195,6 @@ predictions and gold-standard annotations, and update the component's model.
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -176,8 +176,9 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects containing the
|
Learn from a batch of [`Example`](/api/example) objects containing the
|
||||||
predictions and gold-standard annotations, and update the component's model.
|
predictions and gold-standard annotations, and update the component's model.
|
||||||
Delegates to [`predict`](/api/sentencerecognizer#predict) and
|
Delegates to [`predict`](/api/sentencerecognizer#predict),
|
||||||
[`get_loss`](/api/sentencerecognizer#get_loss).
|
[`get_loss`](/api/sentencerecognizer#get_loss) and
|
||||||
|
[`set_annotations`](/api/sentencerecognizer#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -192,7 +193,6 @@ Delegates to [`predict`](/api/sentencerecognizer#predict) and
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -187,8 +187,9 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects containing the
|
Learn from a batch of [`Example`](/api/example) objects containing the
|
||||||
predictions and gold-standard annotations, and update the component's model.
|
predictions and gold-standard annotations, and update the component's model.
|
||||||
Delegates to [`predict`](/api/tagger#predict) and
|
Delegates to [`predict`](/api/tagger#predict),
|
||||||
[`get_loss`](/api/tagger#get_loss).
|
[`get_loss`](/api/tagger#get_loss) and
|
||||||
|
[`set_annotations`](/api/tagger#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -203,7 +204,6 @@ Delegates to [`predict`](/api/tagger#predict) and
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -201,8 +201,9 @@ Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores.
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects containing the
|
Learn from a batch of [`Example`](/api/example) objects containing the
|
||||||
predictions and gold-standard annotations, and update the component's model.
|
predictions and gold-standard annotations, and update the component's model.
|
||||||
Delegates to [`predict`](/api/textcategorizer#predict) and
|
Delegates to [`predict`](/api/textcategorizer#predict),
|
||||||
[`get_loss`](/api/textcategorizer#get_loss).
|
[`get_loss`](/api/textcategorizer#get_loss) and
|
||||||
|
[`set_annotations`](/api/textcategorizer#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -217,7 +218,6 @@ Delegates to [`predict`](/api/textcategorizer#predict) and
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -186,7 +186,8 @@ Modify a batch of [`Doc`](/api/doc) objects, using pre-computed scores.
|
||||||
|
|
||||||
Learn from a batch of [`Example`](/api/example) objects containing the
|
Learn from a batch of [`Example`](/api/example) objects containing the
|
||||||
predictions and gold-standard annotations, and update the component's model.
|
predictions and gold-standard annotations, and update the component's model.
|
||||||
Delegates to [`predict`](/api/tok2vec#predict).
|
Delegates to [`predict`](/api/tok2vec#predict) and
|
||||||
|
[`set_annotations`](/api/tok2vec#set_annotations).
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -201,7 +202,6 @@ Delegates to [`predict`](/api/tok2vec#predict).
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -249,7 +249,6 @@ and call the optimizer, while the others simply increment the gradients.
|
||||||
| `examples` | A batch of [`Example`](/api/example) objects. Only the [`Example.predicted`](/api/example#predicted) `Doc` object is used, the reference `Doc` is ignored. ~~Iterable[Example]~~ |
|
| `examples` | A batch of [`Example`](/api/example) objects. Only the [`Example.predicted`](/api/example#predicted) `Doc` object is used, the reference `Doc` is ignored. ~~Iterable[Example]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `drop` | The dropout rate. ~~float~~ |
|
| `drop` | The dropout rate. ~~float~~ |
|
||||||
| `set_annotations` | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](#set_annotations). ~~bool~~ |
|
|
||||||
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |
|
||||||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||||
|
|
|
@ -796,7 +796,7 @@ class RelationExtractor(TrainablePipe):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
def update(self, examples, drop=0.0, set_annotations=False, sgd=None, losses=None):
|
def update(self, examples, drop=0.0, sgd=None, losses=None):
|
||||||
"""Learn from a batch of Example objects."""
|
"""Learn from a batch of Example objects."""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -901,7 +901,6 @@ def update(
|
||||||
examples: Iterable[Example],
|
examples: Iterable[Example],
|
||||||
*,
|
*,
|
||||||
drop: float = 0.0,
|
drop: float = 0.0,
|
||||||
set_annotations: bool = False,
|
|
||||||
sgd: Optional[Optimizer] = None,
|
sgd: Optional[Optimizer] = None,
|
||||||
losses: Optional[Dict[str, float]] = None,
|
losses: Optional[Dict[str, float]] = None,
|
||||||
) -> Dict[str, float]:
|
) -> Dict[str, float]:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user