mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-05 22:20:34 +03:00
throw informative error when running the components with the wrong type of objects
This commit is contained in:
parent
6712d0b5db
commit
9f43ba839a
|
@ -579,6 +579,8 @@ class Errors(object):
|
|||
"table, which contains {n_rows} vectors.")
|
||||
|
||||
# TODO: fix numbering after merging develop into master
|
||||
E978 = ("The {method} method of component {name} takes a list of Example objects, "
|
||||
"but found {types} instead.")
|
||||
E979 = ("Cannot convert {type} to an Example object.")
|
||||
E980 = ("Each link annotation should refer to a dictionary with at most one "
|
||||
"identifier mapping to 1.0, and all others to 0.0.")
|
||||
|
|
|
@ -286,14 +286,16 @@ class Tagger(Pipe):
|
|||
doc.is_tagged = True
|
||||
|
||||
def update(self, examples, drop=0., sgd=None, losses=None, set_annotations=False):
|
||||
for eg in examples:
|
||||
assert isinstance(eg, Example)
|
||||
if losses is not None and self.name not in losses:
|
||||
losses[self.name] = 0.
|
||||
|
||||
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
return
|
||||
try:
|
||||
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
return
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise ValueError(Errors.E978.format(name="Tagger", method="update", types=types))
|
||||
set_dropout_rate(self.model, drop)
|
||||
tag_scores, bp_tag_scores = self.model.begin_update(
|
||||
[eg.predicted for eg in examples])
|
||||
|
@ -315,7 +317,11 @@ class Tagger(Pipe):
|
|||
"""Perform a 'rehearsal' update, where we try to match the output of
|
||||
an initial model.
|
||||
"""
|
||||
docs = [eg.predicted for eg in examples]
|
||||
try:
|
||||
docs = [eg.predicted for eg in examples]
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise ValueError(Errors.E978.format(name="Tagger", method="rehearse", types=types))
|
||||
if self._rehearsal_model is None:
|
||||
return
|
||||
if not any(len(doc) for doc in docs):
|
||||
|
@ -347,7 +353,11 @@ class Tagger(Pipe):
|
|||
orig_tag_map = dict(self.vocab.morphology.tag_map)
|
||||
new_tag_map = {}
|
||||
for example in get_examples():
|
||||
for token in example.y:
|
||||
try:
|
||||
y = example.y
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example)))
|
||||
for token in y:
|
||||
tag = token.tag_
|
||||
if tag in orig_tag_map:
|
||||
new_tag_map[tag] = orig_tag_map[tag]
|
||||
|
@ -771,10 +781,12 @@ class ClozeMultitask(Pipe):
|
|||
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
||||
if losses is not None and self.name not in losses:
|
||||
losses[self.name] = 0.
|
||||
docs = [eg.predicted for eg in examples]
|
||||
set_dropout_rate(self.model, drop)
|
||||
predictions, bp_predictions = self.model.begin_update(
|
||||
[eg.predicted for eg in examples])
|
||||
try:
|
||||
predictions, bp_predictions = self.model.begin_update([eg.predicted for eg in examples])
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise ValueError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types))
|
||||
loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
|
||||
bp_predictions(d_predictions)
|
||||
if sgd is not None:
|
||||
|
@ -834,11 +846,13 @@ class TextCategorizer(Pipe):
|
|||
doc.cats[label] = float(scores[i, j])
|
||||
|
||||
def update(self, examples, state=None, drop=0., set_annotations=False, sgd=None, losses=None):
|
||||
for eg in examples:
|
||||
assert isinstance(eg, Example)
|
||||
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
return
|
||||
try:
|
||||
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
return
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise ValueError(Errors.E978.format(name="TextCategorizer", method="update", types=types))
|
||||
set_dropout_rate(self.model, drop)
|
||||
scores, bp_scores = self.model.begin_update(
|
||||
[eg.predicted for eg in examples]
|
||||
|
@ -857,9 +871,11 @@ class TextCategorizer(Pipe):
|
|||
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
||||
if self._rehearsal_model is None:
|
||||
return
|
||||
for eg in examples:
|
||||
assert isinstance(eg, Example)
|
||||
docs = [eg.predicted for eg in examples]
|
||||
try:
|
||||
docs = [eg.predicted for eg in examples]
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise ValueError(Errors.E978.format(name="TextCategorizer", method="rehearse", types=types))
|
||||
if not any(len(doc) for doc in docs):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
return
|
||||
|
@ -917,7 +933,11 @@ class TextCategorizer(Pipe):
|
|||
# TODO: begin_training is not guaranteed to see all data / labels ?
|
||||
examples = list(get_examples())
|
||||
for example in examples:
|
||||
for cat in example.y.cats:
|
||||
try:
|
||||
y = example.y
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E978.format(name="TextCategorizer", method="update", types=type(example)))
|
||||
for cat in y.cats:
|
||||
self.add_label(cat)
|
||||
self.require_labels()
|
||||
docs = [Doc(Vocab(), words=["hello"])]
|
||||
|
@ -1074,10 +1094,12 @@ class EntityLinker(Pipe):
|
|||
losses.setdefault(self.name, 0.0)
|
||||
if not examples:
|
||||
return 0
|
||||
for eg in examples:
|
||||
assert isinstance(eg, Example)
|
||||
sentence_docs = []
|
||||
docs = [eg.predicted for eg in examples]
|
||||
try:
|
||||
docs = [eg.predicted for eg in examples]
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise ValueError(Errors.E978.format(name="EntityLinker", method="update", types=types))
|
||||
if set_annotations:
|
||||
# This seems simpler than other ways to get that exact output -- but
|
||||
# it does run the model twice :(
|
||||
|
|
|
@ -268,7 +268,11 @@ cdef class Parser:
|
|||
for multitask in self._multitasks:
|
||||
multitask.update(examples, drop=drop, sgd=sgd)
|
||||
set_dropout_rate(self.model, drop)
|
||||
states, golds, max_steps = self._init_gold_batch_no_cut(examples)
|
||||
try:
|
||||
states, golds, max_steps = self._init_gold_batch_no_cut(examples)
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise ValueError(Errors.E978.format(name="Parser", method="update", types=types))
|
||||
states_golds = [(s, g) for (s, g) in zip(states, golds)
|
||||
if not s.is_final() and g is not None]
|
||||
# Prepare the stepwise model, and get the callback for finishing the batch
|
||||
|
|
|
@ -3,6 +3,7 @@ from thinc.api import Adam, NumpyOps
|
|||
from spacy.attrs import NORM
|
||||
from spacy.vocab import Vocab
|
||||
|
||||
from spacy.gold import Example
|
||||
from spacy.pipeline.defaults import default_parser, default_ner
|
||||
from spacy.tokens import Doc
|
||||
from spacy.pipeline import DependencyParser, EntityRecognizer
|
||||
|
@ -38,7 +39,8 @@ def _train_parser(parser):
|
|||
"heads": [1, 1, 3, 3],
|
||||
"deps": ["left", "ROOT", "left", "ROOT"]
|
||||
}
|
||||
parser.update((doc, gold), sgd=sgd, losses=losses)
|
||||
example = Example.from_dict(doc, gold)
|
||||
parser.update([example], sgd=sgd, losses=losses)
|
||||
return parser
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user