mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 09:00:36 +03:00
throw informative error when running the components with the wrong type of objects
This commit is contained in:
parent
6712d0b5db
commit
9f43ba839a
|
@ -579,6 +579,8 @@ class Errors(object):
|
||||||
"table, which contains {n_rows} vectors.")
|
"table, which contains {n_rows} vectors.")
|
||||||
|
|
||||||
# TODO: fix numbering after merging develop into master
|
# TODO: fix numbering after merging develop into master
|
||||||
|
E978 = ("The {method} method of component {name} takes a list of Example objects, "
|
||||||
|
"but found {types} instead.")
|
||||||
E979 = ("Cannot convert {type} to an Example object.")
|
E979 = ("Cannot convert {type} to an Example object.")
|
||||||
E980 = ("Each link annotation should refer to a dictionary with at most one "
|
E980 = ("Each link annotation should refer to a dictionary with at most one "
|
||||||
"identifier mapping to 1.0, and all others to 0.0.")
|
"identifier mapping to 1.0, and all others to 0.0.")
|
||||||
|
|
|
@ -286,14 +286,16 @@ class Tagger(Pipe):
|
||||||
doc.is_tagged = True
|
doc.is_tagged = True
|
||||||
|
|
||||||
def update(self, examples, drop=0., sgd=None, losses=None, set_annotations=False):
|
def update(self, examples, drop=0., sgd=None, losses=None, set_annotations=False):
|
||||||
for eg in examples:
|
|
||||||
assert isinstance(eg, Example)
|
|
||||||
if losses is not None and self.name not in losses:
|
if losses is not None and self.name not in losses:
|
||||||
losses[self.name] = 0.
|
losses[self.name] = 0.
|
||||||
|
|
||||||
|
try:
|
||||||
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
||||||
# Handle cases where there are no tokens in any docs.
|
# Handle cases where there are no tokens in any docs.
|
||||||
return
|
return
|
||||||
|
except AttributeError:
|
||||||
|
types = set([type(eg) for eg in examples])
|
||||||
|
raise ValueError(Errors.E978.format(name="Tagger", method="update", types=types))
|
||||||
set_dropout_rate(self.model, drop)
|
set_dropout_rate(self.model, drop)
|
||||||
tag_scores, bp_tag_scores = self.model.begin_update(
|
tag_scores, bp_tag_scores = self.model.begin_update(
|
||||||
[eg.predicted for eg in examples])
|
[eg.predicted for eg in examples])
|
||||||
|
@ -315,7 +317,11 @@ class Tagger(Pipe):
|
||||||
"""Perform a 'rehearsal' update, where we try to match the output of
|
"""Perform a 'rehearsal' update, where we try to match the output of
|
||||||
an initial model.
|
an initial model.
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
docs = [eg.predicted for eg in examples]
|
docs = [eg.predicted for eg in examples]
|
||||||
|
except AttributeError:
|
||||||
|
types = set([type(eg) for eg in examples])
|
||||||
|
raise ValueError(Errors.E978.format(name="Tagger", method="rehearse", types=types))
|
||||||
if self._rehearsal_model is None:
|
if self._rehearsal_model is None:
|
||||||
return
|
return
|
||||||
if not any(len(doc) for doc in docs):
|
if not any(len(doc) for doc in docs):
|
||||||
|
@ -347,7 +353,11 @@ class Tagger(Pipe):
|
||||||
orig_tag_map = dict(self.vocab.morphology.tag_map)
|
orig_tag_map = dict(self.vocab.morphology.tag_map)
|
||||||
new_tag_map = {}
|
new_tag_map = {}
|
||||||
for example in get_examples():
|
for example in get_examples():
|
||||||
for token in example.y:
|
try:
|
||||||
|
y = example.y
|
||||||
|
except AttributeError:
|
||||||
|
raise ValueError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example)))
|
||||||
|
for token in y:
|
||||||
tag = token.tag_
|
tag = token.tag_
|
||||||
if tag in orig_tag_map:
|
if tag in orig_tag_map:
|
||||||
new_tag_map[tag] = orig_tag_map[tag]
|
new_tag_map[tag] = orig_tag_map[tag]
|
||||||
|
@ -771,10 +781,12 @@ class ClozeMultitask(Pipe):
|
||||||
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
||||||
if losses is not None and self.name not in losses:
|
if losses is not None and self.name not in losses:
|
||||||
losses[self.name] = 0.
|
losses[self.name] = 0.
|
||||||
docs = [eg.predicted for eg in examples]
|
|
||||||
set_dropout_rate(self.model, drop)
|
set_dropout_rate(self.model, drop)
|
||||||
predictions, bp_predictions = self.model.begin_update(
|
try:
|
||||||
[eg.predicted for eg in examples])
|
predictions, bp_predictions = self.model.begin_update([eg.predicted for eg in examples])
|
||||||
|
except AttributeError:
|
||||||
|
types = set([type(eg) for eg in examples])
|
||||||
|
raise ValueError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types))
|
||||||
loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
|
loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
|
||||||
bp_predictions(d_predictions)
|
bp_predictions(d_predictions)
|
||||||
if sgd is not None:
|
if sgd is not None:
|
||||||
|
@ -834,11 +846,13 @@ class TextCategorizer(Pipe):
|
||||||
doc.cats[label] = float(scores[i, j])
|
doc.cats[label] = float(scores[i, j])
|
||||||
|
|
||||||
def update(self, examples, state=None, drop=0., set_annotations=False, sgd=None, losses=None):
|
def update(self, examples, state=None, drop=0., set_annotations=False, sgd=None, losses=None):
|
||||||
for eg in examples:
|
try:
|
||||||
assert isinstance(eg, Example)
|
|
||||||
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
||||||
# Handle cases where there are no tokens in any docs.
|
# Handle cases where there are no tokens in any docs.
|
||||||
return
|
return
|
||||||
|
except AttributeError:
|
||||||
|
types = set([type(eg) for eg in examples])
|
||||||
|
raise ValueError(Errors.E978.format(name="TextCategorizer", method="update", types=types))
|
||||||
set_dropout_rate(self.model, drop)
|
set_dropout_rate(self.model, drop)
|
||||||
scores, bp_scores = self.model.begin_update(
|
scores, bp_scores = self.model.begin_update(
|
||||||
[eg.predicted for eg in examples]
|
[eg.predicted for eg in examples]
|
||||||
|
@ -857,9 +871,11 @@ class TextCategorizer(Pipe):
|
||||||
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
||||||
if self._rehearsal_model is None:
|
if self._rehearsal_model is None:
|
||||||
return
|
return
|
||||||
for eg in examples:
|
try:
|
||||||
assert isinstance(eg, Example)
|
|
||||||
docs = [eg.predicted for eg in examples]
|
docs = [eg.predicted for eg in examples]
|
||||||
|
except AttributeError:
|
||||||
|
types = set([type(eg) for eg in examples])
|
||||||
|
raise ValueError(Errors.E978.format(name="TextCategorizer", method="rehearse", types=types))
|
||||||
if not any(len(doc) for doc in docs):
|
if not any(len(doc) for doc in docs):
|
||||||
# Handle cases where there are no tokens in any docs.
|
# Handle cases where there are no tokens in any docs.
|
||||||
return
|
return
|
||||||
|
@ -917,7 +933,11 @@ class TextCategorizer(Pipe):
|
||||||
# TODO: begin_training is not guaranteed to see all data / labels ?
|
# TODO: begin_training is not guaranteed to see all data / labels ?
|
||||||
examples = list(get_examples())
|
examples = list(get_examples())
|
||||||
for example in examples:
|
for example in examples:
|
||||||
for cat in example.y.cats:
|
try:
|
||||||
|
y = example.y
|
||||||
|
except AttributeError:
|
||||||
|
raise ValueError(Errors.E978.format(name="TextCategorizer", method="update", types=type(example)))
|
||||||
|
for cat in y.cats:
|
||||||
self.add_label(cat)
|
self.add_label(cat)
|
||||||
self.require_labels()
|
self.require_labels()
|
||||||
docs = [Doc(Vocab(), words=["hello"])]
|
docs = [Doc(Vocab(), words=["hello"])]
|
||||||
|
@ -1074,10 +1094,12 @@ class EntityLinker(Pipe):
|
||||||
losses.setdefault(self.name, 0.0)
|
losses.setdefault(self.name, 0.0)
|
||||||
if not examples:
|
if not examples:
|
||||||
return 0
|
return 0
|
||||||
for eg in examples:
|
|
||||||
assert isinstance(eg, Example)
|
|
||||||
sentence_docs = []
|
sentence_docs = []
|
||||||
|
try:
|
||||||
docs = [eg.predicted for eg in examples]
|
docs = [eg.predicted for eg in examples]
|
||||||
|
except AttributeError:
|
||||||
|
types = set([type(eg) for eg in examples])
|
||||||
|
raise ValueError(Errors.E978.format(name="EntityLinker", method="update", types=types))
|
||||||
if set_annotations:
|
if set_annotations:
|
||||||
# This seems simpler than other ways to get that exact output -- but
|
# This seems simpler than other ways to get that exact output -- but
|
||||||
# it does run the model twice :(
|
# it does run the model twice :(
|
||||||
|
|
|
@ -268,7 +268,11 @@ cdef class Parser:
|
||||||
for multitask in self._multitasks:
|
for multitask in self._multitasks:
|
||||||
multitask.update(examples, drop=drop, sgd=sgd)
|
multitask.update(examples, drop=drop, sgd=sgd)
|
||||||
set_dropout_rate(self.model, drop)
|
set_dropout_rate(self.model, drop)
|
||||||
|
try:
|
||||||
states, golds, max_steps = self._init_gold_batch_no_cut(examples)
|
states, golds, max_steps = self._init_gold_batch_no_cut(examples)
|
||||||
|
except AttributeError:
|
||||||
|
types = set([type(eg) for eg in examples])
|
||||||
|
raise ValueError(Errors.E978.format(name="Parser", method="update", types=types))
|
||||||
states_golds = [(s, g) for (s, g) in zip(states, golds)
|
states_golds = [(s, g) for (s, g) in zip(states, golds)
|
||||||
if not s.is_final() and g is not None]
|
if not s.is_final() and g is not None]
|
||||||
# Prepare the stepwise model, and get the callback for finishing the batch
|
# Prepare the stepwise model, and get the callback for finishing the batch
|
||||||
|
|
|
@ -3,6 +3,7 @@ from thinc.api import Adam, NumpyOps
|
||||||
from spacy.attrs import NORM
|
from spacy.attrs import NORM
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
|
|
||||||
|
from spacy.gold import Example
|
||||||
from spacy.pipeline.defaults import default_parser, default_ner
|
from spacy.pipeline.defaults import default_parser, default_ner
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
from spacy.pipeline import DependencyParser, EntityRecognizer
|
from spacy.pipeline import DependencyParser, EntityRecognizer
|
||||||
|
@ -38,7 +39,8 @@ def _train_parser(parser):
|
||||||
"heads": [1, 1, 3, 3],
|
"heads": [1, 1, 3, 3],
|
||||||
"deps": ["left", "ROOT", "left", "ROOT"]
|
"deps": ["left", "ROOT", "left", "ROOT"]
|
||||||
}
|
}
|
||||||
parser.update((doc, gold), sgd=sgd, losses=losses)
|
example = Example.from_dict(doc, gold)
|
||||||
|
parser.update([example], sgd=sgd, losses=losses)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user