From c4c21cb4281133890d0b59c4b5a847d1ef9bff30 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 10 Jul 2019 19:39:38 +0200 Subject: [PATCH] more friendly textcat errors (#3946) * more friendly textcat errors with require_model and require_labels * update thinc version with recent bugfix --- requirements.txt | 2 +- spacy/errors.py | 1 + spacy/pipeline/pipes.pyx | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8cc52dfe4..58761b95c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Our libraries cymem>=2.0.2,<2.1.0 preshed>=2.0.1,<2.1.0 -thinc>=7.0.2,<7.1.0 +thinc>=7.0.5,<7.1.0 blis>=0.2.2,<0.3.0 murmurhash>=0.28.0,<1.1.0 wasabi>=0.2.0,<1.1.0 diff --git a/spacy/errors.py b/spacy/errors.py index 8f2eab3a1..347ad1fca 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -403,6 +403,7 @@ class Errors(object): E140 = ("The list of entities, prior probabilities and entity vectors should be of equal length.") E141 = ("Entity vectors should be of length {required} instead of the provided {found}.") E142 = ("Unsupported loss_function '{loss_func}'. Use either 'L2' or 'cosine'") + E143 = ("Labels for component '{name}' not initialized. Did you forget to call add_label()?") @add_codes diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index d99a1f73e..891e8d4e3 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -902,6 +902,11 @@ class TextCategorizer(Pipe): def labels(self): return tuple(self.cfg.setdefault("labels", [])) + def require_labels(self): + """Raise an error if the component's model has no labels defined.""" + if not self.labels: + raise ValueError(Errors.E143.format(name=self.name)) + @labels.setter def labels(self, value): self.cfg["labels"] = tuple(value) @@ -931,6 +936,7 @@ class TextCategorizer(Pipe): doc.cats[label] = float(scores[i, j]) def update(self, docs, golds, state=None, drop=0., sgd=None, losses=None): + self.require_model() scores, bp_scores = self.model.begin_update(docs, drop=drop) loss, d_scores = self.get_loss(docs, golds, scores) bp_scores(d_scores, sgd=sgd) @@ -985,6 +991,7 @@ class TextCategorizer(Pipe): def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, **kwargs): if self.model is True: self.cfg["pretrained_vectors"] = kwargs.get("pretrained_vectors") + self.require_labels() self.model = self.Model(len(self.labels), **self.cfg) link_vectors_to_models(self.vocab) if sgd is None: