From 164d90878e54b483fd7a84c12d585e5f358ebf2a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 12 Sep 2020 16:05:26 +0200 Subject: [PATCH] Fix tagger training when some tags are missing --- spacy/pipeline/tagger.pyx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 1f8b4eb7a..9baf83958 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -192,6 +192,9 @@ class Tagger(Pipe): if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples): # Handle cases where there are no tokens in any docs. return + if not any(eg.reference.is_tagged for eg in examples): + # Handle cases where there are no tagged tokens in any docs. + return set_dropout_rate(self.model, drop) tag_scores, bp_tag_scores = self.model.begin_update([eg.predicted for eg in examples]) for sc in tag_scores: @@ -251,7 +254,11 @@ class Tagger(Pipe): DOCS: https://nightly.spacy.io/api/tagger#get_loss """ validate_examples(examples, "Tagger.get_loss") - loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False) + loss_func = SequenceCategoricalCrossentropy( + names=self.label, + normalize=False, + missing_value="" + ) truths = [eg.get_aligned("TAG", as_string=True) for eg in examples] d_scores, loss = loss_func(scores, truths) if self.model.ops.xp.isnan(loss):