From 164d90878e54b483fd7a84c12d585e5f358ebf2a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 12 Sep 2020 16:05:26 +0200
Subject: [PATCH] Fix tagger training when some tags are missing

---
 spacy/pipeline/tagger.pyx | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 1f8b4eb7a..9baf83958 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -192,6 +192,9 @@ class Tagger(Pipe):
         if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
             # Handle cases where there are no tokens in any docs.
             return
+        if not any(eg.reference.is_tagged for eg in examples):
+            # Handle cases where there are no tagged tokens in any docs.
+            return
         set_dropout_rate(self.model, drop)
         tag_scores, bp_tag_scores = self.model.begin_update([eg.predicted for eg in examples])
         for sc in tag_scores:
@@ -251,7 +254,11 @@ class Tagger(Pipe):
         DOCS: https://nightly.spacy.io/api/tagger#get_loss
         """
         validate_examples(examples, "Tagger.get_loss")
-        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
+        loss_func = SequenceCategoricalCrossentropy(
+            names=self.label,
+            normalize=False,
+            missing_value=""
+        )
         truths = [eg.get_aligned("TAG", as_string=True) for eg in examples]
         d_scores, loss = loss_func(scores, truths)
         if self.model.ops.xp.isnan(loss):