Allow Scorer.score_spans to handle pred docs with missing annotation (#9701)

If the predicted docs are missing annotation according to `has_annotation`, treat the docs as having no predictions rather than raising errors when the annotation is missing. The motivation for this is a combined tokenization+sents scorer for a component where the sents annotation is optional. To provide a single scorer in the component factory, it needs to be possible for the scorer to continue despite missing sents annotation in the case where the component is not annotating sents.
2025-07-15 18:52:29 +03:00 · 2021-11-23 15:17:19 +01:00 · 2021-11-23 15:17:19 +01:00 · a77f50baa4
commit a77f50baa4
parent 36c7047946
1 changed files with 21 additions and 17 deletions
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -359,14 +359,15 @@ class Scorer:
            pred_doc = example.predicted
            gold_doc = example.reference
            # Option to handle docs without annotation for this attribute
-            if has_annotation is not None:
-                if not has_annotation(gold_doc):
+            if has_annotation is not None and not has_annotation(gold_doc):
                continue
-            # Find all labels in gold and doc
-            labels = set(
-                [k.label_ for k in getter(gold_doc, attr)]
-                + [k.label_ for k in getter(pred_doc, attr)]
-            )
+            # Find all labels in gold
+            labels = set([k.label_ for k in getter(gold_doc, attr)])
+            # If labeled, find all labels in pred
+            if has_annotation is None or (
+                has_annotation is not None and has_annotation(pred_doc)
+            ):
+                labels |= set([k.label_ for k in getter(pred_doc, attr)])
            # Set up all labels for per type scoring and prepare gold per type
            gold_per_type: Dict[str, Set] = {label: set() for label in labels}
            for label in labels:
@ -384,6 +385,9 @@ class Scorer:
                gold_spans.add(gold_span)
                gold_per_type[span.label_].add(gold_span)
            pred_per_type: Dict[str, Set] = {label: set() for label in labels}
+            if has_annotation is None or (
+                has_annotation is not None and has_annotation(pred_doc)
+            ):
                for span in example.get_aligned_spans_x2y(
                    getter(pred_doc, attr), allow_overlap
                ):