Allow Scorer.score_spans to handle pred docs with missing annotation (#9701)

If the predicted docs are missing annotation according to
`has_annotation`, treat the docs as having no predictions rather than
raising errors when the annotation is missing.

The motivation for this is a combined tokenization+sents scorer for a
component where the sents annotation is optional. To provide a single
scorer in the component factory, it needs to be possible for the scorer
to continue despite missing sents annotation in the case where the
component is not annotating sents.
This commit is contained in:
Adriane Boyd 2021-11-23 15:17:19 +01:00 committed by GitHub
parent 36c7047946
commit a77f50baa4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -359,14 +359,15 @@ class Scorer:
pred_doc = example.predicted
gold_doc = example.reference
# Option to handle docs without annotation for this attribute
if has_annotation is not None:
if not has_annotation(gold_doc):
if has_annotation is not None and not has_annotation(gold_doc):
continue
# Find all labels in gold and doc
labels = set(
[k.label_ for k in getter(gold_doc, attr)]
+ [k.label_ for k in getter(pred_doc, attr)]
)
# Find all labels in gold
labels = set([k.label_ for k in getter(gold_doc, attr)])
# If labeled, find all labels in pred
if has_annotation is None or (
has_annotation is not None and has_annotation(pred_doc)
):
labels |= set([k.label_ for k in getter(pred_doc, attr)])
# Set up all labels for per type scoring and prepare gold per type
gold_per_type: Dict[str, Set] = {label: set() for label in labels}
for label in labels:
@ -384,6 +385,9 @@ class Scorer:
gold_spans.add(gold_span)
gold_per_type[span.label_].add(gold_span)
pred_per_type: Dict[str, Set] = {label: set() for label in labels}
if has_annotation is None or (
has_annotation is not None and has_annotation(pred_doc)
):
for span in example.get_aligned_spans_x2y(
getter(pred_doc, attr), allow_overlap
):