mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
Allow Scorer.score_spans to handle pred docs with missing annotation (#9701)
If the predicted docs are missing annotation according to `has_annotation`, treat the docs as having no predictions rather than raising errors when the annotation is missing. The motivation for this is a combined tokenization+sents scorer for a component where the sents annotation is optional. To provide a single scorer in the component factory, it needs to be possible for the scorer to continue despite missing sents annotation in the case where the component is not annotating sents.
This commit is contained in:
parent
36c7047946
commit
a77f50baa4
|
@ -359,14 +359,15 @@ class Scorer:
|
||||||
pred_doc = example.predicted
|
pred_doc = example.predicted
|
||||||
gold_doc = example.reference
|
gold_doc = example.reference
|
||||||
# Option to handle docs without annotation for this attribute
|
# Option to handle docs without annotation for this attribute
|
||||||
if has_annotation is not None:
|
if has_annotation is not None and not has_annotation(gold_doc):
|
||||||
if not has_annotation(gold_doc):
|
continue
|
||||||
continue
|
# Find all labels in gold
|
||||||
# Find all labels in gold and doc
|
labels = set([k.label_ for k in getter(gold_doc, attr)])
|
||||||
labels = set(
|
# If labeled, find all labels in pred
|
||||||
[k.label_ for k in getter(gold_doc, attr)]
|
if has_annotation is None or (
|
||||||
+ [k.label_ for k in getter(pred_doc, attr)]
|
has_annotation is not None and has_annotation(pred_doc)
|
||||||
)
|
):
|
||||||
|
labels |= set([k.label_ for k in getter(pred_doc, attr)])
|
||||||
# Set up all labels for per type scoring and prepare gold per type
|
# Set up all labels for per type scoring and prepare gold per type
|
||||||
gold_per_type: Dict[str, Set] = {label: set() for label in labels}
|
gold_per_type: Dict[str, Set] = {label: set() for label in labels}
|
||||||
for label in labels:
|
for label in labels:
|
||||||
|
@ -384,16 +385,19 @@ class Scorer:
|
||||||
gold_spans.add(gold_span)
|
gold_spans.add(gold_span)
|
||||||
gold_per_type[span.label_].add(gold_span)
|
gold_per_type[span.label_].add(gold_span)
|
||||||
pred_per_type: Dict[str, Set] = {label: set() for label in labels}
|
pred_per_type: Dict[str, Set] = {label: set() for label in labels}
|
||||||
for span in example.get_aligned_spans_x2y(
|
if has_annotation is None or (
|
||||||
getter(pred_doc, attr), allow_overlap
|
has_annotation is not None and has_annotation(pred_doc)
|
||||||
):
|
):
|
||||||
pred_span: Tuple
|
for span in example.get_aligned_spans_x2y(
|
||||||
if labeled:
|
getter(pred_doc, attr), allow_overlap
|
||||||
pred_span = (span.label_, span.start, span.end - 1)
|
):
|
||||||
else:
|
pred_span: Tuple
|
||||||
pred_span = (span.start, span.end - 1)
|
if labeled:
|
||||||
pred_spans.add(pred_span)
|
pred_span = (span.label_, span.start, span.end - 1)
|
||||||
pred_per_type[span.label_].add(pred_span)
|
else:
|
||||||
|
pred_span = (span.start, span.end - 1)
|
||||||
|
pred_spans.add(pred_span)
|
||||||
|
pred_per_type[span.label_].add(pred_span)
|
||||||
# Scores per label
|
# Scores per label
|
||||||
if labeled:
|
if labeled:
|
||||||
for k, v in score_per_type.items():
|
for k, v in score_per_type.items():
|
||||||
|
|
Loading…
Reference in New Issue
Block a user