mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Handle missing reference values in scorer (#6286)
* Handle missing reference values in scorer Handle missing values in reference doc during scoring where it is possible to detect an unset state for the attribute. If no reference docs contain annotation, `None` is returned instead of a score. `spacy evaluate` displays `-` for missing scores and the missing scores are saved as `None`/`null` in the metrics. Attributes without unset states: * `token.head`: relies on `token.dep` to recognize unset values * `doc.cats`: unable to handle missing annotation Additional changes: * add optional `has_annotation` check to `score_scans` to replace `doc.sents` hack * update `score_token_attr_per_feat` to handle missing and empty morph representations * fix bug in `Doc.has_annotation` for normalization of `IS_SENT_START` vs. `SENT_START` * Fix import * Update return types
This commit is contained in:
parent
5d2cb86c34
commit
a4b32b9552
|
@ -98,10 +98,13 @@ def evaluate(
|
||||||
if key in scores:
|
if key in scores:
|
||||||
if key == "cats_score":
|
if key == "cats_score":
|
||||||
metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
|
metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
|
||||||
if key == "speed":
|
if isinstance(scores[key], (int, float)):
|
||||||
results[metric] = f"{scores[key]:.0f}"
|
if key == "speed":
|
||||||
|
results[metric] = f"{scores[key]:.0f}"
|
||||||
|
else:
|
||||||
|
results[metric] = f"{scores[key]*100:.2f}"
|
||||||
else:
|
else:
|
||||||
results[metric] = f"{scores[key]*100:.2f}"
|
results[metric] = "-"
|
||||||
data[re.sub(r"[\s/]", "_", key.lower())] = scores[key]
|
data[re.sub(r"[\s/]", "_", key.lower())] = scores[key]
|
||||||
|
|
||||||
msg.table(results, title="Results")
|
msg.table(results, title="Results")
|
||||||
|
|
|
@ -226,6 +226,9 @@ class AttributeRuler(Pipe):
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/tagger#score
|
DOCS: https://nightly.spacy.io/api/tagger#score
|
||||||
"""
|
"""
|
||||||
|
def morph_key_getter(token, attr):
|
||||||
|
return getattr(token, attr).key
|
||||||
|
|
||||||
validate_examples(examples, "AttributeRuler.score")
|
validate_examples(examples, "AttributeRuler.score")
|
||||||
results = {}
|
results = {}
|
||||||
attrs = set()
|
attrs = set()
|
||||||
|
@ -237,7 +240,8 @@ class AttributeRuler(Pipe):
|
||||||
elif attr == POS:
|
elif attr == POS:
|
||||||
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
|
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
|
||||||
elif attr == MORPH:
|
elif attr == MORPH:
|
||||||
results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
|
results.update(Scorer.score_token_attr(examples, "morph", getter=morph_key_getter, **kwargs))
|
||||||
|
results.update(Scorer.score_token_attr_per_feat(examples, "morph", getter=morph_key_getter, **kwargs))
|
||||||
elif attr == LEMMA:
|
elif attr == LEMMA:
|
||||||
results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
|
results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -155,13 +155,16 @@ cdef class DependencyParser(Parser):
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/dependencyparser#score
|
DOCS: https://nightly.spacy.io/api/dependencyparser#score
|
||||||
"""
|
"""
|
||||||
|
def has_sents(doc):
|
||||||
|
return doc.has_annotation("SENT_START")
|
||||||
|
|
||||||
validate_examples(examples, "DependencyParser.score")
|
validate_examples(examples, "DependencyParser.score")
|
||||||
def dep_getter(token, attr):
|
def dep_getter(token, attr):
|
||||||
dep = getattr(token, attr)
|
dep = getattr(token, attr)
|
||||||
dep = token.vocab.strings.as_string(dep).lower()
|
dep = token.vocab.strings.as_string(dep).lower()
|
||||||
return dep
|
return dep
|
||||||
results = {}
|
results = {}
|
||||||
results.update(Scorer.score_spans(examples, "sents", **kwargs))
|
results.update(Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs))
|
||||||
kwargs.setdefault("getter", dep_getter)
|
kwargs.setdefault("getter", dep_getter)
|
||||||
kwargs.setdefault("ignore_labels", ("p", "punct"))
|
kwargs.setdefault("ignore_labels", ("p", "punct"))
|
||||||
results.update(Scorer.score_deps(examples, "dep", **kwargs))
|
results.update(Scorer.score_deps(examples, "dep", **kwargs))
|
||||||
|
|
|
@ -10,7 +10,7 @@ from ..errors import Errors
|
||||||
from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
|
from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
|
||||||
from ..tokens import Doc, Span
|
from ..tokens import Doc, Span
|
||||||
from ..matcher import Matcher, PhraseMatcher
|
from ..matcher import Matcher, PhraseMatcher
|
||||||
from ..scorer import Scorer
|
from ..scorer import get_ner_prf
|
||||||
from ..training import validate_examples
|
from ..training import validate_examples
|
||||||
|
|
||||||
|
|
||||||
|
@ -340,7 +340,7 @@ class EntityRuler(Pipe):
|
||||||
|
|
||||||
def score(self, examples, **kwargs):
|
def score(self, examples, **kwargs):
|
||||||
validate_examples(examples, "EntityRuler.score")
|
validate_examples(examples, "EntityRuler.score")
|
||||||
return Scorer.score_spans(examples, "ents", **kwargs)
|
return get_ner_prf(examples)
|
||||||
|
|
||||||
def from_bytes(
|
def from_bytes(
|
||||||
self, patterns_bytes: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
|
self, patterns_bytes: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
|
||||||
|
|
|
@ -251,10 +251,13 @@ class Morphologizer(Tagger):
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/morphologizer#score
|
DOCS: https://nightly.spacy.io/api/morphologizer#score
|
||||||
"""
|
"""
|
||||||
|
def morph_key_getter(token, attr):
|
||||||
|
return getattr(token, attr).key
|
||||||
|
|
||||||
validate_examples(examples, "Morphologizer.score")
|
validate_examples(examples, "Morphologizer.score")
|
||||||
results = {}
|
results = {}
|
||||||
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
|
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
|
||||||
results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
|
results.update(Scorer.score_token_attr(examples, "morph", getter=morph_key_getter, **kwargs))
|
||||||
results.update(Scorer.score_token_attr_per_feat(examples,
|
results.update(Scorer.score_token_attr_per_feat(examples,
|
||||||
"morph", **kwargs))
|
"morph", getter=morph_key_getter, **kwargs))
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -122,13 +122,4 @@ cdef class EntityRecognizer(Parser):
|
||||||
DOCS: https://nightly.spacy.io/api/entityrecognizer#score
|
DOCS: https://nightly.spacy.io/api/entityrecognizer#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "EntityRecognizer.score")
|
validate_examples(examples, "EntityRecognizer.score")
|
||||||
score_per_type = get_ner_prf(examples)
|
return get_ner_prf(examples)
|
||||||
totals = PRFScore()
|
|
||||||
for prf in score_per_type.values():
|
|
||||||
totals += prf
|
|
||||||
return {
|
|
||||||
"ents_p": totals.precision,
|
|
||||||
"ents_r": totals.recall,
|
|
||||||
"ents_f": totals.fscore,
|
|
||||||
"ents_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
|
|
||||||
}
|
|
||||||
|
|
|
@ -155,8 +155,11 @@ class Sentencizer(Pipe):
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/sentencizer#score
|
DOCS: https://nightly.spacy.io/api/sentencizer#score
|
||||||
"""
|
"""
|
||||||
|
def has_sents(doc):
|
||||||
|
return doc.has_annotation("SENT_START")
|
||||||
|
|
||||||
validate_examples(examples, "Sentencizer.score")
|
validate_examples(examples, "Sentencizer.score")
|
||||||
results = Scorer.score_spans(examples, "sents", **kwargs)
|
results = Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs)
|
||||||
del results["sents_per_type"]
|
del results["sents_per_type"]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
|
@ -160,7 +160,10 @@ class SentenceRecognizer(Tagger):
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
||||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer#score
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#score
|
||||||
"""
|
"""
|
||||||
|
def has_sents(doc):
|
||||||
|
return doc.has_annotation("SENT_START")
|
||||||
|
|
||||||
validate_examples(examples, "SentenceRecognizer.score")
|
validate_examples(examples, "SentenceRecognizer.score")
|
||||||
results = Scorer.score_spans(examples, "sents", **kwargs)
|
results = Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs)
|
||||||
del results["sents_per_type"]
|
del results["sents_per_type"]
|
||||||
return results
|
return results
|
||||||
|
|
257
spacy/scorer.py
257
spacy/scorer.py
|
@ -1,9 +1,9 @@
|
||||||
from typing import Optional, Iterable, Dict, Any, Callable, TYPE_CHECKING
|
from typing import Optional, Iterable, Dict, Set, Any, Callable, TYPE_CHECKING
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from .training import Example
|
from .training import Example
|
||||||
from .tokens import Token, Doc, Span
|
from .tokens import Token, Doc, Span, MorphAnalysis
|
||||||
from .errors import Errors
|
from .errors import Errors
|
||||||
from .util import get_lang_class, SimpleFrozenList
|
from .util import get_lang_class, SimpleFrozenList
|
||||||
from .morphology import Morphology
|
from .morphology import Morphology
|
||||||
|
@ -13,7 +13,8 @@ if TYPE_CHECKING:
|
||||||
from .language import Language # noqa: F401
|
from .language import Language # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_PIPELINE = ["senter", "tagger", "morphologizer", "parser", "ner", "textcat"]
|
DEFAULT_PIPELINE = ("senter", "tagger", "morphologizer", "parser", "ner", "textcat")
|
||||||
|
MISSING_VALUES = frozenset([None, 0, ""])
|
||||||
|
|
||||||
|
|
||||||
class PRFScore:
|
class PRFScore:
|
||||||
|
@ -24,6 +25,9 @@ class PRFScore:
|
||||||
self.fp = 0
|
self.fp = 0
|
||||||
self.fn = 0
|
self.fn = 0
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return self.tp + self.fp + self.fn
|
||||||
|
|
||||||
def __iadd__(self, other):
|
def __iadd__(self, other):
|
||||||
self.tp += other.tp
|
self.tp += other.tp
|
||||||
self.fp += other.fp
|
self.fp += other.fp
|
||||||
|
@ -94,7 +98,7 @@ class Scorer:
|
||||||
self,
|
self,
|
||||||
nlp: Optional["Language"] = None,
|
nlp: Optional["Language"] = None,
|
||||||
default_lang: str = "xx",
|
default_lang: str = "xx",
|
||||||
default_pipeline=DEFAULT_PIPELINE,
|
default_pipeline: Iterable[str] = DEFAULT_PIPELINE,
|
||||||
**cfg,
|
**cfg,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the Scorer.
|
"""Initialize the Scorer.
|
||||||
|
@ -126,13 +130,13 @@ class Scorer:
|
||||||
return scores
|
return scores
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def score_tokenization(examples: Iterable[Example], **cfg) -> Dict[str, float]:
|
def score_tokenization(examples: Iterable[Example], **cfg) -> Dict[str, Any]:
|
||||||
"""Returns accuracy and PRF scores for tokenization.
|
"""Returns accuracy and PRF scores for tokenization.
|
||||||
* token_acc: # correct tokens / # gold tokens
|
* token_acc: # correct tokens / # gold tokens
|
||||||
* token_p/r/f: PRF for token character spans
|
* token_p/r/f: PRF for token character spans
|
||||||
|
|
||||||
examples (Iterable[Example]): Examples to score
|
examples (Iterable[Example]): Examples to score
|
||||||
RETURNS (Dict[str, float]): A dictionary containing the scores
|
RETURNS (Dict[str, Any]): A dictionary containing the scores
|
||||||
token_acc/p/r/f.
|
token_acc/p/r/f.
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
|
DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
|
||||||
|
@ -142,6 +146,8 @@ class Scorer:
|
||||||
for example in examples:
|
for example in examples:
|
||||||
gold_doc = example.reference
|
gold_doc = example.reference
|
||||||
pred_doc = example.predicted
|
pred_doc = example.predicted
|
||||||
|
if gold_doc.has_unknown_spaces:
|
||||||
|
continue
|
||||||
align = example.alignment
|
align = example.alignment
|
||||||
gold_spans = set()
|
gold_spans = set()
|
||||||
pred_spans = set()
|
pred_spans = set()
|
||||||
|
@ -158,12 +164,20 @@ class Scorer:
|
||||||
else:
|
else:
|
||||||
acc_score.tp += 1
|
acc_score.tp += 1
|
||||||
prf_score.score_set(pred_spans, gold_spans)
|
prf_score.score_set(pred_spans, gold_spans)
|
||||||
return {
|
if len(acc_score) > 0:
|
||||||
"token_acc": acc_score.fscore,
|
return {
|
||||||
"token_p": prf_score.precision,
|
"token_acc": acc_score.fscore,
|
||||||
"token_r": prf_score.recall,
|
"token_p": prf_score.precision,
|
||||||
"token_f": prf_score.fscore,
|
"token_r": prf_score.recall,
|
||||||
}
|
"token_f": prf_score.fscore,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"token_acc": None,
|
||||||
|
"token_p": None,
|
||||||
|
"token_r": None,
|
||||||
|
"token_f": None
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def score_token_attr(
|
def score_token_attr(
|
||||||
|
@ -171,8 +185,9 @@ class Scorer:
|
||||||
attr: str,
|
attr: str,
|
||||||
*,
|
*,
|
||||||
getter: Callable[[Token, str], Any] = getattr,
|
getter: Callable[[Token, str], Any] = getattr,
|
||||||
|
missing_values: Set[Any] = MISSING_VALUES,
|
||||||
**cfg,
|
**cfg,
|
||||||
) -> Dict[str, float]:
|
) -> Dict[str, Any]:
|
||||||
"""Returns an accuracy score for a token-level attribute.
|
"""Returns an accuracy score for a token-level attribute.
|
||||||
|
|
||||||
examples (Iterable[Example]): Examples to score
|
examples (Iterable[Example]): Examples to score
|
||||||
|
@ -180,7 +195,7 @@ class Scorer:
|
||||||
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
|
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
|
||||||
getter(token, attr) should return the value of the attribute for an
|
getter(token, attr) should return the value of the attribute for an
|
||||||
individual token.
|
individual token.
|
||||||
RETURNS (Dict[str, float]): A dictionary containing the accuracy score
|
RETURNS (Dict[str, Any]): A dictionary containing the accuracy score
|
||||||
under the key attr_acc.
|
under the key attr_acc.
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
|
DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
|
||||||
|
@ -191,17 +206,27 @@ class Scorer:
|
||||||
pred_doc = example.predicted
|
pred_doc = example.predicted
|
||||||
align = example.alignment
|
align = example.alignment
|
||||||
gold_tags = set()
|
gold_tags = set()
|
||||||
|
missing_indices = set()
|
||||||
for gold_i, token in enumerate(gold_doc):
|
for gold_i, token in enumerate(gold_doc):
|
||||||
gold_tags.add((gold_i, getter(token, attr)))
|
value = getter(token, attr)
|
||||||
|
if value not in missing_values:
|
||||||
|
gold_tags.add((gold_i, getter(token, attr)))
|
||||||
|
else:
|
||||||
|
missing_indices.add(gold_i)
|
||||||
pred_tags = set()
|
pred_tags = set()
|
||||||
for token in pred_doc:
|
for token in pred_doc:
|
||||||
if token.orth_.isspace():
|
if token.orth_.isspace():
|
||||||
continue
|
continue
|
||||||
if align.x2y.lengths[token.i] == 1:
|
if align.x2y.lengths[token.i] == 1:
|
||||||
gold_i = align.x2y[token.i].dataXd[0, 0]
|
gold_i = align.x2y[token.i].dataXd[0, 0]
|
||||||
pred_tags.add((gold_i, getter(token, attr)))
|
if gold_i not in missing_indices:
|
||||||
|
pred_tags.add((gold_i, getter(token, attr)))
|
||||||
tag_score.score_set(pred_tags, gold_tags)
|
tag_score.score_set(pred_tags, gold_tags)
|
||||||
return {f"{attr}_acc": tag_score.fscore}
|
score_key = f"{attr}_acc"
|
||||||
|
if len(tag_score) == 0:
|
||||||
|
return {score_key: None}
|
||||||
|
else:
|
||||||
|
return {score_key: tag_score.fscore}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def score_token_attr_per_feat(
|
def score_token_attr_per_feat(
|
||||||
|
@ -209,8 +234,9 @@ class Scorer:
|
||||||
attr: str,
|
attr: str,
|
||||||
*,
|
*,
|
||||||
getter: Callable[[Token, str], Any] = getattr,
|
getter: Callable[[Token, str], Any] = getattr,
|
||||||
|
missing_values: Set[Any] = MISSING_VALUES,
|
||||||
**cfg,
|
**cfg,
|
||||||
):
|
) -> Dict[str, Any]:
|
||||||
"""Return PRF scores per feat for a token attribute in UFEATS format.
|
"""Return PRF scores per feat for a token attribute in UFEATS format.
|
||||||
|
|
||||||
examples (Iterable[Example]): Examples to score
|
examples (Iterable[Example]): Examples to score
|
||||||
|
@ -218,7 +244,7 @@ class Scorer:
|
||||||
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
|
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
|
||||||
getter(token, attr) should return the value of the attribute for an
|
getter(token, attr) should return the value of the attribute for an
|
||||||
individual token.
|
individual token.
|
||||||
RETURNS (dict): A dictionary containing the per-feat PRF scores unders
|
RETURNS (dict): A dictionary containing the per-feat PRF scores under
|
||||||
the key attr_per_feat.
|
the key attr_per_feat.
|
||||||
"""
|
"""
|
||||||
per_feat = {}
|
per_feat = {}
|
||||||
|
@ -227,9 +253,11 @@ class Scorer:
|
||||||
gold_doc = example.reference
|
gold_doc = example.reference
|
||||||
align = example.alignment
|
align = example.alignment
|
||||||
gold_per_feat = {}
|
gold_per_feat = {}
|
||||||
|
missing_indices = set()
|
||||||
for gold_i, token in enumerate(gold_doc):
|
for gold_i, token in enumerate(gold_doc):
|
||||||
morph = str(getter(token, attr))
|
value = getter(token, attr)
|
||||||
if morph:
|
morph = gold_doc.vocab.strings[value]
|
||||||
|
if value not in missing_values and morph != Morphology.EMPTY_MORPH:
|
||||||
for feat in morph.split(Morphology.FEATURE_SEP):
|
for feat in morph.split(Morphology.FEATURE_SEP):
|
||||||
field, values = feat.split(Morphology.FIELD_SEP)
|
field, values = feat.split(Morphology.FIELD_SEP)
|
||||||
if field not in per_feat:
|
if field not in per_feat:
|
||||||
|
@ -237,27 +265,35 @@ class Scorer:
|
||||||
if field not in gold_per_feat:
|
if field not in gold_per_feat:
|
||||||
gold_per_feat[field] = set()
|
gold_per_feat[field] = set()
|
||||||
gold_per_feat[field].add((gold_i, feat))
|
gold_per_feat[field].add((gold_i, feat))
|
||||||
|
else:
|
||||||
|
missing_indices.add(gold_i)
|
||||||
pred_per_feat = {}
|
pred_per_feat = {}
|
||||||
for token in pred_doc:
|
for token in pred_doc:
|
||||||
if token.orth_.isspace():
|
if token.orth_.isspace():
|
||||||
continue
|
continue
|
||||||
if align.x2y.lengths[token.i] == 1:
|
if align.x2y.lengths[token.i] == 1:
|
||||||
gold_i = align.x2y[token.i].dataXd[0, 0]
|
gold_i = align.x2y[token.i].dataXd[0, 0]
|
||||||
morph = str(getter(token, attr))
|
if gold_i not in missing_indices:
|
||||||
if morph:
|
value = getter(token, attr)
|
||||||
for feat in morph.split("|"):
|
morph = gold_doc.vocab.strings[value]
|
||||||
field, values = feat.split("=")
|
if value not in missing_values and morph != Morphology.EMPTY_MORPH:
|
||||||
if field not in per_feat:
|
for feat in morph.split(Morphology.FEATURE_SEP):
|
||||||
per_feat[field] = PRFScore()
|
field, values = feat.split(Morphology.FIELD_SEP)
|
||||||
if field not in pred_per_feat:
|
if field not in per_feat:
|
||||||
pred_per_feat[field] = set()
|
per_feat[field] = PRFScore()
|
||||||
pred_per_feat[field].add((gold_i, feat))
|
if field not in pred_per_feat:
|
||||||
|
pred_per_feat[field] = set()
|
||||||
|
pred_per_feat[field].add((gold_i, feat))
|
||||||
for field in per_feat:
|
for field in per_feat:
|
||||||
per_feat[field].score_set(
|
per_feat[field].score_set(
|
||||||
pred_per_feat.get(field, set()), gold_per_feat.get(field, set())
|
pred_per_feat.get(field, set()), gold_per_feat.get(field, set())
|
||||||
)
|
)
|
||||||
result = {k: v.to_dict() for k, v in per_feat.items()}
|
score_key = f"{attr}_per_feat"
|
||||||
return {f"{attr}_per_feat": result}
|
if any([len(v) for v in per_feat.values()]):
|
||||||
|
result = {k: v.to_dict() for k, v in per_feat.items()}
|
||||||
|
return {score_key: result}
|
||||||
|
else:
|
||||||
|
return {score_key: None}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def score_spans(
|
def score_spans(
|
||||||
|
@ -265,6 +301,7 @@ class Scorer:
|
||||||
attr: str,
|
attr: str,
|
||||||
*,
|
*,
|
||||||
getter: Callable[[Doc, str], Iterable[Span]] = getattr,
|
getter: Callable[[Doc, str], Iterable[Span]] = getattr,
|
||||||
|
has_annotation: Optional[Callable[[Doc], bool]] = None,
|
||||||
**cfg,
|
**cfg,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Returns PRF scores for labeled spans.
|
"""Returns PRF scores for labeled spans.
|
||||||
|
@ -284,18 +321,10 @@ class Scorer:
|
||||||
for example in examples:
|
for example in examples:
|
||||||
pred_doc = example.predicted
|
pred_doc = example.predicted
|
||||||
gold_doc = example.reference
|
gold_doc = example.reference
|
||||||
# TODO
|
# Option to handle docs without sents
|
||||||
# This is a temporary hack to work around the problem that the scorer
|
if has_annotation is not None:
|
||||||
# fails if you have examples that are not fully annotated for all
|
if not has_annotation(gold_doc):
|
||||||
# the tasks in your pipeline. For instance, you might have a corpus
|
continue
|
||||||
# of NER annotations that does not set sentence boundaries, but the
|
|
||||||
# pipeline includes a parser or senter, and then the score_weights
|
|
||||||
# are used to evaluate that component. When the scorer attempts
|
|
||||||
# to read the sentences from the gold document, it fails.
|
|
||||||
try:
|
|
||||||
list(getter(gold_doc, attr))
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
# Find all labels in gold and doc
|
# Find all labels in gold and doc
|
||||||
labels = set(
|
labels = set(
|
||||||
[k.label_ for k in getter(gold_doc, attr)]
|
[k.label_ for k in getter(gold_doc, attr)]
|
||||||
|
@ -323,13 +352,21 @@ class Scorer:
|
||||||
v.score_set(pred_per_type[k], gold_per_type[k])
|
v.score_set(pred_per_type[k], gold_per_type[k])
|
||||||
# Score for all labels
|
# Score for all labels
|
||||||
score.score_set(pred_spans, gold_spans)
|
score.score_set(pred_spans, gold_spans)
|
||||||
results = {
|
if len(score) > 0:
|
||||||
f"{attr}_p": score.precision,
|
return {
|
||||||
f"{attr}_r": score.recall,
|
f"{attr}_p": score.precision,
|
||||||
f"{attr}_f": score.fscore,
|
f"{attr}_r": score.recall,
|
||||||
f"{attr}_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
|
f"{attr}_f": score.fscore,
|
||||||
}
|
f"{attr}_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
|
||||||
return results
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
f"{attr}_p": None,
|
||||||
|
f"{attr}_r": None,
|
||||||
|
f"{attr}_f": None,
|
||||||
|
f"{attr}_per_type": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def score_cats(
|
def score_cats(
|
||||||
|
@ -390,9 +427,6 @@ class Scorer:
|
||||||
pred_cats = getter(example.predicted, attr)
|
pred_cats = getter(example.predicted, attr)
|
||||||
gold_cats = getter(example.reference, attr)
|
gold_cats = getter(example.reference, attr)
|
||||||
|
|
||||||
# I think the AUC metric is applicable regardless of whether we're
|
|
||||||
# doing multi-label classification? Unsure. If not, move this into
|
|
||||||
# the elif pred_cats and gold_cats block below.
|
|
||||||
for label in labels:
|
for label in labels:
|
||||||
pred_score = pred_cats.get(label, 0.0)
|
pred_score = pred_cats.get(label, 0.0)
|
||||||
gold_score = gold_cats.get(label, 0.0)
|
gold_score = gold_cats.get(label, 0.0)
|
||||||
|
@ -542,6 +576,7 @@ class Scorer:
|
||||||
head_attr: str = "head",
|
head_attr: str = "head",
|
||||||
head_getter: Callable[[Token, str], Token] = getattr,
|
head_getter: Callable[[Token, str], Token] = getattr,
|
||||||
ignore_labels: Iterable[str] = SimpleFrozenList(),
|
ignore_labels: Iterable[str] = SimpleFrozenList(),
|
||||||
|
missing_values: Set[Any] = MISSING_VALUES,
|
||||||
**cfg,
|
**cfg,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Returns the UAS, LAS, and LAS per type scores for dependency
|
"""Returns the UAS, LAS, and LAS per type scores for dependency
|
||||||
|
@ -566,6 +601,7 @@ class Scorer:
|
||||||
unlabelled = PRFScore()
|
unlabelled = PRFScore()
|
||||||
labelled = PRFScore()
|
labelled = PRFScore()
|
||||||
labelled_per_dep = dict()
|
labelled_per_dep = dict()
|
||||||
|
missing_indices = set()
|
||||||
for example in examples:
|
for example in examples:
|
||||||
gold_doc = example.reference
|
gold_doc = example.reference
|
||||||
pred_doc = example.predicted
|
pred_doc = example.predicted
|
||||||
|
@ -575,13 +611,16 @@ class Scorer:
|
||||||
for gold_i, token in enumerate(gold_doc):
|
for gold_i, token in enumerate(gold_doc):
|
||||||
dep = getter(token, attr)
|
dep = getter(token, attr)
|
||||||
head = head_getter(token, head_attr)
|
head = head_getter(token, head_attr)
|
||||||
if dep not in ignore_labels:
|
if dep not in missing_values:
|
||||||
gold_deps.add((gold_i, head.i, dep))
|
if dep not in ignore_labels:
|
||||||
if dep not in labelled_per_dep:
|
gold_deps.add((gold_i, head.i, dep))
|
||||||
labelled_per_dep[dep] = PRFScore()
|
if dep not in labelled_per_dep:
|
||||||
if dep not in gold_deps_per_dep:
|
labelled_per_dep[dep] = PRFScore()
|
||||||
gold_deps_per_dep[dep] = set()
|
if dep not in gold_deps_per_dep:
|
||||||
gold_deps_per_dep[dep].add((gold_i, head.i, dep))
|
gold_deps_per_dep[dep] = set()
|
||||||
|
gold_deps_per_dep[dep].add((gold_i, head.i, dep))
|
||||||
|
else:
|
||||||
|
missing_indices.add(gold_i)
|
||||||
pred_deps = set()
|
pred_deps = set()
|
||||||
pred_deps_per_dep = {}
|
pred_deps_per_dep = {}
|
||||||
for token in pred_doc:
|
for token in pred_doc:
|
||||||
|
@ -591,25 +630,26 @@ class Scorer:
|
||||||
gold_i = None
|
gold_i = None
|
||||||
else:
|
else:
|
||||||
gold_i = align.x2y[token.i].dataXd[0, 0]
|
gold_i = align.x2y[token.i].dataXd[0, 0]
|
||||||
dep = getter(token, attr)
|
if gold_i not in missing_indices:
|
||||||
head = head_getter(token, head_attr)
|
dep = getter(token, attr)
|
||||||
if dep not in ignore_labels and token.orth_.strip():
|
head = head_getter(token, head_attr)
|
||||||
if align.x2y.lengths[head.i] == 1:
|
if dep not in ignore_labels and token.orth_.strip():
|
||||||
gold_head = align.x2y[head.i].dataXd[0, 0]
|
if align.x2y.lengths[head.i] == 1:
|
||||||
else:
|
gold_head = align.x2y[head.i].dataXd[0, 0]
|
||||||
gold_head = None
|
else:
|
||||||
# None is indistinct, so we can't just add it to the set
|
gold_head = None
|
||||||
# Multiple (None, None) deps are possible
|
# None is indistinct, so we can't just add it to the set
|
||||||
if gold_i is None or gold_head is None:
|
# Multiple (None, None) deps are possible
|
||||||
unlabelled.fp += 1
|
if gold_i is None or gold_head is None:
|
||||||
labelled.fp += 1
|
unlabelled.fp += 1
|
||||||
else:
|
labelled.fp += 1
|
||||||
pred_deps.add((gold_i, gold_head, dep))
|
else:
|
||||||
if dep not in labelled_per_dep:
|
pred_deps.add((gold_i, gold_head, dep))
|
||||||
labelled_per_dep[dep] = PRFScore()
|
if dep not in labelled_per_dep:
|
||||||
if dep not in pred_deps_per_dep:
|
labelled_per_dep[dep] = PRFScore()
|
||||||
pred_deps_per_dep[dep] = set()
|
if dep not in pred_deps_per_dep:
|
||||||
pred_deps_per_dep[dep].add((gold_i, gold_head, dep))
|
pred_deps_per_dep[dep] = set()
|
||||||
|
pred_deps_per_dep[dep].add((gold_i, gold_head, dep))
|
||||||
labelled.score_set(pred_deps, gold_deps)
|
labelled.score_set(pred_deps, gold_deps)
|
||||||
for dep in labelled_per_dep:
|
for dep in labelled_per_dep:
|
||||||
labelled_per_dep[dep].score_set(
|
labelled_per_dep[dep].score_set(
|
||||||
|
@ -618,29 +658,34 @@ class Scorer:
|
||||||
unlabelled.score_set(
|
unlabelled.score_set(
|
||||||
set(item[:2] for item in pred_deps), set(item[:2] for item in gold_deps)
|
set(item[:2] for item in pred_deps), set(item[:2] for item in gold_deps)
|
||||||
)
|
)
|
||||||
return {
|
if len(unlabelled) > 0:
|
||||||
f"{attr}_uas": unlabelled.fscore,
|
return {
|
||||||
f"{attr}_las": labelled.fscore,
|
f"{attr}_uas": unlabelled.fscore,
|
||||||
f"{attr}_las_per_type": {
|
f"{attr}_las": labelled.fscore,
|
||||||
k: v.to_dict() for k, v in labelled_per_dep.items()
|
f"{attr}_las_per_type": {
|
||||||
},
|
k: v.to_dict() for k, v in labelled_per_dep.items()
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
f"{attr}_uas": None,
|
||||||
|
f"{attr}_las": None,
|
||||||
|
f"{attr}_las_per_type": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_ner_prf(examples: Iterable[Example]) -> Dict[str, PRFScore]:
|
def get_ner_prf(examples: Iterable[Example]) -> Dict[str, Any]:
|
||||||
"""Compute per-entity PRFScore objects for a sequence of examples. The
|
"""Compute micro-PRF and per-entity PRF scores for a sequence of examples.
|
||||||
results are returned as a dictionary keyed by the entity type. You can
|
|
||||||
add the PRFScore objects to get micro-averaged total.
|
|
||||||
"""
|
"""
|
||||||
scores = defaultdict(PRFScore)
|
score_per_type = defaultdict(PRFScore)
|
||||||
for eg in examples:
|
for eg in examples:
|
||||||
if not eg.y.has_annotation("ENT_IOB"):
|
if not eg.y.has_annotation("ENT_IOB"):
|
||||||
continue
|
continue
|
||||||
golds = {(e.label_, e.start, e.end) for e in eg.y.ents}
|
golds = {(e.label_, e.start, e.end) for e in eg.y.ents}
|
||||||
align_x2y = eg.alignment.x2y
|
align_x2y = eg.alignment.x2y
|
||||||
for pred_ent in eg.x.ents:
|
for pred_ent in eg.x.ents:
|
||||||
if pred_ent.label_ not in scores:
|
if pred_ent.label_ not in score_per_type:
|
||||||
scores[pred_ent.label_] = PRFScore()
|
score_per_type[pred_ent.label_] = PRFScore()
|
||||||
indices = align_x2y[pred_ent.start : pred_ent.end].dataXd.ravel()
|
indices = align_x2y[pred_ent.start : pred_ent.end].dataXd.ravel()
|
||||||
if len(indices):
|
if len(indices):
|
||||||
g_span = eg.y[indices[0] : indices[-1] + 1]
|
g_span = eg.y[indices[0] : indices[-1] + 1]
|
||||||
|
@ -650,13 +695,29 @@ def get_ner_prf(examples: Iterable[Example]) -> Dict[str, PRFScore]:
|
||||||
if all(token.ent_iob != 0 for token in g_span):
|
if all(token.ent_iob != 0 for token in g_span):
|
||||||
key = (pred_ent.label_, indices[0], indices[-1] + 1)
|
key = (pred_ent.label_, indices[0], indices[-1] + 1)
|
||||||
if key in golds:
|
if key in golds:
|
||||||
scores[pred_ent.label_].tp += 1
|
score_per_type[pred_ent.label_].tp += 1
|
||||||
golds.remove(key)
|
golds.remove(key)
|
||||||
else:
|
else:
|
||||||
scores[pred_ent.label_].fp += 1
|
score_per_type[pred_ent.label_].fp += 1
|
||||||
for label, start, end in golds:
|
for label, start, end in golds:
|
||||||
scores[label].fn += 1
|
score_per_type[label].fn += 1
|
||||||
return scores
|
totals = PRFScore()
|
||||||
|
for prf in score_per_type.values():
|
||||||
|
totals += prf
|
||||||
|
if len(totals) > 0:
|
||||||
|
return {
|
||||||
|
"ents_p": totals.precision,
|
||||||
|
"ents_r": totals.recall,
|
||||||
|
"ents_f": totals.fscore,
|
||||||
|
"ents_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"ents_p": None,
|
||||||
|
"ents_r": None,
|
||||||
|
"ents_f": None,
|
||||||
|
"ents_per_type": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#############################################################################
|
#############################################################################
|
||||||
|
|
|
@ -160,8 +160,8 @@ def test_attributeruler_score(nlp, pattern_dicts):
|
||||||
scores = nlp.evaluate(dev_examples)
|
scores = nlp.evaluate(dev_examples)
|
||||||
# "cat" is the only correct lemma
|
# "cat" is the only correct lemma
|
||||||
assert scores["lemma_acc"] == pytest.approx(0.2)
|
assert scores["lemma_acc"] == pytest.approx(0.2)
|
||||||
# the empty morphs are correct
|
# no morphs are set
|
||||||
assert scores["morph_acc"] == pytest.approx(0.6)
|
assert scores["morph_acc"] == None
|
||||||
|
|
||||||
|
|
||||||
def test_attributeruler_rule_order(nlp):
|
def test_attributeruler_rule_order(nlp):
|
||||||
|
|
|
@ -277,6 +277,62 @@ def test_tag_score(tagged_doc):
|
||||||
assert results["morph_per_feat"]["Number"]["f"] == approx(0.72727272)
|
assert results["morph_per_feat"]["Number"]["f"] == approx(0.72727272)
|
||||||
|
|
||||||
|
|
||||||
|
def test_partial_annotation(en_tokenizer):
|
||||||
|
pred_doc = en_tokenizer("a b c d e")
|
||||||
|
pred_doc[0].tag_ = "A"
|
||||||
|
pred_doc[0].pos_ = "X"
|
||||||
|
pred_doc[0].set_morph("Feat=Val")
|
||||||
|
pred_doc[0].dep_ = "dep"
|
||||||
|
|
||||||
|
# unannotated reference
|
||||||
|
ref_doc = en_tokenizer("a b c d e")
|
||||||
|
ref_doc.has_unknown_spaces = True
|
||||||
|
example = Example(pred_doc, ref_doc)
|
||||||
|
scorer = Scorer()
|
||||||
|
scores = scorer.score([example])
|
||||||
|
for key in scores:
|
||||||
|
# cats doesn't have an unset state
|
||||||
|
if key.startswith("cats"):
|
||||||
|
continue
|
||||||
|
assert scores[key] == None
|
||||||
|
|
||||||
|
# partially annotated reference, not overlapping with predicted annotation
|
||||||
|
ref_doc = en_tokenizer("a b c d e")
|
||||||
|
ref_doc.has_unknown_spaces = True
|
||||||
|
ref_doc[1].tag_ = "A"
|
||||||
|
ref_doc[1].pos_ = "X"
|
||||||
|
ref_doc[1].set_morph("Feat=Val")
|
||||||
|
ref_doc[1].dep_ = "dep"
|
||||||
|
example = Example(pred_doc, ref_doc)
|
||||||
|
scorer = Scorer()
|
||||||
|
scores = scorer.score([example])
|
||||||
|
assert scores["token_acc"] == None
|
||||||
|
assert scores["tag_acc"] == 0.0
|
||||||
|
assert scores["pos_acc"] == 0.0
|
||||||
|
assert scores["morph_acc"] == 0.0
|
||||||
|
assert scores["dep_uas"] == 1.0
|
||||||
|
assert scores["dep_las"] == 0.0
|
||||||
|
assert scores["sents_f"] == None
|
||||||
|
|
||||||
|
# partially annotated reference, overlapping with predicted annotation
|
||||||
|
ref_doc = en_tokenizer("a b c d e")
|
||||||
|
ref_doc.has_unknown_spaces = True
|
||||||
|
ref_doc[0].tag_ = "A"
|
||||||
|
ref_doc[0].pos_ = "X"
|
||||||
|
ref_doc[1].set_morph("Feat=Val")
|
||||||
|
ref_doc[1].dep_ = "dep"
|
||||||
|
example = Example(pred_doc, ref_doc)
|
||||||
|
scorer = Scorer()
|
||||||
|
scores = scorer.score([example])
|
||||||
|
assert scores["token_acc"] == None
|
||||||
|
assert scores["tag_acc"] == 1.0
|
||||||
|
assert scores["pos_acc"] == 1.0
|
||||||
|
assert scores["morph_acc"] == 0.0
|
||||||
|
assert scores["dep_uas"] == 1.0
|
||||||
|
assert scores["dep_las"] == 0.0
|
||||||
|
assert scores["sents_f"] == None
|
||||||
|
|
||||||
|
|
||||||
def test_roc_auc_score():
|
def test_roc_auc_score():
|
||||||
# Binary classification, toy tests from scikit-learn test suite
|
# Binary classification, toy tests from scikit-learn test suite
|
||||||
y_true = [0, 1]
|
y_true = [0, 1]
|
||||||
|
|
|
@ -399,14 +399,13 @@ cdef class Doc:
|
||||||
return True
|
return True
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef int range_start = 0
|
cdef int range_start = 0
|
||||||
|
if attr == "IS_SENT_START" or attr == self.vocab.strings["IS_SENT_START"]:
|
||||||
|
attr = SENT_START
|
||||||
attr = intify_attr(attr)
|
attr = intify_attr(attr)
|
||||||
# adjust attributes
|
# adjust attributes
|
||||||
if attr == HEAD:
|
if attr == HEAD:
|
||||||
# HEAD does not have an unset state, so rely on DEP
|
# HEAD does not have an unset state, so rely on DEP
|
||||||
attr = DEP
|
attr = DEP
|
||||||
elif attr == self.vocab.strings["IS_SENT_START"]:
|
|
||||||
# as in Matcher, allow IS_SENT_START as an alias of SENT_START
|
|
||||||
attr = SENT_START
|
|
||||||
# special cases for sentence boundaries
|
# special cases for sentence boundaries
|
||||||
if attr == SENT_START:
|
if attr == SENT_START:
|
||||||
if "sents" in self.user_hooks:
|
if "sents" in self.user_hooks:
|
||||||
|
|
|
@ -683,6 +683,7 @@ The L2 norm of the document's vector representation.
|
||||||
| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
|
| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
|
||||||
| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
|
| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
|
||||||
| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
|
| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
|
||||||
|
| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ |
|
||||||
| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
|
| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
|
||||||
|
|
||||||
## Serialization fields {#serialization-fields}
|
## Serialization fields {#serialization-fields}
|
||||||
|
|
|
@ -68,6 +68,8 @@ Scores the tokenization:
|
||||||
- `token_p`, `token_r`, `token_f`: precision, recall and F-score for token
|
- `token_p`, `token_r`, `token_f`: precision, recall and F-score for token
|
||||||
character spans
|
character spans
|
||||||
|
|
||||||
|
Docs with `has_unknown_spaces` are skipped during scoring.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
> ```python
|
> ```python
|
||||||
|
@ -81,7 +83,8 @@ Scores the tokenization:
|
||||||
|
|
||||||
## Scorer.score_token_attr {#score_token_attr tag="staticmethod" new="3"}
|
## Scorer.score_token_attr {#score_token_attr tag="staticmethod" new="3"}
|
||||||
|
|
||||||
Scores a single token attribute.
|
Scores a single token attribute. Tokens with missing values in the reference doc
|
||||||
|
are skipped during scoring.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -90,20 +93,22 @@ Scores a single token attribute.
|
||||||
> print(scores["pos_acc"])
|
> print(scores["pos_acc"])
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||||
| `attr` | The attribute to score. ~~str~~ |
|
| `attr` | The attribute to score. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
||||||
| **RETURNS** | A dictionary containing the score `{attr}_acc`. ~~Dict[str, float]~~ |
|
| `missing_values` | Attribute values to treat as missing annotation in the reference annotation. Defaults to `{0, None, ""}`. ~~Set[Any]~~ |
|
||||||
|
| **RETURNS** | A dictionary containing the score `{attr}_acc`. ~~Dict[str, float]~~ |
|
||||||
|
|
||||||
## Scorer.score_token_attr_per_feat {#score_token_attr_per_feat tag="staticmethod" new="3"}
|
## Scorer.score_token_attr_per_feat {#score_token_attr_per_feat tag="staticmethod" new="3"}
|
||||||
|
|
||||||
Scores a single token attribute per feature for a token attribute in the
|
Scores a single token attribute per feature for a token attribute in the
|
||||||
Universal Dependencies
|
Universal Dependencies
|
||||||
[FEATS](https://universaldependencies.org/format.html#morphological-annotation)
|
[FEATS](https://universaldependencies.org/format.html#morphological-annotation)
|
||||||
format.
|
format. Tokens with missing values in the reference doc are skipped during
|
||||||
|
scoring.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -112,13 +117,14 @@ format.
|
||||||
> print(scores["morph_per_feat"])
|
> print(scores["morph_per_feat"])
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||||
| `attr` | The attribute to score. ~~str~~ |
|
| `attr` | The attribute to score. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
||||||
| **RETURNS** | A dictionary containing the per-feature PRF scores under the key `{attr}_per_feat`. ~~Dict[str, Dict[str, float]]~~ |
|
| `missing_values` | Attribute values to treat as missing annotation in the reference annotation. Defaults to `{0, None, ""}`. ~~Set[Any]~~ |
|
||||||
|
| **RETURNS** | A dictionary containing the per-feature PRF scores under the key `{attr}_per_feat`. ~~Dict[str, Dict[str, float]]~~ |
|
||||||
|
|
||||||
## Scorer.score_spans {#score_spans tag="staticmethod" new="3"}
|
## Scorer.score_spans {#score_spans tag="staticmethod" new="3"}
|
||||||
|
|
||||||
|
@ -131,17 +137,19 @@ Returns PRF scores for labeled or unlabeled spans.
|
||||||
> print(scores["ents_f"])
|
> print(scores["ents_f"])
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||||
| `attr` | The attribute to score. ~~str~~ |
|
| `attr` | The attribute to score. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `getter` | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the `Span` objects for an individual `Doc`. ~~Callable[[Doc, str], Iterable[Span]]~~ |
|
| `getter` | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the `Span` objects for an individual `Doc`. ~~Callable[[Doc, str], Iterable[Span]]~~ |
|
||||||
| **RETURNS** | A dictionary containing the PRF scores under the keys `{attr}_p`, `{attr}_r`, `{attr}_f` and the per-type PRF scores under `{attr}_per_type`. ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
| `has_annotation` | Defaults to `None`. If provided, `has_annotation(doc)` should return whether a `Doc` has annotation for this `attr`. Docs without annotation are skipped for scoring purposes. ~~str~~ |
|
||||||
|
| **RETURNS** | A dictionary containing the PRF scores under the keys `{attr}_p`, `{attr}_r`, `{attr}_f` and the per-type PRF scores under `{attr}_per_type`. ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||||
|
|
||||||
## Scorer.score_deps {#score_deps tag="staticmethod" new="3"}
|
## Scorer.score_deps {#score_deps tag="staticmethod" new="3"}
|
||||||
|
|
||||||
Calculate the UAS, LAS, and LAS per type scores for dependency parses.
|
Calculate the UAS, LAS, and LAS per type scores for dependency parses. Tokens
|
||||||
|
with missing values for the `attr` (typically `dep`) are skipped during scoring.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -160,16 +168,17 @@ Calculate the UAS, LAS, and LAS per type scores for dependency parses.
|
||||||
> print(scores["dep_uas"], scores["dep_las"])
|
> print(scores["dep_uas"], scores["dep_las"])
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||||
| `attr` | The attribute to score. ~~str~~ |
|
| `attr` | The attribute to score. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
||||||
| `head_attr` | The attribute containing the head token. ~~str~~ |
|
| `head_attr` | The attribute containing the head token. ~~str~~ |
|
||||||
| `head_getter` | Defaults to `getattr`. If provided, `head_getter(token, attr)` should return the head for an individual `Token`. ~~Callable[[Doc, str], Token]~~ |
|
| `head_getter` | Defaults to `getattr`. If provided, `head_getter(token, attr)` should return the head for an individual `Token`. ~~Callable[[Doc, str], Token]~~ |
|
||||||
| `ignore_labels` | Labels to ignore while scoring (e.g. `"punct"`). ~~Iterable[str]~~ |
|
| `ignore_labels` | Labels to ignore while scoring (e.g. `"punct"`). ~~Iterable[str]~~ |
|
||||||
| **RETURNS** | A dictionary containing the scores: `{attr}_uas`, `{attr}_las`, and `{attr}_las_per_type`. ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
| `missing_values` | Attribute values to treat as missing annotation in the reference annotation. Defaults to `{0, None, ""}`. ~~Set[Any]~~ |
|
||||||
|
| **RETURNS** | A dictionary containing the scores: `{attr}_uas`, `{attr}_las`, and `{attr}_las_per_type`. ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||||
|
|
||||||
## Scorer.score_cats {#score_cats tag="staticmethod" new="3"}
|
## Scorer.score_cats {#score_cats tag="staticmethod" new="3"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user