mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
* Evaluation of NER model per entity type, closes ##3490 Now each ent score is tracked individually in order to have its own Precision, Recall and F1 Score * Keep track of each entity individually using dicts * Improving how to compute the scores for each entity * Fixed bug computing scores for ents * Formatting with black * Added key ents_per_type to the scores function The key `ents_per_type` contains the metrics Precision, Recall and F1-Score for each entity individually
This commit is contained in:
parent
2eb925bd05
commit
6d577f0b92
|
@ -52,6 +52,7 @@ class Scorer(object):
|
||||||
self.labelled = PRFScore()
|
self.labelled = PRFScore()
|
||||||
self.tags = PRFScore()
|
self.tags = PRFScore()
|
||||||
self.ner = PRFScore()
|
self.ner = PRFScore()
|
||||||
|
self.ner_per_ents = dict()
|
||||||
self.eval_punct = eval_punct
|
self.eval_punct = eval_punct
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -104,6 +105,15 @@ class Scorer(object):
|
||||||
"ents_f": self.ents_f,
|
"ents_f": self.ents_f,
|
||||||
"tags_acc": self.tags_acc,
|
"tags_acc": self.tags_acc,
|
||||||
"token_acc": self.token_acc,
|
"token_acc": self.token_acc,
|
||||||
|
"ents_per_type": self.__scores_per_ents(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def __scores_per_ents(self):
|
||||||
|
"""RETURNS (dict): Scores per NER entity
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
|
||||||
|
for k, v in self.ner_per_ents.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
def score(self, doc, gold, verbose=False, punct_labels=("p", "punct")):
|
def score(self, doc, gold, verbose=False, punct_labels=("p", "punct")):
|
||||||
|
@ -149,13 +159,31 @@ class Scorer(object):
|
||||||
cand_deps.add((gold_i, gold_head, token.dep_.lower()))
|
cand_deps.add((gold_i, gold_head, token.dep_.lower()))
|
||||||
if "-" not in [token[-1] for token in gold.orig_annot]:
|
if "-" not in [token[-1] for token in gold.orig_annot]:
|
||||||
cand_ents = set()
|
cand_ents = set()
|
||||||
|
current_ent = {k.label_: set() for k in doc.ents}
|
||||||
|
current_gold = {k.label_: set() for k in doc.ents}
|
||||||
for ent in doc.ents:
|
for ent in doc.ents:
|
||||||
|
if ent.label_ not in self.ner_per_ents:
|
||||||
|
self.ner_per_ents[ent.label_] = PRFScore()
|
||||||
first = gold.cand_to_gold[ent.start]
|
first = gold.cand_to_gold[ent.start]
|
||||||
last = gold.cand_to_gold[ent.end - 1]
|
last = gold.cand_to_gold[ent.end - 1]
|
||||||
if first is None or last is None:
|
if first is None or last is None:
|
||||||
self.ner.fp += 1
|
self.ner.fp += 1
|
||||||
|
self.ner_per_ents[ent.label_].fp += 1
|
||||||
else:
|
else:
|
||||||
cand_ents.add((ent.label_, first, last))
|
cand_ents.add((ent.label_, first, last))
|
||||||
|
current_ent[ent.label_].add(
|
||||||
|
tuple(x for x in cand_ents if x[0] == ent.label_)
|
||||||
|
)
|
||||||
|
current_gold[ent.label_].add(
|
||||||
|
tuple(x for x in gold_ents if x[0] == ent.label_)
|
||||||
|
)
|
||||||
|
# Scores per ent
|
||||||
|
[
|
||||||
|
v.score_set(current_ent[k], current_gold[k])
|
||||||
|
for k, v in self.ner_per_ents.items()
|
||||||
|
if k in current_ent
|
||||||
|
]
|
||||||
|
# Score for all ents
|
||||||
self.ner.score_set(cand_ents, gold_ents)
|
self.ner.score_set(cand_ents, gold_ents)
|
||||||
self.tags.score_set(cand_tags, gold_tags)
|
self.tags.score_set(cand_tags, gold_tags)
|
||||||
self.labelled.score_set(cand_deps, gold_deps)
|
self.labelled.score_set(cand_deps, gold_deps)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user