mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
* Evaluation of NER model per entity type, closes ##3490 Now each ent score is tracked individually in order to have its own Precision, Recall and F1 Score * Keep track of each entity individually using dicts * Improving how to compute the scores for each entity * Fixed bug computing scores for ents * Formatting with black * Added key ents_per_type to the scores function The key `ents_per_type` contains the metrics Precision, Recall and F1-Score for each entity individually
This commit is contained in:
parent
2eb925bd05
commit
6d577f0b92
|
@ -52,6 +52,7 @@ class Scorer(object):
|
|||
self.labelled = PRFScore()
|
||||
self.tags = PRFScore()
|
||||
self.ner = PRFScore()
|
||||
self.ner_per_ents = dict()
|
||||
self.eval_punct = eval_punct
|
||||
|
||||
@property
|
||||
|
@ -104,6 +105,15 @@ class Scorer(object):
|
|||
"ents_f": self.ents_f,
|
||||
"tags_acc": self.tags_acc,
|
||||
"token_acc": self.token_acc,
|
||||
"ents_per_type": self.__scores_per_ents(),
|
||||
}
|
||||
|
||||
def __scores_per_ents(self):
|
||||
"""RETURNS (dict): Scores per NER entity
|
||||
"""
|
||||
return {
|
||||
k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
|
||||
for k, v in self.ner_per_ents.items()
|
||||
}
|
||||
|
||||
def score(self, doc, gold, verbose=False, punct_labels=("p", "punct")):
|
||||
|
@ -149,13 +159,31 @@ class Scorer(object):
|
|||
cand_deps.add((gold_i, gold_head, token.dep_.lower()))
|
||||
if "-" not in [token[-1] for token in gold.orig_annot]:
|
||||
cand_ents = set()
|
||||
current_ent = {k.label_: set() for k in doc.ents}
|
||||
current_gold = {k.label_: set() for k in doc.ents}
|
||||
for ent in doc.ents:
|
||||
if ent.label_ not in self.ner_per_ents:
|
||||
self.ner_per_ents[ent.label_] = PRFScore()
|
||||
first = gold.cand_to_gold[ent.start]
|
||||
last = gold.cand_to_gold[ent.end - 1]
|
||||
if first is None or last is None:
|
||||
self.ner.fp += 1
|
||||
self.ner_per_ents[ent.label_].fp += 1
|
||||
else:
|
||||
cand_ents.add((ent.label_, first, last))
|
||||
current_ent[ent.label_].add(
|
||||
tuple(x for x in cand_ents if x[0] == ent.label_)
|
||||
)
|
||||
current_gold[ent.label_].add(
|
||||
tuple(x for x in gold_ents if x[0] == ent.label_)
|
||||
)
|
||||
# Scores per ent
|
||||
[
|
||||
v.score_set(current_ent[k], current_gold[k])
|
||||
for k, v in self.ner_per_ents.items()
|
||||
if k in current_ent
|
||||
]
|
||||
# Score for all ents
|
||||
self.ner.score_set(cand_ents, gold_ents)
|
||||
self.tags.score_set(cand_tags, gold_tags)
|
||||
self.labelled.score_set(cand_deps, gold_deps)
|
||||
|
|
Loading…
Reference in New Issue
Block a user