Hack scorer to avoid evaluating non-nered data

This commit is contained in:
Matthew Honnibal 2020-06-24 18:02:59 +02:00
parent 70bd7fb611
commit 359e874766

View File

@ -371,27 +371,28 @@ class Scorer(object):
self.ner_per_ents[ent_label] = PRFScore() self.ner_per_ents[ent_label] = PRFScore()
# Find all candidate labels, for all and per type # Find all candidate labels, for all and per type
gold_ents = set() gold_ents = set()
for ent in gold_doc.ents:
gold_ent = (ent.label_, ent.start, ent.end - 1)
gold_ents.add(gold_ent)
gold_per_ents[ent.label_].add((ent.label_, ent.start, ent.end - 1))
cand_ents = set() cand_ents = set()
cand_per_ents = {ent_label: set() for ent_label in ent_labels} if gold_doc.is_nered:
for ent in doc.ents: for ent in gold_doc.ents:
first = align.cand_to_gold[ent.start] gold_ent = (ent.label_, ent.start, ent.end - 1)
last = align.cand_to_gold[ent.end - 1] gold_ents.add(gold_ent)
if first is None or last is None: gold_per_ents[ent.label_].add((ent.label_, ent.start, ent.end - 1))
self.ner.fp += 1 cand_per_ents = {ent_label: set() for ent_label in ent_labels}
self.ner_per_ents[ent.label_].fp += 1 for ent in doc.ents:
else: first = align.cand_to_gold[ent.start]
cand_ents.add((ent.label_, first, last)) last = align.cand_to_gold[ent.end - 1]
cand_per_ents[ent.label_].add((ent.label_, first, last)) if first is None or last is None:
# Scores per ent self.ner.fp += 1
for k, v in self.ner_per_ents.items(): self.ner_per_ents[ent.label_].fp += 1
if k in cand_per_ents: else:
v.score_set(cand_per_ents[k], gold_per_ents[k]) cand_ents.add((ent.label_, first, last))
# Score for all ents cand_per_ents[ent.label_].add((ent.label_, first, last))
self.ner.score_set(cand_ents, gold_ents) # Scores per ent
for k, v in self.ner_per_ents.items():
if k in cand_per_ents:
v.score_set(cand_per_ents[k], gold_per_ents[k])
# Score for all ents
self.ner.score_set(cand_ents, gold_ents)
self.tags.score_set(cand_tags, gold_tags) self.tags.score_set(cand_tags, gold_tags)
self.pos.score_set(cand_pos, gold_pos) self.pos.score_set(cand_pos, gold_pos)
self.morphs.score_set(cand_morphs, gold_morphs) self.morphs.score_set(cand_morphs, gold_morphs)