mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
* Fix evaluation of NER in scorer.py
This commit is contained in:
parent
6016ee83a6
commit
4c6058baa7
|
@ -1,5 +1,7 @@
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
|
|
||||||
|
from spacy.munge.read_ner import tags_to_entities
|
||||||
|
|
||||||
|
|
||||||
class PRFScore(object):
|
class PRFScore(object):
|
||||||
"""A precision / recall / F score"""
|
"""A precision / recall / F score"""
|
||||||
|
@ -56,25 +58,25 @@ class Scorer(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ents_p(self):
|
def ents_p(self):
|
||||||
return self.ner.precision
|
return self.ner.precision * 100
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ents_r(self):
|
def ents_r(self):
|
||||||
return self.ner.recall
|
return self.ner.recall * 100
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ents_f(self):
|
def ents_f(self):
|
||||||
return self.ner.fscore
|
return self.ner.fscore * 100
|
||||||
|
|
||||||
def score(self, tokens, gold, verbose=False):
|
def score(self, tokens, gold, verbose=False):
|
||||||
assert len(tokens) == len(gold)
|
assert len(tokens) == len(gold)
|
||||||
|
|
||||||
gold_deps = set()
|
gold_deps = set()
|
||||||
gold_tags = set()
|
gold_tags = set()
|
||||||
gold_tags = set()
|
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
||||||
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
||||||
if dep.lower() not in ('p', 'punct'):
|
if dep.lower() not in ('p', 'punct'):
|
||||||
gold_deps.add((id_, head, dep))
|
gold_deps.add((id_, head, dep.lower()))
|
||||||
gold_tags.add((id_, tag))
|
gold_tags.add((id_, tag))
|
||||||
cand_deps = set()
|
cand_deps = set()
|
||||||
cand_tags = set()
|
cand_tags = set()
|
||||||
|
@ -88,13 +90,22 @@ class Scorer(object):
|
||||||
self.unlabelled.fp += 1
|
self.unlabelled.fp += 1
|
||||||
self.labelled.fp += 1
|
self.labelled.fp += 1
|
||||||
else:
|
else:
|
||||||
cand_deps.add((gold_i, gold_head, token.dep_))
|
cand_deps.add((gold_i, gold_head, token.dep_.lower()))
|
||||||
if gold_i is None:
|
if gold_i is None:
|
||||||
self.tags.fp += 1
|
self.tags.fp += 1
|
||||||
else:
|
else:
|
||||||
cand_tags.add((gold_i, token.tag_))
|
cand_tags.add((gold_i, token.tag_))
|
||||||
|
cand_ents = set()
|
||||||
|
for ent in tokens.ents:
|
||||||
|
first = gold.cand_to_gold[ent.start]
|
||||||
|
last = gold.cand_to_gold[ent.end-1]
|
||||||
|
if first is None or last is None:
|
||||||
|
self.ner.fp += 1
|
||||||
|
else:
|
||||||
|
cand_ents.add((ent.label_, first, last))
|
||||||
|
|
||||||
self.tags.score_set(cand_tags, cand_deps)
|
self.ner.score_set(cand_ents, gold_ents)
|
||||||
|
self.tags.score_set(cand_tags, gold_tags)
|
||||||
self.labelled.score_set(cand_deps, gold_deps)
|
self.labelled.score_set(cand_deps, gold_deps)
|
||||||
self.unlabelled.score_set(
|
self.unlabelled.score_set(
|
||||||
set(item[:2] for item in cand_deps),
|
set(item[:2] for item in cand_deps),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user