mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
only evaluate named entities for NEL if there is a corresponding gold span (#7074)
This commit is contained in:
parent
264862c67a
commit
113e8d082b
|
@ -531,27 +531,28 @@ class Scorer:
|
||||||
gold_span = gold_ent_by_offset.get(
|
gold_span = gold_ent_by_offset.get(
|
||||||
(pred_ent.start_char, pred_ent.end_char), None
|
(pred_ent.start_char, pred_ent.end_char), None
|
||||||
)
|
)
|
||||||
label = gold_span.label_
|
if gold_span is not None:
|
||||||
if label not in f_per_type:
|
label = gold_span.label_
|
||||||
f_per_type[label] = PRFScore()
|
if label not in f_per_type:
|
||||||
gold = gold_span.kb_id_
|
f_per_type[label] = PRFScore()
|
||||||
# only evaluating entities that overlap between gold and pred,
|
gold = gold_span.kb_id_
|
||||||
# to disentangle the performance of the NEL from the NER
|
# only evaluating entities that overlap between gold and pred,
|
||||||
if gold is not None:
|
# to disentangle the performance of the NEL from the NER
|
||||||
pred = pred_ent.kb_id_
|
if gold is not None:
|
||||||
if gold in negative_labels and pred in negative_labels:
|
pred = pred_ent.kb_id_
|
||||||
# ignore true negatives
|
if gold in negative_labels and pred in negative_labels:
|
||||||
pass
|
# ignore true negatives
|
||||||
elif gold == pred:
|
pass
|
||||||
f_per_type[label].tp += 1
|
elif gold == pred:
|
||||||
elif gold in negative_labels:
|
f_per_type[label].tp += 1
|
||||||
f_per_type[label].fp += 1
|
elif gold in negative_labels:
|
||||||
elif pred in negative_labels:
|
f_per_type[label].fp += 1
|
||||||
f_per_type[label].fn += 1
|
elif pred in negative_labels:
|
||||||
else:
|
f_per_type[label].fn += 1
|
||||||
# a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
|
else:
|
||||||
f_per_type[label].fp += 1
|
# a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
|
||||||
f_per_type[label].fn += 1
|
f_per_type[label].fp += 1
|
||||||
|
f_per_type[label].fn += 1
|
||||||
micro_prf = PRFScore()
|
micro_prf = PRFScore()
|
||||||
for label_prf in f_per_type.values():
|
for label_prf in f_per_type.values():
|
||||||
micro_prf.tp += label_prf.tp
|
micro_prf.tp += label_prf.tp
|
||||||
|
|
54
spacy/tests/regression/test_issue7062.py
Normal file
54
spacy/tests/regression/test_issue7062.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
from spacy.kb import KnowledgeBase
|
||||||
|
from spacy.training import Example
|
||||||
|
from spacy.lang.en import English
|
||||||
|
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
TRAIN_DATA = [
|
||||||
|
("Russ Cochran his reprints include EC Comics.",
|
||||||
|
{"links": {(0, 12): {"Q2146908": 1.0}},
|
||||||
|
"entities": [(0, 12, "PERSON")],
|
||||||
|
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]})
|
||||||
|
]
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
|
def test_partial_links():
|
||||||
|
# Test that having some entities on the doc without gold links, doesn't crash
|
||||||
|
nlp = English()
|
||||||
|
vector_length = 3
|
||||||
|
train_examples = []
|
||||||
|
for text, annotation in TRAIN_DATA:
|
||||||
|
doc = nlp(text)
|
||||||
|
train_examples.append(Example.from_dict(doc, annotation))
|
||||||
|
|
||||||
|
def create_kb(vocab):
|
||||||
|
# create artificial KB
|
||||||
|
mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
|
||||||
|
mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
|
||||||
|
mykb.add_alias("Russ Cochran", ["Q2146908"], [0.9])
|
||||||
|
return mykb
|
||||||
|
|
||||||
|
# Create and train the Entity Linker
|
||||||
|
entity_linker = nlp.add_pipe("entity_linker", last=True)
|
||||||
|
entity_linker.set_kb(create_kb)
|
||||||
|
optimizer = nlp.initialize(get_examples=lambda: train_examples)
|
||||||
|
for i in range(2):
|
||||||
|
losses = {}
|
||||||
|
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||||
|
|
||||||
|
# adding additional components that are required for the entity_linker
|
||||||
|
nlp.add_pipe("sentencizer", first=True)
|
||||||
|
patterns = [
|
||||||
|
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]},
|
||||||
|
{"label": "ORG", "pattern": [{"LOWER": "ec"}, {"LOWER": "comics"}]}
|
||||||
|
]
|
||||||
|
ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
|
||||||
|
ruler.add_patterns(patterns)
|
||||||
|
|
||||||
|
# this will run the pipeline on the examples and shouldn't crash
|
||||||
|
results = nlp.evaluate(train_examples)
|
||||||
|
assert "PERSON" in results["ents_per_type"]
|
||||||
|
assert "PERSON" in results["nel_f_per_type"]
|
||||||
|
assert "ORG" in results["ents_per_type"]
|
||||||
|
assert "ORG" not in results["nel_f_per_type"]
|
Loading…
Reference in New Issue
Block a user