updates to NEL functionality (#6132)

* NEL: read sentences and ents from reference

* fiddling with sent_start annotations

* add KB serialization test

* KB write additional file with strings.json

* score_links function to calculate NEL P/R/F

* formatting

* documentation
This commit is contained in:
Sofie Van Landeghem 2020-09-24 16:53:59 +02:00 committed by GitHub
parent d0ef4a4cf5
commit c7eedd3534
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 273 additions and 42 deletions

View File

@ -517,8 +517,8 @@ class Errors:
"instead.")
E927 = ("Can't write to frozen list Maybe you're trying to modify a computed "
"property or default function argument?")
E928 = ("A 'KnowledgeBase' should be written to / read from a file, but the "
"provided argument {loc} is an existing directory.")
E928 = ("A 'KnowledgeBase' can only be serialized to/from from a directory, "
"but the provided argument {loc} points to a file.")
E929 = ("A 'KnowledgeBase' could not be read from {loc} - the path does "
"not seem to exist.")
E930 = ("Received invalid get_examples callback in {name}.begin_training. "

View File

@ -10,6 +10,8 @@ from libcpp.vector cimport vector
from pathlib import Path
import warnings
from spacy.strings import StringStore
from spacy import util
from .typedefs cimport hash_t
@ -83,6 +85,9 @@ cdef class KnowledgeBase:
DOCS: https://nightly.spacy.io/api/kb
"""
contents_loc = "contents"
strings_loc = "strings.json"
def __init__(self, Vocab vocab, entity_vector_length):
"""Create a KnowledgeBase."""
self.mem = Pool()
@ -319,15 +324,29 @@ cdef class KnowledgeBase:
return 0.0
def to_disk(self, path):
path = util.ensure_path(path)
if path.is_dir():
if not path.exists():
path.mkdir(parents=True)
if not path.is_dir():
raise ValueError(Errors.E928.format(loc=path))
if not path.parent.exists():
path.parent.mkdir(parents=True)
self.write_contents(path / self.contents_loc)
self.vocab.strings.to_disk(path / self.strings_loc)
cdef Writer writer = Writer(path)
def from_disk(self, path):
path = util.ensure_path(path)
if not path.exists():
raise ValueError(Errors.E929.format(loc=path))
if not path.is_dir():
raise ValueError(Errors.E928.format(loc=path))
self.read_contents(path / self.contents_loc)
kb_strings = StringStore()
kb_strings.from_disk(path / self.strings_loc)
for string in kb_strings:
self.vocab.strings.add(string)
def write_contents(self, file_path):
cdef Writer writer = Writer(file_path)
writer.write_header(self.get_size_entities(), self.entity_vector_length)
# dumping the entity vectors in their original order
@ -366,13 +385,7 @@ cdef class KnowledgeBase:
writer.close()
def from_disk(self, path):
path = util.ensure_path(path)
if path.is_dir():
raise ValueError(Errors.E928.format(loc=path))
if not path.exists():
raise ValueError(Errors.E929.format(loc=path))
def read_contents(self, file_path):
cdef hash_t entity_hash
cdef hash_t alias_hash
cdef int64_t entry_index
@ -382,7 +395,7 @@ cdef class KnowledgeBase:
cdef AliasC alias
cdef float vector_element
cdef Reader reader = Reader(path)
cdef Reader reader = Reader(file_path)
# STEP 0: load header and initialize KB
cdef int64_t nr_entities

View File

@ -16,6 +16,7 @@ from ..training import Example, validate_examples
from ..errors import Errors, Warnings
from ..util import SimpleFrozenList
from .. import util
from ..scorer import Scorer
default_model_config = """
@ -47,6 +48,8 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
"incl_context": True,
"get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
},
scores=["nel_micro_p", "nel_micro_r", "nel_micro_f"],
default_score_weights={"nel_micro_f": 1.0},
)
def make_entity_linker(
nlp: Language,
@ -209,12 +212,11 @@ class EntityLinker(Pipe):
# it does run the model twice :(
predictions = self.model.predict(docs)
for eg in examples:
sentences = [s for s in eg.predicted.sents]
sentences = [s for s in eg.reference.sents]
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
for ent in eg.predicted.ents:
kb_id = kb_ids[
ent.start
] # KB ID of the first token is the same as the whole span
for ent in eg.reference.ents:
# KB ID of the first token is the same as the whole span
kb_id = kb_ids[ent.start]
if kb_id:
try:
# find the sentence in the list of sentences.
@ -253,7 +255,7 @@ class EntityLinker(Pipe):
entity_encodings = []
for eg in examples:
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
for ent in eg.predicted.ents:
for ent in eg.reference.ents:
kb_id = kb_ids[ent.start]
if kb_id:
entity_encoding = self.kb.get_vector(kb_id)
@ -415,6 +417,18 @@ class EntityLinker(Pipe):
for token in ent:
token.ent_kb_id_ = kb_id
def score(self, examples, **kwargs):
"""Score a batch of examples.
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores.
DOCS TODO: https://nightly.spacy.io/api/entity_linker#score
"""
validate_examples(examples, "EntityLinker.score")
return Scorer.score_links(examples, negative_labels=[self.NIL])
def to_disk(
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
) -> None:

View File

@ -451,6 +451,74 @@ class Scorer:
results[f"{attr}_score_desc"] = "macro AUC"
return results
@staticmethod
def score_links(
examples: Iterable[Example], *, negative_labels: Iterable[str]
) -> Dict[str, Any]:
"""Returns PRF for predicted links on the entity level.
To disentangle the performance of the NEL from the NER,
this method only evaluates NEL links for entities that overlap
between the gold reference and the predictions.
examples (Iterable[Example]): Examples to score
negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
RETURNS (Dict[str, Any]): A dictionary containing the scores.
DOCS (TODO): https://nightly.spacy.io/api/scorer#score_links
"""
f_per_type = {}
for example in examples:
gold_ent_by_offset = {}
for gold_ent in example.reference.ents:
gold_ent_by_offset[(gold_ent.start_char, gold_ent.end_char)] = gold_ent
for pred_ent in example.predicted.ents:
gold_span = gold_ent_by_offset.get(
(pred_ent.start_char, pred_ent.end_char), None
)
label = gold_span.label_
if not label in f_per_type:
f_per_type[label] = PRFScore()
gold = gold_span.kb_id_
# only evaluating entities that overlap between gold and pred,
# to disentangle the performance of the NEL from the NER
if gold is not None:
pred = pred_ent.kb_id_
if gold in negative_labels and pred in negative_labels:
# ignore true negatives
pass
elif gold == pred:
f_per_type[label].tp += 1
elif gold in negative_labels:
f_per_type[label].fp += 1
elif pred in negative_labels:
f_per_type[label].fn += 1
else:
# a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
f_per_type[label].fp += 1
f_per_type[label].fn += 1
micro_prf = PRFScore()
for label_prf in f_per_type.values():
micro_prf.tp += label_prf.tp
micro_prf.fn += label_prf.fn
micro_prf.fp += label_prf.fp
n_labels = len(f_per_type) + 1e-100
macro_p = sum(prf.precision for prf in f_per_type.values()) / n_labels
macro_r = sum(prf.recall for prf in f_per_type.values()) / n_labels
macro_f = sum(prf.fscore for prf in f_per_type.values()) / n_labels
results = {
f"nel_score": micro_prf.fscore,
f"nel_score_desc": "micro F",
f"nel_micro_p": micro_prf.precision,
f"nel_micro_r": micro_prf.recall,
f"nel_micro_f": micro_prf.fscore,
f"nel_macro_p": macro_p,
f"nel_macro_r": macro_r,
f"nel_macro_f": macro_f,
f"nel_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
}
return results
@staticmethod
def score_deps(
examples: Iterable[Example],

View File

@ -2,8 +2,10 @@ from typing import Callable, Iterable
import pytest
from spacy.kb import KnowledgeBase, get_candidates, Candidate
from spacy.vocab import Vocab
from spacy import util, registry
from spacy.scorer import Scorer
from spacy.training import Example
from spacy.lang.en import English
from spacy.tests.util import make_tempdir
@ -151,22 +153,15 @@ def test_kb_serialize(nlp):
# normal read-write behaviour
mykb.to_disk(d / "kb")
mykb.from_disk(d / "kb")
mykb.to_disk(d / "kb.file")
mykb.from_disk(d / "kb.file")
mykb.to_disk(d / "new" / "kb")
mykb.from_disk(d / "new" / "kb")
# allow overwriting an existing file
mykb.to_disk(d / "kb.file")
with pytest.raises(ValueError):
# can not write to a directory
mykb.to_disk(d)
with pytest.raises(ValueError):
# can not read from a directory
mykb.from_disk(d)
mykb.to_disk(d / "kb")
with pytest.raises(ValueError):
# can not read from an unknown file
mykb.from_disk(d / "unknown" / "kb")
def test_candidate_generation(nlp):
"""Test correct candidate generation"""
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
@ -254,6 +249,41 @@ def test_el_pipe_configuration(nlp):
assert doc[2].ent_kb_id_ == "Q2"
def test_vocab_serialization(nlp):
"""Test that string information is retained across storage"""
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
# adding entities
q1_hash = mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
q2_hash = mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
q3_hash = mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])
# adding aliases
douglas_hash = mykb.add_alias(
alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1]
)
adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
candidates = mykb.get_alias_candidates("adam")
assert len(candidates) == 1
assert candidates[0].entity == q2_hash
assert candidates[0].entity_ == "Q2"
assert candidates[0].alias == adam_hash
assert candidates[0].alias_ == "adam"
with make_tempdir() as d:
mykb.to_disk(d / "kb")
kb_new_vocab = KnowledgeBase(Vocab(), entity_vector_length=1)
kb_new_vocab.from_disk(d / "kb")
candidates = kb_new_vocab.get_alias_candidates("adam")
assert len(candidates) == 1
assert candidates[0].entity == q2_hash
assert candidates[0].entity_ == "Q2"
assert candidates[0].alias == adam_hash
assert candidates[0].alias_ == "adam"
def test_append_alias(nlp):
"""Test that we can append additional alias-entity pairs"""
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
@ -377,16 +407,20 @@ def test_preserving_links_ents_2(nlp):
TRAIN_DATA = [
("Russ Cochran captured his first major title with his son as caddie.",
{"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
"entities": [(0, 12, "PERSON")]}),
"entities": [(0, 12, "PERSON")],
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}),
("Russ Cochran his reprints include EC Comics.",
{"links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
"entities": [(0, 12, "PERSON")]}),
"entities": [(0, 12, "PERSON")],
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]}),
("Russ Cochran has been publishing comic art.",
{"links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
"entities": [(0, 12, "PERSON")]}),
"entities": [(0, 12, "PERSON")],
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]}),
("Russ Cochran was a member of University of Kentucky's golf team.",
{"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
"entities": [(0, 12, "PERSON"), (43, 51, "LOC")]}),
"entities": [(0, 12, "PERSON"), (43, 51, "LOC")],
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]})
]
GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
# fmt: on
@ -395,16 +429,8 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
def test_overfitting_IO():
# Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
nlp = English()
nlp.add_pipe("sentencizer")
vector_length = 3
# Add a custom component to recognize "Russ Cochran" as an entity for the example training data
patterns = [
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
]
ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns)
# Convert the texts to docs to make sure we have doc.ents set for the training examples
train_examples = []
for text, annotation in TRAIN_DATA:
@ -446,6 +472,16 @@ def test_overfitting_IO():
nlp.update(train_examples, sgd=optimizer, losses=losses)
assert losses["entity_linker"] < 0.001
# adding additional components that are required for the entity_linker
nlp.add_pipe("sentencizer", first=True)
# Add a custom component to recognize "Russ Cochran" as an entity for the example training data
patterns = [
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
]
ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
ruler.add_patterns(patterns)
# test the trained model
predictions = []
for text, annotation in TRAIN_DATA:
@ -465,3 +501,46 @@ def test_overfitting_IO():
for ent in doc2.ents:
predictions.append(ent.kb_id_)
assert predictions == GOLD_entities
def test_scorer_links():
train_examples = []
nlp = English()
ref1 = nlp("Julia lives in London happily.")
ref1.ents = [
Span(ref1, 0, 1, label="PERSON", kb_id="Q2"),
Span(ref1, 3, 4, label="LOC", kb_id="Q3"),
]
pred1 = nlp("Julia lives in London happily.")
pred1.ents = [
Span(pred1, 0, 1, label="PERSON", kb_id="Q70"),
Span(pred1, 3, 4, label="LOC", kb_id="Q3"),
]
train_examples.append(Example(pred1, ref1))
ref2 = nlp("She loves London.")
ref2.ents = [
Span(ref2, 0, 1, label="PERSON", kb_id="Q2"),
Span(ref2, 2, 3, label="LOC", kb_id="Q13"),
]
pred2 = nlp("She loves London.")
pred2.ents = [
Span(pred2, 0, 1, label="PERSON", kb_id="Q2"),
Span(pred2, 2, 3, label="LOC", kb_id="NIL"),
]
train_examples.append(Example(pred2, ref2))
ref3 = nlp("London is great.")
ref3.ents = [Span(ref3, 0, 1, label="LOC", kb_id="NIL")]
pred3 = nlp("London is great.")
pred3.ents = [Span(pred3, 0, 1, label="LOC", kb_id="NIL")]
train_examples.append(Example(pred3, ref3))
scores = Scorer().score_links(train_examples, negative_labels=["NIL"])
assert scores["nel_f_per_type"]["PERSON"]["p"] == 1 / 2
assert scores["nel_f_per_type"]["PERSON"]["r"] == 1 / 2
assert scores["nel_f_per_type"]["LOC"]["p"] == 1 / 1
assert scores["nel_f_per_type"]["LOC"]["r"] == 1 / 2
assert scores["nel_micro_p"] == 2 / 3
assert scores["nel_micro_r"] == 2 / 4

View File

@ -244,3 +244,22 @@ def test_Example_from_dict_with_links_invalid(annots):
predicted = Doc(vocab, words=annots["words"])
with pytest.raises(ValueError):
Example.from_dict(predicted, annots)
def test_Example_from_dict_sentences():
vocab = Vocab()
predicted = Doc(vocab, words=["One", "sentence", ".", "one", "more"])
annots = {"sent_starts": [1, 0, 0, 1, 0]}
ex = Example.from_dict(predicted, annots)
assert len(list(ex.reference.sents)) == 2
# this currently throws an error - bug or feature?
# predicted = Doc(vocab, words=["One", "sentence", "not", "one", "more"])
# annots = {"sent_starts": [1, 0, 0, 0, 0]}
# ex = Example.from_dict(predicted, annots)
# assert len(list(ex.reference.sents)) == 1
predicted = Doc(vocab, words=["One", "sentence", "not", "one", "more"])
annots = {"sent_starts": [1, -1, 0, 0, 0]}
ex = Example.from_dict(predicted, annots)
assert len(list(ex.reference.sents)) == 1

View File

@ -225,6 +225,21 @@ pipe's entity linking model and context encoder. Delegates to
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
## EntityLinker.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = entity_linker.score(examples)
> ```
| Name | Description |
| ----------- | ---------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_links`](/api/scorer#score_links) . ~~Dict[str, float]~~ |
## EntityLinker.create_optimizer {#create_optimizer tag="method"}
Create an optimizer for the pipeline component.

View File

@ -206,3 +206,26 @@ depends on the scorer settings:
| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. ~~bool~~ |
| `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~ |
| **RETURNS** | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~ |
## Scorer.score_links {#score_links tag="staticmethod" new="3"}
Returns PRF for predicted links on the entity level. To disentangle the
performance of the NEL from the NER, this method only evaluates NEL links for
entities that overlap between the gold reference and the predictions.
> #### Example
>
> ```python
> scores = Scorer.score_links(
> examples,
> negative_labels=["NIL", ""]
> )
> print(scores["nel_micro_f"])
> ```
| Name | Description |
| ----------------- | ------------------------------------------------------------------------------------------------------------------- |
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
| _keyword-only_ | |
| `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~ |
| **RETURNS** | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~ |