updates to NEL functionality (#6132)

* NEL: read sentences and ents from reference * fiddling with sent_start annotations * add KB serialization test * KB write additional file with strings.json * score_links function to calculate NEL P/R/F * formatting * documentation
2025-11-04 09:57:26 +03:00 · 2020-09-24 16:53:59 +02:00 · 2020-09-24 16:53:59 +02:00 · c7eedd3534
commit c7eedd3534
parent d0ef4a4cf5
8 changed files with 273 additions and 42 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -517,8 +517,8 @@ class Errors:
            "instead.")
    E927 = ("Can't write to frozen list Maybe you're trying to modify a computed "
            "property or default function argument?")
-    E928 = ("A 'KnowledgeBase' should be written to / read from a file, but the "
+    E928 = ("A 'KnowledgeBase' can only be serialized to/from from a directory, "
-            "provided argument {loc} is an existing directory.")
+            "but the provided argument {loc} points to a file.")
    E929 = ("A 'KnowledgeBase' could not be read from {loc} - the path does "
            "not seem to exist.")
    E930 = ("Received invalid get_examples callback in {name}.begin_training. "
--- a/spacy/kb.pyx
+++ b/spacy/kb.pyx
@ -10,6 +10,8 @@ from libcpp.vector cimport vector
 from pathlib import Path
 import warnings
 from spacy.strings import StringStore
 from spacy import util
 from .typedefs cimport hash_t
@ -83,6 +85,9 @@ cdef class KnowledgeBase:
    DOCS: https://nightly.spacy.io/api/kb
    """
    contents_loc = "contents"
    strings_loc = "strings.json"
    def __init__(self, Vocab vocab, entity_vector_length):
        """Create a KnowledgeBase."""
        self.mem = Pool()
@ -319,15 +324,29 @@ cdef class KnowledgeBase:
        return 0.0
    def to_disk(self, path):
        path = util.ensure_path(path)
-        if path.is_dir():
+        if not path.exists():
            path.mkdir(parents=True)
        if not path.is_dir():
            raise ValueError(Errors.E928.format(loc=path))
-        if not path.parent.exists():
+        self.write_contents(path / self.contents_loc)
-            path.parent.mkdir(parents=True)
+        self.vocab.strings.to_disk(path / self.strings_loc)
-        cdef Writer writer = Writer(path)
+    def from_disk(self, path):
        path = util.ensure_path(path)
        if not path.exists():
            raise ValueError(Errors.E929.format(loc=path))
        if not path.is_dir():
            raise ValueError(Errors.E928.format(loc=path))
        self.read_contents(path / self.contents_loc)
        kb_strings = StringStore()
        kb_strings.from_disk(path / self.strings_loc)
        for string in kb_strings:
            self.vocab.strings.add(string)
    def write_contents(self, file_path):
        cdef Writer writer = Writer(file_path)
        writer.write_header(self.get_size_entities(), self.entity_vector_length)
        # dumping the entity vectors in their original order
@ -366,13 +385,7 @@ cdef class KnowledgeBase:
        writer.close()
-    def from_disk(self, path):
+    def read_contents(self, file_path):
        path = util.ensure_path(path)
        if path.is_dir():
            raise ValueError(Errors.E928.format(loc=path))
        if not path.exists():
            raise ValueError(Errors.E929.format(loc=path))
        cdef hash_t entity_hash
        cdef hash_t alias_hash
        cdef int64_t entry_index
@ -382,7 +395,7 @@ cdef class KnowledgeBase:
        cdef AliasC alias
        cdef float vector_element
-        cdef Reader reader = Reader(path)
+        cdef Reader reader = Reader(file_path)
        # STEP 0: load header and initialize KB
        cdef int64_t nr_entities
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -16,6 +16,7 @@ from ..training import Example, validate_examples
 from ..errors import Errors, Warnings
 from ..util import SimpleFrozenList
 from .. import util
 from ..scorer import Scorer
 default_model_config = """
@ -47,6 +48,8 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
        "incl_context": True,
        "get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
    },
    scores=["nel_micro_p", "nel_micro_r", "nel_micro_f"],
    default_score_weights={"nel_micro_f": 1.0},
 )
 def make_entity_linker(
    nlp: Language,
@ -209,12 +212,11 @@ class EntityLinker(Pipe):
            # it does run the model twice :(
            predictions = self.model.predict(docs)
        for eg in examples:
-            sentences = [s for s in eg.predicted.sents]
+            sentences = [s for s in eg.reference.sents]
            kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
-            for ent in eg.predicted.ents:
+            for ent in eg.reference.ents:
-                kb_id = kb_ids[
+                # KB ID of the first token is the same as the whole span
-                    ent.start
+                kb_id = kb_ids[ent.start]
                ]  # KB ID of the first token is the same as the whole span
                if kb_id:
                    try:
                        # find the sentence in the list of sentences.
@ -253,7 +255,7 @@ class EntityLinker(Pipe):
        entity_encodings = []
        for eg in examples:
            kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
-            for ent in eg.predicted.ents:
+            for ent in eg.reference.ents:
                kb_id = kb_ids[ent.start]
                if kb_id:
                    entity_encoding = self.kb.get_vector(kb_id)
@ -415,6 +417,18 @@ class EntityLinker(Pipe):
                for token in ent:
                    token.ent_kb_id_ = kb_id
    def score(self, examples, **kwargs):
        """Score a batch of examples.
        examples (Iterable[Example]): The examples to score.
        RETURNS (Dict[str, Any]): The scores.
        DOCS TODO: https://nightly.spacy.io/api/entity_linker#score
        """
        validate_examples(examples, "EntityLinker.score")
        return Scorer.score_links(examples, negative_labels=[self.NIL])
    def to_disk(
        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
    ) -> None:
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -451,6 +451,74 @@ class Scorer:
            results[f"{attr}_score_desc"] = "macro AUC"
        return results
    @staticmethod
    def score_links(
        examples: Iterable[Example], *, negative_labels: Iterable[str]
    ) -> Dict[str, Any]:
        """Returns PRF for predicted links on the entity level.
        To disentangle the performance of the NEL from the NER,
        this method only evaluates NEL links for entities that overlap
        between the gold reference and the predictions.
        examples (Iterable[Example]): Examples to score
        negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
        RETURNS (Dict[str, Any]): A dictionary containing the scores.
        DOCS (TODO): https://nightly.spacy.io/api/scorer#score_links
        """
        f_per_type = {}
        for example in examples:
            gold_ent_by_offset = {}
            for gold_ent in example.reference.ents:
                gold_ent_by_offset[(gold_ent.start_char, gold_ent.end_char)] = gold_ent
            for pred_ent in example.predicted.ents:
                gold_span = gold_ent_by_offset.get(
                    (pred_ent.start_char, pred_ent.end_char), None
                )
                label = gold_span.label_
                if not label in f_per_type:
                    f_per_type[label] = PRFScore()
                gold = gold_span.kb_id_
                # only evaluating entities that overlap between gold and pred,
                # to disentangle the performance of the NEL from the NER
                if gold is not None:
                    pred = pred_ent.kb_id_
                    if gold in negative_labels and pred in negative_labels:
                        # ignore true negatives
                        pass
                    elif gold == pred:
                        f_per_type[label].tp += 1
                    elif gold in negative_labels:
                        f_per_type[label].fp += 1
                    elif pred in negative_labels:
                        f_per_type[label].fn += 1
                    else:
                        # a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
                        f_per_type[label].fp += 1
                        f_per_type[label].fn += 1
        micro_prf = PRFScore()
        for label_prf in f_per_type.values():
            micro_prf.tp += label_prf.tp
            micro_prf.fn += label_prf.fn
            micro_prf.fp += label_prf.fp
        n_labels = len(f_per_type) + 1e-100
        macro_p = sum(prf.precision for prf in f_per_type.values()) / n_labels
        macro_r = sum(prf.recall for prf in f_per_type.values()) / n_labels
        macro_f = sum(prf.fscore for prf in f_per_type.values()) / n_labels
        results = {
            f"nel_score": micro_prf.fscore,
            f"nel_score_desc": "micro F",
            f"nel_micro_p": micro_prf.precision,
            f"nel_micro_r": micro_prf.recall,
            f"nel_micro_f": micro_prf.fscore,
            f"nel_macro_p": macro_p,
            f"nel_macro_r": macro_r,
            f"nel_macro_f": macro_f,
            f"nel_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
        }
        return results
    @staticmethod
    def score_deps(
        examples: Iterable[Example],
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -2,8 +2,10 @@ from typing import Callable, Iterable
 import pytest
 from spacy.kb import KnowledgeBase, get_candidates, Candidate
 from spacy.vocab import Vocab
 from spacy import util, registry
 from spacy.scorer import Scorer
 from spacy.training import Example
 from spacy.lang.en import English
 from spacy.tests.util import make_tempdir
@ -151,22 +153,15 @@ def test_kb_serialize(nlp):
        # normal read-write behaviour
        mykb.to_disk(d / "kb")
        mykb.from_disk(d / "kb")
        mykb.to_disk(d / "kb.file")
        mykb.from_disk(d / "kb.file")
        mykb.to_disk(d / "new" / "kb")
        mykb.from_disk(d / "new" / "kb")
        # allow overwriting an existing file
-        mykb.to_disk(d / "kb.file")
+        mykb.to_disk(d / "kb")
        with pytest.raises(ValueError):
            # can not write to a directory
            mykb.to_disk(d)
        with pytest.raises(ValueError):
            # can not read from a directory
            mykb.from_disk(d)
        with pytest.raises(ValueError):
            # can not read from an unknown file
            mykb.from_disk(d / "unknown" / "kb")
 def test_candidate_generation(nlp):
    """Test correct candidate generation"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
@ -254,6 +249,41 @@ def test_el_pipe_configuration(nlp):
    assert doc[2].ent_kb_id_ == "Q2"
 def test_vocab_serialization(nlp):
    """Test that string information is retained across storage"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
    # adding entities
    q1_hash = mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
    q2_hash = mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
    q3_hash = mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])
    # adding aliases
    douglas_hash = mykb.add_alias(
        alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1]
    )
    adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
    candidates = mykb.get_alias_candidates("adam")
    assert len(candidates) == 1
    assert candidates[0].entity == q2_hash
    assert candidates[0].entity_ == "Q2"
    assert candidates[0].alias == adam_hash
    assert candidates[0].alias_ == "adam"
    with make_tempdir() as d:
        mykb.to_disk(d / "kb")
        kb_new_vocab = KnowledgeBase(Vocab(), entity_vector_length=1)
        kb_new_vocab.from_disk(d / "kb")
        candidates = kb_new_vocab.get_alias_candidates("adam")
        assert len(candidates) == 1
        assert candidates[0].entity == q2_hash
        assert candidates[0].entity_ == "Q2"
        assert candidates[0].alias == adam_hash
        assert candidates[0].alias_ == "adam"
 def test_append_alias(nlp):
    """Test that we can append additional alias-entity pairs"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
@ -377,16 +407,20 @@ def test_preserving_links_ents_2(nlp):
 TRAIN_DATA = [
    ("Russ Cochran captured his first major title with his son as caddie.",
        {"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
-         "entities": [(0, 12, "PERSON")]}),
+         "entities": [(0, 12, "PERSON")],
         "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}),
    ("Russ Cochran his reprints include EC Comics.",
        {"links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
-         "entities": [(0, 12, "PERSON")]}),
+         "entities": [(0, 12, "PERSON")],
         "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]}),
    ("Russ Cochran has been publishing comic art.",
        {"links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
-         "entities": [(0, 12, "PERSON")]}),
+         "entities": [(0, 12, "PERSON")],
         "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]}),
    ("Russ Cochran was a member of University of Kentucky's golf team.",
        {"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
-         "entities": [(0, 12, "PERSON"), (43, 51, "LOC")]}),
+         "entities": [(0, 12, "PERSON"), (43, 51, "LOC")],
         "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]})
 ]
 GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 # fmt: on
@ -395,16 +429,8 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 def test_overfitting_IO():
    # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
    nlp = English()
    nlp.add_pipe("sentencizer")
    vector_length = 3
    # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
    patterns = [
        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
    ]
    ruler = nlp.add_pipe("entity_ruler")
    ruler.add_patterns(patterns)
    # Convert the texts to docs to make sure we have doc.ents set for the training examples
    train_examples = []
    for text, annotation in TRAIN_DATA:
@ -446,6 +472,16 @@ def test_overfitting_IO():
        nlp.update(train_examples, sgd=optimizer, losses=losses)
    assert losses["entity_linker"] < 0.001
    # adding additional components that are required for the entity_linker
    nlp.add_pipe("sentencizer", first=True)
    # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
    patterns = [
        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
    ]
    ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
    ruler.add_patterns(patterns)
    # test the trained model
    predictions = []
    for text, annotation in TRAIN_DATA:
@ -465,3 +501,46 @@ def test_overfitting_IO():
            for ent in doc2.ents:
                predictions.append(ent.kb_id_)
        assert predictions == GOLD_entities
 def test_scorer_links():
    train_examples = []
    nlp = English()
    ref1 = nlp("Julia lives in London happily.")
    ref1.ents = [
        Span(ref1, 0, 1, label="PERSON", kb_id="Q2"),
        Span(ref1, 3, 4, label="LOC", kb_id="Q3"),
    ]
    pred1 = nlp("Julia lives in London happily.")
    pred1.ents = [
        Span(pred1, 0, 1, label="PERSON", kb_id="Q70"),
        Span(pred1, 3, 4, label="LOC", kb_id="Q3"),
    ]
    train_examples.append(Example(pred1, ref1))
    ref2 = nlp("She loves London.")
    ref2.ents = [
        Span(ref2, 0, 1, label="PERSON", kb_id="Q2"),
        Span(ref2, 2, 3, label="LOC", kb_id="Q13"),
    ]
    pred2 = nlp("She loves London.")
    pred2.ents = [
        Span(pred2, 0, 1, label="PERSON", kb_id="Q2"),
        Span(pred2, 2, 3, label="LOC", kb_id="NIL"),
    ]
    train_examples.append(Example(pred2, ref2))
    ref3 = nlp("London is great.")
    ref3.ents = [Span(ref3, 0, 1, label="LOC", kb_id="NIL")]
    pred3 = nlp("London is great.")
    pred3.ents = [Span(pred3, 0, 1, label="LOC", kb_id="NIL")]
    train_examples.append(Example(pred3, ref3))
    scores = Scorer().score_links(train_examples, negative_labels=["NIL"])
    assert scores["nel_f_per_type"]["PERSON"]["p"] == 1 / 2
    assert scores["nel_f_per_type"]["PERSON"]["r"] == 1 / 2
    assert scores["nel_f_per_type"]["LOC"]["p"] == 1 / 1
    assert scores["nel_f_per_type"]["LOC"]["r"] == 1 / 2
    assert scores["nel_micro_p"] == 2 / 3
    assert scores["nel_micro_r"] == 2 / 4
--- a/spacy/tests/training/test_new_example.py
+++ b/spacy/tests/training/test_new_example.py
@ -244,3 +244,22 @@ def test_Example_from_dict_with_links_invalid(annots):
    predicted = Doc(vocab, words=annots["words"])
    with pytest.raises(ValueError):
        Example.from_dict(predicted, annots)
 def test_Example_from_dict_sentences():
    vocab = Vocab()
    predicted = Doc(vocab, words=["One", "sentence", ".", "one", "more"])
    annots = {"sent_starts": [1, 0, 0, 1, 0]}
    ex = Example.from_dict(predicted, annots)
    assert len(list(ex.reference.sents)) == 2
    # this currently throws an error - bug or feature?
    # predicted = Doc(vocab, words=["One", "sentence", "not", "one", "more"])
    # annots = {"sent_starts": [1, 0, 0, 0, 0]}
    # ex = Example.from_dict(predicted, annots)
    # assert len(list(ex.reference.sents)) == 1
    predicted = Doc(vocab, words=["One", "sentence", "not", "one", "more"])
    annots = {"sent_starts": [1, -1, 0, 0, 0]}
    ex = Example.from_dict(predicted, annots)
    assert len(list(ex.reference.sents)) == 1
--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@ -225,6 +225,21 @@ pipe's entity linking model and context encoder. Delegates to
 | `losses`          | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~           |
 | **RETURNS**       | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                              |
 ## EntityLinker.score {#score tag="method" new="3"}
 Score a batch of examples.
 > #### Example
 >
 > ```python
 > scores = entity_linker.score(examples)
 > ```
 | Name        | Description                                                                                    |
 | ----------- | ---------------------------------------------------------------------------------------------- |
 | `examples`  | The examples to score. ~~Iterable[Example]~~                                                   |
 | **RETURNS** | The scores, produced by [`Scorer.score_links`](/api/scorer#score_links) . ~~Dict[str, float]~~ |
 ## EntityLinker.create_optimizer {#create_optimizer tag="method"}
 Create an optimizer for the pipeline component.
--- a/website/docs/api/scorer.md
+++ b/website/docs/api/scorer.md
@ -206,3 +206,26 @@ depends on the scorer settings:
 | `multi_label`    | Whether the attribute allows multiple labels. Defaults to `True`. ~~bool~~                                                                         |
 | `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~                                                 |
 | **RETURNS**      | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~                                             |
 ## Scorer.score_links {#score_links tag="staticmethod" new="3"}
 Returns PRF for predicted links on the entity level. To disentangle the
 performance of the NEL from the NER, this method only evaluates NEL links for
 entities that overlap between the gold reference and the predictions.
 > #### Example
 >
 > ```python
 > scores = Scorer.score_links(
 >     examples,
 >     negative_labels=["NIL", ""]
 > )
 > print(scores["nel_micro_f"])
 > ```
 | Name              | Description                                                                                                         |
 | ----------------- | ------------------------------------------------------------------------------------------------------------------- |
 | `examples`        | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
 | _keyword-only_    |                                                                                                                     |
 | `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~                                       |
 | **RETURNS**       | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~                                                  |