NEL confidence threshold (#11016)

* Add base for NEL abstention threshold mechanism. * Add abstention threshold to entity linker. Add test. * Fix entity linking tests. * Changed abstention default threshold from 0 to None. * Fix default values for abstention thresholds. * Fix mypy errors. * Replace assertion with raise of proper error code. * Simplify threshold check. Remove thresholding from EntityLinker_v1. * Rename test. * Update spacy/pipeline/entity_linker.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Update spacy/pipeline/entity_linker.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Make E1043 configurable. * Update docs. * Rephrase description in docs. Adjusting error code message. Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
2025-07-05 20:33:10 +03:00 · 2022-07-04 17:05:21 +02:00 · 2022-07-04 17:05:21 +02:00 · e9eb59699f
commit e9eb59699f
parent 59c763eec1
5 changed files with 122 additions and 43 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -937,6 +937,8 @@ class Errors(metaclass=ErrorsWithCodes):
    E1041 = ("Expected a string, Doc, or bytes as input, but got: {type}")
    E1042 = ("Function was called with `{arg1}`={arg1_values} and "
             "`{arg2}`={arg2_values} but these arguments are conflicting.")
+    E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got "
+             "{value}.")


 # Deprecated model shortcuts, only used in errors and warnings
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -56,6 +56,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
        "overwrite": True,
        "scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
        "use_gold_ents": True,
+        "threshold": None,
    },
    default_score_weights={
        "nel_micro_f": 1.0,
@ -77,6 +78,7 @@ def make_entity_linker(
    overwrite: bool,
    scorer: Optional[Callable],
    use_gold_ents: bool,
+    threshold: Optional[float] = None,
 ):
    """Construct an EntityLinker component.

@ -91,6 +93,10 @@ def make_entity_linker(
    get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
        produces a list of candidates, given a certain knowledge base and a textual mention.
    scorer (Optional[Callable]): The scoring method.
+    use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
+        component must provide entity annotations.
+    threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold,
+        prediction is discarded. If None, predictions are not filtered by any threshold.
    """

    if not model.attrs.get("include_span_maker", False):
@ -121,6 +127,7 @@ def make_entity_linker(
        overwrite=overwrite,
        scorer=scorer,
        use_gold_ents=use_gold_ents,
+        threshold=threshold,
    )


@ -156,6 +163,7 @@ class EntityLinker(TrainablePipe):
        overwrite: bool = BACKWARD_OVERWRITE,
        scorer: Optional[Callable] = entity_linker_score,
        use_gold_ents: bool,
+        threshold: Optional[float] = None,
    ) -> None:
        """Initialize an entity linker.

@ -174,9 +182,20 @@ class EntityLinker(TrainablePipe):
            Scorer.score_links.
        use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
            component must provide entity annotations.
-
+        threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the
+            threshold, prediction is discarded. If None, predictions are not filtered by any threshold.
        DOCS: https://spacy.io/api/entitylinker#init
        """
+
+        if threshold is not None and not (0 <= threshold <= 1):
+            raise ValueError(
+                Errors.E1043.format(
+                    range_start=0,
+                    range_end=1,
+                    value=threshold,
+                )
+            )
+
        self.vocab = vocab
        self.model = model
        self.name = name
@ -192,6 +211,7 @@ class EntityLinker(TrainablePipe):
        self.kb = empty_kb(entity_vector_length)(self.vocab)
        self.scorer = scorer
        self.use_gold_ents = use_gold_ents
+        self.threshold = threshold

    def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
        """Define the KB of this pipe by providing a function that will
@ -424,9 +444,8 @@ class EntityLinker(TrainablePipe):
                    if not candidates:
                        # no prediction possible for this entity - setting to NIL
                        final_kb_ids.append(self.NIL)
-                    elif len(candidates) == 1:
+                    elif len(candidates) == 1 and self.threshold is None:
                        # shortcut for efficiency reasons: take the 1 candidate
-                        # TODO: thresholding
                        final_kb_ids.append(candidates[0].entity_)
                    else:
                        random.shuffle(candidates)
@ -455,10 +474,11 @@ class EntityLinker(TrainablePipe):
                            if sims.shape != prior_probs.shape:
                                raise ValueError(Errors.E161)
                            scores = prior_probs + sims - (prior_probs * sims)
-                        # TODO: thresholding
-                        best_index = scores.argmax().item()
-                        best_candidate = candidates[best_index]
-                        final_kb_ids.append(best_candidate.entity_)
+                        final_kb_ids.append(
+                            candidates[scores.argmax().item()].entity_
+                            if self.threshold is None or scores.max() >= self.threshold
+                            else EntityLinker.NIL
+                        )
        if not (len(final_kb_ids) == entity_count):
            err = Errors.E147.format(
                method="predict", msg="result variables not of equal length"
--- a/spacy/pipeline/legacy/entity_linker.py
+++ b/spacy/pipeline/legacy/entity_linker.py
@ -7,7 +7,7 @@ from pathlib import Path
 from itertools import islice
 import srsly
 import random
-from thinc.api import CosineDistance, Model, Optimizer, Config
+from thinc.api import CosineDistance, Model, Optimizer
 from thinc.api import set_dropout_rate
 import warnings

@ -20,7 +20,7 @@ from ...language import Language
 from ...vocab import Vocab
 from ...training import Example, validate_examples, validate_get_examples
 from ...errors import Errors, Warnings
-from ...util import SimpleFrozenList, registry
+from ...util import SimpleFrozenList
 from ... import util
 from ...scorer import Scorer

@ -70,7 +70,6 @@ class EntityLinker_v1(TrainablePipe):
            produces a list of candidates, given a certain knowledge base and a textual mention.
        scorer (Optional[Callable]): The scoring method. Defaults to
            Scorer.score_links.
-
        DOCS: https://spacy.io/api/entitylinker#init
        """
        self.vocab = vocab
@ -272,7 +271,6 @@ class EntityLinker_v1(TrainablePipe):
                            final_kb_ids.append(self.NIL)
                        elif len(candidates) == 1:
                            # shortcut for efficiency reasons: take the 1 candidate
-                            # TODO: thresholding
                            final_kb_ids.append(candidates[0].entity_)
                        else:
                            random.shuffle(candidates)
@ -301,7 +299,6 @@ class EntityLinker_v1(TrainablePipe):
                                if sims.shape != prior_probs.shape:
                                    raise ValueError(Errors.E161)
                                scores = prior_probs + sims - (prior_probs * sims)
-                            # TODO: thresholding
                            best_index = scores.argmax().item()
                            best_candidate = candidates[best_index]
                            final_kb_ids.append(best_candidate.entity_)
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -1,4 +1,4 @@
-from typing import Callable, Iterable
+from typing import Callable, Iterable, Dict, Any

 import pytest
 from numpy.testing import assert_equal
@ -207,7 +207,7 @@ def test_no_entities():
    nlp.add_pipe("sentencizer", first=True)

    # this will run the pipeline on the examples and shouldn't crash
-    results = nlp.evaluate(train_examples)
+    nlp.evaluate(train_examples)


 def test_partial_links():
@ -1063,7 +1063,7 @@ def test_no_gold_ents(patterns):
        "entity_linker", config={"use_gold_ents": False}, last=True
    )
    entity_linker.set_kb(create_kb)
-    assert entity_linker.use_gold_ents == False
+    assert entity_linker.use_gold_ents is False

    optimizer = nlp.initialize(get_examples=lambda: train_examples)
    for i in range(2):
@ -1074,7 +1074,7 @@ def test_no_gold_ents(patterns):
    nlp.add_pipe("sentencizer", first=True)

    # this will run the pipeline on the examples and shouldn't crash
-    results = nlp.evaluate(train_examples)
+    nlp.evaluate(train_examples)


@pytest.mark.issue(9575)
@ -1114,4 +1114,61 @@ def test_tokenization_mismatch():
        nlp.update(train_examples, sgd=optimizer, losses=losses)

    nlp.add_pipe("sentencizer", first=True)
-    results = nlp.evaluate(train_examples)
+    nlp.evaluate(train_examples)
+
+
+# fmt: off
+@pytest.mark.parametrize(
+    "meet_threshold,config",
+    [
+        (False, {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
+        (True, {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
+    ],
+)
+# fmt: on
+def test_threshold(meet_threshold: bool, config: Dict[str, Any]):
+    """Tests abstention threshold.
+    meet_threshold (bool): Whether to configure NEL setup so that confidence threshold is met.
+    config (Dict[str, Any]): NEL architecture config.
+    """
+    nlp = English()
+    nlp.add_pipe("sentencizer")
+    text = "Mahler's Symphony No. 8 was beautiful."
+    entities = [(0, 6, "PERSON")]
+    links = {(0, 6): {"Q7304": 1.0}}
+    sent_starts = [1, -1, 0, 0, 0, 0, 0, 0, 0]
+    entity_id = "Q7304"
+    doc = nlp(text)
+    train_examples = [
+        Example.from_dict(
+            doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
+        )
+    ]
+
+    def create_kb(vocab):
+        # create artificial KB
+        mykb = KnowledgeBase(vocab, entity_vector_length=3)
+        mykb.add_entity(entity=entity_id, freq=12, entity_vector=[6, -4, 3])
+        mykb.add_alias(
+            alias="Mahler",
+            entities=[entity_id],
+            probabilities=[1 if meet_threshold else 0.01],
+        )
+        return mykb
+
+    # Create the Entity Linker component and add it to the pipeline
+    entity_linker = nlp.add_pipe(
+        "entity_linker",
+        last=True,
+        config={"threshold": 0.99, "model": config},
+    )
+    entity_linker.set_kb(create_kb)  # type: ignore
+    nlp.initialize(get_examples=lambda: train_examples)
+
+    # Add a custom rule-based component to mimick NER
+    ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
+    ruler.add_patterns([{"label": "PERSON", "pattern": [{"LOWER": "mahler"}]}])  # type: ignore
+    doc = nlp(text)
+
+    assert len(doc.ents) == 1
+    assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL
--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@ -47,22 +47,24 @@ architectures and their arguments and hyperparameters.
 >    "model": DEFAULT_NEL_MODEL,
 >    "entity_vector_length": 64,
 >    "get_candidates": {'@misc': 'spacy.CandidateGenerator.v1'},
+>    "threshold": None,
 > }
 > nlp.add_pipe("entity_linker", config=config)
 > ```

-| Setting                                  | Description                                                                                                                                                                                                                                                              |
-| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `labels_discard`                         | NER labels that will automatically get a "NIL" prediction. Defaults to `[]`. ~~Iterable[str]~~                                                                                                                                                                           |
-| `n_sents`                                | The number of neighbouring sentences to take into account. Defaults to 0. ~~int~~                                                                                                                                                                                        |
-| `incl_prior`                             | Whether or not to include prior probabilities from the KB in the model. Defaults to `True`. ~~bool~~                                                                                                                                                                     |
-| `incl_context`                           | Whether or not to include the local context in the model. Defaults to `True`. ~~bool~~                                                                                                                                                                                   |
-| `model`                                  | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~                                                                                                                   |
-| `entity_vector_length`                   | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                            |
-| `use_gold_ents`                          | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~                                                                                     |
-| `get_candidates`                         | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
-| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                 |
-| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                  |
+| Setting                                  | Description                                                                                                                                                                                                                                                                                 |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `labels_discard`                         | NER labels that will automatically get a "NIL" prediction. Defaults to `[]`. ~~Iterable[str]~~                                                                                                                                                                                              |
+| `n_sents`                                | The number of neighbouring sentences to take into account. Defaults to 0. ~~int~~                                                                                                                                                                                                           |
+| `incl_prior`                             | Whether or not to include prior probabilities from the KB in the model. Defaults to `True`. ~~bool~~                                                                                                                                                                                        |
+| `incl_context`                           | Whether or not to include the local context in the model. Defaults to `True`. ~~bool~~                                                                                                                                                                                                      |
+| `model`                                  | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~                                                                                                                                      |
+| `entity_vector_length`                   | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                                               |
+| `use_gold_ents`                          | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~                                                                                                        |
+| `get_candidates`                         | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~                    |
+| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                    |
+| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                     |
+| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |

 ```python
 %%GITHUB_SPACY/spacy/pipeline/entity_linker.py
@ -95,20 +97,21 @@ custom knowledge base, you should either call
 [`set_kb`](/api/entitylinker#set_kb) or provide a `kb_loader` in the
 [`initialize`](/api/entitylinker#initialize) call.

-| Name                                     | Description                                                                                                                      |
-| ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
-| `vocab`                                  | The shared vocabulary. ~~Vocab~~                                                                                                 |
-| `model`                                  | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~                                        |
-| `name`                                   | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                              |
-| _keyword-only_                           |                                                                                                                                  |
-| `entity_vector_length`                   | Size of encoding vectors in the KB. ~~int~~                                                                                      |
-| `get_candidates`                         | Function that generates plausible candidates for a given `Span` object. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
-| `labels_discard`                         | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~                                                   |
-| `n_sents`                                | The number of neighbouring sentences to take into account. ~~int~~                                                               |
-| `incl_prior`                             | Whether or not to include prior probabilities from the KB in the model. ~~bool~~                                                 |
-| `incl_context`                           | Whether or not to include the local context in the model. ~~bool~~                                                               |
-| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                         |
-| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                          |
+| Name                                     | Description                                                                                                                                                                                                                                                                                 |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`                                  | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                            |
+| `model`                                  | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~                                                                                                                                                                                                   |
+| `name`                                   | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                         |
+| _keyword-only_                           |                                                                                                                                                                                                                                                                                             |
+| `entity_vector_length`                   | Size of encoding vectors in the KB. ~~int~~                                                                                                                                                                                                                                                 |
+| `get_candidates`                         | Function that generates plausible candidates for a given `Span` object. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~                                                                                                                                                            |
+| `labels_discard`                         | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~                                                                                                                                                                                                              |
+| `n_sents`                                | The number of neighbouring sentences to take into account. ~~int~~                                                                                                                                                                                                                          |
+| `incl_prior`                             | Whether or not to include prior probabilities from the KB in the model. ~~bool~~                                                                                                                                                                                                            |
+| `incl_context`                           | Whether or not to include the local context in the model. ~~bool~~                                                                                                                                                                                                                          |
+| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                    |
+| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                     |
+| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |

 ## EntityLinker.\_\_call\_\_ {#call tag="method"}