mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
NEL confidence threshold (#11016)
* Add base for NEL abstention threshold mechanism. * Add abstention threshold to entity linker. Add test. * Fix entity linking tests. * Changed abstention default threshold from 0 to None. * Fix default values for abstention thresholds. * Fix mypy errors. * Replace assertion with raise of proper error code. * Simplify threshold check. Remove thresholding from EntityLinker_v1. * Rename test. * Update spacy/pipeline/entity_linker.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Update spacy/pipeline/entity_linker.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Make E1043 configurable. * Update docs. * Rephrase description in docs. Adjusting error code message. Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
59c763eec1
commit
e9eb59699f
|
@ -937,6 +937,8 @@ class Errors(metaclass=ErrorsWithCodes):
|
||||||
E1041 = ("Expected a string, Doc, or bytes as input, but got: {type}")
|
E1041 = ("Expected a string, Doc, or bytes as input, but got: {type}")
|
||||||
E1042 = ("Function was called with `{arg1}`={arg1_values} and "
|
E1042 = ("Function was called with `{arg1}`={arg1_values} and "
|
||||||
"`{arg2}`={arg2_values} but these arguments are conflicting.")
|
"`{arg2}`={arg2_values} but these arguments are conflicting.")
|
||||||
|
E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got "
|
||||||
|
"{value}.")
|
||||||
|
|
||||||
|
|
||||||
# Deprecated model shortcuts, only used in errors and warnings
|
# Deprecated model shortcuts, only used in errors and warnings
|
||||||
|
|
|
@ -56,6 +56,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
"overwrite": True,
|
"overwrite": True,
|
||||||
"scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
|
"scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
|
||||||
"use_gold_ents": True,
|
"use_gold_ents": True,
|
||||||
|
"threshold": None,
|
||||||
},
|
},
|
||||||
default_score_weights={
|
default_score_weights={
|
||||||
"nel_micro_f": 1.0,
|
"nel_micro_f": 1.0,
|
||||||
|
@ -77,6 +78,7 @@ def make_entity_linker(
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
use_gold_ents: bool,
|
use_gold_ents: bool,
|
||||||
|
threshold: Optional[float] = None,
|
||||||
):
|
):
|
||||||
"""Construct an EntityLinker component.
|
"""Construct an EntityLinker component.
|
||||||
|
|
||||||
|
@ -91,6 +93,10 @@ def make_entity_linker(
|
||||||
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
|
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
|
||||||
produces a list of candidates, given a certain knowledge base and a textual mention.
|
produces a list of candidates, given a certain knowledge base and a textual mention.
|
||||||
scorer (Optional[Callable]): The scoring method.
|
scorer (Optional[Callable]): The scoring method.
|
||||||
|
use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
|
||||||
|
component must provide entity annotations.
|
||||||
|
threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold,
|
||||||
|
prediction is discarded. If None, predictions are not filtered by any threshold.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not model.attrs.get("include_span_maker", False):
|
if not model.attrs.get("include_span_maker", False):
|
||||||
|
@ -121,6 +127,7 @@ def make_entity_linker(
|
||||||
overwrite=overwrite,
|
overwrite=overwrite,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
use_gold_ents=use_gold_ents,
|
use_gold_ents=use_gold_ents,
|
||||||
|
threshold=threshold,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -156,6 +163,7 @@ class EntityLinker(TrainablePipe):
|
||||||
overwrite: bool = BACKWARD_OVERWRITE,
|
overwrite: bool = BACKWARD_OVERWRITE,
|
||||||
scorer: Optional[Callable] = entity_linker_score,
|
scorer: Optional[Callable] = entity_linker_score,
|
||||||
use_gold_ents: bool,
|
use_gold_ents: bool,
|
||||||
|
threshold: Optional[float] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize an entity linker.
|
"""Initialize an entity linker.
|
||||||
|
|
||||||
|
@ -174,9 +182,20 @@ class EntityLinker(TrainablePipe):
|
||||||
Scorer.score_links.
|
Scorer.score_links.
|
||||||
use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
|
use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
|
||||||
component must provide entity annotations.
|
component must provide entity annotations.
|
||||||
|
threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the
|
||||||
|
threshold, prediction is discarded. If None, predictions are not filtered by any threshold.
|
||||||
DOCS: https://spacy.io/api/entitylinker#init
|
DOCS: https://spacy.io/api/entitylinker#init
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if threshold is not None and not (0 <= threshold <= 1):
|
||||||
|
raise ValueError(
|
||||||
|
Errors.E1043.format(
|
||||||
|
range_start=0,
|
||||||
|
range_end=1,
|
||||||
|
value=threshold,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -192,6 +211,7 @@ class EntityLinker(TrainablePipe):
|
||||||
self.kb = empty_kb(entity_vector_length)(self.vocab)
|
self.kb = empty_kb(entity_vector_length)(self.vocab)
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.use_gold_ents = use_gold_ents
|
self.use_gold_ents = use_gold_ents
|
||||||
|
self.threshold = threshold
|
||||||
|
|
||||||
def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
|
def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
|
||||||
"""Define the KB of this pipe by providing a function that will
|
"""Define the KB of this pipe by providing a function that will
|
||||||
|
@ -424,9 +444,8 @@ class EntityLinker(TrainablePipe):
|
||||||
if not candidates:
|
if not candidates:
|
||||||
# no prediction possible for this entity - setting to NIL
|
# no prediction possible for this entity - setting to NIL
|
||||||
final_kb_ids.append(self.NIL)
|
final_kb_ids.append(self.NIL)
|
||||||
elif len(candidates) == 1:
|
elif len(candidates) == 1 and self.threshold is None:
|
||||||
# shortcut for efficiency reasons: take the 1 candidate
|
# shortcut for efficiency reasons: take the 1 candidate
|
||||||
# TODO: thresholding
|
|
||||||
final_kb_ids.append(candidates[0].entity_)
|
final_kb_ids.append(candidates[0].entity_)
|
||||||
else:
|
else:
|
||||||
random.shuffle(candidates)
|
random.shuffle(candidates)
|
||||||
|
@ -455,10 +474,11 @@ class EntityLinker(TrainablePipe):
|
||||||
if sims.shape != prior_probs.shape:
|
if sims.shape != prior_probs.shape:
|
||||||
raise ValueError(Errors.E161)
|
raise ValueError(Errors.E161)
|
||||||
scores = prior_probs + sims - (prior_probs * sims)
|
scores = prior_probs + sims - (prior_probs * sims)
|
||||||
# TODO: thresholding
|
final_kb_ids.append(
|
||||||
best_index = scores.argmax().item()
|
candidates[scores.argmax().item()].entity_
|
||||||
best_candidate = candidates[best_index]
|
if self.threshold is None or scores.max() >= self.threshold
|
||||||
final_kb_ids.append(best_candidate.entity_)
|
else EntityLinker.NIL
|
||||||
|
)
|
||||||
if not (len(final_kb_ids) == entity_count):
|
if not (len(final_kb_ids) == entity_count):
|
||||||
err = Errors.E147.format(
|
err = Errors.E147.format(
|
||||||
method="predict", msg="result variables not of equal length"
|
method="predict", msg="result variables not of equal length"
|
||||||
|
|
|
@ -7,7 +7,7 @@ from pathlib import Path
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
import srsly
|
import srsly
|
||||||
import random
|
import random
|
||||||
from thinc.api import CosineDistance, Model, Optimizer, Config
|
from thinc.api import CosineDistance, Model, Optimizer
|
||||||
from thinc.api import set_dropout_rate
|
from thinc.api import set_dropout_rate
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ from ...language import Language
|
||||||
from ...vocab import Vocab
|
from ...vocab import Vocab
|
||||||
from ...training import Example, validate_examples, validate_get_examples
|
from ...training import Example, validate_examples, validate_get_examples
|
||||||
from ...errors import Errors, Warnings
|
from ...errors import Errors, Warnings
|
||||||
from ...util import SimpleFrozenList, registry
|
from ...util import SimpleFrozenList
|
||||||
from ... import util
|
from ... import util
|
||||||
from ...scorer import Scorer
|
from ...scorer import Scorer
|
||||||
|
|
||||||
|
@ -70,7 +70,6 @@ class EntityLinker_v1(TrainablePipe):
|
||||||
produces a list of candidates, given a certain knowledge base and a textual mention.
|
produces a list of candidates, given a certain knowledge base and a textual mention.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_links.
|
Scorer.score_links.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#init
|
DOCS: https://spacy.io/api/entitylinker#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
|
@ -272,7 +271,6 @@ class EntityLinker_v1(TrainablePipe):
|
||||||
final_kb_ids.append(self.NIL)
|
final_kb_ids.append(self.NIL)
|
||||||
elif len(candidates) == 1:
|
elif len(candidates) == 1:
|
||||||
# shortcut for efficiency reasons: take the 1 candidate
|
# shortcut for efficiency reasons: take the 1 candidate
|
||||||
# TODO: thresholding
|
|
||||||
final_kb_ids.append(candidates[0].entity_)
|
final_kb_ids.append(candidates[0].entity_)
|
||||||
else:
|
else:
|
||||||
random.shuffle(candidates)
|
random.shuffle(candidates)
|
||||||
|
@ -301,7 +299,6 @@ class EntityLinker_v1(TrainablePipe):
|
||||||
if sims.shape != prior_probs.shape:
|
if sims.shape != prior_probs.shape:
|
||||||
raise ValueError(Errors.E161)
|
raise ValueError(Errors.E161)
|
||||||
scores = prior_probs + sims - (prior_probs * sims)
|
scores = prior_probs + sims - (prior_probs * sims)
|
||||||
# TODO: thresholding
|
|
||||||
best_index = scores.argmax().item()
|
best_index = scores.argmax().item()
|
||||||
best_candidate = candidates[best_index]
|
best_candidate = candidates[best_index]
|
||||||
final_kb_ids.append(best_candidate.entity_)
|
final_kb_ids.append(best_candidate.entity_)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Callable, Iterable
|
from typing import Callable, Iterable, Dict, Any
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from numpy.testing import assert_equal
|
from numpy.testing import assert_equal
|
||||||
|
@ -207,7 +207,7 @@ def test_no_entities():
|
||||||
nlp.add_pipe("sentencizer", first=True)
|
nlp.add_pipe("sentencizer", first=True)
|
||||||
|
|
||||||
# this will run the pipeline on the examples and shouldn't crash
|
# this will run the pipeline on the examples and shouldn't crash
|
||||||
results = nlp.evaluate(train_examples)
|
nlp.evaluate(train_examples)
|
||||||
|
|
||||||
|
|
||||||
def test_partial_links():
|
def test_partial_links():
|
||||||
|
@ -1063,7 +1063,7 @@ def test_no_gold_ents(patterns):
|
||||||
"entity_linker", config={"use_gold_ents": False}, last=True
|
"entity_linker", config={"use_gold_ents": False}, last=True
|
||||||
)
|
)
|
||||||
entity_linker.set_kb(create_kb)
|
entity_linker.set_kb(create_kb)
|
||||||
assert entity_linker.use_gold_ents == False
|
assert entity_linker.use_gold_ents is False
|
||||||
|
|
||||||
optimizer = nlp.initialize(get_examples=lambda: train_examples)
|
optimizer = nlp.initialize(get_examples=lambda: train_examples)
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
|
@ -1074,7 +1074,7 @@ def test_no_gold_ents(patterns):
|
||||||
nlp.add_pipe("sentencizer", first=True)
|
nlp.add_pipe("sentencizer", first=True)
|
||||||
|
|
||||||
# this will run the pipeline on the examples and shouldn't crash
|
# this will run the pipeline on the examples and shouldn't crash
|
||||||
results = nlp.evaluate(train_examples)
|
nlp.evaluate(train_examples)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.issue(9575)
|
@pytest.mark.issue(9575)
|
||||||
|
@ -1114,4 +1114,61 @@ def test_tokenization_mismatch():
|
||||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||||
|
|
||||||
nlp.add_pipe("sentencizer", first=True)
|
nlp.add_pipe("sentencizer", first=True)
|
||||||
results = nlp.evaluate(train_examples)
|
nlp.evaluate(train_examples)
|
||||||
|
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"meet_threshold,config",
|
||||||
|
[
|
||||||
|
(False, {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
|
||||||
|
(True, {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# fmt: on
|
||||||
|
def test_threshold(meet_threshold: bool, config: Dict[str, Any]):
|
||||||
|
"""Tests abstention threshold.
|
||||||
|
meet_threshold (bool): Whether to configure NEL setup so that confidence threshold is met.
|
||||||
|
config (Dict[str, Any]): NEL architecture config.
|
||||||
|
"""
|
||||||
|
nlp = English()
|
||||||
|
nlp.add_pipe("sentencizer")
|
||||||
|
text = "Mahler's Symphony No. 8 was beautiful."
|
||||||
|
entities = [(0, 6, "PERSON")]
|
||||||
|
links = {(0, 6): {"Q7304": 1.0}}
|
||||||
|
sent_starts = [1, -1, 0, 0, 0, 0, 0, 0, 0]
|
||||||
|
entity_id = "Q7304"
|
||||||
|
doc = nlp(text)
|
||||||
|
train_examples = [
|
||||||
|
Example.from_dict(
|
||||||
|
doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
def create_kb(vocab):
|
||||||
|
# create artificial KB
|
||||||
|
mykb = KnowledgeBase(vocab, entity_vector_length=3)
|
||||||
|
mykb.add_entity(entity=entity_id, freq=12, entity_vector=[6, -4, 3])
|
||||||
|
mykb.add_alias(
|
||||||
|
alias="Mahler",
|
||||||
|
entities=[entity_id],
|
||||||
|
probabilities=[1 if meet_threshold else 0.01],
|
||||||
|
)
|
||||||
|
return mykb
|
||||||
|
|
||||||
|
# Create the Entity Linker component and add it to the pipeline
|
||||||
|
entity_linker = nlp.add_pipe(
|
||||||
|
"entity_linker",
|
||||||
|
last=True,
|
||||||
|
config={"threshold": 0.99, "model": config},
|
||||||
|
)
|
||||||
|
entity_linker.set_kb(create_kb) # type: ignore
|
||||||
|
nlp.initialize(get_examples=lambda: train_examples)
|
||||||
|
|
||||||
|
# Add a custom rule-based component to mimick NER
|
||||||
|
ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
|
||||||
|
ruler.add_patterns([{"label": "PERSON", "pattern": [{"LOWER": "mahler"}]}]) # type: ignore
|
||||||
|
doc = nlp(text)
|
||||||
|
|
||||||
|
assert len(doc.ents) == 1
|
||||||
|
assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL
|
||||||
|
|
|
@ -47,22 +47,24 @@ architectures and their arguments and hyperparameters.
|
||||||
> "model": DEFAULT_NEL_MODEL,
|
> "model": DEFAULT_NEL_MODEL,
|
||||||
> "entity_vector_length": 64,
|
> "entity_vector_length": 64,
|
||||||
> "get_candidates": {'@misc': 'spacy.CandidateGenerator.v1'},
|
> "get_candidates": {'@misc': 'spacy.CandidateGenerator.v1'},
|
||||||
|
> "threshold": None,
|
||||||
> }
|
> }
|
||||||
> nlp.add_pipe("entity_linker", config=config)
|
> nlp.add_pipe("entity_linker", config=config)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `labels_discard` | NER labels that will automatically get a "NIL" prediction. Defaults to `[]`. ~~Iterable[str]~~ |
|
| `labels_discard` | NER labels that will automatically get a "NIL" prediction. Defaults to `[]`. ~~Iterable[str]~~ |
|
||||||
| `n_sents` | The number of neighbouring sentences to take into account. Defaults to 0. ~~int~~ |
|
| `n_sents` | The number of neighbouring sentences to take into account. Defaults to 0. ~~int~~ |
|
||||||
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. Defaults to `True`. ~~bool~~ |
|
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. Defaults to `True`. ~~bool~~ |
|
||||||
| `incl_context` | Whether or not to include the local context in the model. Defaults to `True`. ~~bool~~ |
|
| `incl_context` | Whether or not to include the local context in the model. Defaults to `True`. ~~bool~~ |
|
||||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~ |
|
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~ |
|
||||||
| `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~ |
|
| `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~ |
|
||||||
| `use_gold_ents` | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~ |
|
| `use_gold_ents` | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~ |
|
||||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
||||||
|
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
|
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
|
||||||
|
@ -95,20 +97,21 @@ custom knowledge base, you should either call
|
||||||
[`set_kb`](/api/entitylinker#set_kb) or provide a `kb_loader` in the
|
[`set_kb`](/api/entitylinker#set_kb) or provide a `kb_loader` in the
|
||||||
[`initialize`](/api/entitylinker#initialize) call.
|
[`initialize`](/api/entitylinker#initialize) call.
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~ |
|
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~ |
|
||||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `entity_vector_length` | Size of encoding vectors in the KB. ~~int~~ |
|
| `entity_vector_length` | Size of encoding vectors in the KB. ~~int~~ |
|
||||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||||
| `labels_discard` | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~ |
|
| `labels_discard` | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~ |
|
||||||
| `n_sents` | The number of neighbouring sentences to take into account. ~~int~~ |
|
| `n_sents` | The number of neighbouring sentences to take into account. ~~int~~ |
|
||||||
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. ~~bool~~ |
|
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. ~~bool~~ |
|
||||||
| `incl_context` | Whether or not to include the local context in the model. ~~bool~~ |
|
| `incl_context` | Whether or not to include the local context in the model. ~~bool~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
||||||
|
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
||||||
|
|
||||||
## EntityLinker.\_\_call\_\_ {#call tag="method"}
|
## EntityLinker.\_\_call\_\_ {#call tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user