Rename Candidate to InMemoryCandidate, BaseCandidate to Candidate.

This commit is contained in:
Raphael Mitsch 2023-03-01 14:27:50 +01:00
parent 417e8fea8b
commit 49abf4fb3a
8 changed files with 58 additions and 56 deletions

View File

@ -1,5 +1,5 @@
from .kb import KnowledgeBase
from .kb_in_memory import InMemoryLookupKB
from .candidate import Candidate
from .candidate import Candidate, InMemoryCandidate
__all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate"]
__all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"]

View File

@ -2,8 +2,8 @@ import abc
from typing import List, Union, Callable
class BaseCandidate(abc.ABC):
"""A `BaseCandidate` object refers to a textual mention (`alias`) that may or may not be resolved
class Candidate(abc.ABC):
"""A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking
algorithm which will disambiguate the various candidates to the correct one.
Each candidate (alias, entity_id) pair is assigned a certain prior probability.
@ -14,7 +14,7 @@ class BaseCandidate(abc.ABC):
def __init__(
self, mention: str, entity_id: Union[int, str], entity_vector: List[float]
):
"""Initializes properties of `BaseCandidate`.
"""Initializes properties of `Candidate` instance.
mention (str): Mention text for this candidate.
entity_id (Union[int, str]): Unique entity ID.
entity_vector (List[float]): Entity embedding.
@ -44,8 +44,8 @@ class BaseCandidate(abc.ABC):
return self._entity_vector
class Candidate(BaseCandidate):
"""`Candidate` for InMemoryLookupKB."""
class InMemoryCandidate(Candidate):
"""Candidate for InMemoryLookupKB."""
def __init__(
self,

View File

@ -36,7 +36,7 @@ cdef class KnowledgeBase:
and the prior probability of that alias resolving to that entity.
If no candidate is found for a given text, an empty list is returned.
mentions (Iterable[Span]): Mentions for which to get candidates.
RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
RETURNS (Iterable[Iterable[InMemoryCandidate]]): Identified candidates.
"""
return [self.get_candidates(span) for span in mentions]
@ -46,7 +46,7 @@ cdef class KnowledgeBase:
and the prior probability of that alias resolving to that entity.
If the no candidate is found for a given text, an empty list is returned.
mention (Span): Mention for which to get candidates.
RETURNS (Iterable[Candidate]): Identified candidates.
RETURNS (Iterable[InMemoryCandidate]): Identified candidates.
"""
raise NotImplementedError(
Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)

View File

@ -18,7 +18,7 @@ from .. import util
from ..util import SimpleFrozenList, ensure_path
from ..vocab cimport Vocab
from .kb cimport KnowledgeBase
from .candidate import Candidate as Candidate
from .candidate import InMemoryCandidate
cdef class InMemoryLookupKB(KnowledgeBase):
@ -223,10 +223,10 @@ cdef class InMemoryLookupKB(KnowledgeBase):
alias_entry.probs = probs
self._aliases_table[alias_index] = alias_entry
def get_candidates(self, mention: Span) -> Iterable[Candidate]:
def get_candidates(self, mention: Span) -> Iterable[InMemoryCandidate]:
return self.get_alias_candidates(mention.text) # type: ignore
def get_alias_candidates(self, str alias) -> Iterable[Candidate]:
def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
"""
Return candidate entities for an alias. Each candidate defines the entity, the original alias,
and the prior probability of that alias resolving to that entity.
@ -239,7 +239,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
alias_entry = self._aliases_table[alias_index]
return [
Candidate(
InMemoryCandidate(
retrieve_string_from_hash=self.vocab.strings.__getitem__,
entity_hash=self._entries[entry_index].entity_hash,
entity_freq=self._entries[entry_index].freq,

View File

@ -116,7 +116,7 @@ def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
Return candidate entities for a given mention and fetching appropriate entries from the index.
kb (KnowledgeBase): Knowledge base to query.
mention (Span): Entity mention for which to identify candidates.
RETURNS (Iterable[Candidate]): Identified candidates.
RETURNS (Iterable[InMemoryCandidate]): Identified candidates.
"""
return kb.get_candidates(mention)
@ -128,6 +128,6 @@ def get_candidates_batch(
Return candidate entities for the given mentions and fetching appropriate entries from the index.
kb (KnowledgeBase): Knowledge base to query.
mention (Iterable[Span]): Entity mentions for which to identify candidates.
RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
RETURNS (Iterable[Iterable[InMemoryCandidate]]): Identified candidates.
"""
return kb.get_candidates_batch(mentions)

View File

@ -7,7 +7,7 @@ from thinc.types import Ragged
from spacy import registry, util
from spacy.attrs import ENT_KB_ID
from spacy.compat import pickle
from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase
from spacy.kb import InMemoryCandidate, InMemoryLookupKB, KnowledgeBase
from spacy.lang.en import English
from spacy.ml import load_kb
from spacy.ml.models.entity_linker import build_span_maker, get_candidates
@ -506,13 +506,13 @@ def test_el_pipe_configuration(nlp):
@registry.misc("spacy.LowercaseCandidateGenerator.v1")
def create_candidates() -> Callable[
[InMemoryLookupKB, "Span"], Iterable[Candidate]
[InMemoryLookupKB, "Span"], Iterable[InMemoryCandidate]
]:
return get_lowercased_candidates
@registry.misc("spacy.LowercaseCandidateBatchGenerator.v1")
def create_candidates_batch() -> Callable[
[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]
[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[InMemoryCandidate]]
]:
return get_lowercased_candidates_batch

View File

@ -10,9 +10,9 @@ version: 3.5
The `InMemoryLookupKB` class inherits from [`KnowledgeBase`](/api/kb) and
implements all of its methods. It stores all KB data in-memory and generates
[`Candidate`](/api/kb#candidate) objects by exactly matching mentions with
entity names. It's highly optimized for both a low memory footprint and speed of
retrieval.
[`InMemoryCandidate`](/api/kb#candidate) objects by exactly matching mentions
with entity names. It's highly optimized for both a low memory footprint and
speed of retrieval.
## InMemoryLookupKB.\_\_init\_\_ {id="init",tag="method"}
@ -156,7 +156,7 @@ Get a list of all aliases in the knowledge base.
## InMemoryLookupKB.get_candidates {id="get_candidates",tag="method"}
Given a certain textual mention as input, retrieve a list of candidate entities
of type [`Candidate`](/api/kb#candidate). Wraps
of type [`InMemoryCandidate`](/api/kb#candidate). Wraps
[`get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
> #### Example
@ -169,9 +169,9 @@ of type [`Candidate`](/api/kb#candidate). Wraps
> ```
| Name | Description |
| ----------- | -------------------------------------------------------------------- |
| ----------- | ------------------------------------------------------------------------------------ |
| `mention` | The textual mention or alias. ~~Span~~ |
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[Candidate]~~ |
| **RETURNS** | An iterable of relevant `InMemoryCandidate` objects. ~~Iterable[InMemoryCandidate]~~ |
## InMemoryLookupKB.get_candidates_batch {id="get_candidates_batch",tag="method"}
@ -195,14 +195,14 @@ to you.
> ```
| Name | Description |
| ----------- | -------------------------------------------------------------------------------------------- |
| ----------- | ------------------------------------------------------------------------------------------------------------ |
| `mentions` | The textual mention or alias. ~~Iterable[Span]~~ |
| **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ |
| **RETURNS** | An iterable of iterable with relevant `InMemoryCandidate` objects. ~~Iterable[Iterable[InMemoryCandidate]]~~ |
## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"}
Given a certain textual mention as input, retrieve a list of candidate entities
of type [`Candidate`](/api/kb#candidate).
of type [`InMemoryCandidate`](/api/kb#candidate).
> #### Example
>
@ -211,9 +211,9 @@ of type [`Candidate`](/api/kb#candidate).
> ```
| Name | Description |
| ----------- | ------------------------------------------------------------- |
| ----------- | ----------------------------------------------------------------------------- |
| `alias` | The textual mention or alias. ~~str~~ |
| **RETURNS** | The list of relevant `Candidate` objects. ~~List[Candidate]~~ |
| **RETURNS** | The list of relevant `InMemoryCandidate` objects. ~~List[InMemoryCandidate]~~ |
## InMemoryLookupKB.get_vector {id="get_vector",tag="method"}

View File

@ -9,8 +9,8 @@ version: 2.2
---
The `KnowledgeBase` object is an abstract class providing a method to generate
[`Candidate`](/api/kb#candidate) objects, which are plausible external
identifiers given a certain textual mention. Each such `Candidate` holds
[`InMemoryCandidate`](/api/kb#candidate) objects, which are plausible external
identifiers given a certain textual mention. Each such `InMemoryCandidate` holds
information from the relevant KB entities, such as its frequency in text and
possible aliases. Each entity in the knowledge base also has a pretrained entity
vector of a fixed size.
@ -73,9 +73,9 @@ of type [`Candidate`](/api/kb#candidate).
> ```
| Name | Description |
| ----------- | -------------------------------------------------------------------- |
| ----------- | ---------------------------------------------------------------------------- |
| `mention` | The textual mention or alias. ~~Span~~ |
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[Candidate]~~ |
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[InMemoryCandidate]~~ |
## KnowledgeBase.get_candidates_batch {id="get_candidates_batch",tag="method"}
@ -190,25 +190,27 @@ Restore the state of the knowledge base from a given directory. Note that the
| `exclude` | List of components to exclude. ~~Iterable[str]~~ |
| **RETURNS** | The modified `KnowledgeBase` object. ~~KnowledgeBase~~ |
## Candidate {id="candidate",tag="class"}
## InMemoryCandidate {id="candidate",tag="class"}
A `Candidate` object refers to a textual mention (alias) that may or may not be
resolved to a specific entity from a `KnowledgeBase`. This will be used as input
for the entity linking algorithm which will disambiguate the various candidates
to the correct one. Each candidate `(alias, entity)` pair is assigned to a
certain prior probability.
A `InMemoryCandidate` object refers to a textual mention (alias) that may or may
not be resolved to a specific entity from a `KnowledgeBase`. This will be used
as input for the entity linking algorithm which will disambiguate the various
candidates to the correct one. Each candidate `(alias, entity)` pair is assigned
to a certain prior probability.
### Candidate.\_\_init\_\_ {id="candidate-init",tag="method"}
### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"}
Construct a `Candidate` object. Usually this constructor is not called directly,
but instead these objects are returned by the `get_candidates` method of the
[`entity_linker`](/api/entitylinker) pipe.
Construct a `InMemoryCandidate` object. Usually this constructor is not called
directly, but instead these objects are returned by the `get_candidates` method
of the [`entity_linker`](/api/entitylinker) pipe.
> #### Example
> #### Example```python
>
> from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb,
> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
>
> ```
>
> ```python
> from spacy.kb import Candidate
> candidate = Candidate(kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
> ```
| Name | Description |
@ -219,7 +221,7 @@ but instead these objects are returned by the `get_candidates` method of the
| `alias_hash` | The hash of the textual mention or alias. ~~int~~ |
| `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
## Candidate attributes {id="candidate-attributes"}
## InMemoryCandidate attributes {id="candidate-attributes"}
| Name | Description |
| --------------- | ------------------------------------------------------------------------ |