Rename Candidate to InMemoryCandidate, BaseCandidate to Candidate.

This commit is contained in:
Raphael Mitsch 2023-03-01 14:27:50 +01:00
parent 417e8fea8b
commit 49abf4fb3a
8 changed files with 58 additions and 56 deletions

View File

@ -1,5 +1,5 @@
from .kb import KnowledgeBase from .kb import KnowledgeBase
from .kb_in_memory import InMemoryLookupKB from .kb_in_memory import InMemoryLookupKB
from .candidate import Candidate from .candidate import Candidate, InMemoryCandidate
__all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate"] __all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"]

View File

@ -2,8 +2,8 @@ import abc
from typing import List, Union, Callable from typing import List, Union, Callable
class BaseCandidate(abc.ABC): class Candidate(abc.ABC):
"""A `BaseCandidate` object refers to a textual mention (`alias`) that may or may not be resolved """A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking
algorithm which will disambiguate the various candidates to the correct one. algorithm which will disambiguate the various candidates to the correct one.
Each candidate (alias, entity_id) pair is assigned a certain prior probability. Each candidate (alias, entity_id) pair is assigned a certain prior probability.
@ -14,7 +14,7 @@ class BaseCandidate(abc.ABC):
def __init__( def __init__(
self, mention: str, entity_id: Union[int, str], entity_vector: List[float] self, mention: str, entity_id: Union[int, str], entity_vector: List[float]
): ):
"""Initializes properties of `BaseCandidate`. """Initializes properties of `Candidate` instance.
mention (str): Mention text for this candidate. mention (str): Mention text for this candidate.
entity_id (Union[int, str]): Unique entity ID. entity_id (Union[int, str]): Unique entity ID.
entity_vector (List[float]): Entity embedding. entity_vector (List[float]): Entity embedding.
@ -44,8 +44,8 @@ class BaseCandidate(abc.ABC):
return self._entity_vector return self._entity_vector
class Candidate(BaseCandidate): class InMemoryCandidate(Candidate):
"""`Candidate` for InMemoryLookupKB.""" """Candidate for InMemoryLookupKB."""
def __init__( def __init__(
self, self,

View File

@ -36,7 +36,7 @@ cdef class KnowledgeBase:
and the prior probability of that alias resolving to that entity. and the prior probability of that alias resolving to that entity.
If no candidate is found for a given text, an empty list is returned. If no candidate is found for a given text, an empty list is returned.
mentions (Iterable[Span]): Mentions for which to get candidates. mentions (Iterable[Span]): Mentions for which to get candidates.
RETURNS (Iterable[Iterable[Candidate]]): Identified candidates. RETURNS (Iterable[Iterable[InMemoryCandidate]]): Identified candidates.
""" """
return [self.get_candidates(span) for span in mentions] return [self.get_candidates(span) for span in mentions]
@ -46,7 +46,7 @@ cdef class KnowledgeBase:
and the prior probability of that alias resolving to that entity. and the prior probability of that alias resolving to that entity.
If the no candidate is found for a given text, an empty list is returned. If the no candidate is found for a given text, an empty list is returned.
mention (Span): Mention for which to get candidates. mention (Span): Mention for which to get candidates.
RETURNS (Iterable[Candidate]): Identified candidates. RETURNS (Iterable[InMemoryCandidate]): Identified candidates.
""" """
raise NotImplementedError( raise NotImplementedError(
Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__) Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)

View File

@ -18,7 +18,7 @@ from .. import util
from ..util import SimpleFrozenList, ensure_path from ..util import SimpleFrozenList, ensure_path
from ..vocab cimport Vocab from ..vocab cimport Vocab
from .kb cimport KnowledgeBase from .kb cimport KnowledgeBase
from .candidate import Candidate as Candidate from .candidate import InMemoryCandidate
cdef class InMemoryLookupKB(KnowledgeBase): cdef class InMemoryLookupKB(KnowledgeBase):
@ -223,10 +223,10 @@ cdef class InMemoryLookupKB(KnowledgeBase):
alias_entry.probs = probs alias_entry.probs = probs
self._aliases_table[alias_index] = alias_entry self._aliases_table[alias_index] = alias_entry
def get_candidates(self, mention: Span) -> Iterable[Candidate]: def get_candidates(self, mention: Span) -> Iterable[InMemoryCandidate]:
return self.get_alias_candidates(mention.text) # type: ignore return self.get_alias_candidates(mention.text) # type: ignore
def get_alias_candidates(self, str alias) -> Iterable[Candidate]: def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
""" """
Return candidate entities for an alias. Each candidate defines the entity, the original alias, Return candidate entities for an alias. Each candidate defines the entity, the original alias,
and the prior probability of that alias resolving to that entity. and the prior probability of that alias resolving to that entity.
@ -239,7 +239,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
alias_entry = self._aliases_table[alias_index] alias_entry = self._aliases_table[alias_index]
return [ return [
Candidate( InMemoryCandidate(
retrieve_string_from_hash=self.vocab.strings.__getitem__, retrieve_string_from_hash=self.vocab.strings.__getitem__,
entity_hash=self._entries[entry_index].entity_hash, entity_hash=self._entries[entry_index].entity_hash,
entity_freq=self._entries[entry_index].freq, entity_freq=self._entries[entry_index].freq,

View File

@ -116,7 +116,7 @@ def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
Return candidate entities for a given mention and fetching appropriate entries from the index. Return candidate entities for a given mention and fetching appropriate entries from the index.
kb (KnowledgeBase): Knowledge base to query. kb (KnowledgeBase): Knowledge base to query.
mention (Span): Entity mention for which to identify candidates. mention (Span): Entity mention for which to identify candidates.
RETURNS (Iterable[Candidate]): Identified candidates. RETURNS (Iterable[InMemoryCandidate]): Identified candidates.
""" """
return kb.get_candidates(mention) return kb.get_candidates(mention)
@ -128,6 +128,6 @@ def get_candidates_batch(
Return candidate entities for the given mentions and fetching appropriate entries from the index. Return candidate entities for the given mentions and fetching appropriate entries from the index.
kb (KnowledgeBase): Knowledge base to query. kb (KnowledgeBase): Knowledge base to query.
mention (Iterable[Span]): Entity mentions for which to identify candidates. mention (Iterable[Span]): Entity mentions for which to identify candidates.
RETURNS (Iterable[Iterable[Candidate]]): Identified candidates. RETURNS (Iterable[Iterable[InMemoryCandidate]]): Identified candidates.
""" """
return kb.get_candidates_batch(mentions) return kb.get_candidates_batch(mentions)

View File

@ -7,7 +7,7 @@ from thinc.types import Ragged
from spacy import registry, util from spacy import registry, util
from spacy.attrs import ENT_KB_ID from spacy.attrs import ENT_KB_ID
from spacy.compat import pickle from spacy.compat import pickle
from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase from spacy.kb import InMemoryCandidate, InMemoryLookupKB, KnowledgeBase
from spacy.lang.en import English from spacy.lang.en import English
from spacy.ml import load_kb from spacy.ml import load_kb
from spacy.ml.models.entity_linker import build_span_maker, get_candidates from spacy.ml.models.entity_linker import build_span_maker, get_candidates
@ -506,13 +506,13 @@ def test_el_pipe_configuration(nlp):
@registry.misc("spacy.LowercaseCandidateGenerator.v1") @registry.misc("spacy.LowercaseCandidateGenerator.v1")
def create_candidates() -> Callable[ def create_candidates() -> Callable[
[InMemoryLookupKB, "Span"], Iterable[Candidate] [InMemoryLookupKB, "Span"], Iterable[InMemoryCandidate]
]: ]:
return get_lowercased_candidates return get_lowercased_candidates
@registry.misc("spacy.LowercaseCandidateBatchGenerator.v1") @registry.misc("spacy.LowercaseCandidateBatchGenerator.v1")
def create_candidates_batch() -> Callable[ def create_candidates_batch() -> Callable[
[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]] [InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[InMemoryCandidate]]
]: ]:
return get_lowercased_candidates_batch return get_lowercased_candidates_batch

View File

@ -10,9 +10,9 @@ version: 3.5
The `InMemoryLookupKB` class inherits from [`KnowledgeBase`](/api/kb) and The `InMemoryLookupKB` class inherits from [`KnowledgeBase`](/api/kb) and
implements all of its methods. It stores all KB data in-memory and generates implements all of its methods. It stores all KB data in-memory and generates
[`Candidate`](/api/kb#candidate) objects by exactly matching mentions with [`InMemoryCandidate`](/api/kb#candidate) objects by exactly matching mentions
entity names. It's highly optimized for both a low memory footprint and speed of with entity names. It's highly optimized for both a low memory footprint and
retrieval. speed of retrieval.
## InMemoryLookupKB.\_\_init\_\_ {id="init",tag="method"} ## InMemoryLookupKB.\_\_init\_\_ {id="init",tag="method"}
@ -156,7 +156,7 @@ Get a list of all aliases in the knowledge base.
## InMemoryLookupKB.get_candidates {id="get_candidates",tag="method"} ## InMemoryLookupKB.get_candidates {id="get_candidates",tag="method"}
Given a certain textual mention as input, retrieve a list of candidate entities Given a certain textual mention as input, retrieve a list of candidate entities
of type [`Candidate`](/api/kb#candidate). Wraps of type [`InMemoryCandidate`](/api/kb#candidate). Wraps
[`get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates). [`get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
> #### Example > #### Example
@ -168,10 +168,10 @@ of type [`Candidate`](/api/kb#candidate). Wraps
> candidates = kb.get_candidates(doc[0:2]) > candidates = kb.get_candidates(doc[0:2])
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | -------------------------------------------------------------------- | | ----------- | ------------------------------------------------------------------------------------ |
| `mention` | The textual mention or alias. ~~Span~~ | | `mention` | The textual mention or alias. ~~Span~~ |
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[Candidate]~~ | | **RETURNS** | An iterable of relevant `InMemoryCandidate` objects. ~~Iterable[InMemoryCandidate]~~ |
## InMemoryLookupKB.get_candidates_batch {id="get_candidates_batch",tag="method"} ## InMemoryLookupKB.get_candidates_batch {id="get_candidates_batch",tag="method"}
@ -194,15 +194,15 @@ to you.
> candidates = kb.get_candidates((doc[0:2], doc[3:])) > candidates = kb.get_candidates((doc[0:2], doc[3:]))
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | -------------------------------------------------------------------------------------------- | | ----------- | ------------------------------------------------------------------------------------------------------------ |
| `mentions` | The textual mention or alias. ~~Iterable[Span]~~ | | `mentions` | The textual mention or alias. ~~Iterable[Span]~~ |
| **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ | | **RETURNS** | An iterable of iterable with relevant `InMemoryCandidate` objects. ~~Iterable[Iterable[InMemoryCandidate]]~~ |
## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"} ## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"}
Given a certain textual mention as input, retrieve a list of candidate entities Given a certain textual mention as input, retrieve a list of candidate entities
of type [`Candidate`](/api/kb#candidate). of type [`InMemoryCandidate`](/api/kb#candidate).
> #### Example > #### Example
> >
@ -210,10 +210,10 @@ of type [`Candidate`](/api/kb#candidate).
> candidates = kb.get_alias_candidates("Douglas") > candidates = kb.get_alias_candidates("Douglas")
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | ------------------------------------------------------------- | | ----------- | ----------------------------------------------------------------------------- |
| `alias` | The textual mention or alias. ~~str~~ | | `alias` | The textual mention or alias. ~~str~~ |
| **RETURNS** | The list of relevant `Candidate` objects. ~~List[Candidate]~~ | | **RETURNS** | The list of relevant `InMemoryCandidate` objects. ~~List[InMemoryCandidate]~~ |
## InMemoryLookupKB.get_vector {id="get_vector",tag="method"} ## InMemoryLookupKB.get_vector {id="get_vector",tag="method"}

View File

@ -9,8 +9,8 @@ version: 2.2
--- ---
The `KnowledgeBase` object is an abstract class providing a method to generate The `KnowledgeBase` object is an abstract class providing a method to generate
[`Candidate`](/api/kb#candidate) objects, which are plausible external [`InMemoryCandidate`](/api/kb#candidate) objects, which are plausible external
identifiers given a certain textual mention. Each such `Candidate` holds identifiers given a certain textual mention. Each such `InMemoryCandidate` holds
information from the relevant KB entities, such as its frequency in text and information from the relevant KB entities, such as its frequency in text and
possible aliases. Each entity in the knowledge base also has a pretrained entity possible aliases. Each entity in the knowledge base also has a pretrained entity
vector of a fixed size. vector of a fixed size.
@ -72,10 +72,10 @@ of type [`Candidate`](/api/kb#candidate).
> candidates = kb.get_candidates(doc[0:2]) > candidates = kb.get_candidates(doc[0:2])
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | -------------------------------------------------------------------- | | ----------- | ---------------------------------------------------------------------------- |
| `mention` | The textual mention or alias. ~~Span~~ | | `mention` | The textual mention or alias. ~~Span~~ |
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[Candidate]~~ | | **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[InMemoryCandidate]~~ |
## KnowledgeBase.get_candidates_batch {id="get_candidates_batch",tag="method"} ## KnowledgeBase.get_candidates_batch {id="get_candidates_batch",tag="method"}
@ -190,25 +190,27 @@ Restore the state of the knowledge base from a given directory. Note that the
| `exclude` | List of components to exclude. ~~Iterable[str]~~ | | `exclude` | List of components to exclude. ~~Iterable[str]~~ |
| **RETURNS** | The modified `KnowledgeBase` object. ~~KnowledgeBase~~ | | **RETURNS** | The modified `KnowledgeBase` object. ~~KnowledgeBase~~ |
## Candidate {id="candidate",tag="class"} ## InMemoryCandidate {id="candidate",tag="class"}
A `Candidate` object refers to a textual mention (alias) that may or may not be A `InMemoryCandidate` object refers to a textual mention (alias) that may or may
resolved to a specific entity from a `KnowledgeBase`. This will be used as input not be resolved to a specific entity from a `KnowledgeBase`. This will be used
for the entity linking algorithm which will disambiguate the various candidates as input for the entity linking algorithm which will disambiguate the various
to the correct one. Each candidate `(alias, entity)` pair is assigned to a candidates to the correct one. Each candidate `(alias, entity)` pair is assigned
certain prior probability. to a certain prior probability.
### Candidate.\_\_init\_\_ {id="candidate-init",tag="method"} ### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"}
Construct a `Candidate` object. Usually this constructor is not called directly, Construct a `InMemoryCandidate` object. Usually this constructor is not called
but instead these objects are returned by the `get_candidates` method of the directly, but instead these objects are returned by the `get_candidates` method
[`entity_linker`](/api/entitylinker) pipe. of the [`entity_linker`](/api/entitylinker) pipe.
> #### Example > #### Example```python
>
> from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb,
> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
>
> ```
> >
> ```python
> from spacy.kb import Candidate
> candidate = Candidate(kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
> ``` > ```
| Name | Description | | Name | Description |
@ -219,7 +221,7 @@ but instead these objects are returned by the `get_candidates` method of the
| `alias_hash` | The hash of the textual mention or alias. ~~int~~ | | `alias_hash` | The hash of the textual mention or alias. ~~int~~ |
| `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ | | `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
## Candidate attributes {id="candidate-attributes"} ## InMemoryCandidate attributes {id="candidate-attributes"}
| Name | Description | | Name | Description |
| --------------- | ------------------------------------------------------------------------ | | --------------- | ------------------------------------------------------------------------ |