mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 08:12:24 +03:00
Rename Candidate to InMemoryCandidate, BaseCandidate to Candidate.
This commit is contained in:
parent
417e8fea8b
commit
49abf4fb3a
|
@ -1,5 +1,5 @@
|
|||
from .kb import KnowledgeBase
|
||||
from .kb_in_memory import InMemoryLookupKB
|
||||
from .candidate import Candidate
|
||||
from .candidate import Candidate, InMemoryCandidate
|
||||
|
||||
__all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate"]
|
||||
__all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"]
|
||||
|
|
|
@ -2,8 +2,8 @@ import abc
|
|||
from typing import List, Union, Callable
|
||||
|
||||
|
||||
class BaseCandidate(abc.ABC):
|
||||
"""A `BaseCandidate` object refers to a textual mention (`alias`) that may or may not be resolved
|
||||
class Candidate(abc.ABC):
|
||||
"""A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
|
||||
to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking
|
||||
algorithm which will disambiguate the various candidates to the correct one.
|
||||
Each candidate (alias, entity_id) pair is assigned a certain prior probability.
|
||||
|
@ -14,7 +14,7 @@ class BaseCandidate(abc.ABC):
|
|||
def __init__(
|
||||
self, mention: str, entity_id: Union[int, str], entity_vector: List[float]
|
||||
):
|
||||
"""Initializes properties of `BaseCandidate`.
|
||||
"""Initializes properties of `Candidate` instance.
|
||||
mention (str): Mention text for this candidate.
|
||||
entity_id (Union[int, str]): Unique entity ID.
|
||||
entity_vector (List[float]): Entity embedding.
|
||||
|
@ -44,8 +44,8 @@ class BaseCandidate(abc.ABC):
|
|||
return self._entity_vector
|
||||
|
||||
|
||||
class Candidate(BaseCandidate):
|
||||
"""`Candidate` for InMemoryLookupKB."""
|
||||
class InMemoryCandidate(Candidate):
|
||||
"""Candidate for InMemoryLookupKB."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
|
@ -36,7 +36,7 @@ cdef class KnowledgeBase:
|
|||
and the prior probability of that alias resolving to that entity.
|
||||
If no candidate is found for a given text, an empty list is returned.
|
||||
mentions (Iterable[Span]): Mentions for which to get candidates.
|
||||
RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
|
||||
RETURNS (Iterable[Iterable[InMemoryCandidate]]): Identified candidates.
|
||||
"""
|
||||
return [self.get_candidates(span) for span in mentions]
|
||||
|
||||
|
@ -46,7 +46,7 @@ cdef class KnowledgeBase:
|
|||
and the prior probability of that alias resolving to that entity.
|
||||
If the no candidate is found for a given text, an empty list is returned.
|
||||
mention (Span): Mention for which to get candidates.
|
||||
RETURNS (Iterable[Candidate]): Identified candidates.
|
||||
RETURNS (Iterable[InMemoryCandidate]): Identified candidates.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)
|
||||
|
|
|
@ -18,7 +18,7 @@ from .. import util
|
|||
from ..util import SimpleFrozenList, ensure_path
|
||||
from ..vocab cimport Vocab
|
||||
from .kb cimport KnowledgeBase
|
||||
from .candidate import Candidate as Candidate
|
||||
from .candidate import InMemoryCandidate
|
||||
|
||||
|
||||
cdef class InMemoryLookupKB(KnowledgeBase):
|
||||
|
@ -223,10 +223,10 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
|||
alias_entry.probs = probs
|
||||
self._aliases_table[alias_index] = alias_entry
|
||||
|
||||
def get_candidates(self, mention: Span) -> Iterable[Candidate]:
|
||||
def get_candidates(self, mention: Span) -> Iterable[InMemoryCandidate]:
|
||||
return self.get_alias_candidates(mention.text) # type: ignore
|
||||
|
||||
def get_alias_candidates(self, str alias) -> Iterable[Candidate]:
|
||||
def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
|
||||
"""
|
||||
Return candidate entities for an alias. Each candidate defines the entity, the original alias,
|
||||
and the prior probability of that alias resolving to that entity.
|
||||
|
@ -239,7 +239,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
|||
alias_entry = self._aliases_table[alias_index]
|
||||
|
||||
return [
|
||||
Candidate(
|
||||
InMemoryCandidate(
|
||||
retrieve_string_from_hash=self.vocab.strings.__getitem__,
|
||||
entity_hash=self._entries[entry_index].entity_hash,
|
||||
entity_freq=self._entries[entry_index].freq,
|
||||
|
|
|
@ -116,7 +116,7 @@ def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
|
|||
Return candidate entities for a given mention and fetching appropriate entries from the index.
|
||||
kb (KnowledgeBase): Knowledge base to query.
|
||||
mention (Span): Entity mention for which to identify candidates.
|
||||
RETURNS (Iterable[Candidate]): Identified candidates.
|
||||
RETURNS (Iterable[InMemoryCandidate]): Identified candidates.
|
||||
"""
|
||||
return kb.get_candidates(mention)
|
||||
|
||||
|
@ -128,6 +128,6 @@ def get_candidates_batch(
|
|||
Return candidate entities for the given mentions and fetching appropriate entries from the index.
|
||||
kb (KnowledgeBase): Knowledge base to query.
|
||||
mention (Iterable[Span]): Entity mentions for which to identify candidates.
|
||||
RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
|
||||
RETURNS (Iterable[Iterable[InMemoryCandidate]]): Identified candidates.
|
||||
"""
|
||||
return kb.get_candidates_batch(mentions)
|
||||
|
|
|
@ -7,7 +7,7 @@ from thinc.types import Ragged
|
|||
from spacy import registry, util
|
||||
from spacy.attrs import ENT_KB_ID
|
||||
from spacy.compat import pickle
|
||||
from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase
|
||||
from spacy.kb import InMemoryCandidate, InMemoryLookupKB, KnowledgeBase
|
||||
from spacy.lang.en import English
|
||||
from spacy.ml import load_kb
|
||||
from spacy.ml.models.entity_linker import build_span_maker, get_candidates
|
||||
|
@ -506,13 +506,13 @@ def test_el_pipe_configuration(nlp):
|
|||
|
||||
@registry.misc("spacy.LowercaseCandidateGenerator.v1")
|
||||
def create_candidates() -> Callable[
|
||||
[InMemoryLookupKB, "Span"], Iterable[Candidate]
|
||||
[InMemoryLookupKB, "Span"], Iterable[InMemoryCandidate]
|
||||
]:
|
||||
return get_lowercased_candidates
|
||||
|
||||
@registry.misc("spacy.LowercaseCandidateBatchGenerator.v1")
|
||||
def create_candidates_batch() -> Callable[
|
||||
[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]
|
||||
[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[InMemoryCandidate]]
|
||||
]:
|
||||
return get_lowercased_candidates_batch
|
||||
|
||||
|
|
|
@ -10,9 +10,9 @@ version: 3.5
|
|||
|
||||
The `InMemoryLookupKB` class inherits from [`KnowledgeBase`](/api/kb) and
|
||||
implements all of its methods. It stores all KB data in-memory and generates
|
||||
[`Candidate`](/api/kb#candidate) objects by exactly matching mentions with
|
||||
entity names. It's highly optimized for both a low memory footprint and speed of
|
||||
retrieval.
|
||||
[`InMemoryCandidate`](/api/kb#candidate) objects by exactly matching mentions
|
||||
with entity names. It's highly optimized for both a low memory footprint and
|
||||
speed of retrieval.
|
||||
|
||||
## InMemoryLookupKB.\_\_init\_\_ {id="init",tag="method"}
|
||||
|
||||
|
@ -156,7 +156,7 @@ Get a list of all aliases in the knowledge base.
|
|||
## InMemoryLookupKB.get_candidates {id="get_candidates",tag="method"}
|
||||
|
||||
Given a certain textual mention as input, retrieve a list of candidate entities
|
||||
of type [`Candidate`](/api/kb#candidate). Wraps
|
||||
of type [`InMemoryCandidate`](/api/kb#candidate). Wraps
|
||||
[`get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
|
||||
|
||||
> #### Example
|
||||
|
@ -168,10 +168,10 @@ of type [`Candidate`](/api/kb#candidate). Wraps
|
|||
> candidates = kb.get_candidates(doc[0:2])
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------------------------------- |
|
||||
| `mention` | The textual mention or alias. ~~Span~~ |
|
||||
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[Candidate]~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------------------------------ |
|
||||
| `mention` | The textual mention or alias. ~~Span~~ |
|
||||
| **RETURNS** | An iterable of relevant `InMemoryCandidate` objects. ~~Iterable[InMemoryCandidate]~~ |
|
||||
|
||||
## InMemoryLookupKB.get_candidates_batch {id="get_candidates_batch",tag="method"}
|
||||
|
||||
|
@ -194,15 +194,15 @@ to you.
|
|||
> candidates = kb.get_candidates((doc[0:2], doc[3:]))
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------------------------------------------------------- |
|
||||
| `mentions` | The textual mention or alias. ~~Iterable[Span]~~ |
|
||||
| **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------ |
|
||||
| `mentions` | The textual mention or alias. ~~Iterable[Span]~~ |
|
||||
| **RETURNS** | An iterable of iterable with relevant `InMemoryCandidate` objects. ~~Iterable[Iterable[InMemoryCandidate]]~~ |
|
||||
|
||||
## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"}
|
||||
|
||||
Given a certain textual mention as input, retrieve a list of candidate entities
|
||||
of type [`Candidate`](/api/kb#candidate).
|
||||
of type [`InMemoryCandidate`](/api/kb#candidate).
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
@ -210,10 +210,10 @@ of type [`Candidate`](/api/kb#candidate).
|
|||
> candidates = kb.get_alias_candidates("Douglas")
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------- |
|
||||
| `alias` | The textual mention or alias. ~~str~~ |
|
||||
| **RETURNS** | The list of relevant `Candidate` objects. ~~List[Candidate]~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ----------------------------------------------------------------------------- |
|
||||
| `alias` | The textual mention or alias. ~~str~~ |
|
||||
| **RETURNS** | The list of relevant `InMemoryCandidate` objects. ~~List[InMemoryCandidate]~~ |
|
||||
|
||||
## InMemoryLookupKB.get_vector {id="get_vector",tag="method"}
|
||||
|
||||
|
|
|
@ -9,8 +9,8 @@ version: 2.2
|
|||
---
|
||||
|
||||
The `KnowledgeBase` object is an abstract class providing a method to generate
|
||||
[`Candidate`](/api/kb#candidate) objects, which are plausible external
|
||||
identifiers given a certain textual mention. Each such `Candidate` holds
|
||||
[`InMemoryCandidate`](/api/kb#candidate) objects, which are plausible external
|
||||
identifiers given a certain textual mention. Each such `InMemoryCandidate` holds
|
||||
information from the relevant KB entities, such as its frequency in text and
|
||||
possible aliases. Each entity in the knowledge base also has a pretrained entity
|
||||
vector of a fixed size.
|
||||
|
@ -72,10 +72,10 @@ of type [`Candidate`](/api/kb#candidate).
|
|||
> candidates = kb.get_candidates(doc[0:2])
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------------------------------- |
|
||||
| `mention` | The textual mention or alias. ~~Span~~ |
|
||||
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[Candidate]~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ---------------------------------------------------------------------------- |
|
||||
| `mention` | The textual mention or alias. ~~Span~~ |
|
||||
| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[InMemoryCandidate]~~ |
|
||||
|
||||
## KnowledgeBase.get_candidates_batch {id="get_candidates_batch",tag="method"}
|
||||
|
||||
|
@ -190,25 +190,27 @@ Restore the state of the knowledge base from a given directory. Note that the
|
|||
| `exclude` | List of components to exclude. ~~Iterable[str]~~ |
|
||||
| **RETURNS** | The modified `KnowledgeBase` object. ~~KnowledgeBase~~ |
|
||||
|
||||
## Candidate {id="candidate",tag="class"}
|
||||
## InMemoryCandidate {id="candidate",tag="class"}
|
||||
|
||||
A `Candidate` object refers to a textual mention (alias) that may or may not be
|
||||
resolved to a specific entity from a `KnowledgeBase`. This will be used as input
|
||||
for the entity linking algorithm which will disambiguate the various candidates
|
||||
to the correct one. Each candidate `(alias, entity)` pair is assigned to a
|
||||
certain prior probability.
|
||||
A `InMemoryCandidate` object refers to a textual mention (alias) that may or may
|
||||
not be resolved to a specific entity from a `KnowledgeBase`. This will be used
|
||||
as input for the entity linking algorithm which will disambiguate the various
|
||||
candidates to the correct one. Each candidate `(alias, entity)` pair is assigned
|
||||
to a certain prior probability.
|
||||
|
||||
### Candidate.\_\_init\_\_ {id="candidate-init",tag="method"}
|
||||
### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"}
|
||||
|
||||
Construct a `Candidate` object. Usually this constructor is not called directly,
|
||||
but instead these objects are returned by the `get_candidates` method of the
|
||||
[`entity_linker`](/api/entitylinker) pipe.
|
||||
Construct a `InMemoryCandidate` object. Usually this constructor is not called
|
||||
directly, but instead these objects are returned by the `get_candidates` method
|
||||
of the [`entity_linker`](/api/entitylinker) pipe.
|
||||
|
||||
> #### Example
|
||||
> #### Example```python
|
||||
>
|
||||
> from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb,
|
||||
> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
|
||||
>
|
||||
> ```
|
||||
>
|
||||
> ```python
|
||||
> from spacy.kb import Candidate
|
||||
> candidate = Candidate(kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
|
@ -219,7 +221,7 @@ but instead these objects are returned by the `get_candidates` method of the
|
|||
| `alias_hash` | The hash of the textual mention or alias. ~~int~~ |
|
||||
| `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
|
||||
|
||||
## Candidate attributes {id="candidate-attributes"}
|
||||
## InMemoryCandidate attributes {id="candidate-attributes"}
|
||||
|
||||
| Name | Description |
|
||||
| --------------- | ------------------------------------------------------------------------ |
|
||||
|
|
Loading…
Reference in New Issue
Block a user