mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-29 17:33:10 +03:00
Drop storing mention.
This commit is contained in:
parent
b0ee34185d
commit
c61654eef8
|
@ -5,7 +5,6 @@ from ..typedefs cimport hash_t
|
||||||
cdef class Candidate:
|
cdef class Candidate:
|
||||||
cdef readonly str _entity_id_
|
cdef readonly str _entity_id_
|
||||||
cdef readonly hash_t _entity_id
|
cdef readonly hash_t _entity_id
|
||||||
cdef readonly str _mention
|
|
||||||
cpdef vector[float] _entity_vector
|
cpdef vector[float] _entity_vector
|
||||||
cdef float _prior_prob
|
cdef float _prior_prob
|
||||||
|
|
||||||
|
@ -14,4 +13,4 @@ cdef class InMemoryCandidate(Candidate):
|
||||||
cdef readonly InMemoryLookupKB _kb
|
cdef readonly InMemoryLookupKB _kb
|
||||||
cdef hash_t _entity_hash
|
cdef hash_t _entity_hash
|
||||||
cdef float _entity_freq
|
cdef float _entity_freq
|
||||||
cdef hash_t _alias_hash
|
cdef hash_t _mention
|
|
@ -17,13 +17,11 @@ cdef class Candidate:
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
mention: str,
|
|
||||||
entity_id: str,
|
entity_id: str,
|
||||||
entity_vector: vector[float],
|
entity_vector: vector[float],
|
||||||
prior_prob: float,
|
prior_prob: float,
|
||||||
):
|
):
|
||||||
"""Initializes properties of abstract base class `Candidate`.
|
"""Initializes properties of abstract base class `Candidate`.
|
||||||
mention (str): Mention text for this candidate.
|
|
||||||
entity_id (Union[str, int]): Unique entity ID.
|
entity_id (Union[str, int]): Unique entity ID.
|
||||||
entity_vector (List[float]): Entity embedding.
|
entity_vector (List[float]): Entity embedding.
|
||||||
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
|
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
|
||||||
|
@ -37,7 +35,6 @@ cdef class Candidate:
|
||||||
Errors.E1046.format(cls_name=self.__class__.__name__)
|
Errors.E1046.format(cls_name=self.__class__.__name__)
|
||||||
)
|
)
|
||||||
|
|
||||||
self._mention = mention
|
|
||||||
self._entity_id_ = entity_id
|
self._entity_id_ = entity_id
|
||||||
# Note that hashing an int value yields the same int value.
|
# Note that hashing an int value yields the same int value.
|
||||||
self._entity_id = hash(entity_id)
|
self._entity_id = hash(entity_id)
|
||||||
|
@ -58,7 +55,7 @@ cdef class Candidate:
|
||||||
@property
|
@property
|
||||||
def mention(self) -> str:
|
def mention(self) -> str:
|
||||||
"""RETURNS (str): Mention."""
|
"""RETURNS (str): Mention."""
|
||||||
return self._mention
|
raise NotImplementedError
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def entity_vector(self) -> vector[float]:
|
def entity_vector(self) -> vector[float]:
|
||||||
|
@ -78,7 +75,7 @@ cdef class InMemoryCandidate(Candidate):
|
||||||
self,
|
self,
|
||||||
kb: InMemoryLookupKB,
|
kb: InMemoryLookupKB,
|
||||||
entity_hash: int,
|
entity_hash: int,
|
||||||
mention: str,
|
mention_hash: int,
|
||||||
entity_vector: vector[float],
|
entity_vector: vector[float],
|
||||||
prior_prob: float,
|
prior_prob: float,
|
||||||
entity_freq: float
|
entity_freq: float
|
||||||
|
@ -88,22 +85,27 @@ cdef class InMemoryCandidate(Candidate):
|
||||||
entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
|
entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
|
||||||
entity_freq (int): Entity frequency in KB corpus.
|
entity_freq (int): Entity frequency in KB corpus.
|
||||||
entity_vector (List[float]): Entity embedding.
|
entity_vector (List[float]): Entity embedding.
|
||||||
mention (str): Mention.
|
mention_hash (int): Mention hash.
|
||||||
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
|
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
|
||||||
the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
|
the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
|
||||||
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
|
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
|
||||||
doesn't) it might be better to eschew this information and always supply the same value.
|
doesn't) it might be better to eschew this information and always supply the same value.
|
||||||
"""
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
mention=mention,
|
|
||||||
entity_id=kb.vocab.strings[entity_hash],
|
entity_id=kb.vocab.strings[entity_hash],
|
||||||
entity_vector=entity_vector,
|
entity_vector=entity_vector,
|
||||||
prior_prob=prior_prob,
|
prior_prob=prior_prob,
|
||||||
)
|
)
|
||||||
self._kb = kb
|
self._kb = kb
|
||||||
|
self._mention = mention_hash
|
||||||
self._entity_id = entity_hash
|
self._entity_id = entity_hash
|
||||||
self._entity_freq = entity_freq
|
self._entity_freq = entity_freq
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mention(self) -> str:
|
||||||
|
"""RETURNS (str): ID/name of this entity in the KB"""
|
||||||
|
return self._kb.vocab.strings[self._mention]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def entity_id_(self) -> str:
|
def entity_id_(self) -> str:
|
||||||
"""RETURNS (str): ID/name of this entity in the KB"""
|
"""RETURNS (str): ID/name of this entity in the KB"""
|
||||||
|
|
|
@ -245,7 +245,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
||||||
InMemoryCandidate(
|
InMemoryCandidate(
|
||||||
kb=self,
|
kb=self,
|
||||||
entity_hash=self._entries[entry_index].entity_hash,
|
entity_hash=self._entries[entry_index].entity_hash,
|
||||||
mention=alias,
|
mention_hash=alias_hash,
|
||||||
entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
|
entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
|
||||||
prior_prob=prior_prob,
|
prior_prob=prior_prob,
|
||||||
entity_freq=self._entries[entry_index].freq
|
entity_freq=self._entries[entry_index].freq
|
||||||
|
|
Loading…
Reference in New Issue
Block a user