mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-22 10:02:01 +03:00
Refactor Candidate attribute names. Update docs and tests accordingly.
This commit is contained in:
parent
46fe069f87
commit
94e57d0ed5
|
@ -29,26 +29,26 @@ class Candidate(abc.ABC):
|
|||
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
|
||||
doesn't) it might be better to eschew this information and always supply the same value.
|
||||
"""
|
||||
self._mention = mention
|
||||
self._entity_id = entity_id
|
||||
self._entity_name = entity_name
|
||||
self._mention_ = mention
|
||||
self._entity = entity_id
|
||||
self._entity_ = entity_name
|
||||
self._entity_vector = entity_vector
|
||||
self._prior_prob = prior_prob
|
||||
|
||||
@property
|
||||
def entity(self) -> int:
|
||||
"""RETURNS (int): Unique entity ID."""
|
||||
return self._entity_id
|
||||
return self._entity
|
||||
|
||||
@property
|
||||
def entity_(self) -> str:
|
||||
"""RETURNS (int): Entity name."""
|
||||
return self._entity_name
|
||||
return self._entity_
|
||||
|
||||
@property
|
||||
def mention(self) -> str:
|
||||
def mention_(self) -> str:
|
||||
"""RETURNS (str): Mention."""
|
||||
return self._mention
|
||||
return self._mention_
|
||||
|
||||
@property
|
||||
def entity_vector(self) -> List[float]:
|
||||
|
@ -93,20 +93,20 @@ class InMemoryCandidate(Candidate):
|
|||
prior_prob=prior_prob,
|
||||
)
|
||||
self._retrieve_string_from_hash = retrieve_string_from_hash
|
||||
self._entity_hash = entity_hash
|
||||
self._entity = entity_hash
|
||||
self._entity_freq = entity_freq
|
||||
self._mention_hash = mention_hash
|
||||
self._mention = mention_hash
|
||||
self._prior_prob = prior_prob
|
||||
|
||||
@property
|
||||
def entity(self) -> int:
|
||||
"""RETURNS (int): hash of the entity_id's KB ID/name"""
|
||||
return self._entity_hash
|
||||
return self._entity
|
||||
|
||||
@property
|
||||
def mention_hash(self) -> int:
|
||||
def mention(self) -> int:
|
||||
"""RETURNS (int): Mention hash."""
|
||||
return self._mention_hash
|
||||
return self._mention
|
||||
|
||||
@property
|
||||
def entity_freq(self) -> float:
|
||||
|
|
|
@ -224,9 +224,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
|||
self._aliases_table[alias_index] = alias_entry
|
||||
|
||||
def get_candidates(self, mention: Span) -> Iterable[InMemoryCandidate]:
|
||||
return self.get_alias_candidates(mention.text) # type: ignore
|
||||
return self._get_alias_candidates(mention.text) # type: ignore
|
||||
|
||||
def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
|
||||
def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
|
||||
"""
|
||||
Return candidate entities for an alias. Each candidate defines the entity, the original alias,
|
||||
and the prior probability of that alias resolving to that entity.
|
||||
|
@ -244,7 +244,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
|||
entity_hash=self._entries[entry_index].entity_hash,
|
||||
entity_freq=self._entries[entry_index].freq,
|
||||
entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
|
||||
alias_hash=alias_hash,
|
||||
mention_hash=alias_hash,
|
||||
prior_prob=prior_prob
|
||||
)
|
||||
for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs)
|
||||
|
|
|
@ -469,7 +469,7 @@ def test_candidate_generation(nlp):
|
|||
|
||||
# test the content of the candidates
|
||||
assert get_candidates(mykb, adam_ent)[0].entity_ == "Q2"
|
||||
assert get_candidates(mykb, adam_ent)[0].alias_ == "adam"
|
||||
assert get_candidates(mykb, adam_ent)[0].mention_ == "adam"
|
||||
assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
|
||||
assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)
|
||||
|
||||
|
@ -499,7 +499,7 @@ def test_el_pipe_configuration(nlp):
|
|||
assert doc[2].ent_kb_id_ == "Q2"
|
||||
|
||||
def get_lowercased_candidates(kb, span):
|
||||
return kb.get_alias_candidates(span.text.lower())
|
||||
return kb._get_alias_candidates(span.text.lower())
|
||||
|
||||
def get_lowercased_candidates_batch(kb, spans):
|
||||
return [get_lowercased_candidates(kb, span) for span in spans]
|
||||
|
@ -558,24 +558,24 @@ def test_vocab_serialization(nlp):
|
|||
mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1])
|
||||
adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
|
||||
|
||||
candidates = mykb.get_alias_candidates("adam")
|
||||
candidates = mykb._get_alias_candidates("adam")
|
||||
assert len(candidates) == 1
|
||||
assert candidates[0].entity == q2_hash
|
||||
assert candidates[0].entity_ == "Q2"
|
||||
assert candidates[0].alias == adam_hash
|
||||
assert candidates[0].alias_ == "adam"
|
||||
assert candidates[0].mention == adam_hash
|
||||
assert candidates[0].mention_ == "adam"
|
||||
|
||||
with make_tempdir() as d:
|
||||
mykb.to_disk(d / "kb")
|
||||
kb_new_vocab = InMemoryLookupKB(Vocab(), entity_vector_length=1)
|
||||
kb_new_vocab.from_disk(d / "kb")
|
||||
|
||||
candidates = kb_new_vocab.get_alias_candidates("adam")
|
||||
candidates = kb_new_vocab._get_alias_candidates("adam")
|
||||
assert len(candidates) == 1
|
||||
assert candidates[0].entity == q2_hash
|
||||
assert candidates[0].entity_ == "Q2"
|
||||
assert candidates[0].alias == adam_hash
|
||||
assert candidates[0].alias_ == "adam"
|
||||
assert candidates[0].mention == adam_hash
|
||||
assert candidates[0].mention_ == "adam"
|
||||
|
||||
assert kb_new_vocab.get_vector("Q2") == [2]
|
||||
assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4)
|
||||
|
@ -595,20 +595,20 @@ def test_append_alias(nlp):
|
|||
mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
|
||||
|
||||
# test the size of the relevant candidates
|
||||
assert len(mykb.get_alias_candidates("douglas")) == 2
|
||||
assert len(mykb._get_alias_candidates("douglas")) == 2
|
||||
|
||||
# append an alias
|
||||
mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)
|
||||
|
||||
# test the size of the relevant candidates has been incremented
|
||||
assert len(mykb.get_alias_candidates("douglas")) == 3
|
||||
assert len(mykb._get_alias_candidates("douglas")) == 3
|
||||
|
||||
# append the same alias-entity pair again should not work (will throw a warning)
|
||||
with pytest.warns(UserWarning):
|
||||
mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.3)
|
||||
|
||||
# test the size of the relevant candidates remained unchanged
|
||||
assert len(mykb.get_alias_candidates("douglas")) == 3
|
||||
assert len(mykb._get_alias_candidates("douglas")) == 3
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\[W036")
|
||||
|
@ -905,11 +905,11 @@ def test_kb_to_bytes():
|
|||
assert kb_2.contains_alias("Russ Cochran")
|
||||
assert kb_1.get_size_aliases() == kb_2.get_size_aliases()
|
||||
assert kb_1.get_alias_strings() == kb_2.get_alias_strings()
|
||||
assert len(kb_1.get_alias_candidates("Russ Cochran")) == len(
|
||||
kb_2.get_alias_candidates("Russ Cochran")
|
||||
assert len(kb_1._get_alias_candidates("Russ Cochran")) == len(
|
||||
kb_2._get_alias_candidates("Russ Cochran")
|
||||
)
|
||||
assert len(kb_1.get_alias_candidates("Randomness")) == len(
|
||||
kb_2.get_alias_candidates("Randomness")
|
||||
assert len(kb_1._get_alias_candidates("Randomness")) == len(
|
||||
kb_2._get_alias_candidates("Randomness")
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -63,19 +63,19 @@ def _check_kb(kb):
|
|||
assert alias_string not in kb.get_alias_strings()
|
||||
|
||||
# check candidates & probabilities
|
||||
candidates = sorted(kb.get_alias_candidates("double07"), key=lambda x: x.entity_)
|
||||
candidates = sorted(kb._get_alias_candidates("double07"), key=lambda x: x.entity_)
|
||||
assert len(candidates) == 2
|
||||
|
||||
assert candidates[0].entity_ == "Q007"
|
||||
assert 6.999 < candidates[0].entity_freq < 7.01
|
||||
assert candidates[0].entity_vector == [0, 0, 7]
|
||||
assert candidates[0].alias_ == "double07"
|
||||
assert candidates[0].mention_ == "double07"
|
||||
assert 0.899 < candidates[0].prior_prob < 0.901
|
||||
|
||||
assert candidates[1].entity_ == "Q17"
|
||||
assert 1.99 < candidates[1].entity_freq < 2.01
|
||||
assert candidates[1].entity_vector == [7, 1, 0]
|
||||
assert candidates[1].alias_ == "double07"
|
||||
assert candidates[1].mention_ == "double07"
|
||||
assert 0.099 < candidates[1].prior_prob < 0.101
|
||||
|
||||
|
||||
|
|
|
@ -103,23 +103,6 @@ to you.
|
|||
| `mentions` | The textual mention or alias. ~~Iterable[Span]~~ |
|
||||
| **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ |
|
||||
|
||||
## KnowledgeBase.get_alias_candidates {id="get_alias_candidates",tag="method"}
|
||||
|
||||
<Infobox variant="warning">
|
||||
This method is _not_ available from spaCy 3.5 onwards.
|
||||
</Infobox>
|
||||
|
||||
From spaCy 3.5 on `KnowledgeBase` is an abstract class (with
|
||||
[`InMemoryLookupKB`](/api/inmemorylookupkb) being a drop-in replacement) to
|
||||
allow more flexibility in customizing knowledge bases. Some of its methods were
|
||||
moved to [`InMemoryLookupKB`](/api/inmemorylookupkb) during this refactoring,
|
||||
one of those being `get_alias_candidates()`. This method is now available as
|
||||
[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
|
||||
Note:
|
||||
[`InMemoryLookupKB.get_candidates()`](/api/inmemorylookupkb#get_candidates)
|
||||
defaults to
|
||||
[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
|
||||
|
||||
## KnowledgeBase.get_vector {id="get_vector",tag="method"}
|
||||
|
||||
Given a certain entity ID, retrieve its pretrained entity vector.
|
||||
|
@ -207,19 +190,19 @@ of the [`entity_linker`](/api/entitylinker) pipe.
|
|||
> #### Example```python
|
||||
>
|
||||
> from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb,
|
||||
> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
|
||||
> entity_hash, entity_freq, entity_vector, mention_hash, prior_prob)
|
||||
>
|
||||
> ```
|
||||
>
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ------------- | ------------------------------------------------------------------------- |
|
||||
| `kb` | The knowledge base that defined this candidate. ~~KnowledgeBase~~ |
|
||||
| `entity_hash` | The hash of the entity's KB ID. ~~int~~ |
|
||||
| `entity_freq` | The entity frequency as recorded in the KB. ~~float~~ |
|
||||
| `alias_hash` | The hash of the textual mention or alias. ~~int~~ |
|
||||
| `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
|
||||
| Name | Description |
|
||||
| -------------- | ------------------------------------------------------------------------- |
|
||||
| `kb` | The knowledge base that defined this candidate. ~~KnowledgeBase~~ |
|
||||
| `entity_hash` | The hash of the entity's KB ID. ~~int~~ |
|
||||
| `entity_freq` | The entity frequency as recorded in the KB. ~~float~~ |
|
||||
| `mention_hash` | The hash of the textual mention. ~~int~~ |
|
||||
| `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
|
||||
|
||||
## InMemoryCandidate attributes {id="candidate-attributes"}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user