diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py index af691b415..b8d26832a 100644 --- a/spacy/kb/candidate.py +++ b/spacy/kb/candidate.py @@ -29,26 +29,26 @@ class Candidate(abc.ABC): cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus doesn't) it might be better to eschew this information and always supply the same value. """ - self._mention = mention - self._entity_id = entity_id - self._entity_name = entity_name + self._mention_ = mention + self._entity = entity_id + self._entity_ = entity_name self._entity_vector = entity_vector self._prior_prob = prior_prob @property def entity(self) -> int: """RETURNS (int): Unique entity ID.""" - return self._entity_id + return self._entity @property def entity_(self) -> str: """RETURNS (int): Entity name.""" - return self._entity_name + return self._entity_ @property - def mention(self) -> str: + def mention_(self) -> str: """RETURNS (str): Mention.""" - return self._mention + return self._mention_ @property def entity_vector(self) -> List[float]: @@ -93,20 +93,20 @@ class InMemoryCandidate(Candidate): prior_prob=prior_prob, ) self._retrieve_string_from_hash = retrieve_string_from_hash - self._entity_hash = entity_hash + self._entity = entity_hash self._entity_freq = entity_freq - self._mention_hash = mention_hash + self._mention = mention_hash self._prior_prob = prior_prob @property def entity(self) -> int: """RETURNS (int): hash of the entity_id's KB ID/name""" - return self._entity_hash + return self._entity @property - def mention_hash(self) -> int: + def mention(self) -> int: """RETURNS (int): Mention hash.""" - return self._mention_hash + return self._mention @property def entity_freq(self) -> float: diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx index f39432f5e..d7a986320 100644 --- a/spacy/kb/kb_in_memory.pyx +++ b/spacy/kb/kb_in_memory.pyx @@ -224,9 +224,9 @@ cdef class InMemoryLookupKB(KnowledgeBase): self._aliases_table[alias_index] = alias_entry def get_candidates(self, mention: Span) -> Iterable[InMemoryCandidate]: - return self.get_alias_candidates(mention.text) # type: ignore + return self._get_alias_candidates(mention.text) # type: ignore - def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]: + def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]: """ Return candidate entities for an alias. Each candidate defines the entity, the original alias, and the prior probability of that alias resolving to that entity. @@ -244,7 +244,7 @@ cdef class InMemoryLookupKB(KnowledgeBase): entity_hash=self._entries[entry_index].entity_hash, entity_freq=self._entries[entry_index].freq, entity_vector=self._vectors_table[self._entries[entry_index].vector_index], - alias_hash=alias_hash, + mention_hash=alias_hash, prior_prob=prior_prob ) for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs) diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index cb1e4a733..23eb5e205 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -469,7 +469,7 @@ def test_candidate_generation(nlp): # test the content of the candidates assert get_candidates(mykb, adam_ent)[0].entity_ == "Q2" - assert get_candidates(mykb, adam_ent)[0].alias_ == "adam" + assert get_candidates(mykb, adam_ent)[0].mention_ == "adam" assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12) assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9) @@ -499,7 +499,7 @@ def test_el_pipe_configuration(nlp): assert doc[2].ent_kb_id_ == "Q2" def get_lowercased_candidates(kb, span): - return kb.get_alias_candidates(span.text.lower()) + return kb._get_alias_candidates(span.text.lower()) def get_lowercased_candidates_batch(kb, spans): return [get_lowercased_candidates(kb, span) for span in spans] @@ -558,24 +558,24 @@ def test_vocab_serialization(nlp): mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1]) adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9]) - candidates = mykb.get_alias_candidates("adam") + candidates = mykb._get_alias_candidates("adam") assert len(candidates) == 1 assert candidates[0].entity == q2_hash assert candidates[0].entity_ == "Q2" - assert candidates[0].alias == adam_hash - assert candidates[0].alias_ == "adam" + assert candidates[0].mention == adam_hash + assert candidates[0].mention_ == "adam" with make_tempdir() as d: mykb.to_disk(d / "kb") kb_new_vocab = InMemoryLookupKB(Vocab(), entity_vector_length=1) kb_new_vocab.from_disk(d / "kb") - candidates = kb_new_vocab.get_alias_candidates("adam") + candidates = kb_new_vocab._get_alias_candidates("adam") assert len(candidates) == 1 assert candidates[0].entity == q2_hash assert candidates[0].entity_ == "Q2" - assert candidates[0].alias == adam_hash - assert candidates[0].alias_ == "adam" + assert candidates[0].mention == adam_hash + assert candidates[0].mention_ == "adam" assert kb_new_vocab.get_vector("Q2") == [2] assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4) @@ -595,20 +595,20 @@ def test_append_alias(nlp): mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9]) # test the size of the relevant candidates - assert len(mykb.get_alias_candidates("douglas")) == 2 + assert len(mykb._get_alias_candidates("douglas")) == 2 # append an alias mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2) # test the size of the relevant candidates has been incremented - assert len(mykb.get_alias_candidates("douglas")) == 3 + assert len(mykb._get_alias_candidates("douglas")) == 3 # append the same alias-entity pair again should not work (will throw a warning) with pytest.warns(UserWarning): mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.3) # test the size of the relevant candidates remained unchanged - assert len(mykb.get_alias_candidates("douglas")) == 3 + assert len(mykb._get_alias_candidates("douglas")) == 3 @pytest.mark.filterwarnings("ignore:\\[W036") @@ -905,11 +905,11 @@ def test_kb_to_bytes(): assert kb_2.contains_alias("Russ Cochran") assert kb_1.get_size_aliases() == kb_2.get_size_aliases() assert kb_1.get_alias_strings() == kb_2.get_alias_strings() - assert len(kb_1.get_alias_candidates("Russ Cochran")) == len( - kb_2.get_alias_candidates("Russ Cochran") + assert len(kb_1._get_alias_candidates("Russ Cochran")) == len( + kb_2._get_alias_candidates("Russ Cochran") ) - assert len(kb_1.get_alias_candidates("Randomness")) == len( - kb_2.get_alias_candidates("Randomness") + assert len(kb_1._get_alias_candidates("Randomness")) == len( + kb_2._get_alias_candidates("Randomness") ) diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py index 8d3653ab1..9e501c32f 100644 --- a/spacy/tests/serialize/test_serialize_kb.py +++ b/spacy/tests/serialize/test_serialize_kb.py @@ -63,19 +63,19 @@ def _check_kb(kb): assert alias_string not in kb.get_alias_strings() # check candidates & probabilities - candidates = sorted(kb.get_alias_candidates("double07"), key=lambda x: x.entity_) + candidates = sorted(kb._get_alias_candidates("double07"), key=lambda x: x.entity_) assert len(candidates) == 2 assert candidates[0].entity_ == "Q007" assert 6.999 < candidates[0].entity_freq < 7.01 assert candidates[0].entity_vector == [0, 0, 7] - assert candidates[0].alias_ == "double07" + assert candidates[0].mention_ == "double07" assert 0.899 < candidates[0].prior_prob < 0.901 assert candidates[1].entity_ == "Q17" assert 1.99 < candidates[1].entity_freq < 2.01 assert candidates[1].entity_vector == [7, 1, 0] - assert candidates[1].alias_ == "double07" + assert candidates[1].mention_ == "double07" assert 0.099 < candidates[1].prior_prob < 0.101 diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx index 3a1cefe8d..4d51dbc16 100644 --- a/website/docs/api/kb.mdx +++ b/website/docs/api/kb.mdx @@ -103,23 +103,6 @@ to you. | `mentions` | The textual mention or alias. ~~Iterable[Span]~~ | | **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ | -## KnowledgeBase.get_alias_candidates {id="get_alias_candidates",tag="method"} - - - This method is _not_ available from spaCy 3.5 onwards. - - -From spaCy 3.5 on `KnowledgeBase` is an abstract class (with -[`InMemoryLookupKB`](/api/inmemorylookupkb) being a drop-in replacement) to -allow more flexibility in customizing knowledge bases. Some of its methods were -moved to [`InMemoryLookupKB`](/api/inmemorylookupkb) during this refactoring, -one of those being `get_alias_candidates()`. This method is now available as -[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates). -Note: -[`InMemoryLookupKB.get_candidates()`](/api/inmemorylookupkb#get_candidates) -defaults to -[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates). - ## KnowledgeBase.get_vector {id="get_vector",tag="method"} Given a certain entity ID, retrieve its pretrained entity vector. @@ -207,19 +190,19 @@ of the [`entity_linker`](/api/entitylinker) pipe. > #### Example```python > > from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb, -> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob) +> entity_hash, entity_freq, entity_vector, mention_hash, prior_prob) > > ``` > > ``` -| Name | Description | -| ------------- | ------------------------------------------------------------------------- | -| `kb` | The knowledge base that defined this candidate. ~~KnowledgeBase~~ | -| `entity_hash` | The hash of the entity's KB ID. ~~int~~ | -| `entity_freq` | The entity frequency as recorded in the KB. ~~float~~ | -| `alias_hash` | The hash of the textual mention or alias. ~~int~~ | -| `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------- | +| `kb` | The knowledge base that defined this candidate. ~~KnowledgeBase~~ | +| `entity_hash` | The hash of the entity's KB ID. ~~int~~ | +| `entity_freq` | The entity frequency as recorded in the KB. ~~float~~ | +| `mention_hash` | The hash of the textual mention. ~~int~~ | +| `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ | ## InMemoryCandidate attributes {id="candidate-attributes"}