diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py index 911af1127..b1c188e09 100644 --- a/spacy/kb/candidate.py +++ b/spacy/kb/candidate.py @@ -37,18 +37,13 @@ class Candidate(abc.ABC): self._prior_prob = prior_prob @property - def entity_id(self) -> Union[str, int]: - """RETURNS (Union[str, int]): Unique entity ID.""" - return self._entity_id - - @property - def entity_id_int(self) -> int: + def entity_id(self) -> int: """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID, otherwise the hash of the entity ID string).""" return self._entity_id_hash @property - def entity_id_str(self) -> str: + def entity_id_(self) -> str: """RETURNS (str): String representation of entity ID.""" return str(self._entity_id) @@ -111,6 +106,6 @@ class InMemoryCandidate(Candidate): return self._entity_freq @property - def entity_id_str(self) -> str: + def entity_id_(self) -> str: """RETURNS (str): String representation of entity ID.""" return self._entity_id_str diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index 4d8370fd3..39cff218a 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -522,12 +522,12 @@ class EntityLinker(TrainablePipe): ) elif len(candidates) == 1 and self.threshold is None: # shortcut for efficiency reasons: take the 1 candidate - final_kb_ids.append(candidates[0].entity_id_str) + final_kb_ids.append(candidates[0].entity_id_) self._add_activations( doc_scores=doc_scores, doc_ents=doc_ents, scores=[1.0], - ents=[candidates[0].entity_id_int], + ents=[candidates[0].entity_id], ) else: random.shuffle(candidates) @@ -557,7 +557,7 @@ class EntityLinker(TrainablePipe): raise ValueError(Errors.E161) scores = prior_probs + sims - (prior_probs * sims) final_kb_ids.append( - candidates[scores.argmax().item()].entity_id_str + candidates[scores.argmax().item()].entity_id_ if self.threshold is None or scores.max() >= self.threshold else EntityLinker.NIL @@ -566,7 +566,7 @@ class EntityLinker(TrainablePipe): doc_scores=doc_scores, doc_ents=doc_ents, scores=scores, - ents=[c.entity_id_int for c in candidates], + ents=[c.entity_id for c in candidates], ) self._add_doc_activations( docs_scores=docs_scores, diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index 1b5117947..d0cfdd3c7 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -471,7 +471,7 @@ def test_candidate_generation(nlp): assert len(get_candidates(mykb, shrubbery_ent)) == 0 # test the content of the candidates - assert get_candidates(mykb, adam_ent)[0].entity_id_str == "Q2" + assert get_candidates(mykb, adam_ent)[0].entity_id_ == "Q2" assert get_candidates(mykb, adam_ent)[0].mention == "adam" assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12) assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9) @@ -563,8 +563,8 @@ def test_vocab_serialization(nlp): candidates = mykb._get_alias_candidates("adam") assert len(candidates) == 1 - assert candidates[0].entity_id_int == q2_hash - assert candidates[0].entity_id_str == "Q2" + assert candidates[0].entity_id == q2_hash + assert candidates[0].entity_id_ == "Q2" assert candidates[0].mention == "adam" with make_tempdir() as d: @@ -574,8 +574,8 @@ def test_vocab_serialization(nlp): candidates = kb_new_vocab._get_alias_candidates("adam") assert len(candidates) == 1 - assert candidates[0].entity_id_int == q2_hash - assert candidates[0].entity_id_str == "Q2" + assert candidates[0].entity_id == q2_hash + assert candidates[0].entity_id_ == "Q2" assert candidates[0].mention == "adam" assert kb_new_vocab.get_vector("Q2") == [2] diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py index 7365765cb..336fd16fe 100644 --- a/spacy/tests/serialize/test_serialize_kb.py +++ b/spacy/tests/serialize/test_serialize_kb.py @@ -67,17 +67,17 @@ def _check_kb(kb): # check candidates & probabilities candidates = sorted( - kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_str + kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_ ) assert len(candidates) == 2 - assert candidates[0].entity_id_str == "Q007" + assert candidates[0].entity_id_ == "Q007" assert 6.999 < candidates[0].entity_freq < 7.01 assert candidates[0].entity_vector == [0, 0, 7] assert candidates[0].mention == "double07" assert 0.899 < candidates[0].prior_prob < 0.901 - assert candidates[1].entity_id_str == "Q17" + assert candidates[1].entity_id_ == "Q17" assert 1.99 < candidates[1].entity_freq < 2.01 assert candidates[1].entity_vector == [7, 1, 0] assert candidates[1].mention == "double07"