From c71123dd0c91766c4d8f890c3d2c6660f6deee16 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 21 Mar 2019 15:24:40 +0100 Subject: [PATCH] ensure no candidates are returned for unknown aliases --- spacy/kb.pyx | 3 ++- spacy/sandbox_test_sofie/testing_el.py | 19 +++++-------------- spacy/tests/pipeline/test_el.py | 1 + 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/spacy/kb.pyx b/spacy/kb.pyx index 4776e9d34..62080e1be 100644 --- a/spacy/kb.pyx +++ b/spacy/kb.pyx @@ -149,5 +149,6 @@ cdef class KnowledgeBase: entity_hash=self._entries[entry_index].entity_hash, alias_hash=alias_hash, prior_prob=prob) - for (entry_index, prob) in zip(alias_entry.entry_indices, alias_entry.probs)] + for (entry_index, prob) in zip(alias_entry.entry_indices, alias_entry.probs) + if entry_index != 0] diff --git a/spacy/sandbox_test_sofie/testing_el.py b/spacy/sandbox_test_sofie/testing_el.py index 3a81effbc..03261806b 100644 --- a/spacy/sandbox_test_sofie/testing_el.py +++ b/spacy/sandbox_test_sofie/testing_el.py @@ -7,6 +7,7 @@ def create_kb(): mykb = KnowledgeBase() print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases()) + print() # adding entities entity_0 = "Q0" # douglas adams @@ -22,33 +23,23 @@ def create_kb(): mykb.add_entity(entity_id=entity_5301561, prob=0.5) print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases()) + print() # adding aliases alias1 = "douglassss" print(" adding alias", alias1, "to Q42 and Q5301561") mykb.add_alias(alias=alias1, entities=["Q42", "Q5301561"], probabilities=[0.8, 0.2]) - alias2 = "johny" - print(" adding alias", alias2, "to Q0, Q42 and Q5301561") - mykb.add_alias(alias=alias2, entities=["Q0", "Q42", "Q5301561"], probabilities=[0.3, 0.1, 0.4]) - alias3 = "adam" print(" adding alias", alias3, "to Q42") mykb.add_alias(alias=alias3, entities=["Q42"], probabilities=[0.9]) print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases()) + print() - for alias in [alias1, alias2, alias3]: - print() - print("candidates for", alias) + for alias in [alias1, "rubbish", alias3]: candidates = mykb.get_candidates(alias) - for candidate in candidates: - print(" candidate") - print(" kb_id", candidate.kb_id) - print(" kb_id_", candidate.kb_id_) - print(" alias", candidate.alias) - print(" alias_", candidate.alias_) - print(" prior_prob", candidate.prior_prob) + print(len(candidates), "candidates for", alias) def add_el(): diff --git a/spacy/tests/pipeline/test_el.py b/spacy/tests/pipeline/test_el.py index 78ee0f358..295b35cce 100644 --- a/spacy/tests/pipeline/test_el.py +++ b/spacy/tests/pipeline/test_el.py @@ -80,3 +80,4 @@ def test_candidate_generation(): # test the size of the relevant candidates assert(len(mykb.get_candidates("douglas")) == 2) assert(len(mykb.get_candidates("adam")) == 1) + assert(len(mykb.get_candidates("shrubbery")) == 0)