# coding: utf-8 import spacy from spacy.kb import KnowledgeBase def create_kb(): mykb = KnowledgeBase() print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases()) # adding entities entity_0 = "Q0" # douglas adams print(" adding entity", entity_0) mykb.add_entity(entity_id=entity_0, prob=0.5) entity_42 = "Q42" # douglas adams print(" adding entity", entity_42) mykb.add_entity(entity_id=entity_42, prob=0.5) entity_5301561 = "Q5301561" print(" adding entity", entity_5301561) mykb.add_entity(entity_id=entity_5301561, prob=0.5) print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases()) # adding aliases alias1 = "douglassss" print(" adding alias", alias1, "to Q42 and Q5301561") mykb.add_alias(alias=alias1, entities=["Q42", "Q5301561"], probabilities=[0.8, 0.2]) alias2 = "johny" print(" adding alias", alias2, "to Q0, Q42 and Q5301561") mykb.add_alias(alias=alias2, entities=["Q0", "Q42", "Q5301561"], probabilities=[0.3, 0.1, 0.4]) alias3 = "adam" print(" adding alias", alias3, "to Q42") mykb.add_alias(alias=alias3, entities=["Q42"], probabilities=[0.9]) print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases()) for alias in [alias1, alias2, alias3]: print() print("candidates for", alias) candidates = mykb.get_candidates(alias) for candidate in candidates: print(" candidate") print(" kb_id", candidate.kb_id) print(" kb_id_", candidate.kb_id_) print(" alias", candidate.alias) print(" alias_", candidate.alias_) print(" prior_prob", candidate.prior_prob) def add_el(): nlp = spacy.load('en_core_web_sm') print("pipes before:", nlp.pipe_names) el_pipe = nlp.create_pipe(name='el') nlp.add_pipe(el_pipe, last=True) print("pipes after:", nlp.pipe_names) print() text = "The Hitchhiker's Guide to the Galaxy, written by Douglas Adams, reminds us to always bring our towel." doc = nlp(text) for token in doc: print("token", token.text, token.ent_type_, token.ent_kb_id_) print() for ent in doc.ents: print("ent", ent.text, ent.label_, ent.kb_id_) if __name__ == "__main__": # add_el() create_kb()