2019-03-19 23:35:24 +03:00
|
|
|
# coding: utf-8
|
2019-03-06 21:34:18 +03:00
|
|
|
import spacy
|
2019-03-18 19:27:51 +03:00
|
|
|
from spacy.kb import KnowledgeBase
|
|
|
|
|
|
|
|
|
|
|
|
def create_kb():
|
|
|
|
mykb = KnowledgeBase()
|
2019-03-19 23:35:24 +03:00
|
|
|
|
2019-03-19 17:51:56 +03:00
|
|
|
print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases())
|
2019-03-21 17:24:40 +03:00
|
|
|
print()
|
2019-03-18 19:27:51 +03:00
|
|
|
|
2019-03-18 19:50:01 +03:00
|
|
|
# adding entities
|
2019-03-19 19:39:35 +03:00
|
|
|
entity_0 = "Q0" # douglas adams
|
|
|
|
print(" adding entity", entity_0)
|
2019-03-21 20:20:57 +03:00
|
|
|
mykb.add_entity(entity_id=entity_0, entity_name="queZero", prob=0.5)
|
2019-03-19 19:39:35 +03:00
|
|
|
|
2019-03-18 19:50:01 +03:00
|
|
|
entity_42 = "Q42" # douglas adams
|
2019-03-19 17:51:56 +03:00
|
|
|
print(" adding entity", entity_42)
|
2019-03-21 20:20:57 +03:00
|
|
|
mykb.add_entity(entity_id=entity_42, entity_name="que42", prob=0.5)
|
2019-03-18 19:27:51 +03:00
|
|
|
|
2019-03-18 19:50:01 +03:00
|
|
|
entity_5301561 = "Q5301561"
|
2019-03-19 23:35:24 +03:00
|
|
|
print(" adding entity", entity_5301561)
|
2019-03-21 20:20:57 +03:00
|
|
|
mykb.add_entity(entity_id=entity_5301561, entity_name="queMore", prob=0.5)
|
2019-03-19 23:35:24 +03:00
|
|
|
|
2019-03-19 17:51:56 +03:00
|
|
|
print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases())
|
2019-03-21 17:24:40 +03:00
|
|
|
print()
|
2019-03-18 19:50:01 +03:00
|
|
|
|
|
|
|
# adding aliases
|
2019-03-19 23:50:32 +03:00
|
|
|
alias1 = "douglassss"
|
2019-03-21 02:04:06 +03:00
|
|
|
print(" adding alias", alias1, "to Q42 and Q5301561")
|
2019-03-19 23:50:32 +03:00
|
|
|
mykb.add_alias(alias=alias1, entities=["Q42", "Q5301561"], probabilities=[0.8, 0.2])
|
2019-03-06 21:34:18 +03:00
|
|
|
|
2019-03-19 23:50:32 +03:00
|
|
|
alias3 = "adam"
|
2019-03-21 02:04:06 +03:00
|
|
|
print(" adding alias", alias3, "to Q42")
|
|
|
|
mykb.add_alias(alias=alias3, entities=["Q42"], probabilities=[0.9])
|
2019-03-19 23:35:24 +03:00
|
|
|
|
|
|
|
print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases())
|
2019-03-21 17:24:40 +03:00
|
|
|
print()
|
2019-03-19 23:35:24 +03:00
|
|
|
|
2019-03-21 19:33:25 +03:00
|
|
|
return mykb
|
2019-03-19 23:35:24 +03:00
|
|
|
|
2019-03-06 21:34:18 +03:00
|
|
|
|
2019-03-21 19:33:25 +03:00
|
|
|
def add_el(kb):
|
2019-03-06 21:34:18 +03:00
|
|
|
nlp = spacy.load('en_core_web_sm')
|
2019-03-14 17:48:40 +03:00
|
|
|
print("pipes before:", nlp.pipe_names)
|
2019-03-06 21:34:18 +03:00
|
|
|
|
2019-03-21 19:33:25 +03:00
|
|
|
el_pipe = nlp.create_pipe(name='el', config={"kb": kb})
|
2019-03-06 21:34:18 +03:00
|
|
|
nlp.add_pipe(el_pipe, last=True)
|
|
|
|
|
2019-03-14 17:48:40 +03:00
|
|
|
print("pipes after:", nlp.pipe_names)
|
2019-03-06 21:34:18 +03:00
|
|
|
print()
|
|
|
|
|
2019-03-14 17:48:40 +03:00
|
|
|
text = "The Hitchhiker's Guide to the Galaxy, written by Douglas Adams, reminds us to always bring our towel."
|
2019-03-06 21:34:18 +03:00
|
|
|
doc = nlp(text)
|
2019-03-14 17:48:40 +03:00
|
|
|
|
2019-03-06 21:34:18 +03:00
|
|
|
for token in doc:
|
2019-03-14 17:48:40 +03:00
|
|
|
print("token", token.text, token.ent_type_, token.ent_kb_id_)
|
|
|
|
|
|
|
|
print()
|
|
|
|
for ent in doc.ents:
|
|
|
|
print("ent", ent.text, ent.label_, ent.kb_id_)
|
2019-03-06 21:34:18 +03:00
|
|
|
|
2019-03-21 19:33:25 +03:00
|
|
|
print()
|
|
|
|
for alias in ["douglassss", "rubbish", "adam"]:
|
|
|
|
candidates = nlp.linker.kb.get_candidates(alias)
|
2019-03-21 20:20:57 +03:00
|
|
|
print(len(candidates), "candidates for", alias, ":")
|
|
|
|
for c in candidates:
|
|
|
|
print(" ", c.entity_id_, c.entity_name_, c.alias_)
|
2019-03-21 19:33:25 +03:00
|
|
|
|
2019-03-06 21:34:18 +03:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2019-03-21 19:33:25 +03:00
|
|
|
mykb = create_kb()
|
|
|
|
add_el(mykb)
|