2019-03-06 21:34:18 +03:00
|
|
|
import spacy
|
2019-03-18 19:27:51 +03:00
|
|
|
from spacy.kb import KnowledgeBase
|
|
|
|
|
|
|
|
|
|
|
|
def create_kb():
|
|
|
|
mykb = KnowledgeBase()
|
|
|
|
print("kb size", len(mykb))
|
|
|
|
|
|
|
|
entity_id = "Q42"
|
|
|
|
mykb.add_entity(entity_id=entity_id, prob=0.5)
|
|
|
|
print("adding entity", entity_id)
|
|
|
|
|
|
|
|
print("kb size", len(mykb))
|
2019-03-06 21:34:18 +03:00
|
|
|
|
|
|
|
|
|
|
|
def add_el():
|
|
|
|
nlp = spacy.load('en_core_web_sm')
|
2019-03-14 17:48:40 +03:00
|
|
|
print("pipes before:", nlp.pipe_names)
|
2019-03-06 21:34:18 +03:00
|
|
|
|
|
|
|
el_pipe = nlp.create_pipe(name='el')
|
|
|
|
nlp.add_pipe(el_pipe, last=True)
|
|
|
|
|
2019-03-14 17:48:40 +03:00
|
|
|
print("pipes after:", nlp.pipe_names)
|
2019-03-06 21:34:18 +03:00
|
|
|
print()
|
|
|
|
|
2019-03-14 17:48:40 +03:00
|
|
|
text = "The Hitchhiker's Guide to the Galaxy, written by Douglas Adams, reminds us to always bring our towel."
|
2019-03-06 21:34:18 +03:00
|
|
|
doc = nlp(text)
|
2019-03-14 17:48:40 +03:00
|
|
|
|
2019-03-06 21:34:18 +03:00
|
|
|
for token in doc:
|
2019-03-14 17:48:40 +03:00
|
|
|
print("token", token.text, token.ent_type_, token.ent_kb_id_)
|
|
|
|
|
|
|
|
print()
|
|
|
|
for ent in doc.ents:
|
|
|
|
print("ent", ent.text, ent.label_, ent.kb_id_)
|
2019-03-06 21:34:18 +03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2019-03-18 19:27:51 +03:00
|
|
|
# add_el()
|
|
|
|
create_kb()
|