spaCy/spacy/sandbox_test_sofie/testing_el.py

78 lines
2.3 KiB
Python
Raw Normal View History

# coding: utf-8
import spacy
2019-03-18 19:27:51 +03:00
from spacy.kb import KnowledgeBase
def create_kb():
mykb = KnowledgeBase()
2019-03-19 17:51:56 +03:00
print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases())
2019-03-18 19:27:51 +03:00
2019-03-18 19:50:01 +03:00
# adding entities
entity_0 = "Q0" # douglas adams
print(" adding entity", entity_0)
mykb.add_entity(entity_id=entity_0, prob=0.5)
2019-03-18 19:50:01 +03:00
entity_42 = "Q42" # douglas adams
2019-03-19 17:51:56 +03:00
print(" adding entity", entity_42)
mykb.add_entity(entity_id=entity_42, prob=0.5)
2019-03-18 19:27:51 +03:00
2019-03-18 19:50:01 +03:00
entity_5301561 = "Q5301561"
print(" adding entity", entity_5301561)
2019-03-18 19:50:01 +03:00
mykb.add_entity(entity_id=entity_5301561, prob=0.5)
2019-03-19 17:51:56 +03:00
print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases())
2019-03-18 19:50:01 +03:00
# adding aliases
alias1 = "douglassss"
print(" adding alias", alias1, "to Q42 and Q5301561")
mykb.add_alias(alias=alias1, entities=["Q42", "Q5301561"], probabilities=[0.8, 0.2])
alias2 = "johny"
print(" adding alias", alias2, "to Q0, Q42 and Q5301561")
mykb.add_alias(alias=alias2, entities=["Q0", "Q42", "Q5301561"], probabilities=[0.3, 0.1, 0.4])
2019-03-18 19:50:01 +03:00
alias3 = "adam"
print(" adding alias", alias3, "to Q42")
mykb.add_alias(alias=alias3, entities=["Q42"], probabilities=[0.9])
print("kb size", len(mykb), mykb.get_size_entities(), mykb.get_size_aliases())
for alias in [alias1, alias2, alias3]:
print()
print("candidates for", alias)
candidates = mykb.get_candidates(alias)
for candidate in candidates:
print(" candidate")
print(" kb_id", candidate.kb_id)
print(" kb_id_", candidate.kb_id_)
print(" alias", candidate.alias)
print(" alias_", candidate.alias_)
print(" prior_prob", candidate.prior_prob)
def add_el():
nlp = spacy.load('en_core_web_sm')
2019-03-14 17:48:40 +03:00
print("pipes before:", nlp.pipe_names)
el_pipe = nlp.create_pipe(name='el')
nlp.add_pipe(el_pipe, last=True)
2019-03-14 17:48:40 +03:00
print("pipes after:", nlp.pipe_names)
print()
2019-03-14 17:48:40 +03:00
text = "The Hitchhiker's Guide to the Galaxy, written by Douglas Adams, reminds us to always bring our towel."
doc = nlp(text)
2019-03-14 17:48:40 +03:00
for token in doc:
2019-03-14 17:48:40 +03:00
print("token", token.text, token.ent_type_, token.ent_kb_id_)
print()
for ent in doc.ents:
print("ent", ent.text, ent.label_, ent.kb_id_)
if __name__ == "__main__":
2019-03-18 19:27:51 +03:00
# add_el()
create_kb()