spaCy/spacy/tests/regression/test_issue7062.py

55 lines
1.9 KiB
Python
Raw Normal View History

from spacy.kb import KnowledgeBase
from spacy.training import Example
from spacy.lang.en import English
# fmt: off
TRAIN_DATA = [
("Russ Cochran his reprints include EC Comics.",
{"links": {(0, 12): {"Q2146908": 1.0}},
"entities": [(0, 12, "PERSON")],
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]})
]
# fmt: on
def test_partial_links():
# Test that having some entities on the doc without gold links, doesn't crash
nlp = English()
vector_length = 3
train_examples = []
for text, annotation in TRAIN_DATA:
doc = nlp(text)
train_examples.append(Example.from_dict(doc, annotation))
def create_kb(vocab):
# create artificial KB
mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
mykb.add_alias("Russ Cochran", ["Q2146908"], [0.9])
return mykb
# Create and train the Entity Linker
entity_linker = nlp.add_pipe("entity_linker", last=True)
entity_linker.set_kb(create_kb)
optimizer = nlp.initialize(get_examples=lambda: train_examples)
for i in range(2):
losses = {}
nlp.update(train_examples, sgd=optimizer, losses=losses)
# adding additional components that are required for the entity_linker
nlp.add_pipe("sentencizer", first=True)
patterns = [
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]},
{"label": "ORG", "pattern": [{"LOWER": "ec"}, {"LOWER": "comics"}]}
]
ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
ruler.add_patterns(patterns)
# this will run the pipeline on the examples and shouldn't crash
results = nlp.evaluate(train_examples)
assert "PERSON" in results["ents_per_type"]
assert "PERSON" in results["nel_f_per_type"]
assert "ORG" in results["ents_per_type"]
assert "ORG" not in results["nel_f_per_type"]