mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 21:00:19 +03:00
Add test reproducing EL failure in sentence-crossing entities.
This commit is contained in:
parent
f27bce67fd
commit
25676bb595
|
@ -1219,3 +1219,46 @@ def test_span_maker_forward_with_empty():
|
|||
# just to get a model
|
||||
span_maker = build_span_maker()
|
||||
span_maker([doc1, doc2], False)
|
||||
|
||||
@pytest.mark.skip(reason="Not fixed yet, expected to fail")
|
||||
def test_sentence_crossing_ents():
|
||||
"""Tests if NEL crashes if entities cross sentence boundaries and the first associated sentence doesn't have an
|
||||
entity.
|
||||
"""
|
||||
nlp = English()
|
||||
vector_length = 3
|
||||
nlp.add_pipe("sentencizer")
|
||||
text = "Mahler 's Symphony No. 8 was beautiful."
|
||||
entities = [(10, 24, "WORK")]
|
||||
links = {(10, 24): {"Q7304": 0.0, "Q270853": 1.0},
|
||||
}
|
||||
sent_starts = [1, -1, 0, 0, 0, 1, 0, 0, 0]
|
||||
doc = nlp(text)
|
||||
example = Example.from_dict(
|
||||
doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
|
||||
)
|
||||
train_examples = [example]
|
||||
|
||||
def create_kb(vocab):
|
||||
# create artificial KB
|
||||
mykb = InMemoryLookupKB(vocab, entity_vector_length=vector_length)
|
||||
mykb.add_entity(entity="Q270853", freq=12, entity_vector=[9, 1, -7])
|
||||
mykb.add_alias(
|
||||
alias="No. 8",
|
||||
entities=["Q270853"],
|
||||
probabilities=[1.0],
|
||||
)
|
||||
return mykb
|
||||
|
||||
# Create the Entity Linker component and add it to the pipeline
|
||||
entity_linker = nlp.add_pipe("entity_linker", last=True)
|
||||
entity_linker.set_kb(create_kb)
|
||||
# train the NEL pipe
|
||||
optimizer = nlp.initialize(get_examples=lambda: train_examples)
|
||||
for i in range(2):
|
||||
losses = {}
|
||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||
|
||||
# This shouldn't crash.
|
||||
entity_linker.predict([example.reference])
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user