Simplify ent.sents handling.

This commit is contained in:
Raphael Mitsch 2023-03-14 13:15:11 +01:00
parent 66f4d67dfa
commit 9b6f47f1ac

View File

@ -476,26 +476,7 @@ class EntityLinker(TrainablePipe):
for j, ent in enumerate(ent_batch):
assert hasattr(ent, "sents")
sents = list(ent.sents)
# Note: the last sentence associated with an sentence-crossing entity isn't complete. E. g. if you
# have "Mahler's Symphony No. 8 was beautiful", the entity being "No. 8", ent.sents would be:
# 1. "Mahler's Symphony No."
# 2. "8"
# whereas doc.sents would be:
# 1. "Mahler's Symphony No."
# 2. "8 was beautiful"
# This makes it tricky to receive the last sentence by indexing doc.sents - hence we use an offset
# to determine sent_indices[1].
sent_indices = (
(
sentences.index(sents[0]),
sentences.index(sents[0]) + len(sents) - 1,
)
if len(sents) > 1
else (
sentences.index(ent.sent),
sentences.index(ent.sent),
)
)
sent_indices = (sentences.index(sents[0]), sentences.index(sents[-1]))
assert all([si >= 0 for si in sent_indices])
if self.incl_context: