From 6b2e8363fc7c73e154debaea276bdf840c389b90 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Wed, 3 May 2023 13:08:44 +0000 Subject: [PATCH] avoid two for loops over all docs by not precomputing --- spacy/pipeline/span_finder.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py index 6ad27bfb7..bdfa055f3 100644 --- a/spacy/pipeline/span_finder.py +++ b/spacy/pipeline/span_finder.py @@ -226,20 +226,12 @@ class SpanFinder(TrainablePipe): docs (Iterable[Doc]): The documents to modify. scores: The scores to set, produced by SpanFinder predict method. """ - lengths = [len(doc) for doc in docs] - offset = 0 - scores_per_doc = [] - # XXX Isn't this really inefficient that we are creating these - # slices ahead of time? Couldn't we just do this in the next loop? - for length in lengths: - scores_per_doc.append(scores[offset : offset + length]) - offset += length - - for doc, doc_scores in zip(docs, scores_per_doc): + for i, doc in enumerate(docs): doc.spans[self.predicted_key] = [] starts = [] ends = [] + doc_scores = scores[offset:offset + len(doc)] for token, token_score in zip(doc, doc_scores): if token_score[0] >= self.threshold: