From 6b2e8363fc7c73e154debaea276bdf840c389b90 Mon Sep 17 00:00:00 2001
From: kadarakos <kadar.akos@gmail.com>
Date: Wed, 3 May 2023 13:08:44 +0000
Subject: [PATCH] avoid two for loops over all docs by not precomputing

---
 spacy/pipeline/span_finder.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py
index 6ad27bfb7..bdfa055f3 100644
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@@ -226,20 +226,12 @@ class SpanFinder(TrainablePipe):
         docs (Iterable[Doc]): The documents to modify.
         scores: The scores to set, produced by SpanFinder predict method.
         """
-        lengths = [len(doc) for doc in docs]
-
         offset = 0
-        scores_per_doc = []
-        # XXX Isn't this really inefficient that we are creating these
-        # slices ahead of time? Couldn't we just do this in the next loop?
-        for length in lengths:
-            scores_per_doc.append(scores[offset : offset + length])
-            offset += length
-
-        for doc, doc_scores in zip(docs, scores_per_doc):
+        for i, doc in enumerate(docs):
             doc.spans[self.predicted_key] = []
             starts = []
             ends = []
+            doc_scores = scores[offset:offset + len(doc)]
 
             for token, token_score in zip(doc, doc_scores):
                 if token_score[0] >= self.threshold: