avoid two for loops over all docs by not precomputing

2025-09-12 23:22:38 +03:00 · 2023-05-03 13:08:44 +00:00 · 2023-05-03 13:08:44 +00:00 · 6b2e8363fc
commit 6b2e8363fc
parent fe4c094d86
1 changed files with 2 additions and 10 deletions
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@ -226,20 +226,12 @@ class SpanFinder(TrainablePipe):
        docs (Iterable[Doc]): The documents to modify.
        scores: The scores to set, produced by SpanFinder predict method.
        """
-        lengths = [len(doc) for doc in docs]
-
        offset = 0
-        scores_per_doc = []
-        # XXX Isn't this really inefficient that we are creating these
-        # slices ahead of time? Couldn't we just do this in the next loop?
-        for length in lengths:
-            scores_per_doc.append(scores[offset : offset + length])
-            offset += length
-
-        for doc, doc_scores in zip(docs, scores_per_doc):
+        for i, doc in enumerate(docs):
            doc.spans[self.predicted_key] = []
            starts = []
            ends = []
+            doc_scores = scores[offset:offset + len(doc)]

            for token, token_score in zip(doc, doc_scores):
                if token_score[0] >= self.threshold: