mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
avoid two for loops over all docs by not precomputing
This commit is contained in:
parent
fe4c094d86
commit
6b2e8363fc
|
@ -226,20 +226,12 @@ class SpanFinder(TrainablePipe):
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
scores: The scores to set, produced by SpanFinder predict method.
|
scores: The scores to set, produced by SpanFinder predict method.
|
||||||
"""
|
"""
|
||||||
lengths = [len(doc) for doc in docs]
|
|
||||||
|
|
||||||
offset = 0
|
offset = 0
|
||||||
scores_per_doc = []
|
for i, doc in enumerate(docs):
|
||||||
# XXX Isn't this really inefficient that we are creating these
|
|
||||||
# slices ahead of time? Couldn't we just do this in the next loop?
|
|
||||||
for length in lengths:
|
|
||||||
scores_per_doc.append(scores[offset : offset + length])
|
|
||||||
offset += length
|
|
||||||
|
|
||||||
for doc, doc_scores in zip(docs, scores_per_doc):
|
|
||||||
doc.spans[self.predicted_key] = []
|
doc.spans[self.predicted_key] = []
|
||||||
starts = []
|
starts = []
|
||||||
ends = []
|
ends = []
|
||||||
|
doc_scores = scores[offset:offset + len(doc)]
|
||||||
|
|
||||||
for token, token_score in zip(doc, doc_scores):
|
for token, token_score in zip(doc, doc_scores):
|
||||||
if token_score[0] >= self.threshold:
|
if token_score[0] >= self.threshold:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user