mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Fix memory issues in Language.evaluate (#6386)
* Fix memory issues in Language.evaluate Reset annotation in predicted docs before evaluating and store all data in `examples`. * Minor refactor to docs generator init * Fix generator expression * Fix final generator check * Refactor pipeline loop * Handle examples generator in Language.evaluate * Add test with generator * Use make_doc
This commit is contained in:
parent
5ca57d8221
commit
b57be94c78
|
@ -1290,6 +1290,7 @@ class Language:
|
|||
|
||||
DOCS: https://nightly.spacy.io/api/language#evaluate
|
||||
"""
|
||||
examples = list(examples)
|
||||
validate_examples(examples, "Language.evaluate")
|
||||
if batch_size is None:
|
||||
batch_size = self.batch_size
|
||||
|
@ -1301,27 +1302,21 @@ class Language:
|
|||
kwargs = dict(scorer_cfg)
|
||||
kwargs.setdefault("nlp", self)
|
||||
scorer = Scorer(**kwargs)
|
||||
texts = [eg.reference.text for eg in examples]
|
||||
docs = [eg.predicted for eg in examples]
|
||||
# reset annotation in predicted docs and time tokenization
|
||||
start_time = timer()
|
||||
# tokenize the texts only for timing purposes
|
||||
if not hasattr(self.tokenizer, "pipe"):
|
||||
_ = [self.tokenizer(text) for text in texts] # noqa: F841
|
||||
else:
|
||||
_ = list(self.tokenizer.pipe(texts)) # noqa: F841
|
||||
for eg in examples:
|
||||
eg.predicted = self.make_doc(eg.reference.text)
|
||||
# apply all pipeline components
|
||||
for name, pipe in self.pipeline:
|
||||
kwargs = component_cfg.get(name, {})
|
||||
kwargs.setdefault("batch_size", batch_size)
|
||||
docs = _pipe(docs, pipe, kwargs)
|
||||
# iterate over the final generator
|
||||
if len(self.pipeline):
|
||||
docs = list(docs)
|
||||
end_time = timer()
|
||||
for i, (doc, eg) in enumerate(zip(docs, examples)):
|
||||
util.logger.debug(doc)
|
||||
for doc, eg in zip(
|
||||
_pipe((eg.predicted for eg in examples), pipe, kwargs), examples
|
||||
):
|
||||
eg.predicted = doc
|
||||
end_time = timer()
|
||||
results = scorer.score(examples)
|
||||
n_words = sum(len(doc) for doc in docs)
|
||||
n_words = sum(len(eg.predicted) for eg in examples)
|
||||
results["speed"] = n_words / (end_time - start_time)
|
||||
return results
|
||||
|
||||
|
|
|
@ -53,7 +53,12 @@ def test_language_evaluate(nlp):
|
|||
annots = {"doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}}
|
||||
doc = Doc(nlp.vocab, words=text.split(" "))
|
||||
example = Example.from_dict(doc, annots)
|
||||
nlp.evaluate([example])
|
||||
scores = nlp.evaluate([example])
|
||||
assert scores["speed"] > 0
|
||||
|
||||
# test with generator
|
||||
scores = nlp.evaluate(eg for eg in [example])
|
||||
assert scores["speed"] > 0
|
||||
|
||||
# Not allowed to call with just one Example
|
||||
with pytest.raises(TypeError):
|
||||
|
|
|
@ -249,9 +249,8 @@ def create_evaluation_callback(
|
|||
weights = {key: value for key, value in weights.items() if value is not None}
|
||||
|
||||
def evaluate() -> Tuple[float, Dict[str, float]]:
|
||||
dev_examples = list(dev_corpus(nlp))
|
||||
try:
|
||||
scores = nlp.evaluate(dev_examples)
|
||||
scores = nlp.evaluate(dev_corpus(nlp))
|
||||
except KeyError as e:
|
||||
raise KeyError(Errors.E900.format(pipeline=nlp.pipe_names)) from e
|
||||
# Calculate a weighted sum based on score_weights for the main score.
|
||||
|
|
Loading…
Reference in New Issue
Block a user