Fix memory issues in Language.evaluate (#6386)

* Fix memory issues in Language.evaluate

Reset annotation in predicted docs before evaluating and store all data
in `examples`.

* Minor refactor to docs generator init

* Fix generator expression

* Fix final generator check

* Refactor pipeline loop

* Handle examples generator in Language.evaluate

* Add test with generator

* Use make_doc
This commit is contained in:
Adriane Boyd 2020-12-31 00:45:50 +01:00 committed by GitHub
parent 5ca57d8221
commit b57be94c78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 18 deletions

View File

@ -1290,6 +1290,7 @@ class Language:
DOCS: https://nightly.spacy.io/api/language#evaluate DOCS: https://nightly.spacy.io/api/language#evaluate
""" """
examples = list(examples)
validate_examples(examples, "Language.evaluate") validate_examples(examples, "Language.evaluate")
if batch_size is None: if batch_size is None:
batch_size = self.batch_size batch_size = self.batch_size
@ -1301,27 +1302,21 @@ class Language:
kwargs = dict(scorer_cfg) kwargs = dict(scorer_cfg)
kwargs.setdefault("nlp", self) kwargs.setdefault("nlp", self)
scorer = Scorer(**kwargs) scorer = Scorer(**kwargs)
texts = [eg.reference.text for eg in examples] # reset annotation in predicted docs and time tokenization
docs = [eg.predicted for eg in examples]
start_time = timer() start_time = timer()
# tokenize the texts only for timing purposes for eg in examples:
if not hasattr(self.tokenizer, "pipe"): eg.predicted = self.make_doc(eg.reference.text)
_ = [self.tokenizer(text) for text in texts] # noqa: F841 # apply all pipeline components
else:
_ = list(self.tokenizer.pipe(texts)) # noqa: F841
for name, pipe in self.pipeline: for name, pipe in self.pipeline:
kwargs = component_cfg.get(name, {}) kwargs = component_cfg.get(name, {})
kwargs.setdefault("batch_size", batch_size) kwargs.setdefault("batch_size", batch_size)
docs = _pipe(docs, pipe, kwargs) for doc, eg in zip(
# iterate over the final generator _pipe((eg.predicted for eg in examples), pipe, kwargs), examples
if len(self.pipeline): ):
docs = list(docs) eg.predicted = doc
end_time = timer() end_time = timer()
for i, (doc, eg) in enumerate(zip(docs, examples)):
util.logger.debug(doc)
eg.predicted = doc
results = scorer.score(examples) results = scorer.score(examples)
n_words = sum(len(doc) for doc in docs) n_words = sum(len(eg.predicted) for eg in examples)
results["speed"] = n_words / (end_time - start_time) results["speed"] = n_words / (end_time - start_time)
return results return results

View File

@ -53,7 +53,12 @@ def test_language_evaluate(nlp):
annots = {"doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}} annots = {"doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}}
doc = Doc(nlp.vocab, words=text.split(" ")) doc = Doc(nlp.vocab, words=text.split(" "))
example = Example.from_dict(doc, annots) example = Example.from_dict(doc, annots)
nlp.evaluate([example]) scores = nlp.evaluate([example])
assert scores["speed"] > 0
# test with generator
scores = nlp.evaluate(eg for eg in [example])
assert scores["speed"] > 0
# Not allowed to call with just one Example # Not allowed to call with just one Example
with pytest.raises(TypeError): with pytest.raises(TypeError):

View File

@ -249,9 +249,8 @@ def create_evaluation_callback(
weights = {key: value for key, value in weights.items() if value is not None} weights = {key: value for key, value in weights.items() if value is not None}
def evaluate() -> Tuple[float, Dict[str, float]]: def evaluate() -> Tuple[float, Dict[str, float]]:
dev_examples = list(dev_corpus(nlp))
try: try:
scores = nlp.evaluate(dev_examples) scores = nlp.evaluate(dev_corpus(nlp))
except KeyError as e: except KeyError as e:
raise KeyError(Errors.E900.format(pipeline=nlp.pipe_names)) from e raise KeyError(Errors.E900.format(pipeline=nlp.pipe_names)) from e
# Calculate a weighted sum based on score_weights for the main score. # Calculate a weighted sum based on score_weights for the main score.