Address random results in slow readers tests (#9544)

* Set random seed for dataset shuffling
* Use more dev examples for non-zero scores
This commit is contained in:
Adriane Boyd 2021-10-26 16:53:10 +02:00 committed by GitHub
parent fa70837f28
commit 386dcada1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,6 @@
from typing import Dict, Iterable, Callable
import pytest
from thinc.api import Config
from thinc.api import Config, fix_random_seed
from spacy import Language
from spacy.util import load_model_from_config, registry, resolve_dot_names
from spacy.schemas import ConfigSchemaTraining
@ -64,8 +64,8 @@ def test_readers():
@pytest.mark.parametrize(
"reader,additional_config",
[
("ml_datasets.imdb_sentiment.v1", {"train_limit": 10, "dev_limit": 2}),
("ml_datasets.dbpedia.v1", {"train_limit": 10, "dev_limit": 2}),
("ml_datasets.imdb_sentiment.v1", {"train_limit": 10, "dev_limit": 10}),
("ml_datasets.dbpedia.v1", {"train_limit": 10, "dev_limit": 10}),
("ml_datasets.cmu_movies.v1", {"limit": 10, "freq_cutoff": 200, "split": 0.8}),
],
)
@ -93,6 +93,7 @@ def test_cat_readers(reader, additional_config):
factory = "textcat_multilabel"
"""
config = Config().from_str(nlp_config_string)
fix_random_seed(config["training"]["seed"])
config["corpora"]["@readers"] = reader
config["corpora"].update(additional_config)
nlp = load_model_from_config(config, auto_fill=True)