spaCy/spacy/tests/regression/test_issue5551.py
Sofie Van Landeghem c1ea55307b
Fixing reproducible training (#5735)
* Add initial reproducibility tests

* failing test for default_text_classifier (WIP)

* track trouble to underlying tok2vec layer

* add regression test for Issue 5551

* tests go green with https://github.com/explosion/thinc/pull/359

* update test

* adding fixed seeds to HashEmbed layers, seems to fix the reproducility issue

Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com>
2020-07-09 19:39:31 +02:00

32 lines
1.1 KiB
Python

from spacy.lang.en import English
from spacy.util import fix_random_seed
def test_issue5551():
"""Test that after fixing the random seed, the results of the pipeline are truly identical"""
component = "textcat"
pipe_cfg = {"exclusive_classes": False}
results = []
for i in range(3):
fix_random_seed(0)
nlp = English()
example = (
"Once hot, form ping-pong-ball-sized balls of the mixture, each weighing roughly 25 g.",
{"cats": {"Labe1": 1.0, "Label2": 0.0, "Label3": 0.0}},
)
nlp.add_pipe(nlp.create_pipe(component, config=pipe_cfg), last=True)
pipe = nlp.get_pipe(component)
for label in set(example[1]["cats"]):
pipe.add_label(label)
nlp.begin_training(component_cfg={component: pipe_cfg})
# Store the result of each iteration
result = pipe.model.predict([nlp.make_doc(example[0])])
results.append(list(result[0]))
# All results should be the same because of the fixed seed
assert len(results) == 3
assert results[0] == results[1]
assert results[0] == results[2]