From cfc72c29959b1ab715d4b647c66e0c2e0b5f4979 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 8 Dec 2020 23:29:15 +0100 Subject: [PATCH] Bugfix multi-label textcat reproducibility (#6481) * add test for multi-label textcat reproducibility * remove positive_label * fix lengths dtype * fix comments * remove comment that we should not have forgotten :-) --- spacy/ml/extract_ngrams.py | 5 +-- spacy/tests/pipeline/test_textcat.py | 59 +++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py index bdc297232..93878c81b 100644 --- a/spacy/ml/extract_ngrams.py +++ b/spacy/ml/extract_ngrams.py @@ -23,10 +23,7 @@ def forward(model: Model, docs, is_train: bool): keys, vals = model.ops.xp.unique(keys, return_counts=True) batch_keys.append(keys) batch_vals.append(vals) - # The dtype here matches what thinc is expecting -- which differs per - # platform (by int definition). This should be fixed once the problem - # is fixed on Thinc's side. - lengths = model.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_) + lengths = model.ops.asarray([arr.shape[0] for arr in batch_keys], dtype="int32") batch_keys = model.ops.xp.concatenate(batch_keys) batch_vals = model.ops.asarray(model.ops.xp.concatenate(batch_vals), dtype="f") diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 06d512a32..733535b32 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -135,7 +135,7 @@ def test_initialize_examples(): def test_overfitting_IO(): - # Simple test to try and quickly overfit the textcat component - ensuring the ML models work correctly + # Simple test to try and quickly overfit the single-label textcat component - ensuring the ML models work correctly fix_random_seed(0) nlp = English() nlp.config["initialize"]["components"]["textcat"] = {"positive_label": "POSITIVE"} @@ -177,11 +177,58 @@ def test_overfitting_IO(): # Make sure that running pipe twice, or comparing to call, always amounts to the same predictions texts = ["Just a sentence.", "I like green eggs.", "I am happy.", "I eat ham."] - batch_deps_1 = [doc.cats for doc in nlp.pipe(texts)] - batch_deps_2 = [doc.cats for doc in nlp.pipe(texts)] - no_batch_deps = [doc.cats for doc in [nlp(text) for text in texts]] - assert_equal(batch_deps_1, batch_deps_2) - assert_equal(batch_deps_1, no_batch_deps) + batch_cats_1 = [doc.cats for doc in nlp.pipe(texts)] + batch_cats_2 = [doc.cats for doc in nlp.pipe(texts)] + no_batch_cats = [doc.cats for doc in [nlp(text) for text in texts]] + assert_equal(batch_cats_1, batch_cats_2) + assert_equal(batch_cats_1, no_batch_cats) + + +def test_overfitting_IO_multi(): + # Simple test to try and quickly overfit the multi-label textcat component - ensuring the ML models work correctly + fix_random_seed(0) + nlp = English() + # Set exclusive labels to False + config = {"model": {"linear_model": {"exclusive_classes": False}}} + textcat = nlp.add_pipe("textcat", config=config) + train_examples = [] + for text, annotations in TRAIN_DATA: + train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) + optimizer = nlp.initialize(get_examples=lambda: train_examples) + assert textcat.model.get_dim("nO") == 2 + + for i in range(50): + losses = {} + nlp.update(train_examples, sgd=optimizer, losses=losses) + assert losses["textcat"] < 0.01 + + # test the trained model + test_text = "I am happy." + doc = nlp(test_text) + cats = doc.cats + assert cats["POSITIVE"] > 0.9 + + # Also test the results are still the same after IO + with make_tempdir() as tmp_dir: + nlp.to_disk(tmp_dir) + nlp2 = util.load_model_from_path(tmp_dir) + doc2 = nlp2(test_text) + cats2 = doc2.cats + assert cats2["POSITIVE"] > 0.9 + + # Test scoring + scores = nlp.evaluate(train_examples) + assert scores["cats_micro_f"] == 1.0 + assert scores["cats_score"] == 1.0 + assert "cats_score_desc" in scores + + # Make sure that running pipe twice, or comparing to call, always amounts to the same predictions + texts = ["Just a sentence.", "I like green eggs.", "I am happy.", "I eat ham."] + batch_cats_1 = [doc.cats for doc in nlp.pipe(texts)] + batch_cats_2 = [doc.cats for doc in nlp.pipe(texts)] + no_batch_cats = [doc.cats for doc in [nlp(text) for text in texts]] + assert_equal(batch_cats_1, batch_cats_2) + assert_equal(batch_cats_1, no_batch_cats) # fmt: off