pull out parameter example data

2026-01-11 11:11:13 +03:00 · 2023-02-23 09:56:07 -05:00 · 2023-02-23 09:56:07 -05:00 · d37b2094f7
commit d37b2094f7
parent 35f22ba211
1 changed files with 71 additions and 52 deletions
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@ -46,6 +46,47 @@ def test_benchmark_accuracy_alias():
    )


+example_words_1 = ["I", "like", "cats"]
+example_words_2 = ["I", "like", "dogs"]
+example_lemmas_1 = ["I", "like", "cat"]
+example_lemmas_2 = ["I", "like", "dog"]
+example_tags = ["PRP", "VBP", "NNS"]
+example_morphs = [
+    "Case=Nom|Number=Sing|Person=1|PronType=Prs",
+    "Tense=Pres|VerbForm=Fin",
+    "Number=Plur",
+]
+example_deps = ["nsubj", "ROOT", "dobj"]
+example_pos = ["PRON", "VERB", "NOUN"]
+example_ents = ["O", "O", "I-ANIMAL"]
+example_spans = [(2, 3, "ANIMAL")]
+
+TRAIN_EXAMPLE_1 = dict(
+    words=example_words_1,
+    lemmas=example_lemmas_1,
+    tags=example_tags,
+    morphs=example_morphs,
+    deps=example_deps,
+    heads=[1, 1, 1],
+    pos=example_pos,
+    ents=example_ents,
+    spans=example_spans,
+    cats={"CAT": 1.0, "DOG": 0.0},
+)
+TRAIN_EXAMPLE_2 = dict(
+    words=example_words_2,
+    lemmas=example_lemmas_2,
+    tags=example_tags,
+    morphs=example_morphs,
+    deps=example_deps,
+    heads=[1, 1, 1],
+    pos=example_pos,
+    ents=example_ents,
+    spans=example_spans,
+    cats={"CAT": 0.0, "DOG": 1.0},
+)
+
+
@pytest.mark.slow
@pytest.mark.parametrize(
    "component,examples",
@ -53,69 +94,50 @@ def test_benchmark_accuracy_alias():
        (
            "tagger",
            [
-                dict(words=["I", "like", "cats"], tags=["PRP", "VBP", "NNS"]),
-                dict(words=["I", "like", "dogs"], tags=["PRP", "VBP", "NNS"]),
+                TRAIN_EXAMPLE_1,
+                TRAIN_EXAMPLE_2,
            ],
        ),
        (
            "morphologizer",
            [
-                dict(
-                    words=["I", "like", "cats"],
-                    morphs=[
-                        "Case=Nom|Number=Sing|Person=1|PronType=Prs",
-                        "Tense=Pres|VerbForm=Fin",
-                        "Number=Plur",
-                    ],
-                ),
-                dict(
-                    words=["I", "like", "dogs"],
-                    morphs=[
-                        "Case=Nom|Number=Sing|Person=1|PronType=Prs",
-                        "Tense=Pres|VerbForm=Fin",
-                        "Number=Plur",
-                    ],
-                ),
+                TRAIN_EXAMPLE_1,
+                TRAIN_EXAMPLE_2,
            ],
        ),
        (
            "trainable_lemmatizer",
            [
-                dict(words=["I", "like", "cats"], lemmas=["I", "like", "cat"]),
-                dict(words=["I", "like", "dogs"], lemmas=["I", "like", "dog"]),
+                TRAIN_EXAMPLE_1,
+                TRAIN_EXAMPLE_2,
            ],
        ),
        (
            "parser",
            [
-                dict(
-                    words=["I", "like", "cats", "."],
-                    deps=["nsubj", "ROOT", "dobj", "punct"],
-                    heads=[1, 1, 1, 1],
-                    pos=["PRON", "VERB", "NOUN", "PUNCT"],
-                ),
+                TRAIN_EXAMPLE_1,
            ]
            * 30,
        ),
        (
            "ner",
            [
-                dict(words=["I", "like", "cats"], ents=["O", "O", "I-ANIMAL"]),
-                dict(words=["I", "like", "dogs"], ents=["O", "O", "I-ANIMAL"]),
+                TRAIN_EXAMPLE_1,
+                TRAIN_EXAMPLE_2,
            ],
        ),
        (
            "spancat",
            [
-                dict(words=["I", "like", "cats"], spans=[(2, 3, "ANIMAL")]),
-                dict(words=["I", "like", "dogs"], spans=[(2, 3, "ANIMAL")]),
+                TRAIN_EXAMPLE_1,
+                TRAIN_EXAMPLE_2,
            ],
        ),
        (
            "textcat",
            [
-                dict(words=["I", "like", "cats"], cats={"CAT": 1.0, "DOG": 0.0}),
-                dict(words=["I", "like", "dogs"], cats={"CAT": 0.0, "DOG": 1.0}),
+                TRAIN_EXAMPLE_1,
+                TRAIN_EXAMPLE_2,
            ],
        ),
    ],
@ -136,7 +158,12 @@ def test_init_config_trainable(component, examples, en_vocab):
            ]
            train_docs.append(doc)
    else:
-        train_docs = [Doc(en_vocab, **example) for example in examples]
+        train_docs = []
+        for example in examples:
+            # cats, spans are not valid kwargs for instantiating a Doc
+            example = {k: v for k, v in example.items() if k not in ("cats", "spans")}
+            doc = Doc(en_vocab, **example)
+            train_docs.append(doc)

    with make_tempdir() as d_in:
        train_bin = DocBin(docs=train_docs)
@ -173,7 +200,7 @@ def test_init_config_trainable(component, examples, en_vocab):
        assert Path(d_in / "model" / "model-last").exists()


-# @pytest.mark.slow
+@pytest.mark.slow
@pytest.mark.parametrize(
    "component,examples",
    [
@ -181,28 +208,20 @@ def test_init_config_trainable(component, examples, en_vocab):
            "tagger,parser,morphologizer",
            [
                dict(
-                    words=["I", "like", "cats"],
-                    tags=["PRP", "VBP", "NNS"],
-                    morphs=[
-                        "Case=Nom|Number=Sing|Person=1|PronType=Prs",
-                        "Tense=Pres|VerbForm=Fin",
-                        "Number=Plur",
-                    ],
-                    deps=["nsubj", "ROOT", "dobj"],
+                    words=example_words_1,
+                    tags=example_tags,
+                    morphs=example_morphs,
+                    deps=example_deps,
                    heads=[1, 1, 1],
-                    pos=["PRON", "VERB", "NOUN"],
+                    pos=example_pos,
                ),
                dict(
-                    words=["I", "like", "dogs"],
-                    tags=["PRP", "VBP", "NNS"],
-                    morphs=[
-                        "Case=Nom|Number=Sing|Person=1|PronType=Prs",
-                        "Tense=Pres|VerbForm=Fin",
-                        "Number=Plur",
-                    ],
-                    deps=["nsubj", "ROOT", "dobj"],
+                    words=example_words_2,
+                    tags=example_tags,
+                    morphs=example_morphs,
+                    deps=example_deps,
                    heads=[1, 1, 1],
-                    pos=["PRON", "VERB", "NOUN"],
+                    pos=example_pos,
                ),
            ]
            * 15,