initial test commit

2025-07-08 22:03:24 +03:00 · 2023-01-23 15:28:42 -05:00 · 2023-01-23 15:28:42 -05:00 · 17c4bfc181
commit 17c4bfc181
parent f6e39a3072
1 changed files with 132 additions and 0 deletions
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@ -1,6 +1,10 @@
 import os
 from pathlib import Path
 import pytest
 from typer.testing import CliRunner
 from spacy.tokens import DocBin, Doc, Span
 from spacy.lang.en import English
 from spacy.cli._util import app
 from .util import make_tempdir
@ -40,3 +44,131 @@ def test_benchmark_accuracy_alias():
    assert result_benchmark.stdout == result_evaluate.stdout.replace(
        "spacy evaluate", "spacy benchmark accuracy"
    )
@pytest.mark.slow
@pytest.mark.parametrize(
    "component,examples",
    [
        (
            "tagger",
            [
                dict(words=["I", "like", "cats"], tags=["PRP", "VBP", "NNS"]),
                dict(words=["I", "like", "dogs"], tags=["PRP", "VBP", "NNS"]),
            ],
        ),
        (
            "morphologizer",
            [
                dict(
                    words=["I", "like", "cats"],
                    morphs=[
                        "Case=Nom|Number=Sing|Person=1|PronType=Prs",
                        "Tense=Pres|VerbForm=Fin",
                        "Number=Plur",
                    ],
                ),
                dict(
                    words=["I", "like", "dogs"],
                    morphs=[
                        "Case=Nom|Number=Sing|Person=1|PronType=Prs",
                        "Tense=Pres|VerbForm=Fin",
                        "Number=Plur",
                    ],
                ),
            ],
        ),
        (
            "trainable_lemmatizer",
            [
                dict(words=["I", "like", "cats"], lemmas=["I", "like", "cat"]),
                dict(words=["I", "like", "dogs"], lemmas=["I", "like", "dog"]),
            ],
        ),
        (
            "parser",
            [
                dict(
                    words=["I", "like", "cats", "."],
                    deps=["nsubj", "ROOT", "dobj", "punct"],
                    heads=[1, 1, 1, 1],
                    pos=["PRON", "VERB", "NOUN", "PUNCT"],
                ),
            ]
            * 30,
        ),
        (
            "ner",
            [
                dict(words=["I", "like", "cats"], ents=["O", "O", "I-ANIMAL"]),
                dict(words=["I", "like", "dogs"], ents=["O", "O", "I-ANIMAL"]),
            ],
        ),
        (
            "spancat",
            [
                dict(words=["I", "like", "cats"], spans=[(2, 3, "ANIMAL")]),
                dict(words=["I", "like", "dogs"], spans=[(2, 3, "ANIMAL")]),
            ],
        ),
        (
            "textcat",
            [
                dict(words=["I", "like", "cats"], cats={"CAT": 1.0, "DOG": 0.0}),
                dict(words=["I", "like", "dogs"], cats={"CAT": 0.0, "DOG": 1.0}),
            ],
        ),
    ],
 )
 def test_init_config_trainable(component, examples):
    nlp = English()
    if component == "textcat":
        train_docs = []
        for example in examples:
            doc = Doc(nlp.vocab, words=example["words"])
            doc.cats = example["cats"]
            train_docs.append(doc)
    elif component == "spancat":
        train_docs = []
        for example in examples:
            doc = Doc(nlp.vocab, words=example["words"])
            doc.spans["sc"] = [
                Span(doc, start, end, label) for start, end, label in example["spans"]
            ]
            train_docs.append(doc)
    else:
        train_docs = [Doc(nlp.vocab, **example) for example in examples]
    with make_tempdir() as d_in:
        train_bin = DocBin(docs=train_docs)
        train_bin.to_disk(d_in / "train.spacy")
        dev_bin = DocBin(docs=train_docs)
        dev_bin.to_disk(d_in / "dev.spacy")
        init_config_result = CliRunner().invoke(
            app,
            [
                "init",
                "config",
                f"{d_in}/config.cfg",
                "--lang",
                "en",
                "--pipeline",
                component,
            ],
        )
        assert init_config_result.exit_code == 0
        train_result = CliRunner().invoke(
            app,
            [
                "train",
                f"{d_in}/config.cfg",
                "--paths.train",
                f"{d_in}/train.spacy",
                "--paths.dev",
                f"{d_in}/dev.spacy",
                "--output",
                f"{d_in}/model",
            ],
        )
        assert train_result.exit_code == 0
        assert Path(d_in / "model" / "model-last").exists()