From 17c4bfc181b459307d1aaf175fa40f5e2fb1b9e8 Mon Sep 17 00:00:00 2001 From: Peter Baumgartner <5107405+pmbaumgartner@users.noreply.github.com> Date: Mon, 23 Jan 2023 15:28:42 -0500 Subject: [PATCH] initial test commit --- spacy/tests/test_cli_app.py | 132 ++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py index 84b2b8d4d..c9fcc99c0 100644 --- a/spacy/tests/test_cli_app.py +++ b/spacy/tests/test_cli_app.py @@ -1,6 +1,10 @@ import os from pathlib import Path + +import pytest from typer.testing import CliRunner +from spacy.tokens import DocBin, Doc, Span +from spacy.lang.en import English from spacy.cli._util import app from .util import make_tempdir @@ -40,3 +44,131 @@ def test_benchmark_accuracy_alias(): assert result_benchmark.stdout == result_evaluate.stdout.replace( "spacy evaluate", "spacy benchmark accuracy" ) + + +@pytest.mark.slow +@pytest.mark.parametrize( + "component,examples", + [ + ( + "tagger", + [ + dict(words=["I", "like", "cats"], tags=["PRP", "VBP", "NNS"]), + dict(words=["I", "like", "dogs"], tags=["PRP", "VBP", "NNS"]), + ], + ), + ( + "morphologizer", + [ + dict( + words=["I", "like", "cats"], + morphs=[ + "Case=Nom|Number=Sing|Person=1|PronType=Prs", + "Tense=Pres|VerbForm=Fin", + "Number=Plur", + ], + ), + dict( + words=["I", "like", "dogs"], + morphs=[ + "Case=Nom|Number=Sing|Person=1|PronType=Prs", + "Tense=Pres|VerbForm=Fin", + "Number=Plur", + ], + ), + ], + ), + ( + "trainable_lemmatizer", + [ + dict(words=["I", "like", "cats"], lemmas=["I", "like", "cat"]), + dict(words=["I", "like", "dogs"], lemmas=["I", "like", "dog"]), + ], + ), + ( + "parser", + [ + dict( + words=["I", "like", "cats", "."], + deps=["nsubj", "ROOT", "dobj", "punct"], + heads=[1, 1, 1, 1], + pos=["PRON", "VERB", "NOUN", "PUNCT"], + ), + ] + * 30, + ), + ( + "ner", + [ + dict(words=["I", "like", "cats"], ents=["O", "O", "I-ANIMAL"]), + dict(words=["I", "like", "dogs"], ents=["O", "O", "I-ANIMAL"]), + ], + ), + ( + "spancat", + [ + dict(words=["I", "like", "cats"], spans=[(2, 3, "ANIMAL")]), + dict(words=["I", "like", "dogs"], spans=[(2, 3, "ANIMAL")]), + ], + ), + ( + "textcat", + [ + dict(words=["I", "like", "cats"], cats={"CAT": 1.0, "DOG": 0.0}), + dict(words=["I", "like", "dogs"], cats={"CAT": 0.0, "DOG": 1.0}), + ], + ), + ], +) +def test_init_config_trainable(component, examples): + nlp = English() + if component == "textcat": + train_docs = [] + for example in examples: + doc = Doc(nlp.vocab, words=example["words"]) + doc.cats = example["cats"] + train_docs.append(doc) + elif component == "spancat": + train_docs = [] + for example in examples: + doc = Doc(nlp.vocab, words=example["words"]) + doc.spans["sc"] = [ + Span(doc, start, end, label) for start, end, label in example["spans"] + ] + train_docs.append(doc) + else: + train_docs = [Doc(nlp.vocab, **example) for example in examples] + + with make_tempdir() as d_in: + train_bin = DocBin(docs=train_docs) + train_bin.to_disk(d_in / "train.spacy") + dev_bin = DocBin(docs=train_docs) + dev_bin.to_disk(d_in / "dev.spacy") + init_config_result = CliRunner().invoke( + app, + [ + "init", + "config", + f"{d_in}/config.cfg", + "--lang", + "en", + "--pipeline", + component, + ], + ) + assert init_config_result.exit_code == 0 + train_result = CliRunner().invoke( + app, + [ + "train", + f"{d_in}/config.cfg", + "--paths.train", + f"{d_in}/train.spacy", + "--paths.dev", + f"{d_in}/dev.spacy", + "--output", + f"{d_in}/model", + ], + ) + assert train_result.exit_code == 0 + assert Path(d_in / "model" / "model-last").exists()