initial test commit

This commit is contained in:
Peter Baumgartner 2023-01-23 15:28:42 -05:00
parent f6e39a3072
commit 17c4bfc181

View File

@ -1,6 +1,10 @@
import os import os
from pathlib import Path from pathlib import Path
import pytest
from typer.testing import CliRunner from typer.testing import CliRunner
from spacy.tokens import DocBin, Doc, Span
from spacy.lang.en import English
from spacy.cli._util import app from spacy.cli._util import app
from .util import make_tempdir from .util import make_tempdir
@ -40,3 +44,131 @@ def test_benchmark_accuracy_alias():
assert result_benchmark.stdout == result_evaluate.stdout.replace( assert result_benchmark.stdout == result_evaluate.stdout.replace(
"spacy evaluate", "spacy benchmark accuracy" "spacy evaluate", "spacy benchmark accuracy"
) )
@pytest.mark.slow
@pytest.mark.parametrize(
"component,examples",
[
(
"tagger",
[
dict(words=["I", "like", "cats"], tags=["PRP", "VBP", "NNS"]),
dict(words=["I", "like", "dogs"], tags=["PRP", "VBP", "NNS"]),
],
),
(
"morphologizer",
[
dict(
words=["I", "like", "cats"],
morphs=[
"Case=Nom|Number=Sing|Person=1|PronType=Prs",
"Tense=Pres|VerbForm=Fin",
"Number=Plur",
],
),
dict(
words=["I", "like", "dogs"],
morphs=[
"Case=Nom|Number=Sing|Person=1|PronType=Prs",
"Tense=Pres|VerbForm=Fin",
"Number=Plur",
],
),
],
),
(
"trainable_lemmatizer",
[
dict(words=["I", "like", "cats"], lemmas=["I", "like", "cat"]),
dict(words=["I", "like", "dogs"], lemmas=["I", "like", "dog"]),
],
),
(
"parser",
[
dict(
words=["I", "like", "cats", "."],
deps=["nsubj", "ROOT", "dobj", "punct"],
heads=[1, 1, 1, 1],
pos=["PRON", "VERB", "NOUN", "PUNCT"],
),
]
* 30,
),
(
"ner",
[
dict(words=["I", "like", "cats"], ents=["O", "O", "I-ANIMAL"]),
dict(words=["I", "like", "dogs"], ents=["O", "O", "I-ANIMAL"]),
],
),
(
"spancat",
[
dict(words=["I", "like", "cats"], spans=[(2, 3, "ANIMAL")]),
dict(words=["I", "like", "dogs"], spans=[(2, 3, "ANIMAL")]),
],
),
(
"textcat",
[
dict(words=["I", "like", "cats"], cats={"CAT": 1.0, "DOG": 0.0}),
dict(words=["I", "like", "dogs"], cats={"CAT": 0.0, "DOG": 1.0}),
],
),
],
)
def test_init_config_trainable(component, examples):
nlp = English()
if component == "textcat":
train_docs = []
for example in examples:
doc = Doc(nlp.vocab, words=example["words"])
doc.cats = example["cats"]
train_docs.append(doc)
elif component == "spancat":
train_docs = []
for example in examples:
doc = Doc(nlp.vocab, words=example["words"])
doc.spans["sc"] = [
Span(doc, start, end, label) for start, end, label in example["spans"]
]
train_docs.append(doc)
else:
train_docs = [Doc(nlp.vocab, **example) for example in examples]
with make_tempdir() as d_in:
train_bin = DocBin(docs=train_docs)
train_bin.to_disk(d_in / "train.spacy")
dev_bin = DocBin(docs=train_docs)
dev_bin.to_disk(d_in / "dev.spacy")
init_config_result = CliRunner().invoke(
app,
[
"init",
"config",
f"{d_in}/config.cfg",
"--lang",
"en",
"--pipeline",
component,
],
)
assert init_config_result.exit_code == 0
train_result = CliRunner().invoke(
app,
[
"train",
f"{d_in}/config.cfg",
"--paths.train",
f"{d_in}/train.spacy",
"--paths.dev",
f"{d_in}/dev.spacy",
"--output",
f"{d_in}/model",
],
)
assert train_result.exit_code == 0
assert Path(d_in / "model" / "model-last").exists()