initial test commit

This commit is contained in:
Peter Baumgartner 2023-01-23 15:28:42 -05:00
parent f6e39a3072
commit 17c4bfc181

View File

@ -1,6 +1,10 @@
import os
from pathlib import Path
import pytest
from typer.testing import CliRunner
from spacy.tokens import DocBin, Doc, Span
from spacy.lang.en import English
from spacy.cli._util import app
from .util import make_tempdir
@ -40,3 +44,131 @@ def test_benchmark_accuracy_alias():
assert result_benchmark.stdout == result_evaluate.stdout.replace(
"spacy evaluate", "spacy benchmark accuracy"
)
@pytest.mark.slow
@pytest.mark.parametrize(
"component,examples",
[
(
"tagger",
[
dict(words=["I", "like", "cats"], tags=["PRP", "VBP", "NNS"]),
dict(words=["I", "like", "dogs"], tags=["PRP", "VBP", "NNS"]),
],
),
(
"morphologizer",
[
dict(
words=["I", "like", "cats"],
morphs=[
"Case=Nom|Number=Sing|Person=1|PronType=Prs",
"Tense=Pres|VerbForm=Fin",
"Number=Plur",
],
),
dict(
words=["I", "like", "dogs"],
morphs=[
"Case=Nom|Number=Sing|Person=1|PronType=Prs",
"Tense=Pres|VerbForm=Fin",
"Number=Plur",
],
),
],
),
(
"trainable_lemmatizer",
[
dict(words=["I", "like", "cats"], lemmas=["I", "like", "cat"]),
dict(words=["I", "like", "dogs"], lemmas=["I", "like", "dog"]),
],
),
(
"parser",
[
dict(
words=["I", "like", "cats", "."],
deps=["nsubj", "ROOT", "dobj", "punct"],
heads=[1, 1, 1, 1],
pos=["PRON", "VERB", "NOUN", "PUNCT"],
),
]
* 30,
),
(
"ner",
[
dict(words=["I", "like", "cats"], ents=["O", "O", "I-ANIMAL"]),
dict(words=["I", "like", "dogs"], ents=["O", "O", "I-ANIMAL"]),
],
),
(
"spancat",
[
dict(words=["I", "like", "cats"], spans=[(2, 3, "ANIMAL")]),
dict(words=["I", "like", "dogs"], spans=[(2, 3, "ANIMAL")]),
],
),
(
"textcat",
[
dict(words=["I", "like", "cats"], cats={"CAT": 1.0, "DOG": 0.0}),
dict(words=["I", "like", "dogs"], cats={"CAT": 0.0, "DOG": 1.0}),
],
),
],
)
def test_init_config_trainable(component, examples):
nlp = English()
if component == "textcat":
train_docs = []
for example in examples:
doc = Doc(nlp.vocab, words=example["words"])
doc.cats = example["cats"]
train_docs.append(doc)
elif component == "spancat":
train_docs = []
for example in examples:
doc = Doc(nlp.vocab, words=example["words"])
doc.spans["sc"] = [
Span(doc, start, end, label) for start, end, label in example["spans"]
]
train_docs.append(doc)
else:
train_docs = [Doc(nlp.vocab, **example) for example in examples]
with make_tempdir() as d_in:
train_bin = DocBin(docs=train_docs)
train_bin.to_disk(d_in / "train.spacy")
dev_bin = DocBin(docs=train_docs)
dev_bin.to_disk(d_in / "dev.spacy")
init_config_result = CliRunner().invoke(
app,
[
"init",
"config",
f"{d_in}/config.cfg",
"--lang",
"en",
"--pipeline",
component,
],
)
assert init_config_result.exit_code == 0
train_result = CliRunner().invoke(
app,
[
"train",
f"{d_in}/config.cfg",
"--paths.train",
f"{d_in}/train.spacy",
"--paths.dev",
f"{d_in}/dev.spacy",
"--output",
f"{d_in}/model",
],
)
assert train_result.exit_code == 0
assert Path(d_in / "model" / "model-last").exists()