Add tests for evaluate

This commit is contained in:
Lj Miranda 2023-04-27 11:34:00 +08:00
parent 77371ae8c2
commit b3c77e3e43

View File

@ -12,6 +12,7 @@ import srsly
from click import NoSuchOption from click import NoSuchOption
from packaging.specifiers import SpecifierSet from packaging.specifiers import SpecifierSet
from thinc.api import Config, ConfigValidationError from thinc.api import Config, ConfigValidationError
from spacy.tokens import DocBin
from spacy import about from spacy import about
from spacy.cli import info from spacy.cli import info
@ -27,6 +28,7 @@ from spacy.cli.debug_data import _get_span_characteristics
from spacy.cli.debug_data import _print_span_characteristics from spacy.cli.debug_data import _print_span_characteristics
from spacy.cli.debug_data import _get_spans_length_freq_dist from spacy.cli.debug_data import _get_spans_length_freq_dist
from spacy.cli.download import get_compatibility, get_version from spacy.cli.download import get_compatibility, get_version
from spacy.cli.evaluate import evaluate
from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
from spacy.cli.init_pipeline import _init_labels from spacy.cli.init_pipeline import _init_labels
from spacy.cli.package import get_third_party_dependencies from spacy.cli.package import get_third_party_dependencies
@ -144,6 +146,81 @@ def test_issue11235():
assert cfg["commands"][0]["script"][0] == f"hello {lang_var}" assert cfg["commands"][0]["script"][0] == f"hello {lang_var}"
@pytest.mark.issue(12566)
@pytest.mark.parametrize(
"displacy_type,output_file",
[("parser", "parsers.html"), ("ner", "entities.html"), ("spancat", "spans.html")],
)
def test_issue12566(displacy_type: str, output_file: str):
"""
Test if all displaCy types (ents, dep, spans) produce an HTML file
"""
with make_tempdir() as tmp_dir:
# Create sample spaCy file
doc_json = {
"ents": [
{"end": 54, "label": "nam_adj_country", "start": 44},
{"end": 83, "label": "nam_liv_person", "start": 69},
{"end": 100, "label": "nam_pro_title_book", "start": 86},
],
"spans": {
"sc": [
{"end": 54, "kb_id": "", "label": "nam_adj_country", "start": 44},
{"end": 83, "kb_id": "", "label": "nam_liv_person", "start": 69},
{
"end": 100,
"kb_id": "",
"label": "nam_pro_title_book",
"start": 86,
},
]
},
"text": "Niedawno czytał em nową książkę znakomitego szkockiego medioznawcy , "
"Briana McNaira - Cultural Chaos .",
"tokens": [
{"end": 8, "id": 0, "start": 0},
{"end": 15, "id": 1, "start": 9},
{"end": 18, "id": 2, "start": 16},
{"end": 23, "id": 3, "start": 19},
{"end": 31, "id": 4, "start": 24},
{"end": 43, "id": 5, "start": 32},
{"end": 54, "id": 6, "start": 44},
{"end": 66, "id": 7, "start": 55},
{"end": 68, "id": 8, "start": 67},
{"end": 75, "id": 9, "start": 69},
{"end": 83, "id": 10, "start": 76},
{"end": 85, "id": 11, "start": 84},
{"end": 94, "id": 12, "start": 86},
{"end": 100, "id": 13, "start": 95},
{"end": 102, "id": 14, "start": 101},
],
}
# Create a .spacy file
test_data_path = tmp_dir / "test.spacy"
nlp = spacy.blank("pl")
doc = Doc(nlp.vocab).from_json(doc_json)
doc_bin = DocBin(docs=[doc])
doc_bin.to_disk(test_data_path)
# Add 'spancat' to en_core_web_sm so that it shows up in the
# factory_names
test_model_path = tmp_dir / "test-model"
nlp_sm = spacy.load("en_core_web_sm")
if displacy_type == "spancat":
nlp_sm.add_pipe("spancat")
nlp_sm.to_disk(test_model_path)
# Run the evaluate command and check if the html files exist
evaluate(
model=str(test_model_path),
data_path=tmp_dir / "test.spacy",
displacy_path=tmp_dir,
)
assert (tmp_dir / output_file).is_file()
def test_cli_info(): def test_cli_info():
nlp = Dutch() nlp = Dutch()
nlp.add_pipe("textcat") nlp.add_pipe("textcat")