Switch tests to separate scorer keys rather than merged dicts

This commit is contained in:
Adriane Boyd 2022-12-15 16:40:36 +01:00
parent 3ca590020b
commit 3c6ad16f71

View File

@ -3,6 +3,7 @@ import logging
from unittest import mock from unittest import mock
import pytest import pytest
from spacy.language import Language from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.training import Example from spacy.training import Example
@ -58,29 +59,6 @@ def nlp():
return nlp return nlp
@pytest.fixture
def nlp_multi():
nlp = Language(Vocab())
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
textcat_multilabel.add_label(label)
nlp.initialize()
return nlp
@pytest.fixture
def nlp_both():
nlp = Language(Vocab())
textcat = nlp.add_pipe("textcat")
for label in ("POSITIVE", "NEGATIVE"):
textcat.add_label(label)
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
textcat_multilabel.add_label(label)
nlp.initialize()
return nlp
def test_language_update(nlp): def test_language_update(nlp):
text = "hello world" text = "hello world"
annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}} annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
@ -114,9 +92,6 @@ def test_language_evaluate(nlp):
example = Example.from_dict(doc, annots) example = Example.from_dict(doc, annots)
scores = nlp.evaluate([example]) scores = nlp.evaluate([example])
assert scores["speed"] > 0 assert scores["speed"] > 0
assert scores["cats_f_per_type"].get("POSITIVE") is not None
assert scores["cats_f_per_type"].get("NEGATIVE") is not None
assert scores["cats_f_per_type"].get("BUG") is None
# test with generator # test with generator
scores = nlp.evaluate(eg for eg in [example]) scores = nlp.evaluate(eg for eg in [example])
@ -152,33 +127,110 @@ def test_evaluate_no_pipe(nlp):
nlp.evaluate([Example.from_dict(doc, annots)]) nlp.evaluate([Example.from_dict(doc, annots)])
def test_evaluate_textcat(nlp_multi): def test_evaluate_textcat_multilabel(en_vocab):
"""Test that evaluate works with a multilabel textcat pipe.""" """Test that evaluate works with a multilabel textcat pipe."""
text = "hello world" nlp = Language(en_vocab)
annots = {"doc_annotation": {"cats": {"FEATURE": 1.0, "QUESTION": 1.0}}} textcat_multilabel = nlp.add_pipe("textcat_multilabel")
doc = Doc(nlp_multi.vocab, words=text.split(" ")) for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
textcat_multilabel.add_label(label)
nlp.initialize()
annots = {"cats": {"FEATURE": 1.0, "QUESTION": 1.0}}
doc = nlp.make_doc("hello world")
example = Example.from_dict(doc, annots) example = Example.from_dict(doc, annots)
scores = nlp_multi.evaluate([example]) scores = nlp.evaluate([example])
assert scores["cats_f_per_type"].get("FEATURE") is not None labels = nlp.get_pipe("textcat_multilabel").labels
assert scores["cats_f_per_type"].get("QUESTION") is not None for label in labels:
assert scores["cats_f_per_type"].get("REQUEST") is not None assert scores["cats_f_per_type"].get(label) is not None
assert scores["cats_f_per_type"].get("BUG") is not None for key in example.reference.cats.keys():
assert scores["cats_f_per_type"].get("POSITIVE") is None if key not in labels:
assert scores["cats_f_per_type"].get("NEGATIVE") is None assert scores["cats_f_per_type"].get(key) is None
def test_evaluate_both(nlp_both): def test_evaluate_multiple_textcat_final(en_vocab):
"""Test that evaluate works with two textcat pipes.""" """Test that evaluate evaluates the final textcat component in a pipeline
text = "hello world" with more than one textcat or textcat_multilabel."""
annots = {"doc_annotation": {"cats": {"FEATURE": 1.0, "QUESTION": 1.0, "POSITIVE": 1.0, "NEGATIVE": 0.0}}} nlp = Language(en_vocab)
doc = Doc(nlp_both.vocab, words=text.split(" ")) textcat = nlp.add_pipe("textcat")
for label in ("POSITIVE", "NEGATIVE"):
textcat.add_label(label)
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
textcat_multilabel.add_label(label)
nlp.initialize()
annots = {
"cats": {
"POSITIVE": 1.0,
"NEGATIVE": 0.0,
"FEATURE": 1.0,
"QUESTION": 1.0,
"POSITIVE": 1.0,
"NEGATIVE": 0.0,
}
}
doc = nlp.make_doc("hello world")
example = Example.from_dict(doc, annots) example = Example.from_dict(doc, annots)
scores = nlp_both.evaluate([example]) scores = nlp.evaluate([example])
assert scores["cats_f_per_type"].get("FEATURE") is not None # get the labels from the final pipe
assert scores["cats_f_per_type"].get("QUESTION") is not None labels = nlp.get_pipe(nlp.pipe_names[-1]).labels
assert scores["cats_f_per_type"].get("BUG") is not None for label in labels:
assert scores["cats_f_per_type"].get("POSITIVE") is not None assert scores["cats_f_per_type"].get(label) is not None
assert scores["cats_f_per_type"].get("NEGATIVE") is not None for key in example.reference.cats.keys():
if key not in labels:
assert scores["cats_f_per_type"].get(key) is None
def test_evaluate_multiple_textcat_separate(en_vocab):
"""Test that evaluate can evaluate multiple textcat components separately
with custom scorers."""
def custom_textcat_score(examples, **kwargs):
scores = Scorer.score_cats(
examples,
"cats",
multi_label=False,
**kwargs,
)
return {f"custom_{k}": v for k, v in scores.items()}
@spacy.registry.scorers("test_custom_textcat_scorer")
def make_custom_textcat_scorer():
return custom_textcat_score
nlp = Language(en_vocab)
textcat = nlp.add_pipe(
"textcat",
config={"scorer": {"@scorers": "test_custom_textcat_scorer"}},
)
for label in ("POSITIVE", "NEGATIVE"):
textcat.add_label(label)
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
textcat_multilabel.add_label(label)
nlp.initialize()
annots = {
"cats": {
"POSITIVE": 1.0,
"NEGATIVE": 0.0,
"FEATURE": 1.0,
"QUESTION": 1.0,
"POSITIVE": 1.0,
"NEGATIVE": 0.0,
}
}
doc = nlp.make_doc("hello world")
example = Example.from_dict(doc, annots)
scores = nlp.evaluate([example])
# check custom scores for the textcat pipe
assert "custom_cats_f_per_type" in scores
labels = nlp.get_pipe("textcat").labels
assert set(scores["custom_cats_f_per_type"].keys()) == set(labels)
# check default scores for the textcat_multilabel pipe
assert "cats_f_per_type" in scores
labels = nlp.get_pipe("textcat_multilabel").labels
assert set(scores["cats_f_per_type"].keys()) == set(labels)
def vector_modification_pipe(doc): def vector_modification_pipe(doc):