mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 12:50:20 +03:00
Switch tests to separate scorer keys rather than merged dicts
This commit is contained in:
parent
3ca590020b
commit
3c6ad16f71
|
@ -3,6 +3,7 @@ import logging
|
|||
from unittest import mock
|
||||
import pytest
|
||||
from spacy.language import Language
|
||||
from spacy.scorer import Scorer
|
||||
from spacy.tokens import Doc, Span
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.training import Example
|
||||
|
@ -58,29 +59,6 @@ def nlp():
|
|||
return nlp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nlp_multi():
|
||||
nlp = Language(Vocab())
|
||||
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
|
||||
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
|
||||
textcat_multilabel.add_label(label)
|
||||
nlp.initialize()
|
||||
return nlp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nlp_both():
|
||||
nlp = Language(Vocab())
|
||||
textcat = nlp.add_pipe("textcat")
|
||||
for label in ("POSITIVE", "NEGATIVE"):
|
||||
textcat.add_label(label)
|
||||
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
|
||||
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
|
||||
textcat_multilabel.add_label(label)
|
||||
nlp.initialize()
|
||||
return nlp
|
||||
|
||||
|
||||
def test_language_update(nlp):
|
||||
text = "hello world"
|
||||
annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
|
||||
|
@ -114,9 +92,6 @@ def test_language_evaluate(nlp):
|
|||
example = Example.from_dict(doc, annots)
|
||||
scores = nlp.evaluate([example])
|
||||
assert scores["speed"] > 0
|
||||
assert scores["cats_f_per_type"].get("POSITIVE") is not None
|
||||
assert scores["cats_f_per_type"].get("NEGATIVE") is not None
|
||||
assert scores["cats_f_per_type"].get("BUG") is None
|
||||
|
||||
# test with generator
|
||||
scores = nlp.evaluate(eg for eg in [example])
|
||||
|
@ -152,33 +127,110 @@ def test_evaluate_no_pipe(nlp):
|
|||
nlp.evaluate([Example.from_dict(doc, annots)])
|
||||
|
||||
|
||||
def test_evaluate_textcat(nlp_multi):
|
||||
def test_evaluate_textcat_multilabel(en_vocab):
|
||||
"""Test that evaluate works with a multilabel textcat pipe."""
|
||||
text = "hello world"
|
||||
annots = {"doc_annotation": {"cats": {"FEATURE": 1.0, "QUESTION": 1.0}}}
|
||||
doc = Doc(nlp_multi.vocab, words=text.split(" "))
|
||||
nlp = Language(en_vocab)
|
||||
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
|
||||
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
|
||||
textcat_multilabel.add_label(label)
|
||||
nlp.initialize()
|
||||
|
||||
annots = {"cats": {"FEATURE": 1.0, "QUESTION": 1.0}}
|
||||
doc = nlp.make_doc("hello world")
|
||||
example = Example.from_dict(doc, annots)
|
||||
scores = nlp_multi.evaluate([example])
|
||||
assert scores["cats_f_per_type"].get("FEATURE") is not None
|
||||
assert scores["cats_f_per_type"].get("QUESTION") is not None
|
||||
assert scores["cats_f_per_type"].get("REQUEST") is not None
|
||||
assert scores["cats_f_per_type"].get("BUG") is not None
|
||||
assert scores["cats_f_per_type"].get("POSITIVE") is None
|
||||
assert scores["cats_f_per_type"].get("NEGATIVE") is None
|
||||
scores = nlp.evaluate([example])
|
||||
labels = nlp.get_pipe("textcat_multilabel").labels
|
||||
for label in labels:
|
||||
assert scores["cats_f_per_type"].get(label) is not None
|
||||
for key in example.reference.cats.keys():
|
||||
if key not in labels:
|
||||
assert scores["cats_f_per_type"].get(key) is None
|
||||
|
||||
|
||||
def test_evaluate_both(nlp_both):
|
||||
"""Test that evaluate works with two textcat pipes."""
|
||||
text = "hello world"
|
||||
annots = {"doc_annotation": {"cats": {"FEATURE": 1.0, "QUESTION": 1.0, "POSITIVE": 1.0, "NEGATIVE": 0.0}}}
|
||||
doc = Doc(nlp_both.vocab, words=text.split(" "))
|
||||
def test_evaluate_multiple_textcat_final(en_vocab):
|
||||
"""Test that evaluate evaluates the final textcat component in a pipeline
|
||||
with more than one textcat or textcat_multilabel."""
|
||||
nlp = Language(en_vocab)
|
||||
textcat = nlp.add_pipe("textcat")
|
||||
for label in ("POSITIVE", "NEGATIVE"):
|
||||
textcat.add_label(label)
|
||||
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
|
||||
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
|
||||
textcat_multilabel.add_label(label)
|
||||
nlp.initialize()
|
||||
|
||||
annots = {
|
||||
"cats": {
|
||||
"POSITIVE": 1.0,
|
||||
"NEGATIVE": 0.0,
|
||||
"FEATURE": 1.0,
|
||||
"QUESTION": 1.0,
|
||||
"POSITIVE": 1.0,
|
||||
"NEGATIVE": 0.0,
|
||||
}
|
||||
}
|
||||
doc = nlp.make_doc("hello world")
|
||||
example = Example.from_dict(doc, annots)
|
||||
scores = nlp_both.evaluate([example])
|
||||
assert scores["cats_f_per_type"].get("FEATURE") is not None
|
||||
assert scores["cats_f_per_type"].get("QUESTION") is not None
|
||||
assert scores["cats_f_per_type"].get("BUG") is not None
|
||||
assert scores["cats_f_per_type"].get("POSITIVE") is not None
|
||||
assert scores["cats_f_per_type"].get("NEGATIVE") is not None
|
||||
scores = nlp.evaluate([example])
|
||||
# get the labels from the final pipe
|
||||
labels = nlp.get_pipe(nlp.pipe_names[-1]).labels
|
||||
for label in labels:
|
||||
assert scores["cats_f_per_type"].get(label) is not None
|
||||
for key in example.reference.cats.keys():
|
||||
if key not in labels:
|
||||
assert scores["cats_f_per_type"].get(key) is None
|
||||
|
||||
|
||||
def test_evaluate_multiple_textcat_separate(en_vocab):
|
||||
"""Test that evaluate can evaluate multiple textcat components separately
|
||||
with custom scorers."""
|
||||
|
||||
def custom_textcat_score(examples, **kwargs):
|
||||
scores = Scorer.score_cats(
|
||||
examples,
|
||||
"cats",
|
||||
multi_label=False,
|
||||
**kwargs,
|
||||
)
|
||||
return {f"custom_{k}": v for k, v in scores.items()}
|
||||
|
||||
@spacy.registry.scorers("test_custom_textcat_scorer")
|
||||
def make_custom_textcat_scorer():
|
||||
return custom_textcat_score
|
||||
|
||||
nlp = Language(en_vocab)
|
||||
textcat = nlp.add_pipe(
|
||||
"textcat",
|
||||
config={"scorer": {"@scorers": "test_custom_textcat_scorer"}},
|
||||
)
|
||||
for label in ("POSITIVE", "NEGATIVE"):
|
||||
textcat.add_label(label)
|
||||
textcat_multilabel = nlp.add_pipe("textcat_multilabel")
|
||||
for label in ("FEATURE", "REQUEST", "BUG", "QUESTION"):
|
||||
textcat_multilabel.add_label(label)
|
||||
nlp.initialize()
|
||||
|
||||
annots = {
|
||||
"cats": {
|
||||
"POSITIVE": 1.0,
|
||||
"NEGATIVE": 0.0,
|
||||
"FEATURE": 1.0,
|
||||
"QUESTION": 1.0,
|
||||
"POSITIVE": 1.0,
|
||||
"NEGATIVE": 0.0,
|
||||
}
|
||||
}
|
||||
doc = nlp.make_doc("hello world")
|
||||
example = Example.from_dict(doc, annots)
|
||||
scores = nlp.evaluate([example])
|
||||
# check custom scores for the textcat pipe
|
||||
assert "custom_cats_f_per_type" in scores
|
||||
labels = nlp.get_pipe("textcat").labels
|
||||
assert set(scores["custom_cats_f_per_type"].keys()) == set(labels)
|
||||
# check default scores for the textcat_multilabel pipe
|
||||
assert "cats_f_per_type" in scores
|
||||
labels = nlp.get_pipe("textcat_multilabel").labels
|
||||
assert set(scores["cats_f_per_type"].keys()) == set(labels)
|
||||
|
||||
|
||||
def vector_modification_pipe(doc):
|
||||
|
|
Loading…
Reference in New Issue
Block a user