Add and update score methods and score weights

Add and update `score` methods, provided `scores`, and default weights `default_score_weights` for pipeline components. * `scores` provides all top-level keys returned by `score` (merely informative, similar to `assigns`). * `default_score_weights` provides the default weights for a default config. * The keys from `default_score_weights` determine which values will be shown in the `spacy train` output, so keys with weight `0.0` will be displayed but not counted toward the overall score.
2025-07-15 18:52:29 +03:00 · 2020-07-27 12:27:40 +02:00 · 2020-07-27 12:27:40 +02:00 · 8bb0507777
commit 8bb0507777
parent baf19fd652
13 changed files with 57 additions and 32 deletions
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -395,7 +395,7 @@ def subdivide_batch(batch, accumulate_gradient):
 def setup_printer(
    training: Union[Dict[str, Any], Config], nlp: Language
 ) -> Callable[[Dict[str, Any]], None]:
-    score_cols = training["scores"]
+    score_cols = list(training["score_weights"])
    score_widths = [max(len(col), 6) for col in score_cols]
    loss_cols = [f"Loss {pipe}" for pipe in nlp.pipe_names]
    loss_widths = [max(len(col), 8) for col in loss_cols]
--- a/spacy/language.py
+++ b/spacy/language.py
@ -230,11 +230,12 @@ class Language:
            pipe_config = self.get_pipe_config(pipe_name)
            pipeline[pipe_name] = {"factory": pipe_meta.factory, **pipe_config}
            scores.extend(pipe_meta.scores)
-            if pipe_meta.score_weights:
-                score_weights.append(pipe_meta.score_weights)
+            if pipe_meta.default_score_weights:
+                score_weights.append(pipe_meta.default_score_weights)
        self._config["nlp"]["pipeline"] = self.pipe_names
        self._config["components"] = pipeline
-        self._config["training"]["scores"] = list(scores)
+        self._config["training"]["scores"] = sorted(set(scores))
+        combined_score_weights = combine_score_weights(score_weights)
        self._config["training"]["score_weights"] = combine_score_weights(score_weights)
        if not srsly.is_json_serializable(self._config):
            raise ValueError(Errors.E961.format(config=self._config))
@ -357,7 +358,7 @@ class Language:
        requires: Iterable[str] = tuple(),
        retokenizes: bool = False,
        scores: Iterable[str] = tuple(),
-        score_weights: Dict[str, float] = SimpleFrozenDict(),
+        default_score_weights: Dict[str, float] = SimpleFrozenDict(),
        func: Optional[Callable] = None,
    ) -> Callable:
        """Register a new pipeline component factory. Can be used as a decorator
@ -404,7 +405,7 @@ class Language:
                assigns=validate_attrs(assigns),
                requires=validate_attrs(requires),
                scores=scores,
-                score_weights=score_weights,
+                default_score_weights=default_score_weights,
                retokenizes=retokenizes,
            )
            cls.set_factory_meta(name, factory_meta)
@ -430,7 +431,7 @@ class Language:
        requires: Iterable[str] = tuple(),
        retokenizes: bool = False,
        scores: Iterable[str] = tuple(),
-        score_weights: Dict[str, float] = SimpleFrozenDict(),
+        default_score_weights: Dict[str, float] = SimpleFrozenDict(),
        func: Optional[Callable[[Doc], Doc]] = None,
    ) -> Callable:
        """Register a new pipeline component. Can be used for stateless function
@ -465,7 +466,7 @@ class Language:
                requires=requires,
                retokenizes=retokenizes,
                scores=scores,
-                score_weights=score_weights,
+                default_score_weights=default_score_weights,
                func=factory_func,
            )
            return component_func
@ -1501,7 +1502,7 @@ class FactoryMeta:
    requires: Iterable[str] = tuple()
    retokenizes: bool = False
    scores: Iterable[str] = tuple()
-    score_weights: Dict[str, float] = None
+    default_score_weights: Dict[str, float] = None


 def _get_config_overrides(
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@ -43,8 +43,8 @@ DEFAULT_PARSER_MODEL = Config().from_str(default_model_config)["model"]
        "min_action_freq": 30,
        "model": DEFAULT_PARSER_MODEL,
    },
-    scores=["dep_uas", "dep_las", "sents_f"],
-    score_weights={"dep_uas": 0.5, "dep_las": 0.5, "sents_f": 0.0},
+    scores=["dep_uas", "dep_las", "dep_las_per_type", "sents_p", "sents_r", "sents_f"],
+    default_score_weights={"dep_uas": 0.5, "dep_las": 0.5, "sents_f": 0.0},
 )
 def make_parser(
    nlp: Language,
@ -115,4 +115,5 @@ cdef class DependencyParser(Parser):
        results.update(Scorer.score_spans(examples, "sents", **kwargs))
        results.update(Scorer.score_deps(examples, "dep", getter=dep_getter,
            ignore_labels=("p", "punct"), **kwargs))
+        del results["sents_per_type"]
        return results
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -23,6 +23,8 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
        "overwrite_ents": False,
        "ent_id_sep": DEFAULT_ENT_ID_SEP,
    },
+    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
+    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
 )
 def make_entity_ruler(
    nlp: Language,
@ -305,6 +307,9 @@ class EntityRuler:
            label = f"{label}{self.ent_id_sep}{ent_id}"
        return label

+    def score(self, examples, **kwargs):
+        return Scorer.score_spans(examples, "ents", **kwargs)
+
    def from_bytes(
        self, patterns_bytes: bytes, exclude: Iterable[str] = tuple()
    ) -> "EntityRuler":
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -39,7 +39,9 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
@Language.factory(
    "morphologizer",
    assigns=["token.morph", "token.pos"],
-    default_config={"model": DEFAULT_MORPH_MODEL}
+    default_config={"model": DEFAULT_MORPH_MODEL},
+    scores=["pos_acc", "morph_acc", "morph_per_feat"],
+    default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5},
 )
 def make_morphologizer(
    nlp: Language,
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@ -41,8 +41,9 @@ DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"]
        "min_action_freq": 30,
        "model": DEFAULT_NER_MODEL,
    },
-    scores=["ents_f", "ents_r", "ents_p"],
-    score_weights={"ents_f": 1.0, "ents_r": 0.0, "ents_p": 0.0},
+    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
+    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
+
 )
 def make_ner(
    nlp: Language,
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@ -13,7 +13,9 @@ from .. import util
@Language.factory(
    "sentencizer",
    assigns=["token.is_sent_start", "doc.sents"],
-    default_config={"punct_chars": None}
+    default_config={"punct_chars": None},
+    scores=["sents_p", "sents_r", "sents_f"],
+    default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0},
 )
 def make_sentencizer(
    nlp: Language,
@ -132,7 +134,9 @@ class Sentencizer(Pipe):
                        doc.c[j].sent_start = -1

    def score(self, examples, **kwargs):
-        return Scorer.score_spans(examples, "sents", **kwargs)
+        results = Scorer.score_spans(examples, "sents", **kwargs)
+        del results["sents_per_type"]
+        return results

    def to_bytes(self, exclude=tuple()):
        """Serialize the sentencizer to a bytestring.
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -35,7 +35,7 @@ DEFAULT_SENTER_MODEL = Config().from_str(default_model_config)["model"]
    assigns=["token.is_sent_start"],
    default_config={"model": DEFAULT_SENTER_MODEL},
    scores=["sents_p", "sents_r", "sents_f"],
-    score_weights={"sents_p": 0.0, "sents_r": 0.0, "sents_f": 1.0},
+    default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0},
 )
 def make_senter(nlp: Language, name: str, model: Model):
    return SentenceRecognizer(nlp.vocab, model, name)
@ -108,7 +108,9 @@ class SentenceRecognizer(Tagger):
        raise NotImplementedError

    def score(self, examples, **kwargs):
-        return Scorer.score_spans(examples, "sents", **kwargs)
+        results = Scorer.score_spans(examples, "sents", **kwargs)
+        del results["sents_per_type"]
+        return results

    def to_bytes(self, exclude=tuple()):
        serialize = {}
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@ -34,6 +34,9 @@ DEFAULT_SIMPLE_NER_MODEL = Config().from_str(default_model_config)["model"]
    "simple_ner",
    assigns=["doc.ents"],
    default_config={"labels": [], "model": DEFAULT_SIMPLE_NER_MODEL},
+    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
+    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
+
 )
 def make_simple_ner(
    nlp: Language, name: str, model: Model, labels: Iterable[str]
@ -173,6 +176,9 @@ class SimpleNER(Pipe):
    def init_multitask_objectives(self, *args, **kwargs):
        pass

+    def score(self, examples, **kwargs):
+        return Scorer.score_spans(examples, "ents", **kwargs)
+

 def _has_ner(example: Example) -> bool:
    for ner_tag in example.get_aligned_ner():
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -40,8 +40,8 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
    "tagger",
    assigns=["token.tag"],
    default_config={"model": DEFAULT_TAGGER_MODEL, "set_morphology": False},
-    scores=["tag_acc", "pos_acc"],
-    score_weights={"tag_acc": 0.5, "pos_acc": 0.5},
+    scores=["tag_acc", "pos_acc", "lemma_acc"],
+    default_score_weights={"tag_acc": 1.0},
 )
 def make_tagger(nlp: Language, name: str, model: Model, set_morphology: bool):
    return Tagger(nlp.vocab, model, name, set_morphology=set_morphology)
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@ -56,6 +56,8 @@ dropout = null
    "textcat",
    assigns=["doc.cats"],
    default_config={"labels": [], "model": DEFAULT_TEXTCAT_MODEL},
+    scores=["cats_score", "cats_score_desc", "cats_p", "cats_r", "cats_f", "cats_macro_f", "cats_macro_auc", "cats_f_per_type", "cats_macro_auc_per_type"],
+    default_score_weights={"cats_score": 1.0},
 )
 def make_textcat(
    nlp: Language, name: str, model: Model, labels: Iterable[str]
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@ -343,6 +343,10 @@ def test_language_factories_invalid():
            [{"a": 100, "b": 400}, {"c": 0.5, "d": 0.5}],
            {"a": 0.1, "b": 0.4, "c": 0.25, "d": 0.25},
        ),
+        (
+            [{"a": 0.5, "b": 0.5}, {"b": 1.0}],
+            {"a": 0.25, "b": 0.75},
+        ),
    ],
 )
 def test_language_factories_combine_score_weights(weights, expected):
@ -354,28 +358,24 @@ def test_language_factories_combine_score_weights(weights, expected):
 def test_language_factories_scores():
    name = "test_language_factories_scores"
    func = lambda doc: doc
-    scores1 = ["a1", "a2"]
    weights1 = {"a1": 0.5, "a2": 0.5}
-    scores2 = ["b1", "b2", "b3"]
    weights2 = {"b1": 0.2, "b2": 0.7, "b3": 0.1}
    Language.component(
-        f"{name}1", scores=scores1, score_weights=weights1, func=func,
+        f"{name}1", scores=list(weights1), default_score_weights=weights1, func=func,
    )
    Language.component(
-        f"{name}2", scores=scores2, score_weights=weights2, func=func,
+        f"{name}2", scores=list(weights2), default_score_weights=weights2, func=func,
    )
    meta1 = Language.get_factory_meta(f"{name}1")
-    assert meta1.scores == scores1
-    assert meta1.score_weights == weights1
+    assert meta1.default_score_weights == weights1
    meta2 = Language.get_factory_meta(f"{name}2")
-    assert meta2.scores == scores2
-    assert meta2.score_weights == weights2
+    assert meta2.default_score_weights == weights2
    nlp = Language()
    nlp._config["training"]["scores"] = ["speed"]
    nlp._config["training"]["score_weights"] = {}
    nlp.add_pipe(f"{name}1")
    nlp.add_pipe(f"{name}2")
    cfg = nlp.config["training"]
-    assert cfg["scores"] == ["speed", *scores1, *scores2]
+    assert cfg["scores"] == sorted(["speed", *list(weights1.keys()), *list(weights2.keys())])
    expected_weights = {"a1": 0.25, "a2": 0.25, "b1": 0.1, "b2": 0.35, "b3": 0.05}
    assert cfg["score_weights"] == expected_weights
--- a/spacy/util.py
+++ b/spacy/util.py
@ -1139,9 +1139,10 @@ def combine_score_weights(weights: List[Dict[str, float]]) -> Dict[str, float]:
    """
    result = {}
    for w_dict in weights:
-        # We need to account for weights that don't sum to 1.0 and normalize the
-        # score weights accordingly, then divide score by the number of components
-        total = sum([w for w in w_dict.values()])
+        # We need to account for weights that don't sum to 1.0 and normalize
+        # the score weights accordingly, then divide score by the number of
+        # components.
+        total = sum(w_dict.values())
        for key, value in w_dict.items():
            weight = round(value / total / len(weights), 2)
            result[key] = result.get(key, 0.0) + weight