Fix handling of score_weights

2025-07-16 03:02:41 +03:00 · 2020-09-24 10:27:33 +02:00 · 2020-09-24 10:27:33 +02:00 · ae51f580c1
commit ae51f580c1
parent e2ffe51fb5
28 changed files with 95 additions and 92 deletions
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@ -317,21 +317,3 @@ start = 100
 stop = 1000
 compound = 1.001
 {% endif %}
-
-[training.score_weights]
-{%- if "tagger" in components %}
-tag_acc = {{ (1.0 / components|length)|round(2) }}
-{%- endif -%}
-{%- if "parser" in components %}
-dep_uas = 0.0
-dep_las = {{ (1.0 / components|length)|round(2) }}
-sents_f = 0.0
-{%- endif %}
-{%- if "ner" in components %}
-ents_f = {{ (1.0 / components|length)|round(2) }}
-ents_p = 0.0
-ents_r = 0.0
-{%- endif %}
-{%- if "textcat" in components %}
-cats_score = {{ (1.0 / components|length)|round(2) }}
-{%- endif -%}
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -209,6 +209,8 @@ def create_train_batches(iterator, batcher, max_epochs: int):
 def create_evaluation_callback(
    nlp: Language, dev_corpus: Callable, weights: Dict[str, float]
 ) -> Callable[[], Tuple[float, Dict[str, float]]]:
+    weights = {key: value for key, value in weights.items() if value is not None}
+
    def evaluate() -> Tuple[float, Dict[str, float]]:
        dev_examples = list(dev_corpus(nlp))
        scores = nlp.evaluate(dev_examples)
@ -368,6 +370,7 @@ def update_meta(
 ) -> None:
    nlp.meta["performance"] = {}
    for metric in training["score_weights"]:
+        if metric is not None:
            nlp.meta["performance"][metric] = info["other_scores"].get(metric, 0.0)
    for pipe_name in nlp.pipe_names:
        nlp.meta["performance"][f"{pipe_name}_loss"] = info["losses"][pipe_name]
--- a/spacy/lang/bn/init.py
+++ b/spacy/lang/bn/init.py
@ -25,7 +25,6 @@ class Bengali(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/el/init.py
+++ b/spacy/lang/el/init.py
@ -30,7 +30,6 @@ class Greek(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/en/init.py
+++ b/spacy/lang/en/init.py
@ -29,7 +29,6 @@ class English(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/fa/init.py
+++ b/spacy/lang/fa/init.py
@ -28,7 +28,6 @@ class Persian(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/fr/init.py
+++ b/spacy/lang/fr/init.py
@ -33,7 +33,6 @@ class French(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/nb/init.py
+++ b/spacy/lang/nb/init.py
@ -28,7 +28,6 @@ class Norwegian(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/nl/init.py
+++ b/spacy/lang/nl/init.py
@ -30,7 +30,6 @@ class Dutch(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/pl/init.py
+++ b/spacy/lang/pl/init.py
@ -35,7 +35,6 @@ class Polish(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "pos_lookup", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/ru/init.py
+++ b/spacy/lang/ru/init.py
@ -25,7 +25,6 @@ class Russian(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "pymorphy2", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/sv/init.py
+++ b/spacy/lang/sv/init.py
@ -31,7 +31,6 @@ class Swedish(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "rule", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/lang/uk/init.py
+++ b/spacy/lang/uk/init.py
@ -25,7 +25,6 @@ class Ukrainian(Language):
    "lemmatizer",
    assigns=["token.lemma"],
    default_config={"model": None, "mode": "pymorphy2", "lookups": None},
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/language.py
+++ b/spacy/language.py
@ -248,8 +248,14 @@ class Language:
        self._config["nlp"]["pipeline"] = list(self.component_names)
        self._config["nlp"]["disabled"] = list(self.disabled)
        self._config["components"] = pipeline
-        if not self._config["training"].get("score_weights"):
+        # We're merging the existing score weights back into the combined
+        # weights to make sure we're preserving custom settings in the config
+        # but also reflect updates (e.g. new components added)
+        prev_score_weights = self._config["training"].get("score_weights", {})
        combined_score_weights = combine_score_weights(score_weights)
+        combined_score_weights.update(prev_score_weights)
+        # Combine the scores a second time to normalize them
+        combined_score_weights = combine_score_weights([combined_score_weights])
        self._config["training"]["score_weights"] = combined_score_weights
        if not srsly.is_json_serializable(self._config):
            raise ValueError(Errors.E961.format(config=self._config))
@ -412,7 +418,6 @@ class Language:
        assigns: Iterable[str] = SimpleFrozenList(),
        requires: Iterable[str] = SimpleFrozenList(),
        retokenizes: bool = False,
-        scores: Iterable[str] = SimpleFrozenList(),
        default_score_weights: Dict[str, float] = SimpleFrozenDict(),
        func: Optional[Callable] = None,
    ) -> Callable:
@ -430,12 +435,11 @@ class Language:
            e.g. "token.ent_id". Used for pipeline analyis.
        retokenizes (bool): Whether the component changes the tokenization.
            Used for pipeline analysis.
-        scores (Iterable[str]): All scores set by the component if it's trainable,
-            e.g. ["ents_f", "ents_r", "ents_p"].
        default_score_weights (Dict[str, float]): The scores to report during
            training, and their default weight towards the final score used to
            select the best model. Weights should sum to 1.0 per component and
-            will be combined and normalized for the whole pipeline.
+            will be combined and normalized for the whole pipeline. If None,
+            the score won't be shown in the logs or be weighted.
        func (Optional[Callable]): Factory function if not used as a decorator.

        DOCS: https://nightly.spacy.io/api/language#factory
@ -475,7 +479,7 @@ class Language:
                default_config=default_config,
                assigns=validate_attrs(assigns),
                requires=validate_attrs(requires),
-                scores=scores,
+                scores=list(default_score_weights.keys()),
                default_score_weights=default_score_weights,
                retokenizes=retokenizes,
            )
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@ -43,8 +43,14 @@ DEFAULT_PARSER_MODEL = Config().from_str(default_model_config)["model"]
        "min_action_freq": 30,
        "model": DEFAULT_PARSER_MODEL,
    },
-    scores=["dep_uas", "dep_las", "dep_las_per_type", "sents_p", "sents_r", "sents_f"],
-    default_score_weights={"dep_uas": 0.5, "dep_las": 0.5, "sents_f": 0.0},
+    default_score_weights={
+        "dep_uas": 0.5,
+        "dep_las": 0.5,
+        "dep_las_per_type": None,
+        "sents_p": None,
+        "sents_r": None,
+        "sents_f": 0.0,
+    },
 )
 def make_parser(
    nlp: Language,
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -25,8 +25,12 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
        "overwrite_ents": False,
        "ent_id_sep": DEFAULT_ENT_ID_SEP,
    },
-    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
-    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
+    default_score_weights={
+        "ents_f": 1.0,
+        "ents_p": 0.0,
+        "ents_r": 0.0,
+        "ents_per_type": None,
+    },
 )
 def make_entity_ruler(
    nlp: Language,
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@ -21,7 +21,6 @@ from .. import util
        "lookups": None,
        "overwrite": False,
    },
-    scores=["lemma_acc"],
    default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -49,8 +49,7 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
    "morphologizer",
    assigns=["token.morph", "token.pos"],
    default_config={"model": DEFAULT_MORPH_MODEL},
-    scores=["pos_acc", "morph_acc", "morph_per_feat"],
-    default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5},
+    default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
 )
 def make_morphologizer(
    nlp: Language,
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@ -39,8 +39,7 @@ DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"]
        "update_with_oracle_cut_size": 100,
        "model": DEFAULT_NER_MODEL,
    },
-    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
-    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
+    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0, "ents_per_type": None},

 )
 def make_ner(
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@ -15,7 +15,6 @@ from .. import util
    "sentencizer",
    assigns=["token.is_sent_start", "doc.sents"],
    default_config={"punct_chars": None},
-    scores=["sents_p", "sents_r", "sents_f"],
    default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0},
 )
 def make_sentencizer(
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -36,7 +36,6 @@ DEFAULT_SENTER_MODEL = Config().from_str(default_model_config)["model"]
    "senter",
    assigns=["token.is_sent_start"],
    default_config={"model": DEFAULT_SENTER_MODEL},
-    scores=["sents_p", "sents_r", "sents_f"],
    default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0},
 )
 def make_senter(nlp: Language, name: str, model: Model):
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -42,7 +42,6 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
    "tagger",
    assigns=["token.tag"],
    default_config={"model": DEFAULT_TAGGER_MODEL},
-    scores=["tag_acc"],
    default_score_weights={"tag_acc": 1.0},
 )
 def make_tagger(nlp: Language, name: str, model: Model):
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@ -62,18 +62,17 @@ subword_features = true
        "positive_label": None,
        "model": DEFAULT_TEXTCAT_MODEL,
    },
-    scores=[
-        "cats_score",
-        "cats_score_desc",
-        "cats_p",
-        "cats_r",
-        "cats_f",
-        "cats_macro_f",
-        "cats_macro_auc",
-        "cats_f_per_type",
-        "cats_macro_auc_per_type",
-    ],
-    default_score_weights={"cats_score": 1.0},
+    default_score_weights={
+        "cats_score": 1.0,
+        "cats_score_desc": None,
+        "cats_p": None,
+        "cats_r": None,
+        "cats_f": None,
+        "cats_macro_f": None,
+        "cats_macro_auc": None,
+        "cats_f_per_type": None,
+        "cats_macro_auc_per_type": None,
+    },
 )
 def make_textcat(
    nlp: Language,
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@ -211,7 +211,7 @@ class ConfigSchemaTraining(BaseModel):
    seed: Optional[StrictInt] = Field(..., title="Random seed")
    gpu_allocator: Optional[StrictStr] = Field(..., title="Memory allocator when running on GPU")
    accumulate_gradient: StrictInt = Field(..., title="Whether to divide the batch up into substeps")
-    score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Scores to report and their weights for selecting final model")
+    score_weights: Dict[StrictStr, Optional[Union[StrictFloat, StrictInt]]] = Field(..., title="Scores to report and their weights for selecting final model")
    init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
    raw_text: Optional[StrictStr] = Field(default=None, title="Raw text")
    optimizer: Optimizer = Field(..., title="The optimizer to use")
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@ -359,12 +359,8 @@ def test_language_factories_scores():
    func = lambda nlp, name: lambda doc: doc
    weights1 = {"a1": 0.5, "a2": 0.5}
    weights2 = {"b1": 0.2, "b2": 0.7, "b3": 0.1}
-    Language.factory(
-        f"{name}1", scores=list(weights1), default_score_weights=weights1, func=func,
-    )
-    Language.factory(
-        f"{name}2", scores=list(weights2), default_score_weights=weights2, func=func,
-    )
+    Language.factory(f"{name}1", default_score_weights=weights1, func=func)
+    Language.factory(f"{name}2", default_score_weights=weights2, func=func)
    meta1 = Language.get_factory_meta(f"{name}1")
    assert meta1.default_score_weights == weights1
    meta2 = Language.get_factory_meta(f"{name}2")
@ -376,6 +372,21 @@ def test_language_factories_scores():
    cfg = nlp.config["training"]
    expected_weights = {"a1": 0.25, "a2": 0.25, "b1": 0.1, "b2": 0.35, "b3": 0.05}
    assert cfg["score_weights"] == expected_weights
+    # Test with custom defaults
+    config = nlp.config.copy()
+    config["training"]["score_weights"]["a1"] = 0.0
+    config["training"]["score_weights"]["b3"] = 1.0
+    nlp = English.from_config(config)
+    score_weights = nlp.config["training"]["score_weights"]
+    expected = {"a1": 0.0, "a2": 0.15, "b1": 0.06, "b2": 0.21, "b3": 0.59}
+    assert score_weights == expected
+    # Test with null values
+    config = nlp.config.copy()
+    config["training"]["score_weights"]["a1"] = None
+    nlp = English.from_config(config)
+    score_weights = nlp.config["training"]["score_weights"]
+    expected = {"a1": None, "a2": 0.15, "b1": 0.06, "b2": 0.21, "b3": 0.58}  # rounding :(
+    assert score_weights == expected


 def test_pipe_factories_from_source():
--- a/spacy/util.py
+++ b/spacy/util.py
@ -1209,8 +1209,19 @@ def combine_score_weights(weights: List[Dict[str, float]]) -> Dict[str, float]:
    weights (List[dict]): The weights defined by the components.
    RETURNS (Dict[str, float]): The combined and normalized weights.
    """
+    # We first need to extract all None/null values for score weights that
+    # shouldn't be shown in the table *or* be weighted
    result = {}
+    all_weights = []
    for w_dict in weights:
+        filtered_weights = {}
+        for key, value in w_dict.items():
+            if value is None:
+                result[key] = None
+            else:
+                filtered_weights[key] = value
+        all_weights.append(filtered_weights)
+    for w_dict in all_weights:
        # We need to account for weights that don't sum to 1.0 and normalize
        # the score weights accordingly, then divide score by the number of
        # components.
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -146,15 +146,14 @@ examples, see the
 > ```

 | Name                    | Description                                                                                                                                                                                                                                                                                                                        |
-| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `name`                  | The name of the component factory. ~~str~~                                                                                                                                                                                                                                                                                         |
 | _keyword-only_          |                                                                                                                                                                                                                                                                                                                                    |
 | `default_config`        | The default config, describing the default values of the factory arguments. ~~Dict[str, Any]~~                                                                                                                                                                                                                                     |
 | `assigns`               | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~Iterable[str]~~                                                                                                                                                                 |
 | `requires`              | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~Iterable[str]~~                                                                                                                                                                 |
 | `retokenizes`           | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~bool~~                                                                                                                                                                                                               |
-| `scores`                | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~Iterable[str]~~                                                                     |
-| `default_score_weights` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. ~~Dict[str, float]~~ |
+| `default_score_weights` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. If a weight is set to `None`, the score will not be logged or weighted. ~~Dict[str, Optional[float]]~~ |
 | `func`                  | Optional function if not used a a decorator. ~~Optional[Callable[[...], Callable[[Doc], Doc]]]~~                                                                                                                                                                                                                                   |

 ## Language.\_\_call\_\_ {#call tag="method"}
@ -1037,11 +1036,11 @@ component is defined and stored on the `Language` class for each component
 instance and factory instance.

 | Name                    | Description                                                                                                                                                                                                                                                                                                                        |
-| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `factory`               | The name of the registered component factory. ~~str~~                                                                                                                                                                                                                                                                              |
 | `default_config`        | The default config, describing the default values of the factory arguments. ~~Dict[str, Any]~~                                                                                                                                                                                                                                     |
 | `assigns`               | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~Iterable[str]~~                                                                                                                                                                 |
 | `requires`              | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~Iterable[str]~~                                                                                                                                                                 |
 | `retokenizes`           | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~bool~~                                                                                                                                                                                                               |
-| `scores`                | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). ~~Iterable[str]~~                                                                     |
-| `default_score_weights` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. ~~Dict[str, float]~~ |
+| `default_score_weights` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. If a weight is set to `None`, the score will not be logged or weighted. ~~Dict[str, Optional[float]]~~ |
+| `scores`                | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Based on the `default_score_weights` and used for [pipe analysis](/usage/processing-pipelines#analysis). ~~Iterable[str]~~                                                                                                              |
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@ -470,6 +470,7 @@ score.
 ```ini
 [training.score_weights]
 dep_las = 0.4
+dep_uas = null
 ents_f = 0.4
 tag_acc = 0.2
 token_acc = 0.0
@ -481,9 +482,9 @@ you generate a config for a given pipeline, the score weights are generated by
 combining and normalizing the default score weights of the pipeline components.
 The default score weights are defined by each pipeline component via the
 `default_score_weights` setting on the
-[`@Language.component`](/api/language#component) or
-[`@Language.factory`](/api/language#factory). By default, all pipeline
-components are weighted equally.
+[`@Language.factory`](/api/language#factory) decorator. By default, all pipeline
+components are weighted equally. If a score weight is set to `null`, it will be
+excluded from the logs and the score won't be weighted.

 <Accordion title="Understanding the training output and score types" spaced>