From 110850f095c9c328be3b4559b78733198b8e40e6 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 2 Sep 2022 12:35:46 +0200
Subject: [PATCH] Make beta a component scorer setting.

---
 spacy/cli/find_threshold.py          | 9 ++++++---
 spacy/errors.py                      | 3 ++-
 spacy/pipeline/spancat.py            | 5 +++--
 spacy/pipeline/textcat_multilabel.py | 5 +++--
 spacy/scorer.py                      | 8 +++++---
 spacy/tests/test_cli.py              | 4 ++--
 6 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index 1641c2d04..6ce4f7321 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -1,4 +1,5 @@
 import functools
+from functools import partial
 import operator
 from pathlib import Path
 import logging
@@ -115,8 +116,12 @@ def find_threshold(
         pipe = nlp.get_pipe(pipe_name)
     except KeyError as err:
         wasabi.msg.fail(title=str(err), exits=1)
+
     if not isinstance(pipe, TrainablePipe):
         raise TypeError(Errors.E1044)
+    if not hasattr(pipe, "scorer"):
+        raise AttributeError(Errors.E1045)
+    setattr(pipe, "scorer", partial(pipe.scorer.func, beta=beta))
 
     if not silent:
         wasabi.msg.info(
@@ -145,9 +150,7 @@ def find_threshold(
     scores: Dict[float, float] = {}
     for threshold in numpy.linspace(0, 1, n_trials):
         pipe.cfg = set_nested_item(pipe.cfg, config_keys, threshold)
-        scores[threshold] = nlp.evaluate(dev_dataset, scorer_cfg={"beta": beta})[
-            scores_key
-        ]
+        scores[threshold] = nlp.evaluate(dev_dataset)[scores_key]
         if not (
             isinstance(scores[threshold], float) or isinstance(scores[threshold], int)
         ):
diff --git a/spacy/errors.py b/spacy/errors.py
index 18d3cd5f2..08ab40987 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -939,7 +939,8 @@ class Errors(metaclass=ErrorsWithCodes):
              "`{arg2}`={arg2_values} but these arguments are conflicting.")
     E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got "
              "{value}.")
-    E1044 = ("Only components of type `TrainablePipe` are supported by `find_threshold()`.")
+    E1044 = ("`find_threshold()` only supports components of type `TrainablePipe`.")
+    E1045 = ("`find_threshold()` only supports components with a `scorer` attribute.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 1b7a9eecb..ce34a20d0 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -1,3 +1,4 @@
+from functools import partial
 from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast
 from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
 from thinc.api import Optimizer
@@ -165,8 +166,8 @@ def spancat_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
 
 
 @registry.scorers("spacy.spancat_scorer.v1")
-def make_spancat_scorer():
-    return spancat_score
+def make_spancat_scorer(beta: float = 1.0):
+    return partial(spancat_score, beta=beta)
 
 
 class SpanCategorizer(TrainablePipe):
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index e33a885f8..cb9ae7719 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -1,3 +1,4 @@
+from functools import partial
 from typing import Iterable, Optional, Dict, List, Callable, Any
 from thinc.types import Floats2d
 from thinc.api import Model, Config
@@ -121,8 +122,8 @@ def textcat_multilabel_score(examples: Iterable[Example], **kwargs) -> Dict[str,
 
 
 @registry.scorers("spacy.textcat_multilabel_scorer.v1")
-def make_textcat_multilabel_scorer():
-    return textcat_multilabel_score
+def make_textcat_multilabel_scorer(beta: float = 1.0):
+    return partial(textcat_multilabel_score, beta=beta)
 
 
 class MultiLabel_TextCategorizer(TextCategorizer):
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 3bb3c5cab..0a893fcce 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -102,7 +102,7 @@ class ROCAUCScore:
 class Scorer:
     """Compute evaluation scores."""
 
-    BETA = 1
+    BETA = 1.0
 
     def __init__(
         self,
@@ -336,6 +336,7 @@ class Scorer:
         has_annotation: Optional[Callable[[Doc], bool]] = None,
         labeled: bool = True,
         allow_overlap: bool = False,
+        beta: float = 1.0,
         **cfg,
     ) -> Dict[str, Any]:
         """Returns PRF scores for labeled spans.
@@ -353,12 +354,12 @@ class Scorer:
             equal if their start and end match, irrespective of their label.
         allow_overlap (bool): Whether or not to allow overlapping spans.
             If set to 'False', the alignment will automatically resolve conflicts.
+        beta (float): Beta coefficient for F-score calculation. Defaults to 1.0.
         RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
             the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
 
         DOCS: https://spacy.io/api/scorer#score_spans
         """
-        beta = cfg.get("beta", Scorer.BETA)
         score = PRFScore(beta=beta)
         score_per_type = dict()
         for example in examples:
@@ -439,6 +440,7 @@ class Scorer:
         multi_label: bool = True,
         positive_label: Optional[str] = None,
         threshold: Optional[float] = None,
+        beta: float = 1.0,
         **cfg,
     ) -> Dict[str, Any]:
         """Returns PRF and ROC AUC scores for a doc-level attribute with a
@@ -458,6 +460,7 @@ class Scorer:
         threshold (float): Cutoff to consider a prediction "positive". Defaults
             to 0.5 for multi-label, and 0.0 (i.e. whatever's highest scoring)
             otherwise.
+        beta (float): Beta coefficient for F-score calculation.
         RETURNS (Dict[str, Any]): A dictionary containing the scores, with
             inapplicable scores as None:
             for all:
@@ -475,7 +478,6 @@ class Scorer:
 
         DOCS: https://spacy.io/api/scorer#score_cats
         """
-        beta = cfg.get("beta", Scorer.BETA)
         if threshold is None:
             threshold = 0.5 if multi_label else 0.0
         f_per_type = {label: PRFScore(beta=beta) for label in labels}
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index ed16ea37b..733c7c876 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -928,12 +928,12 @@ def test_cli_find_threshold(capsys):
                     pipe_name="tc_multi",
                     threshold_key="threshold",
                     scores_key="cats_macro_f",
-                    silent=True,
+                    silent=False,
                 )[0]
                 == numpy.linspace(0, 1, 10)[1]
             )
 
-        # Specifying name of non-MultiLabel_TextCategorizer component should fail.
+        # Test with spancat.
         nlp, _ = init_nlp((("spancat", {}),))
         with make_tempdir() as nlp_dir:
             nlp.to_disk(nlp_dir)