Remove beta references. Delete universe.json.

2025-08-08 06:04:57 +03:00 · 2022-09-12 10:44:57 +02:00 · 2022-09-12 10:44:57 +02:00 · 9c00b287c1
commit 9c00b287c1
parent b61cf873b3
3 changed files with 3 additions and 3845 deletions
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -15,7 +15,6 @@ from .. import util
 _DEFAULTS = {
    "average": "micro",
    "n_trials": 10,
-    "beta": 1,
    "use_gpu": -1,
    "gold_preproc": False,
 }
@ -33,7 +32,6 @@ def find_threshold_cli(
    threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"),
    scores_key: str = Arg(..., help="Name of score to metric to optimize"),
    n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
-    beta: float = Opt(_DEFAULTS["beta"], "--beta", help="Beta for F1 calculation. Ignored if different metric is used"),
    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
    use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
    gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
@ -48,7 +46,6 @@ def find_threshold_cli(
    threshold_key (str): Key of threshold attribute in component's configuration.
    scores_key (str): Name of score to metric to optimize.
    n_trials (int): Number of trials to determine optimal thresholds
-    beta (float): Beta for F-score calculation.
    code_path (Optional[Path]): Path to Python file with additional code (registered functions) to be imported.
    use_gpu (int): GPU ID or -1 for CPU.
    gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
@ -66,7 +63,6 @@ def find_threshold_cli(
        threshold_key=threshold_key,
        scores_key=scores_key,
        n_trials=n_trials,
-        beta=beta,
        use_gpu=use_gpu,
        gold_preproc=gold_preproc,
        silent=False,
@ -81,7 +77,6 @@ def find_threshold(
    scores_key: str,
    *,
    n_trials: int = _DEFAULTS["n_trials"],  # type: ignore
-    beta: float = _DEFAULTS["beta"],  # type: ignore
    use_gpu: int = _DEFAULTS["use_gpu"],  # type: ignore
    gold_preproc: bool = _DEFAULTS["gold_preproc"],  # type: ignore
    silent: bool = True,
@ -94,7 +89,6 @@ def find_threshold(
    threshold_key (str): Key of threshold attribute in component's configuration.
    scores_key (str): Name of score to metric to optimize.
    n_trials (int): Number of trials to determine optimal thresholds.
-    beta (float): Beta for F-score calculation.
    use_gpu (int): GPU ID or -1 for CPU.
    gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
        tokenization, and may result in sequences of more consistent length. However, it may reduce runtime accuracy due
@ -121,7 +115,7 @@ def find_threshold(
    if not silent:
        wasabi.msg.info(
            title=f"Optimizing for {scores_key} for component '{pipe_name}' with {n_trials} "
-            f"trials and beta = {beta}."
+            f"trials."
        )

    # Load evaluation corpus.
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -21,13 +21,10 @@ MISSING_VALUES = frozenset([None, 0, ""])
 class PRFScore:
    """A precision / recall / F score."""

-    def __init__(
-        self, *, tp: int = 0, fp: int = 0, fn: int = 0, beta: float = 1
-    ) -> None:
+    def __init__(self, *, tp: int = 0, fp: int = 0, fn: int = 0) -> None:
        self.tp = tp
        self.fp = fp
        self.fn = fn
-        self.beta = beta

    def __len__(self) -> int:
        return self.tp + self.fp + self.fn
@ -39,12 +36,10 @@ class PRFScore:
        return self

    def __add__(self, other):
-        assert self.beta == other.beta
        return PRFScore(
            tp=self.tp + other.tp,
            fp=self.fp + other.fp,
            fn=self.fn + other.fn,
-            beta=self.beta,
        )

    def score_set(self, cand: set, gold: set) -> None:
@ -64,7 +59,7 @@ class PRFScore:
    def fscore(self) -> float:
        p = self.precision
        r = self.recall
-        return (1 + self.beta**2) * ((p * r) / ((self.beta**2 * p) + r + 1e-100))
+        return 2 * ((p * r) / (p + r + 1e-100))

    def to_dict(self) -> Dict[str, float]:
        return {"p": self.precision, "r": self.recall, "f": self.fscore}
--- a/spacy/tests/universe/universe.json
+++ b/spacy/tests/universe/universe.json