Remove beta references. Delete universe.json.

This commit is contained in:
Raphael Mitsch 2022-09-12 10:44:57 +02:00
parent b61cf873b3
commit 9c00b287c1
3 changed files with 3 additions and 3845 deletions

View File

@ -15,7 +15,6 @@ from .. import util
_DEFAULTS = { _DEFAULTS = {
"average": "micro", "average": "micro",
"n_trials": 10, "n_trials": 10,
"beta": 1,
"use_gpu": -1, "use_gpu": -1,
"gold_preproc": False, "gold_preproc": False,
} }
@ -33,7 +32,6 @@ def find_threshold_cli(
threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"), threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"),
scores_key: str = Arg(..., help="Name of score to metric to optimize"), scores_key: str = Arg(..., help="Name of score to metric to optimize"),
n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"), n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
beta: float = Opt(_DEFAULTS["beta"], "--beta", help="Beta for F1 calculation. Ignored if different metric is used"),
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"), use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"), gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
@ -48,7 +46,6 @@ def find_threshold_cli(
threshold_key (str): Key of threshold attribute in component's configuration. threshold_key (str): Key of threshold attribute in component's configuration.
scores_key (str): Name of score to metric to optimize. scores_key (str): Name of score to metric to optimize.
n_trials (int): Number of trials to determine optimal thresholds n_trials (int): Number of trials to determine optimal thresholds
beta (float): Beta for F-score calculation.
code_path (Optional[Path]): Path to Python file with additional code (registered functions) to be imported. code_path (Optional[Path]): Path to Python file with additional code (registered functions) to be imported.
use_gpu (int): GPU ID or -1 for CPU. use_gpu (int): GPU ID or -1 for CPU.
gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
@ -66,7 +63,6 @@ def find_threshold_cli(
threshold_key=threshold_key, threshold_key=threshold_key,
scores_key=scores_key, scores_key=scores_key,
n_trials=n_trials, n_trials=n_trials,
beta=beta,
use_gpu=use_gpu, use_gpu=use_gpu,
gold_preproc=gold_preproc, gold_preproc=gold_preproc,
silent=False, silent=False,
@ -81,7 +77,6 @@ def find_threshold(
scores_key: str, scores_key: str,
*, *,
n_trials: int = _DEFAULTS["n_trials"], # type: ignore n_trials: int = _DEFAULTS["n_trials"], # type: ignore
beta: float = _DEFAULTS["beta"], # type: ignore
use_gpu: int = _DEFAULTS["use_gpu"], # type: ignore use_gpu: int = _DEFAULTS["use_gpu"], # type: ignore
gold_preproc: bool = _DEFAULTS["gold_preproc"], # type: ignore gold_preproc: bool = _DEFAULTS["gold_preproc"], # type: ignore
silent: bool = True, silent: bool = True,
@ -94,7 +89,6 @@ def find_threshold(
threshold_key (str): Key of threshold attribute in component's configuration. threshold_key (str): Key of threshold attribute in component's configuration.
scores_key (str): Name of score to metric to optimize. scores_key (str): Name of score to metric to optimize.
n_trials (int): Number of trials to determine optimal thresholds. n_trials (int): Number of trials to determine optimal thresholds.
beta (float): Beta for F-score calculation.
use_gpu (int): GPU ID or -1 for CPU. use_gpu (int): GPU ID or -1 for CPU.
gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
tokenization, and may result in sequences of more consistent length. However, it may reduce runtime accuracy due tokenization, and may result in sequences of more consistent length. However, it may reduce runtime accuracy due
@ -121,7 +115,7 @@ def find_threshold(
if not silent: if not silent:
wasabi.msg.info( wasabi.msg.info(
title=f"Optimizing for {scores_key} for component '{pipe_name}' with {n_trials} " title=f"Optimizing for {scores_key} for component '{pipe_name}' with {n_trials} "
f"trials and beta = {beta}." f"trials."
) )
# Load evaluation corpus. # Load evaluation corpus.

View File

@ -21,13 +21,10 @@ MISSING_VALUES = frozenset([None, 0, ""])
class PRFScore: class PRFScore:
"""A precision / recall / F score.""" """A precision / recall / F score."""
def __init__( def __init__(self, *, tp: int = 0, fp: int = 0, fn: int = 0) -> None:
self, *, tp: int = 0, fp: int = 0, fn: int = 0, beta: float = 1
) -> None:
self.tp = tp self.tp = tp
self.fp = fp self.fp = fp
self.fn = fn self.fn = fn
self.beta = beta
def __len__(self) -> int: def __len__(self) -> int:
return self.tp + self.fp + self.fn return self.tp + self.fp + self.fn
@ -39,12 +36,10 @@ class PRFScore:
return self return self
def __add__(self, other): def __add__(self, other):
assert self.beta == other.beta
return PRFScore( return PRFScore(
tp=self.tp + other.tp, tp=self.tp + other.tp,
fp=self.fp + other.fp, fp=self.fp + other.fp,
fn=self.fn + other.fn, fn=self.fn + other.fn,
beta=self.beta,
) )
def score_set(self, cand: set, gold: set) -> None: def score_set(self, cand: set, gold: set) -> None:
@ -64,7 +59,7 @@ class PRFScore:
def fscore(self) -> float: def fscore(self) -> float:
p = self.precision p = self.precision
r = self.recall r = self.recall
return (1 + self.beta**2) * ((p * r) / ((self.beta**2 * p) + r + 1e-100)) return 2 * ((p * r) / (p + r + 1e-100))
def to_dict(self) -> Dict[str, float]: def to_dict(self) -> Dict[str, float]:
return {"p": self.precision, "r": self.recall, "f": self.fscore} return {"p": self.precision, "r": self.recall, "f": self.fscore}

File diff suppressed because it is too large Load Diff