mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-08 14:14:57 +03:00
Remove beta references. Delete universe.json.
This commit is contained in:
parent
b61cf873b3
commit
9c00b287c1
|
@ -15,7 +15,6 @@ from .. import util
|
||||||
_DEFAULTS = {
|
_DEFAULTS = {
|
||||||
"average": "micro",
|
"average": "micro",
|
||||||
"n_trials": 10,
|
"n_trials": 10,
|
||||||
"beta": 1,
|
|
||||||
"use_gpu": -1,
|
"use_gpu": -1,
|
||||||
"gold_preproc": False,
|
"gold_preproc": False,
|
||||||
}
|
}
|
||||||
|
@ -33,7 +32,6 @@ def find_threshold_cli(
|
||||||
threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"),
|
threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"),
|
||||||
scores_key: str = Arg(..., help="Name of score to metric to optimize"),
|
scores_key: str = Arg(..., help="Name of score to metric to optimize"),
|
||||||
n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
|
n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
|
||||||
beta: float = Opt(_DEFAULTS["beta"], "--beta", help="Beta for F1 calculation. Ignored if different metric is used"),
|
|
||||||
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
||||||
use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
||||||
gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
|
gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
|
||||||
|
@ -48,7 +46,6 @@ def find_threshold_cli(
|
||||||
threshold_key (str): Key of threshold attribute in component's configuration.
|
threshold_key (str): Key of threshold attribute in component's configuration.
|
||||||
scores_key (str): Name of score to metric to optimize.
|
scores_key (str): Name of score to metric to optimize.
|
||||||
n_trials (int): Number of trials to determine optimal thresholds
|
n_trials (int): Number of trials to determine optimal thresholds
|
||||||
beta (float): Beta for F-score calculation.
|
|
||||||
code_path (Optional[Path]): Path to Python file with additional code (registered functions) to be imported.
|
code_path (Optional[Path]): Path to Python file with additional code (registered functions) to be imported.
|
||||||
use_gpu (int): GPU ID or -1 for CPU.
|
use_gpu (int): GPU ID or -1 for CPU.
|
||||||
gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
|
gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
|
||||||
|
@ -66,7 +63,6 @@ def find_threshold_cli(
|
||||||
threshold_key=threshold_key,
|
threshold_key=threshold_key,
|
||||||
scores_key=scores_key,
|
scores_key=scores_key,
|
||||||
n_trials=n_trials,
|
n_trials=n_trials,
|
||||||
beta=beta,
|
|
||||||
use_gpu=use_gpu,
|
use_gpu=use_gpu,
|
||||||
gold_preproc=gold_preproc,
|
gold_preproc=gold_preproc,
|
||||||
silent=False,
|
silent=False,
|
||||||
|
@ -81,7 +77,6 @@ def find_threshold(
|
||||||
scores_key: str,
|
scores_key: str,
|
||||||
*,
|
*,
|
||||||
n_trials: int = _DEFAULTS["n_trials"], # type: ignore
|
n_trials: int = _DEFAULTS["n_trials"], # type: ignore
|
||||||
beta: float = _DEFAULTS["beta"], # type: ignore
|
|
||||||
use_gpu: int = _DEFAULTS["use_gpu"], # type: ignore
|
use_gpu: int = _DEFAULTS["use_gpu"], # type: ignore
|
||||||
gold_preproc: bool = _DEFAULTS["gold_preproc"], # type: ignore
|
gold_preproc: bool = _DEFAULTS["gold_preproc"], # type: ignore
|
||||||
silent: bool = True,
|
silent: bool = True,
|
||||||
|
@ -94,7 +89,6 @@ def find_threshold(
|
||||||
threshold_key (str): Key of threshold attribute in component's configuration.
|
threshold_key (str): Key of threshold attribute in component's configuration.
|
||||||
scores_key (str): Name of score to metric to optimize.
|
scores_key (str): Name of score to metric to optimize.
|
||||||
n_trials (int): Number of trials to determine optimal thresholds.
|
n_trials (int): Number of trials to determine optimal thresholds.
|
||||||
beta (float): Beta for F-score calculation.
|
|
||||||
use_gpu (int): GPU ID or -1 for CPU.
|
use_gpu (int): GPU ID or -1 for CPU.
|
||||||
gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
|
gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
|
||||||
tokenization, and may result in sequences of more consistent length. However, it may reduce runtime accuracy due
|
tokenization, and may result in sequences of more consistent length. However, it may reduce runtime accuracy due
|
||||||
|
@ -121,7 +115,7 @@ def find_threshold(
|
||||||
if not silent:
|
if not silent:
|
||||||
wasabi.msg.info(
|
wasabi.msg.info(
|
||||||
title=f"Optimizing for {scores_key} for component '{pipe_name}' with {n_trials} "
|
title=f"Optimizing for {scores_key} for component '{pipe_name}' with {n_trials} "
|
||||||
f"trials and beta = {beta}."
|
f"trials."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load evaluation corpus.
|
# Load evaluation corpus.
|
||||||
|
|
|
@ -21,13 +21,10 @@ MISSING_VALUES = frozenset([None, 0, ""])
|
||||||
class PRFScore:
|
class PRFScore:
|
||||||
"""A precision / recall / F score."""
|
"""A precision / recall / F score."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, *, tp: int = 0, fp: int = 0, fn: int = 0) -> None:
|
||||||
self, *, tp: int = 0, fp: int = 0, fn: int = 0, beta: float = 1
|
|
||||||
) -> None:
|
|
||||||
self.tp = tp
|
self.tp = tp
|
||||||
self.fp = fp
|
self.fp = fp
|
||||||
self.fn = fn
|
self.fn = fn
|
||||||
self.beta = beta
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return self.tp + self.fp + self.fn
|
return self.tp + self.fp + self.fn
|
||||||
|
@ -39,12 +36,10 @@ class PRFScore:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
assert self.beta == other.beta
|
|
||||||
return PRFScore(
|
return PRFScore(
|
||||||
tp=self.tp + other.tp,
|
tp=self.tp + other.tp,
|
||||||
fp=self.fp + other.fp,
|
fp=self.fp + other.fp,
|
||||||
fn=self.fn + other.fn,
|
fn=self.fn + other.fn,
|
||||||
beta=self.beta,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def score_set(self, cand: set, gold: set) -> None:
|
def score_set(self, cand: set, gold: set) -> None:
|
||||||
|
@ -64,7 +59,7 @@ class PRFScore:
|
||||||
def fscore(self) -> float:
|
def fscore(self) -> float:
|
||||||
p = self.precision
|
p = self.precision
|
||||||
r = self.recall
|
r = self.recall
|
||||||
return (1 + self.beta**2) * ((p * r) / ((self.beta**2 * p) + r + 1e-100))
|
return 2 * ((p * r) / (p + r + 1e-100))
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, float]:
|
def to_dict(self) -> Dict[str, float]:
|
||||||
return {"p": self.precision, "r": self.recall, "f": self.fscore}
|
return {"p": self.precision, "r": self.recall, "f": self.fscore}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user