mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-08 14:14:57 +03:00
Harmonize arguments with spacy evaluate command.
This commit is contained in:
parent
6c3ae8dfcc
commit
63c80288ef
|
@ -1,21 +1,16 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional, Tuple, Union
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import wasabi.tables
|
import wasabi.tables
|
||||||
|
|
||||||
from ._util import app, Arg, Opt
|
from ._util import app, Arg, Opt, import_code, setup_gpu
|
||||||
from .. import util
|
from .. import util
|
||||||
from ..pipeline import MultiLabel_TextCategorizer, Pipe
|
from ..pipeline import MultiLabel_TextCategorizer, Pipe
|
||||||
from ..tokens import DocBin
|
from ..tokens import DocBin
|
||||||
|
|
||||||
_DEFAULTS = {
|
_DEFAULTS = {"average": "micro", "n_trials": 10, "beta": 1, "use_gpu": -1}
|
||||||
"average": "micro",
|
|
||||||
"pipe_name": None,
|
|
||||||
"n_trials": 10,
|
|
||||||
"beta": 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(
|
@app.command(
|
||||||
|
@ -24,62 +19,73 @@ _DEFAULTS = {
|
||||||
)
|
)
|
||||||
def find_threshold_cli(
|
def find_threshold_cli(
|
||||||
# fmt: off
|
# fmt: off
|
||||||
model_path: Path = Arg(..., help="Path to model file", exists=True, allow_dash=True),
|
model: str = Arg(..., help="Model name or path"),
|
||||||
doc_path: Path = Arg(..., help="Path to doc bin file", exists=True, allow_dash=True),
|
data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
|
||||||
|
pipe_name: str = Opt(..., "--pipe_name", "-p", help="Name of pipe to examine thresholds for"),
|
||||||
average: str = Arg(_DEFAULTS["average"], help="How to aggregate F-scores over labels. One of ('micro', 'macro')", exists=True, allow_dash=True),
|
average: str = Arg(_DEFAULTS["average"], help="How to aggregate F-scores over labels. One of ('micro', 'macro')", exists=True, allow_dash=True),
|
||||||
pipe_name: Optional[str] = Opt(_DEFAULTS["pipe_name"], "--pipe_name", "-p", help="Name of pipe to examine thresholds for"),
|
|
||||||
n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
|
n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
|
||||||
beta: float = Opt(_DEFAULTS["beta"], "--beta", help="Beta for F1 calculation. Ignored if different metric is used"),
|
beta: float = Opt(_DEFAULTS["beta"], "--beta", help="Beta for F1 calculation. Ignored if different metric is used"),
|
||||||
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
|
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
||||||
|
use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
||||||
|
verbose: bool = Opt(False, "--silent", "-V", "-VV", help="Display more information for debugging purposes"),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Runs prediction trials for `textcat` models with varying tresholds to maximize the specified metric from CLI.
|
Runs prediction trials for `textcat` models with varying tresholds to maximize the specified metric from CLI.
|
||||||
model_path (Path): Path to file with trained model.
|
model (Path): Path to file with trained model.
|
||||||
doc_path (Path): Path to file with DocBin with docs to use for threshold search.
|
data_path (Path): Path to file with DocBin with docs to use for threshold search.
|
||||||
|
pipe_name (str): Name of pipe to examine thresholds for.
|
||||||
average (str): How to average F-scores across labels. One of ('micro', 'macro').
|
average (str): How to average F-scores across labels. One of ('micro', 'macro').
|
||||||
pipe_name (Optional[str]): Name of pipe to examine thresholds for. If None, pipe of type MultiLabel_TextCategorizer
|
|
||||||
is seleted. If there are multiple, an error is raised.
|
|
||||||
n_trials (int): Number of trials to determine optimal thresholds
|
n_trials (int): Number of trials to determine optimal thresholds
|
||||||
beta (float): Beta for F1 calculation. Ignored if different metric is used.
|
beta (float): Beta for F1 calculation.
|
||||||
verbose (bool): Display more information for debugging purposes
|
code_path (Optional[Path]): Path to Python file with additional code (registered functions) to be imported.
|
||||||
|
use_gpu (int): GPU ID or -1 for CPU.
|
||||||
|
silent (bool): Display more information for debugging purposes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||||
|
import_code(code_path)
|
||||||
find_threshold(
|
find_threshold(
|
||||||
model_path,
|
model,
|
||||||
doc_path,
|
data_path,
|
||||||
average=average,
|
|
||||||
pipe_name=pipe_name,
|
pipe_name=pipe_name,
|
||||||
|
average=average,
|
||||||
n_trials=n_trials,
|
n_trials=n_trials,
|
||||||
beta=beta,
|
beta=beta,
|
||||||
|
use_gpu=use_gpu,
|
||||||
|
silent=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def find_threshold(
|
def find_threshold(
|
||||||
model_path: Union[str, Path],
|
model: str,
|
||||||
doc_path: Union[str, Path],
|
data_path: Path,
|
||||||
*,
|
*,
|
||||||
|
pipe_name: str, # type: ignore
|
||||||
average: str = _DEFAULTS["average"], # type: ignore
|
average: str = _DEFAULTS["average"], # type: ignore
|
||||||
pipe_name: Optional[str] = _DEFAULTS["pipe_name"], # type: ignore
|
|
||||||
n_trials: int = _DEFAULTS["n_trials"], # type: ignore
|
n_trials: int = _DEFAULTS["n_trials"], # type: ignore
|
||||||
beta: float = _DEFAULTS["beta"], # type: ignore
|
beta: float = _DEFAULTS["beta"], # type: ignore,
|
||||||
verbose: bool = True,
|
use_gpu: int = _DEFAULTS["use_gpu"],
|
||||||
|
silent: bool = True,
|
||||||
) -> Tuple[float, float]:
|
) -> Tuple[float, float]:
|
||||||
"""
|
"""
|
||||||
Runs prediction trials for `textcat` models with varying tresholds to maximize the specified metric.
|
Runs prediction trials for `textcat` models with varying tresholds to maximize the specified metric.
|
||||||
model_path (Union[str, Path]): Path to file with trained model.
|
model (Union[str, Path]): Path to file with trained model.
|
||||||
doc_path (Union[str, Path]): Path to file with DocBin with docs to use for threshold search.
|
data_path (Union[str, Path]): Path to file with DocBin with docs to use for threshold search.
|
||||||
|
pipe_name (str): Name of pipe to examine thresholds for.
|
||||||
average (str): How to average F-scores across labels. One of ('micro', 'macro').
|
average (str): How to average F-scores across labels. One of ('micro', 'macro').
|
||||||
pipe_name (Optional[str]): Name of pipe to examine thresholds for. If None, pipe of type MultiLabel_TextCategorizer
|
n_trials (int): Number of trials to determine optimal thresholds.
|
||||||
is seleted. If there are multiple, an error is raised.
|
beta (float): Beta for F1 calculation.
|
||||||
n_trials (int): Number of trials to determine optimal thresholds
|
use_gpu (int): GPU ID or -1 for CPU.
|
||||||
beta (float): Beta for F1 calculation. Ignored if different metric is used.
|
silent (bool): Whether to print non-error-related output to stdout.
|
||||||
verbose (bool): Whether to print non-error-related output to stdout.
|
|
||||||
RETURNS (Tuple[float, float]): Best found threshold with corresponding F-score.
|
RETURNS (Tuple[float, float]): Best found threshold with corresponding F-score.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
nlp = util.load_model(model_path)
|
setup_gpu(use_gpu, silent=silent)
|
||||||
|
data_path = util.ensure_path(data_path)
|
||||||
|
if not data_path.exists():
|
||||||
|
wasabi.msg.fail("Evaluation data not found", data_path, exits=1)
|
||||||
|
nlp = util.load_model(model)
|
||||||
pipe: Optional[Pipe] = None
|
pipe: Optional[Pipe] = None
|
||||||
selected_pipe_name: Optional[str] = pipe_name
|
selected_pipe_name: Optional[str] = pipe_name
|
||||||
|
|
||||||
|
@ -90,7 +96,9 @@ def find_threshold(
|
||||||
)
|
)
|
||||||
|
|
||||||
for _pipe_name, _pipe in nlp.pipeline:
|
for _pipe_name, _pipe in nlp.pipeline:
|
||||||
if pipe_name and _pipe_name == pipe_name:
|
# todo instead of instance check, assert _pipe has a .threshold arg
|
||||||
|
# won't work, actually. e.g. spancat doesn't .threshold.
|
||||||
|
if _pipe_name == pipe_name:
|
||||||
if not isinstance(_pipe, MultiLabel_TextCategorizer):
|
if not isinstance(_pipe, MultiLabel_TextCategorizer):
|
||||||
wasabi.msg.fail(
|
wasabi.msg.fail(
|
||||||
"Specified component '{component}' is not of type `MultiLabel_TextCategorizer`.".format(
|
"Specified component '{component}' is not of type `MultiLabel_TextCategorizer`.".format(
|
||||||
|
@ -100,36 +108,22 @@ def find_threshold(
|
||||||
)
|
)
|
||||||
pipe = _pipe
|
pipe = _pipe
|
||||||
break
|
break
|
||||||
elif pipe_name is None:
|
|
||||||
if isinstance(_pipe, MultiLabel_TextCategorizer):
|
|
||||||
if pipe:
|
|
||||||
wasabi.msg.fail(
|
|
||||||
"Multiple components of type `MultiLabel_TextCategorizer` exist in pipeline. Specify name of "
|
|
||||||
"component to evaluate.",
|
|
||||||
exits=1,
|
|
||||||
)
|
|
||||||
pipe = _pipe
|
|
||||||
selected_pipe_name = _pipe_name
|
|
||||||
|
|
||||||
if pipe is None:
|
if pipe is None:
|
||||||
if pipe_name:
|
|
||||||
wasabi.msg.fail(
|
wasabi.msg.fail(
|
||||||
f"No component with name {pipe_name} found in pipeline.", exits=1
|
f"No component with name {pipe_name} found in pipeline.", exits=1
|
||||||
)
|
)
|
||||||
wasabi.msg.fail(
|
|
||||||
"No component of type `MultiLabel_TextCategorizer` found in pipeline.",
|
|
||||||
exits=1,
|
|
||||||
)
|
|
||||||
# This is purely for MyPy. Type checking is done in loop above already.
|
# This is purely for MyPy. Type checking is done in loop above already.
|
||||||
assert isinstance(pipe, MultiLabel_TextCategorizer)
|
assert isinstance(pipe, MultiLabel_TextCategorizer)
|
||||||
|
|
||||||
if verbose:
|
if silent:
|
||||||
print(
|
print(
|
||||||
f"Searching threshold with the best {average} F-score for component '{selected_pipe_name}' with {n_trials} "
|
f"Searching threshold with the best {average} F-score for component '{selected_pipe_name}' with {n_trials} "
|
||||||
f"trials and beta = {beta}."
|
f"trials and beta = {beta}."
|
||||||
)
|
)
|
||||||
|
|
||||||
thresholds = numpy.linspace(0, 1, n_trials)
|
thresholds = numpy.linspace(0, 1, n_trials)
|
||||||
|
# todo use Scorer.score_cats. possibly to be extended?
|
||||||
ref_pos_counts = {label: 0 for label in pipe.labels}
|
ref_pos_counts = {label: 0 for label in pipe.labels}
|
||||||
pred_pos_counts = {
|
pred_pos_counts = {
|
||||||
t: {True: ref_pos_counts.copy(), False: ref_pos_counts.copy()}
|
t: {True: ref_pos_counts.copy(), False: ref_pos_counts.copy()}
|
||||||
|
@ -140,7 +134,7 @@ def find_threshold(
|
||||||
|
|
||||||
# Count true/false positives for provided docs.
|
# Count true/false positives for provided docs.
|
||||||
doc_bin = DocBin()
|
doc_bin = DocBin()
|
||||||
doc_bin.from_disk(doc_path)
|
doc_bin.from_disk(data_path)
|
||||||
for ref_doc in doc_bin.get_docs(nlp.vocab):
|
for ref_doc in doc_bin.get_docs(nlp.vocab):
|
||||||
for label, score in ref_doc.cats.items():
|
for label, score in ref_doc.cats.items():
|
||||||
if score not in (0, 1):
|
if score not in (0, 1):
|
||||||
|
@ -198,7 +192,7 @@ def find_threshold(
|
||||||
) / len(ref_pos_counts)
|
) / len(ref_pos_counts)
|
||||||
|
|
||||||
best_threshold = max(f_scores.keys(), key=(lambda key: f_scores[key]))
|
best_threshold = max(f_scores.keys(), key=(lambda key: f_scores[key]))
|
||||||
if verbose:
|
if silent:
|
||||||
print(
|
print(
|
||||||
f"Best threshold: {round(best_threshold, ndigits=4)} with F-score of {f_scores[best_threshold]}.",
|
f"Best threshold: {round(best_threshold, ndigits=4)} with F-score of {f_scores[best_threshold]}.",
|
||||||
wasabi.tables.table(
|
wasabi.tables.table(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user