mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 15:23:41 +03:00
add custom code support to CLI speed benchmark (#13247)
* add custom code support to CLI speed benchmark * sort imports * better copying for warmup docs
This commit is contained in:
parent
68b85ea950
commit
00e938a7c3
|
@ -13,7 +13,7 @@ from .. import util
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..tokens import Doc
|
from ..tokens import Doc
|
||||||
from ..training import Corpus
|
from ..training import Corpus
|
||||||
from ._util import Arg, Opt, benchmark_cli, setup_gpu
|
from ._util import Arg, Opt, benchmark_cli, import_code, setup_gpu
|
||||||
|
|
||||||
|
|
||||||
@benchmark_cli.command(
|
@benchmark_cli.command(
|
||||||
|
@ -30,12 +30,14 @@ def benchmark_speed_cli(
|
||||||
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
||||||
n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,),
|
n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,),
|
||||||
warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"),
|
warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"),
|
||||||
|
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark
|
Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark
|
||||||
data in the binary .spacy format.
|
data in the binary .spacy format.
|
||||||
"""
|
"""
|
||||||
|
import_code(code_path)
|
||||||
setup_gpu(use_gpu=use_gpu, silent=False)
|
setup_gpu(use_gpu=use_gpu, silent=False)
|
||||||
|
|
||||||
nlp = util.load_model(model)
|
nlp = util.load_model(model)
|
||||||
|
@ -171,5 +173,5 @@ def print_outliers(sample: numpy.ndarray):
|
||||||
def warmup(
|
def warmup(
|
||||||
nlp: Language, docs: List[Doc], warmup_epochs: int, batch_size: Optional[int]
|
nlp: Language, docs: List[Doc], warmup_epochs: int, batch_size: Optional[int]
|
||||||
) -> numpy.ndarray:
|
) -> numpy.ndarray:
|
||||||
docs = warmup_epochs * docs
|
docs = [doc.copy() for doc in docs * warmup_epochs]
|
||||||
return annotate(nlp, docs, batch_size)
|
return annotate(nlp, docs, batch_size)
|
||||||
|
|
|
@ -1268,13 +1268,14 @@ the [binary `.spacy` format](/api/data-formats#binary-training). The pipeline is
|
||||||
warmed up before any measurements are taken.
|
warmed up before any measurements are taken.
|
||||||
|
|
||||||
```cli
|
```cli
|
||||||
$ python -m spacy benchmark speed [model] [data_path] [--batch_size] [--no-shuffle] [--gpu-id] [--batches] [--warmup]
|
$ python -m spacy benchmark speed [model] [data_path] [--code] [--batch_size] [--no-shuffle] [--gpu-id] [--batches] [--warmup]
|
||||||
```
|
```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------------- | -------------------------------------------------------------------------------------------------------- |
|
| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||||
| `model` | Pipeline to benchmark the speed of. Can be a package or a path to a data directory. ~~str (positional)~~ |
|
| `model` | Pipeline to benchmark the speed of. Can be a package or a path to a data directory. ~~str (positional)~~ |
|
||||||
| `data_path` | Location of benchmark data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~ |
|
| `data_path` | Location of benchmark data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~ |
|
||||||
|
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
|
||||||
| `--batch-size`, `-b` | Set the batch size. If not set, the pipeline's batch size is used. ~~Optional[int] \(option)~~ |
|
| `--batch-size`, `-b` | Set the batch size. If not set, the pipeline's batch size is used. ~~Optional[int] \(option)~~ |
|
||||||
| `--no-shuffle` | Do not shuffle documents in the benchmark data. ~~bool (flag)~~ |
|
| `--no-shuffle` | Do not shuffle documents in the benchmark data. ~~bool (flag)~~ |
|
||||||
| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
|
| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user