mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-29 23:17:59 +03:00 
			
		
		
		
	add custom code support to CLI speed benchmark
This commit is contained in:
		
							parent
							
								
									e2a3952de5
								
							
						
					
					
						commit
						5f32a5a82e
					
				|  | @ -13,7 +13,7 @@ from .. import util | ||||||
| from ..language import Language | from ..language import Language | ||||||
| from ..tokens import Doc | from ..tokens import Doc | ||||||
| from ..training import Corpus | from ..training import Corpus | ||||||
| from ._util import Arg, Opt, benchmark_cli, setup_gpu | from ._util import Arg, Opt, benchmark_cli, setup_gpu, import_code | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @benchmark_cli.command( | @benchmark_cli.command( | ||||||
|  | @ -30,12 +30,14 @@ def benchmark_speed_cli( | ||||||
|     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), |     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), | ||||||
|     n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,), |     n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,), | ||||||
|     warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"), |     warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"), | ||||||
|  |     code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), | ||||||
|     # fmt: on |     # fmt: on | ||||||
| ): | ): | ||||||
|     """ |     """ | ||||||
|     Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark |     Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark | ||||||
|     data in the binary .spacy format. |     data in the binary .spacy format. | ||||||
|     """ |     """ | ||||||
|  |     import_code(code_path) | ||||||
|     setup_gpu(use_gpu=use_gpu, silent=False) |     setup_gpu(use_gpu=use_gpu, silent=False) | ||||||
| 
 | 
 | ||||||
|     nlp = util.load_model(model) |     nlp = util.load_model(model) | ||||||
|  | @ -171,5 +173,7 @@ def print_outliers(sample: numpy.ndarray): | ||||||
| def warmup( | def warmup( | ||||||
|     nlp: Language, docs: List[Doc], warmup_epochs: int, batch_size: Optional[int] |     nlp: Language, docs: List[Doc], warmup_epochs: int, batch_size: Optional[int] | ||||||
| ) -> numpy.ndarray: | ) -> numpy.ndarray: | ||||||
|     docs = warmup_epochs * docs |     warmup_docs = [] | ||||||
|     return annotate(nlp, docs, batch_size) |     for _ in range(warmup_epochs): | ||||||
|  |         warmup_docs += [doc.from_docs([doc]) for doc in docs] | ||||||
|  |     return annotate(nlp, warmup_docs, batch_size) | ||||||
|  |  | ||||||
|  | @ -1268,20 +1268,21 @@ the [binary `.spacy` format](/api/data-formats#binary-training). The pipeline is | ||||||
| warmed up before any measurements are taken. | warmed up before any measurements are taken. | ||||||
| 
 | 
 | ||||||
| ```cli | ```cli | ||||||
| $ python -m spacy benchmark speed [model] [data_path] [--batch_size] [--no-shuffle] [--gpu-id] [--batches] [--warmup] | $ python -m spacy benchmark speed [model] [data_path] [--code] [--batch_size] [--no-shuffle] [--gpu-id] [--batches] [--warmup] | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| | Name                 | Description                                                                                              | | | Name                 | Description                                                                                                                                                                          | | ||||||
| | -------------------- | -------------------------------------------------------------------------------------------------------- | | | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||||
| | `model`              | Pipeline to benchmark the speed of. Can be a package or a path to a data directory. ~~str (positional)~~ | | | `model`              | Pipeline to benchmark the speed of. Can be a package or a path to a data directory. ~~str (positional)~~                                                                             | | ||||||
| | `data_path`          | Location of benchmark data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~ | | | `data_path`          | Location of benchmark data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~                                                                             | | ||||||
| | `--batch-size`, `-b` | Set the batch size. If not set, the pipeline's batch size is used. ~~Optional[int] \(option)~~           | | | `--code`, `-c`       | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ | | ||||||
| | `--no-shuffle`       | Do not shuffle documents in the benchmark data. ~~bool (flag)~~                                          | | | `--batch-size`, `-b` | Set the batch size. If not set, the pipeline's batch size is used. ~~Optional[int] \(option)~~                                                                                       | | ||||||
| | `--gpu-id`, `-g`     | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                           | | | `--no-shuffle`       | Do not shuffle documents in the benchmark data. ~~bool (flag)~~                                                                                                                      | | ||||||
| | `--batches`          | Number of batches to benchmark on. Defaults to `50`. ~~Optional[int] \(option)~~                         | | | `--gpu-id`, `-g`     | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       | | ||||||
| | `--warmup`, `-w`     | Iterations over the benchmark data for warmup. Defaults to `3` ~~Optional[int] \(option)~~               | | | `--batches`          | Number of batches to benchmark on. Defaults to `50`. ~~Optional[int] \(option)~~                                                                                                     | | ||||||
| | `--help`, `-h`       | Show help message and available arguments. ~~bool (flag)~~                                               | | | `--warmup`, `-w`     | Iterations over the benchmark data for warmup. Defaults to `3` ~~Optional[int] \(option)~~                                                                                           | | ||||||
| | **PRINTS**           | Pipeline speed in words per second with a 95% confidence interval.                                       | | | `--help`, `-h`       | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           | | ||||||
|  | | **PRINTS**           | Pipeline speed in words per second with a 95% confidence interval.                                                                                                                   | | ||||||
| 
 | 
 | ||||||
| ## apply {id="apply", version="3.5", tag="command"} | ## apply {id="apply", version="3.5", tag="command"} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user