mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Refactor CLI
This commit is contained in:
		
							parent
							
								
									cc569a348d
								
							
						
					
					
						commit
						7811d97339
					
				|  | @ -13,122 +13,112 @@ from spacy.cli import model as cli_model | |||
| from spacy.cli import convert as cli_convert | ||||
| 
 | ||||
| 
 | ||||
| class CLI(object): | ||||
| @plac.annotations( | ||||
|     model=("model to download (shortcut or model name)", "positional", None, str), | ||||
|     direct=("force direct download. Needs model name with version and won't " | ||||
|             "perform compatibility check", "flag", "d", bool) | ||||
| ) | ||||
| def download(model, direct=False): | ||||
|     """ | ||||
|     Command-line interface for spaCy | ||||
|     Download compatible model from default download path using pip. Model | ||||
|     can be shortcut, model name or, if --direct flag is set, full model name | ||||
|     with version. | ||||
|     """ | ||||
|     commands = ('download', 'link', 'info', 'package', 'train', 'model', 'convert') | ||||
| 
 | ||||
|     @plac.annotations( | ||||
|         model=("model to download (shortcut or model name)", "positional", None, str), | ||||
|         direct=("force direct download. Needs model name with version and won't " | ||||
|                 "perform compatibility check", "flag", "d", bool) | ||||
|     ) | ||||
|     def download(self, model, direct=False): | ||||
|         """ | ||||
|         Download compatible model from default download path using pip. Model | ||||
|         can be shortcut, model name or, if --direct flag is set, full model name | ||||
|         with version. | ||||
|         """ | ||||
|         cli_download(model, direct) | ||||
|     cli_download(model, direct) | ||||
| 
 | ||||
| 
 | ||||
|     @plac.annotations( | ||||
|         origin=("package name or local path to model", "positional", None, str), | ||||
|         link_name=("name of shortuct link to create", "positional", None, str), | ||||
|         force=("force overwriting of existing link", "flag", "f", bool) | ||||
|     ) | ||||
|     def link(self, origin, link_name, force=False): | ||||
|         """ | ||||
|         Create a symlink for models within the spacy/data directory. Accepts | ||||
|         either the name of a pip package, or the local path to the model data | ||||
|         directory. Linking models allows loading them via spacy.load(link_name). | ||||
|         """ | ||||
|         cli_link(origin, link_name, force) | ||||
| @plac.annotations( | ||||
|     origin=("package name or local path to model", "positional", None, str), | ||||
|     link_name=("name of shortuct link to create", "positional", None, str), | ||||
|     force=("force overwriting of existing link", "flag", "f", bool) | ||||
| ) | ||||
| def link(origin, link_name, force=False): | ||||
|     """ | ||||
|     Create a symlink for models within the spacy/data directory. Accepts | ||||
|     either the name of a pip package, or the local path to the model data | ||||
|     directory. Linking models allows loading them via spacy.load(link_name). | ||||
|     """ | ||||
|     cli_link(origin, link_name, force) | ||||
| 
 | ||||
| 
 | ||||
|     @plac.annotations( | ||||
|         model=("optional: shortcut link of model", "positional", None, str), | ||||
|         markdown=("generate Markdown for GitHub issues", "flag", "md", str) | ||||
|     ) | ||||
|     def info(self, model=None, markdown=False): | ||||
|         """ | ||||
|         Print info about spaCy installation. If a model shortcut link is | ||||
|         speficied as an argument, print model information. Flag --markdown | ||||
|         prints details in Markdown for easy copy-pasting to GitHub issues. | ||||
|         """ | ||||
|         cli_info(model, markdown) | ||||
| @plac.annotations( | ||||
|     model=("optional: shortcut link of model", "positional", None, str), | ||||
|     markdown=("generate Markdown for GitHub issues", "flag", "md", str) | ||||
| ) | ||||
| def info(model=None, markdown=False): | ||||
|     """ | ||||
|     Print info about spaCy installation. If a model shortcut link is | ||||
|     speficied as an argument, print model information. Flag --markdown | ||||
|     prints details in Markdown for easy copy-pasting to GitHub issues. | ||||
|     """ | ||||
|     cli_info(model, markdown) | ||||
| 
 | ||||
| 
 | ||||
|     @plac.annotations( | ||||
|         input_dir=("directory with model data", "positional", None, str), | ||||
|         output_dir=("output parent directory", "positional", None, str), | ||||
|         meta=("path to meta.json", "option", "m", str), | ||||
|         force=("force overwriting of existing folder in output directory", "flag", "f", bool) | ||||
|     ) | ||||
|     def package(self, input_dir, output_dir, meta=None, force=False): | ||||
|         """ | ||||
|         Generate Python package for model data, including meta and required | ||||
|         installation files. A new directory will be created in the specified | ||||
|         output directory, and model data will be copied over. | ||||
|         """ | ||||
|         cli_package(input_dir, output_dir, meta, force) | ||||
| @plac.annotations( | ||||
|     input_dir=("directory with model data", "positional", None, str), | ||||
|     output_dir=("output parent directory", "positional", None, str), | ||||
|     meta=("path to meta.json", "option", "m", str), | ||||
|     force=("force overwriting of existing folder in output directory", "flag", "f", bool) | ||||
| ) | ||||
| def package(input_dir, output_dir, meta=None, force=False): | ||||
|     """ | ||||
|     Generate Python package for model data, including meta and required | ||||
|     installation files. A new directory will be created in the specified | ||||
|     output directory, and model data will be copied over. | ||||
|     """ | ||||
|     cli_package(input_dir, output_dir, meta, force) | ||||
| 
 | ||||
| 
 | ||||
|     @plac.annotations( | ||||
|         lang=("model language", "positional", None, str), | ||||
|         output_dir=("output directory to store model in", "positional", None, str), | ||||
|         train_data=("location of JSON-formatted training data", "positional", None, str), | ||||
|         dev_data=("location of JSON-formatted development data (optional)", "positional", None, str), | ||||
|         n_iter=("number of iterations", "option", "n", int), | ||||
|         nsents=("number of sentences", "option", None, int), | ||||
|         parser_L1=("L1 regularization penalty for parser", "option", "L", float), | ||||
|         use_gpu=("Use GPU", "flag", "g", bool), | ||||
|         no_tagger=("Don't train tagger", "flag", "T", bool), | ||||
|         no_parser=("Don't train parser", "flag", "P", bool), | ||||
|         no_entities=("Don't train NER", "flag", "N", bool) | ||||
|     ) | ||||
|     def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15, | ||||
|               nsents=0, parser_L1=0.0, use_gpu=False, | ||||
|               no_tagger=False, no_parser=False, no_entities=False): | ||||
|         """ | ||||
|         Train a model. Expects data in spaCy's JSON format. | ||||
|         """ | ||||
|         nsents = nsents or None | ||||
|         cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents, | ||||
|                   use_gpu, no_tagger, no_parser, no_entities, parser_L1) | ||||
| @plac.annotations( | ||||
|     lang=("model language", "positional", None, str), | ||||
|     output_dir=("output directory to store model in", "positional", None, str), | ||||
|     train_data=("location of JSON-formatted training data", "positional", None, str), | ||||
|     dev_data=("location of JSON-formatted development data (optional)", "positional", None, str), | ||||
|     n_iter=("number of iterations", "option", "n", int), | ||||
|     nsents=("number of sentences", "option", None, int), | ||||
|     parser_L1=("L1 regularization penalty for parser", "option", "L", float), | ||||
|     use_gpu=("Use GPU", "flag", "g", bool), | ||||
|     no_tagger=("Don't train tagger", "flag", "T", bool), | ||||
|     no_parser=("Don't train parser", "flag", "P", bool), | ||||
|     no_entities=("Don't train NER", "flag", "N", bool) | ||||
| ) | ||||
| def train(lang, output_dir, train_data, dev_data=None, n_iter=15, | ||||
|           nsents=0, parser_L1=0.0, use_gpu=False, | ||||
|           no_tagger=False, no_parser=False, no_entities=False): | ||||
|     """ | ||||
|     Train a model. Expects data in spaCy's JSON format. | ||||
|     """ | ||||
|     nsents = nsents or None | ||||
|     cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents, | ||||
|               use_gpu, no_tagger, no_parser, no_entities, parser_L1) | ||||
| 
 | ||||
|     @plac.annotations( | ||||
|         lang=("model language", "positional", None, str), | ||||
|         model_dir=("output directory to store model in", "positional", None, str), | ||||
|         freqs_data=("tab-separated frequencies file", "positional", None, str), | ||||
|         clusters_data=("Brown clusters file", "positional", None, str), | ||||
|         vectors_data=("word vectors file", "positional", None, str) | ||||
|     ) | ||||
|     def model(self, lang, model_dir, freqs_data, clusters_data=None, vectors_data=None): | ||||
|         """ | ||||
|         Initialize a new model and its data directory. | ||||
|         """ | ||||
|         cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data) | ||||
| @plac.annotations( | ||||
|     input_file=("input file", "positional", None, str), | ||||
|     output_dir=("output directory for converted file", "positional", None, str), | ||||
|     n_sents=("Number of sentences per doc", "option", "n", float), | ||||
|     morphology=("Enable appending morphology to tags", "flag", "m", bool) | ||||
| ) | ||||
| def convert(input_file, output_dir, n_sents=10, morphology=False): | ||||
|     """ | ||||
|     Convert files into JSON format for use with train command and other | ||||
|     experiment management functions. | ||||
|     """ | ||||
|     cli_convert(input_file, output_dir, n_sents, morphology) | ||||
| 
 | ||||
|     @plac.annotations( | ||||
|         input_file=("input file", "positional", None, str), | ||||
|         output_dir=("output directory for converted file", "positional", None, str), | ||||
|         n_sents=("Number of sentences per doc", "option", "n", float), | ||||
|         morphology=("Enable appending morphology to tags", "flag", "m", bool) | ||||
|     ) | ||||
|     def convert(self, input_file, output_dir, n_sents=10, morphology=False): | ||||
|         """ | ||||
|         Convert files into JSON format for use with train command and other | ||||
|         experiment management functions. | ||||
|         """ | ||||
|         cli_convert(input_file, output_dir, n_sents, morphology) | ||||
| @plac.annotations( | ||||
|     lang=("model language", "positional", None, str), | ||||
|     model_dir=("output directory to store model in", "positional", None, str), | ||||
|     freqs_data=("tab-separated frequencies file", "positional", None, str), | ||||
|     clusters_data=("Brown clusters file", "positional", None, str), | ||||
|     vectors_data=("word vectors file", "positional", None, str) | ||||
| ) | ||||
| def model(lang, model_dir, freqs_data, clusters_data=None, vectors_data=None): | ||||
|     """ | ||||
|     Initialize a new model and its data directory. | ||||
|     """ | ||||
|     cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data) | ||||
| 
 | ||||
| 
 | ||||
|     def __missing__(self, name): | ||||
|         print("\n   Command %r does not exist." | ||||
|               "\n   Use the --help flag for a list of available commands.\n" % name) | ||||
| 
 | ||||
| @plac.annotations( | ||||
|     lang=("model language", "positional", None, str), | ||||
|     output_dir=("output directory to store model in", "positional", None, str), | ||||
|  | @ -147,6 +137,7 @@ def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15, | |||
|     """ | ||||
|     Train a model. Expects data in spaCy's JSON format. | ||||
|     """ | ||||
|     print(train_data, dev_data) | ||||
|     nsents = nsents or None | ||||
|     cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents, | ||||
|               use_gpu, no_tagger, no_parser, no_entities) | ||||
|  | @ -157,3 +148,5 @@ if __name__ == '__main__': | |||
|     import sys | ||||
|     if sys.argv[1] == 'train': | ||||
|         plac.call(train) | ||||
|     if sys.argv[1] == 'convert': | ||||
|         plac.call(convert) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user