Refactor CLI

This commit is contained in:
Matthew Honnibal 2017-05-21 17:49:10 -05:00
parent cc569a348d
commit 7811d97339

View File

@ -13,18 +13,12 @@ from spacy.cli import model as cli_model
from spacy.cli import convert as cli_convert from spacy.cli import convert as cli_convert
class CLI(object):
"""
Command-line interface for spaCy
"""
commands = ('download', 'link', 'info', 'package', 'train', 'model', 'convert')
@plac.annotations( @plac.annotations(
model=("model to download (shortcut or model name)", "positional", None, str), model=("model to download (shortcut or model name)", "positional", None, str),
direct=("force direct download. Needs model name with version and won't " direct=("force direct download. Needs model name with version and won't "
"perform compatibility check", "flag", "d", bool) "perform compatibility check", "flag", "d", bool)
) )
def download(self, model, direct=False): def download(model, direct=False):
""" """
Download compatible model from default download path using pip. Model Download compatible model from default download path using pip. Model
can be shortcut, model name or, if --direct flag is set, full model name can be shortcut, model name or, if --direct flag is set, full model name
@ -38,7 +32,7 @@ class CLI(object):
link_name=("name of shortuct link to create", "positional", None, str), link_name=("name of shortuct link to create", "positional", None, str),
force=("force overwriting of existing link", "flag", "f", bool) force=("force overwriting of existing link", "flag", "f", bool)
) )
def link(self, origin, link_name, force=False): def link(origin, link_name, force=False):
""" """
Create a symlink for models within the spacy/data directory. Accepts Create a symlink for models within the spacy/data directory. Accepts
either the name of a pip package, or the local path to the model data either the name of a pip package, or the local path to the model data
@ -51,7 +45,7 @@ class CLI(object):
model=("optional: shortcut link of model", "positional", None, str), model=("optional: shortcut link of model", "positional", None, str),
markdown=("generate Markdown for GitHub issues", "flag", "md", str) markdown=("generate Markdown for GitHub issues", "flag", "md", str)
) )
def info(self, model=None, markdown=False): def info(model=None, markdown=False):
""" """
Print info about spaCy installation. If a model shortcut link is Print info about spaCy installation. If a model shortcut link is
speficied as an argument, print model information. Flag --markdown speficied as an argument, print model information. Flag --markdown
@ -66,7 +60,7 @@ class CLI(object):
meta=("path to meta.json", "option", "m", str), meta=("path to meta.json", "option", "m", str),
force=("force overwriting of existing folder in output directory", "flag", "f", bool) force=("force overwriting of existing folder in output directory", "flag", "f", bool)
) )
def package(self, input_dir, output_dir, meta=None, force=False): def package(input_dir, output_dir, meta=None, force=False):
""" """
Generate Python package for model data, including meta and required Generate Python package for model data, including meta and required
installation files. A new directory will be created in the specified installation files. A new directory will be created in the specified
@ -88,7 +82,7 @@ class CLI(object):
no_parser=("Don't train parser", "flag", "P", bool), no_parser=("Don't train parser", "flag", "P", bool),
no_entities=("Don't train NER", "flag", "N", bool) no_entities=("Don't train NER", "flag", "N", bool)
) )
def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15, def train(lang, output_dir, train_data, dev_data=None, n_iter=15,
nsents=0, parser_L1=0.0, use_gpu=False, nsents=0, parser_L1=0.0, use_gpu=False,
no_tagger=False, no_parser=False, no_entities=False): no_tagger=False, no_parser=False, no_entities=False):
""" """
@ -98,36 +92,32 @@ class CLI(object):
cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents, cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents,
use_gpu, no_tagger, no_parser, no_entities, parser_L1) use_gpu, no_tagger, no_parser, no_entities, parser_L1)
@plac.annotations(
lang=("model language", "positional", None, str),
model_dir=("output directory to store model in", "positional", None, str),
freqs_data=("tab-separated frequencies file", "positional", None, str),
clusters_data=("Brown clusters file", "positional", None, str),
vectors_data=("word vectors file", "positional", None, str)
)
def model(self, lang, model_dir, freqs_data, clusters_data=None, vectors_data=None):
"""
Initialize a new model and its data directory.
"""
cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data)
@plac.annotations( @plac.annotations(
input_file=("input file", "positional", None, str), input_file=("input file", "positional", None, str),
output_dir=("output directory for converted file", "positional", None, str), output_dir=("output directory for converted file", "positional", None, str),
n_sents=("Number of sentences per doc", "option", "n", float), n_sents=("Number of sentences per doc", "option", "n", float),
morphology=("Enable appending morphology to tags", "flag", "m", bool) morphology=("Enable appending morphology to tags", "flag", "m", bool)
) )
def convert(self, input_file, output_dir, n_sents=10, morphology=False): def convert(input_file, output_dir, n_sents=10, morphology=False):
""" """
Convert files into JSON format for use with train command and other Convert files into JSON format for use with train command and other
experiment management functions. experiment management functions.
""" """
cli_convert(input_file, output_dir, n_sents, morphology) cli_convert(input_file, output_dir, n_sents, morphology)
@plac.annotations(
lang=("model language", "positional", None, str),
model_dir=("output directory to store model in", "positional", None, str),
freqs_data=("tab-separated frequencies file", "positional", None, str),
clusters_data=("Brown clusters file", "positional", None, str),
vectors_data=("word vectors file", "positional", None, str)
)
def model(lang, model_dir, freqs_data, clusters_data=None, vectors_data=None):
"""
Initialize a new model and its data directory.
"""
cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data)
def __missing__(self, name):
print("\n Command %r does not exist."
"\n Use the --help flag for a list of available commands.\n" % name)
@plac.annotations( @plac.annotations(
lang=("model language", "positional", None, str), lang=("model language", "positional", None, str),
@ -147,6 +137,7 @@ def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15,
""" """
Train a model. Expects data in spaCy's JSON format. Train a model. Expects data in spaCy's JSON format.
""" """
print(train_data, dev_data)
nsents = nsents or None nsents = nsents or None
cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents, cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents,
use_gpu, no_tagger, no_parser, no_entities) use_gpu, no_tagger, no_parser, no_entities)
@ -157,3 +148,5 @@ if __name__ == '__main__':
import sys import sys
if sys.argv[1] == 'train': if sys.argv[1] == 'train':
plac.call(train) plac.call(train)
if sys.argv[1] == 'convert':
plac.call(convert)