2017-03-18 17:14:48 +03:00
|
|
|
# coding: utf8
|
2017-03-18 20:14:03 +03:00
|
|
|
from __future__ import print_function
|
|
|
|
# NB! This breaks in plac on Python 2!!
|
2017-03-23 13:08:30 +03:00
|
|
|
#from __future__ import unicode_literals
|
2017-03-18 17:14:48 +03:00
|
|
|
|
|
|
|
import plac
|
|
|
|
from spacy.cli import download as cli_download
|
|
|
|
from spacy.cli import link as cli_link
|
|
|
|
from spacy.cli import info as cli_info
|
2017-03-21 00:50:13 +03:00
|
|
|
from spacy.cli import package as cli_package
|
2017-03-23 13:08:41 +03:00
|
|
|
from spacy.cli import train as cli_train
|
|
|
|
from spacy.cli import train_config as cli_train_config
|
2017-03-18 17:14:48 +03:00
|
|
|
|
|
|
|
|
|
|
|
class CLI(object):
|
|
|
|
"""Command-line interface for spaCy"""
|
|
|
|
|
2017-03-23 13:08:41 +03:00
|
|
|
commands = ('download', 'link', 'info', 'package', 'train', 'train_config')
|
2017-03-18 17:14:48 +03:00
|
|
|
|
|
|
|
@plac.annotations(
|
|
|
|
model=("model to download (shortcut or model name)", "positional", None, str),
|
|
|
|
direct=("force direct download. Needs model name with version and won't "
|
|
|
|
"perform compatibility check", "flag", "d", bool)
|
|
|
|
)
|
|
|
|
def download(self, model=None, direct=False):
|
|
|
|
"""
|
|
|
|
Download compatible model from default download path using pip. Model
|
|
|
|
can be shortcut, model name or, if --direct flag is set, full model name
|
|
|
|
with version.
|
|
|
|
"""
|
|
|
|
|
|
|
|
cli_download(model, direct)
|
|
|
|
|
|
|
|
|
|
|
|
@plac.annotations(
|
|
|
|
origin=("package name or local path to model", "positional", None, str),
|
2017-03-21 04:05:14 +03:00
|
|
|
link_name=("name of shortuct link to create", "positional", None, str),
|
|
|
|
force=("force overwriting of existing link", "flag", "f", bool)
|
2017-03-18 17:14:48 +03:00
|
|
|
)
|
|
|
|
def link(self, origin, link_name, force=False):
|
|
|
|
"""
|
|
|
|
Create a symlink for models within the spacy/data directory. Accepts
|
|
|
|
either the name of a pip package, or the local path to the model data
|
|
|
|
directory. Linking models allows loading them via spacy.load(link_name).
|
|
|
|
"""
|
|
|
|
|
|
|
|
cli_link(origin, link_name, force)
|
|
|
|
|
|
|
|
|
|
|
|
@plac.annotations(
|
|
|
|
model=("optional: shortcut link of model", "positional", None, str),
|
|
|
|
markdown=("generate Markdown for GitHub issues", "flag", "md", str)
|
|
|
|
)
|
|
|
|
def info(self, model=None, markdown=False):
|
|
|
|
"""
|
|
|
|
Print info about spaCy installation. If a model shortcut link is
|
|
|
|
speficied as an argument, print model information. Flag --markdown
|
|
|
|
prints details in Markdown for easy copy-pasting to GitHub issues.
|
|
|
|
"""
|
|
|
|
|
|
|
|
cli_info(model, markdown)
|
|
|
|
|
|
|
|
|
2017-03-21 00:50:13 +03:00
|
|
|
@plac.annotations(
|
|
|
|
input_dir=("directory with model data", "positional", None, str),
|
2017-03-23 13:08:41 +03:00
|
|
|
output_dir=("output parent directory", "positional", None, str),
|
2017-03-21 13:19:21 +03:00
|
|
|
force=("force overwriting of existing folder in output directory", "flag", "f", bool)
|
2017-03-21 00:50:13 +03:00
|
|
|
)
|
2017-03-21 04:05:34 +03:00
|
|
|
def package(self, input_dir, output_dir, force=False):
|
2017-03-21 00:50:13 +03:00
|
|
|
"""
|
|
|
|
Generate Python package for model data, including meta and required
|
|
|
|
installation files. A new directory will be created in the specified
|
2017-03-21 00:50:55 +03:00
|
|
|
output directory, and model data will be copied over.
|
2017-03-21 00:50:13 +03:00
|
|
|
"""
|
|
|
|
|
2017-03-21 04:05:34 +03:00
|
|
|
cli_package(input_dir, output_dir, force)
|
2017-03-21 00:50:13 +03:00
|
|
|
|
|
|
|
|
2017-03-23 13:08:41 +03:00
|
|
|
@plac.annotations(
|
|
|
|
lang=("language", "positional", None, str),
|
|
|
|
output_dir=("output directory", "positional", None, str),
|
|
|
|
train_data=("training data", "positional", None, str),
|
|
|
|
dev_data=("development data", "positional", None, str),
|
2017-03-26 15:16:52 +03:00
|
|
|
n_iter=("number of iterations", "option", "n", int),
|
2017-03-26 15:24:07 +03:00
|
|
|
parser_L1=("L1 regularization penalty for parser", "option", "L", float),
|
2017-03-26 15:16:52 +03:00
|
|
|
no_tagger=("Don't train tagger", "flag", "T", bool),
|
|
|
|
no_parser=("Don't train parser", "flag", "P", bool),
|
|
|
|
no_ner=("Don't train NER", "flag", "N", bool)
|
2017-03-23 13:08:41 +03:00
|
|
|
)
|
2017-03-26 15:16:52 +03:00
|
|
|
def train(self, lang, output_dir, train_data, dev_data, n_iter=15,
|
2017-03-26 15:24:07 +03:00
|
|
|
parser_L1=0.0,
|
2017-03-26 15:16:52 +03:00
|
|
|
no_tagger=False, no_parser=False, no_ner=False):
|
2017-03-23 13:08:41 +03:00
|
|
|
"""Train a model."""
|
2017-03-26 15:16:52 +03:00
|
|
|
cli_train(lang, output_dir, train_data, dev_data, n_iter,
|
2017-03-26 15:24:07 +03:00
|
|
|
not no_tagger, not no_parser, not no_ner,
|
|
|
|
parser_L1)
|
2017-03-23 13:08:41 +03:00
|
|
|
|
|
|
|
|
|
|
|
@plac.annotations(
|
|
|
|
config=("config", "positional", None, str),
|
|
|
|
)
|
|
|
|
def train_config(self, config):
|
|
|
|
"""Train a model from config file."""
|
|
|
|
|
|
|
|
cli_train_config(config)
|
|
|
|
|
|
|
|
|
2017-03-18 17:14:48 +03:00
|
|
|
def __missing__(self, name):
|
|
|
|
print("\n Command %r does not exist\n" % name)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
import plac
|
|
|
|
import sys
|
|
|
|
cli = CLI()
|
|
|
|
sys.argv[0] = 'spacy'
|
|
|
|
plac.Interpreter.call(CLI)
|