Merge branch 'develop'

This commit is contained in:
ines 2017-03-26 15:57:00 +02:00
commit c00d997924
3 changed files with 104 additions and 21 deletions

View File

@ -9,7 +9,6 @@ from spacy.cli import link as cli_link
from spacy.cli import info as cli_info
from spacy.cli import package as cli_package
from spacy.cli import train as cli_train
from spacy.cli import train_config as cli_train_config
class CLI(object):
@ -77,36 +76,29 @@ class CLI(object):
@plac.annotations(
lang=("language", "positional", None, str),
output_dir=("output directory", "positional", None, str),
train_data=("training data", "positional", None, str),
dev_data=("development data", "positional", None, str),
lang=("model language", "positional", None, str),
output_dir=("output directory to store model in", "positional", None, str),
train_data=("location of JSON-formatted training data", "positional", None, str),
dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
n_iter=("number of iterations", "option", "n", int),
parser_L1=("L1 regularization penalty for parser", "option", "L", float),
no_tagger=("Don't train tagger", "flag", "T", bool),
no_parser=("Don't train parser", "flag", "P", bool),
no_ner=("Don't train NER", "flag", "N", bool)
)
def train(self, lang, output_dir, train_data, dev_data, n_iter=15,
parser_L1=0.0,
no_tagger=False, no_parser=False, no_ner=False):
"""Train a model."""
cli_train(lang, output_dir, train_data, dev_data, n_iter,
not no_tagger, not no_parser, not no_ner,
parser_L1)
def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15,
parser_L1=0.0, no_tagger=False, no_parser=False, no_ner=False):
"""
Train a model. Expects data in spaCy's JSON format.
"""
@plac.annotations(
config=("config", "positional", None, str),
)
def train_config(self, config):
"""Train a model from config file."""
cli_train_config(config)
cli_train(lang, output_dir, train_data, dev_data, n_iter, not no_tagger,
not no_parser, not no_ner, parser_L1)
def __missing__(self, name):
print("\n Command %r does not exist\n" % name)
print("\n Command %r does not exist."
"\n Use the --help flag for a list of available commands.\n" % name)
if __name__ == '__main__':

View File

@ -79,3 +79,33 @@ p
+h(2, "named-entities") Named Entity Recognition
include _annotation/_named-entities
+h(2, "json-input") JSON input format for training
p
| spaCy takes training data in the following format:
+code("Example structure").
doc: {
id: string,
paragraphs: [{
raw: string,
sents: [int],
tokens: [{
start: int,
tag: string,
head: int,
dep: string
}],
ner: [{
start: int,
end: int,
label: string
}],
brackets: [{
start: int,
end: int,
label: string
}]
}]
}

View File

@ -138,3 +138,64 @@ p
+cell #[code --help], #[code -h]
+cell flag
+cell Show help message and available arguments.
+h(2, "train") Train
+tag experimental
p
| Train a model. Expects data in spaCy's
| #[+a("/docs/api/annotation#json-input") JSON format].
+code(false, "bash").
python -m spacy train [lang] [output_dir] [train_data] [dev_data] [--n_iter] [--parser_L1] [--no_tagger] [--no_parser] [--no_ner]
+table(["Argument", "Type", "Description"])
+row
+cell #[code lang]
+cell positional
+cell Model language.
+row
+cell #[code output_dir]
+cell positional
+cell Directory to store model in.
+row
+cell #[code train_data]
+cell positional
+cell Location of JSON-formatted training data.
+row
+cell #[code dev_data]
+cell positional
+cell Location of JSON-formatted dev data (optional).
+row
+cell #[code --n_iter], #[code -n]
+cell option
+cell Number of iterations (default: #[code 15]).
+row
+cell #[code --parser_L1], #[code -L]
+cell option
+cell L1 regularization penalty for parser (default: #[code 0.0]).
+row
+cell #[code --no_tagger], #[code -T]
+cell flag
+cell Don't train tagger.
+row
+cell #[code --no_parser], #[code -P]
+cell flag
+cell Don't train parser.
+row
+cell #[code --no_ner], #[code -N]
+cell flag
+cell Don't train NER.
+row
+cell #[code --help], #[code -h]
+cell flag
+cell Show help message and available arguments.