diff --git a/spacy/__main__.py b/spacy/__main__.py index fedf3166b..6e96893ea 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -9,7 +9,6 @@ from spacy.cli import link as cli_link from spacy.cli import info as cli_info from spacy.cli import package as cli_package from spacy.cli import train as cli_train -from spacy.cli import train_config as cli_train_config class CLI(object): @@ -77,36 +76,29 @@ class CLI(object): @plac.annotations( - lang=("language", "positional", None, str), - output_dir=("output directory", "positional", None, str), - train_data=("training data", "positional", None, str), - dev_data=("development data", "positional", None, str), + lang=("model language", "positional", None, str), + output_dir=("output directory to store model in", "positional", None, str), + train_data=("location of JSON-formatted training data", "positional", None, str), + dev_data=("location of JSON-formatted development data (optional)", "positional", None, str), n_iter=("number of iterations", "option", "n", int), parser_L1=("L1 regularization penalty for parser", "option", "L", float), no_tagger=("Don't train tagger", "flag", "T", bool), no_parser=("Don't train parser", "flag", "P", bool), no_ner=("Don't train NER", "flag", "N", bool) ) - def train(self, lang, output_dir, train_data, dev_data, n_iter=15, - parser_L1=0.0, - no_tagger=False, no_parser=False, no_ner=False): - """Train a model.""" - cli_train(lang, output_dir, train_data, dev_data, n_iter, - not no_tagger, not no_parser, not no_ner, - parser_L1) + def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15, + parser_L1=0.0, no_tagger=False, no_parser=False, no_ner=False): + """ + Train a model. Expects data in spaCy's JSON format. + """ - - @plac.annotations( - config=("config", "positional", None, str), - ) - def train_config(self, config): - """Train a model from config file.""" - - cli_train_config(config) + cli_train(lang, output_dir, train_data, dev_data, n_iter, not no_tagger, + not no_parser, not no_ner, parser_L1) def __missing__(self, name): - print("\n Command %r does not exist\n" % name) + print("\n Command %r does not exist." + "\n Use the --help flag for a list of available commands.\n" % name) if __name__ == '__main__': diff --git a/website/docs/api/annotation.jade b/website/docs/api/annotation.jade index 93511899b..adc6b28f7 100644 --- a/website/docs/api/annotation.jade +++ b/website/docs/api/annotation.jade @@ -79,3 +79,33 @@ p +h(2, "named-entities") Named Entity Recognition include _annotation/_named-entities + ++h(2, "json-input") JSON input format for training + +p + | spaCy takes training data in the following format: + ++code("Example structure"). + doc: { + id: string, + paragraphs: [{ + raw: string, + sents: [int], + tokens: [{ + start: int, + tag: string, + head: int, + dep: string + }], + ner: [{ + start: int, + end: int, + label: string + }], + brackets: [{ + start: int, + end: int, + label: string + }] + }] + } diff --git a/website/docs/usage/cli.jade b/website/docs/usage/cli.jade index cc07c18ea..74d6554b0 100644 --- a/website/docs/usage/cli.jade +++ b/website/docs/usage/cli.jade @@ -138,3 +138,64 @@ p +cell #[code --help], #[code -h] +cell flag +cell Show help message and available arguments. + ++h(2, "train") Train + +tag experimental + +p + | Train a model. Expects data in spaCy's + | #[+a("/docs/api/annotation#json-input") JSON format]. + ++code(false, "bash"). + python -m spacy train [lang] [output_dir] [train_data] [dev_data] [--n_iter] [--parser_L1] [--no_tagger] [--no_parser] [--no_ner] + ++table(["Argument", "Type", "Description"]) + +row + +cell #[code lang] + +cell positional + +cell Model language. + + +row + +cell #[code output_dir] + +cell positional + +cell Directory to store model in. + + +row + +cell #[code train_data] + +cell positional + +cell Location of JSON-formatted training data. + + +row + +cell #[code dev_data] + +cell positional + +cell Location of JSON-formatted dev data (optional). + + +row + +cell #[code --n_iter], #[code -n] + +cell option + +cell Number of iterations (default: #[code 15]). + + +row + +cell #[code --parser_L1], #[code -L] + +cell option + +cell L1 regularization penalty for parser (default: #[code 0.0]). + + +row + +cell #[code --no_tagger], #[code -T] + +cell flag + +cell Don't train tagger. + + +row + +cell #[code --no_parser], #[code -P] + +cell flag + +cell Don't train parser. + + +row + +cell #[code --no_ner], #[code -N] + +cell flag + +cell Don't train NER. + + +row + +cell #[code --help], #[code -h] + +cell flag + +cell Show help message and available arguments.