diff --git a/website/docs/api/annotation.jade b/website/docs/api/annotation.jade index 93511899b..adc6b28f7 100644 --- a/website/docs/api/annotation.jade +++ b/website/docs/api/annotation.jade @@ -79,3 +79,33 @@ p +h(2, "named-entities") Named Entity Recognition include _annotation/_named-entities + ++h(2, "json-input") JSON input format for training + +p + | spaCy takes training data in the following format: + ++code("Example structure"). + doc: { + id: string, + paragraphs: [{ + raw: string, + sents: [int], + tokens: [{ + start: int, + tag: string, + head: int, + dep: string + }], + ner: [{ + start: int, + end: int, + label: string + }], + brackets: [{ + start: int, + end: int, + label: string + }] + }] + } diff --git a/website/docs/usage/cli.jade b/website/docs/usage/cli.jade index ca33e4e40..74d6554b0 100644 --- a/website/docs/usage/cli.jade +++ b/website/docs/usage/cli.jade @@ -143,7 +143,8 @@ p +tag experimental p - | Train a model. Expects data in spaCy's JSON format. + | Train a model. Expects data in spaCy's + | #[+a("/docs/api/annotation#json-input") JSON format]. +code(false, "bash"). python -m spacy train [lang] [output_dir] [train_data] [dev_data] [--n_iter] [--parser_L1] [--no_tagger] [--no_parser] [--no_ner]