From 24e80c51b8e78f5d9f5a8f183d4e7121cc58e445 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 7 Dec 2017 10:14:37 +0100 Subject: [PATCH] Document init-model command --- spacy/cli/init_model.py | 2 ++ website/api/cli.jade | 60 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index e124b6926..9be1d2fc1 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -27,6 +27,8 @@ from ..util import prints, ensure_path, get_lang_class "option", "V", int) ) def init_model(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None, prune_vectors=-1): + """Create a new model from raw data, like word frequencies, Brown clusters + and word vectors.""" if not freqs_loc.exists(): prints(freqs_loc, title="Can't find words frequencies file", exits=1) clusters_loc = ensure_path(clusters_loc) diff --git a/website/api/cli.jade b/website/api/cli.jade index cb98ec279..fdb8e4efe 100644 --- a/website/api/cli.jade +++ b/website/api/cli.jade @@ -478,6 +478,66 @@ p +cell model +cell A spaCy model containing the vocab and vectors. ++h(3, "init-model") Init Model + +tag-new(2) + +p + | Create a new model directory from raw data, like word frequencies, Brown + | clusters and word vectors. This command is similar to the + | #[code spacy model] command in v1.x. + ++code(false, "bash", "$", false, false, true). + python -m spacy init-model [lang] [output_dir] [freqs_loc] [--clusters-loc] [--vectors-loc] [--prune-vectors] + ++table(["Argument", "Type", "Description"]) + +row + +cell #[code lang] + +cell positional + +cell + | Model language + | #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code], + | e.g. #[code en]. + + +row + +cell #[code output_dir] + +cell positional + +cell Model output directory. Will be created if it doesn't exist. + + +row + +cell #[code freqs_loc] + +cell positional + +cell + | Location of word frequencies file. Should be a tab-separated + | file with three columns: frequency, document frequency and + | frequency count. + + +row + +cell #[code --clusters-loc], #[code -c] + +cell option + +cell + | Optional location of clusters file. Should be a tab-separated + | file with three columns: cluster, word and frequency. + + +row + +cell #[code --vectors-loc], #[code -v] + +cell option + +cell + | Optional location of vectors file. Should be a tab-separated + | file where the first column contains the word and the remaining + | columns the values. + + +row + +cell #[code --prune-vectors], #[code -V] + +cell flag + +cell + | Number of vectors to prune the vocabulary to. Defaults to + | #[code -1] for no pruning. + + +row("foot") + +cell creates + +cell model + +cell A spaCy model containing the vocab and vectors. + +h(3, "evaluate") Evaluate +tag-new(2)