Document init-model command

2025-12-23 10:03:15 +03:00 · 2017-12-07 10:14:37 +01:00 · 2017-12-07 10:14:37 +01:00 · 24e80c51b8
commit 24e80c51b8
parent c91f451b0f
2 changed files with 62 additions and 0 deletions
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@ -27,6 +27,8 @@ from ..util import prints, ensure_path, get_lang_class
                   "option", "V", int)
 )
 def init_model(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None, prune_vectors=-1):
    """Create a new model from raw data, like word frequencies, Brown clusters
    and word vectors."""
    if not freqs_loc.exists():
        prints(freqs_loc, title="Can't find words frequencies file", exits=1)
    clusters_loc = ensure_path(clusters_loc)
--- a/website/api/cli.jade
+++ b/website/api/cli.jade
@ -478,6 +478,66 @@ p
        +cell model
        +cell A spaCy model containing the vocab and vectors.
 +h(3, "init-model") Init Model
    +tag-new(2)
 p
    |  Create a new model directory from raw data, like word frequencies, Brown
    |  clusters and word vectors. This command is similar to the
    |  #[code spacy model] command in v1.x.
 +code(false, "bash", "$", false, false, true).
    python -m spacy init-model [lang] [output_dir] [freqs_loc] [--clusters-loc] [--vectors-loc] [--prune-vectors]
 +table(["Argument", "Type", "Description"])
    +row
        +cell #[code lang]
        +cell positional
        +cell
            |  Model language
            |  #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code],
            |  e.g. #[code en].
    +row
        +cell #[code output_dir]
        +cell positional
        +cell Model output directory. Will be created if it doesn't exist.
    +row
        +cell #[code freqs_loc]
        +cell positional
        +cell
            |  Location of word frequencies file. Should be a tab-separated
            |  file with three columns: frequency, document frequency and
            |  frequency count.
    +row
        +cell #[code --clusters-loc], #[code -c]
        +cell option
        +cell
            |  Optional location of clusters file. Should be a tab-separated
            |  file with three columns: cluster, word and frequency.
    +row
        +cell #[code --vectors-loc], #[code -v]
        +cell option
        +cell
            |  Optional location of vectors file. Should be a tab-separated
            |  file where the first column contains the word and the remaining
            |  columns the values.
    +row
        +cell #[code --prune-vectors], #[code -V]
        +cell flag
        +cell
            |  Number of vectors to prune the vocabulary to. Defaults to
            |  #[code -1] for no pruning.
    +row("foot")
        +cell creates
        +cell model
        +cell A spaCy model containing the vocab and vectors.
 +h(3, "evaluate") Evaluate
    +tag-new(2)