This commit is contained in:
Matthew Honnibal 2017-12-07 10:39:32 +01:00
commit 3b17eb7c49
2 changed files with 64 additions and 0 deletions

View File

@ -27,6 +27,10 @@ from ..util import prints, ensure_path, get_lang_class
"option", "V", int)
)
def init_model(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None, prune_vectors=-1):
"""
Create a new model from raw data, like word frequencies, Brown clusters
and word vectors.
"""
if not freqs_loc.exists():
prints(freqs_loc, title="Can't find words frequencies file", exits=1)
clusters_loc = ensure_path(clusters_loc)

View File

@ -478,6 +478,66 @@ p
+cell model
+cell A spaCy model containing the vocab and vectors.
+h(3, "init-model") Init Model
+tag-new(2)
p
| Create a new model directory from raw data, like word frequencies, Brown
| clusters and word vectors. This command is similar to the
| #[code spacy model] command in v1.x.
+code(false, "bash", "$", false, false, true).
python -m spacy init-model [lang] [output_dir] [freqs_loc] [--clusters-loc] [--vectors-loc] [--prune-vectors]
+table(["Argument", "Type", "Description"])
+row
+cell #[code lang]
+cell positional
+cell
| Model language
| #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code],
| e.g. #[code en].
+row
+cell #[code output_dir]
+cell positional
+cell Model output directory. Will be created if it doesn't exist.
+row
+cell #[code freqs_loc]
+cell positional
+cell
| Location of word frequencies file. Should be a tab-separated
| file with three columns: frequency, document frequency and
| frequency count.
+row
+cell #[code --clusters-loc], #[code -c]
+cell option
+cell
| Optional location of clusters file. Should be a tab-separated
| file with three columns: cluster, word and frequency.
+row
+cell #[code --vectors-loc], #[code -v]
+cell option
+cell
| Optional location of vectors file. Should be a tab-separated
| file where the first column contains the word and the remaining
| columns the values.
+row
+cell #[code --prune-vectors], #[code -V]
+cell flag
+cell
| Number of vectors to prune the vocabulary to. Defaults to
| #[code -1] for no pruning.
+row("foot")
+cell creates
+cell model
+cell A spaCy model containing the vocab and vectors.
+h(3, "evaluate") Evaluate
+tag-new(2)