mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
commit
3b17eb7c49
|
@ -27,6 +27,10 @@ from ..util import prints, ensure_path, get_lang_class
|
|||
"option", "V", int)
|
||||
)
|
||||
def init_model(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None, prune_vectors=-1):
|
||||
"""
|
||||
Create a new model from raw data, like word frequencies, Brown clusters
|
||||
and word vectors.
|
||||
"""
|
||||
if not freqs_loc.exists():
|
||||
prints(freqs_loc, title="Can't find words frequencies file", exits=1)
|
||||
clusters_loc = ensure_path(clusters_loc)
|
||||
|
|
|
@ -478,6 +478,66 @@ p
|
|||
+cell model
|
||||
+cell A spaCy model containing the vocab and vectors.
|
||||
|
||||
+h(3, "init-model") Init Model
|
||||
+tag-new(2)
|
||||
|
||||
p
|
||||
| Create a new model directory from raw data, like word frequencies, Brown
|
||||
| clusters and word vectors. This command is similar to the
|
||||
| #[code spacy model] command in v1.x.
|
||||
|
||||
+code(false, "bash", "$", false, false, true).
|
||||
python -m spacy init-model [lang] [output_dir] [freqs_loc] [--clusters-loc] [--vectors-loc] [--prune-vectors]
|
||||
|
||||
+table(["Argument", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code lang]
|
||||
+cell positional
|
||||
+cell
|
||||
| Model language
|
||||
| #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code],
|
||||
| e.g. #[code en].
|
||||
|
||||
+row
|
||||
+cell #[code output_dir]
|
||||
+cell positional
|
||||
+cell Model output directory. Will be created if it doesn't exist.
|
||||
|
||||
+row
|
||||
+cell #[code freqs_loc]
|
||||
+cell positional
|
||||
+cell
|
||||
| Location of word frequencies file. Should be a tab-separated
|
||||
| file with three columns: frequency, document frequency and
|
||||
| frequency count.
|
||||
|
||||
+row
|
||||
+cell #[code --clusters-loc], #[code -c]
|
||||
+cell option
|
||||
+cell
|
||||
| Optional location of clusters file. Should be a tab-separated
|
||||
| file with three columns: cluster, word and frequency.
|
||||
|
||||
+row
|
||||
+cell #[code --vectors-loc], #[code -v]
|
||||
+cell option
|
||||
+cell
|
||||
| Optional location of vectors file. Should be a tab-separated
|
||||
| file where the first column contains the word and the remaining
|
||||
| columns the values.
|
||||
|
||||
+row
|
||||
+cell #[code --prune-vectors], #[code -V]
|
||||
+cell flag
|
||||
+cell
|
||||
| Number of vectors to prune the vocabulary to. Defaults to
|
||||
| #[code -1] for no pruning.
|
||||
|
||||
+row("foot")
|
||||
+cell creates
|
||||
+cell model
|
||||
+cell A spaCy model containing the vocab and vectors.
|
||||
|
||||
+h(3, "evaluate") Evaluate
|
||||
+tag-new(2)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user