From 70052e46e908d9d11c2f705187c7cbdad56c8ec5 Mon Sep 17 00:00:00 2001 From: ines Date: Tue, 10 Apr 2018 21:42:46 +0200 Subject: [PATCH 1/4] Fix formatting [ci skip] --- spacy/cli/_messages.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/cli/_messages.py b/spacy/cli/_messages.py index d55ff8d10..c3c9e496f 100644 --- a/spacy/cli/_messages.py +++ b/spacy/cli/_messages.py @@ -1,6 +1,7 @@ # coding: utf8 from __future__ import unicode_literals + class Messages(object): M001 = ("Download successful but linking failed") M002 = ("Creating a shortcut link for 'en' didn't work (maybe you " @@ -64,7 +65,7 @@ class Messages(object): "flag to overwrite existing directories.") M046 = ("Generating meta.json") M047 = ("Enter the package settings for your model. The following " - "information will be read from your model data: pipeline, vectors.") + "information will be read from your model data: pipeline, vectors.") M048 = ("No '{key}' setting found in meta.json") M049 = ("This setting is required to build your package.") M050 = ("Training data not found") From ce63f8997b71a0f4939a95a86b2e5e14abb96fbf Mon Sep 17 00:00:00 2001 From: ines Date: Tue, 10 Apr 2018 21:42:54 +0200 Subject: [PATCH 2/4] Update init-model docs --- website/api/cli.jade | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/website/api/cli.jade b/website/api/cli.jade index 85a13c3c0..a34271d81 100644 --- a/website/api/cli.jade +++ b/website/api/cli.jade @@ -533,8 +533,10 @@ p +cell option +cell | Optional location of vectors file. Should be a tab-separated - | file where the first column contains the word and the remaining - | columns the values. + | file in Word2Vec format where the first column contains the word + | and the remaining columns the values. File can be provided in + | #[code .txt] format or as a zipped text file in #[code .zip] or + | #[code .tar.gz] format. +row +cell #[code --prune-vectors], #[code -V] From 49b1e48bf556dc291c0144b6384e7b4c702a3072 Mon Sep 17 00:00:00 2001 From: ines Date: Tue, 10 Apr 2018 21:44:59 +0200 Subject: [PATCH 3/4] Fix syntax error --- spacy/cli/init_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 5dbe39901..8fc4c5645 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -114,7 +114,7 @@ def read_vectors(vectors_loc): pieces = line.rsplit(' ', vectors_data.shape[1]+1) word = pieces.pop(0) if len(pieces) != vectors_data.shape[1]: - raise ValueError(Errors.E094.format(line_num=i, loc=vectors_loc) + raise ValueError(Errors.E094.format(line_num=i, loc=vectors_loc)) vectors_data[i] = numpy.asarray(pieces, dtype='f') vectors_keys.append(word) return vectors_data, vectors_keys From 0299d5fac82cf1fb8f8d1caaa4d6fb5048bb5520 Mon Sep 17 00:00:00 2001 From: ines Date: Tue, 10 Apr 2018 21:45:11 +0200 Subject: [PATCH 4/4] Update argument annotations and formatting --- spacy/cli/init_model.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 8fc4c5645..87c3033ad 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -29,12 +29,14 @@ except ImportError: freqs_loc=("location of words frequencies file", "positional", None, Path), clusters_loc=("optional: location of brown clusters data", "option", "c", str), - vectors_loc=("optional: location of vectors file in GenSim text format", - "option", "v", str), + vectors_loc=("optional: location of vectors file in Word2Vec format " + "(either as .txt or zipped as .zip or .tar.gz)", "option", + "v", str), prune_vectors=("optional: number of vectors to prune to", "option", "V", int) ) -def init_model(lang, output_dir, freqs_loc=None, clusters_loc=None, vectors_loc=None, prune_vectors=-1): +def init_model(lang, output_dir, freqs_loc=None, clusters_loc=None, + vectors_loc=None, prune_vectors=-1): """ Create a new model from raw data, like word frequencies, Brown clusters and word vectors.