Allow vectors name to be set in init-model (#4321)

* Allow vectors name to be specified in init-model * Document --vectors-name argument to init-model * Update website/docs/api/cli.md Co-Authored-By: Ines Montani <ines@ines.io>
2025-08-07 21:54:54 +03:00 · 2019-09-25 13:11:00 +02:00 · 2019-09-25 13:11:00 +02:00 · 92ed4dc5e0
commit 92ed4dc5e0
parent 09816f8323
2 changed files with 9 additions and 3 deletions
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@ -35,6 +35,7 @@ msg = Printer()
    clusters_loc=("Optional location of brown clusters data", "option", "c", str),
    vectors_loc=("Optional vectors file in Word2Vec format", "option", "v", str),
    prune_vectors=("Optional number of vectors to prune to", "option", "V", int),
+    vectors_name=("Optional name for the word vectors, e.g. en_core_web_lg.vectors", "vn", str)
 )
 def init_model(
    lang,
@ -44,6 +45,7 @@ def init_model(
    jsonl_loc=None,
    vectors_loc=None,
    prune_vectors=-1,
+    vectors_name=None
 ):
    """
    Create a new model from raw data, like word frequencies, Brown clusters
@ -78,7 +80,7 @@ def init_model(
        nlp = create_model(lang, lex_attrs)
    msg.good("Successfully created model")
    if vectors_loc is not None:
-        add_vectors(nlp, vectors_loc, prune_vectors)
+        add_vectors(nlp, vectors_loc, prune_vectors, vectors_name)
    vec_added = len(nlp.vocab.vectors)
    lex_added = len(nlp.vocab)
    msg.good(
@ -160,7 +162,7 @@ def create_model(lang, lex_attrs):
    return nlp


-def add_vectors(nlp, vectors_loc, prune_vectors):
+def add_vectors(nlp, vectors_loc, prune_vectors, name=None):
    vectors_loc = ensure_path(vectors_loc)
    if vectors_loc and vectors_loc.parts[-1].endswith(".npz"):
        nlp.vocab.vectors = Vectors(data=numpy.load(vectors_loc.open("rb")))
@ -181,7 +183,10 @@ def add_vectors(nlp, vectors_loc, prune_vectors):
                    lexeme.is_oov = False
        if vectors_data is not None:
            nlp.vocab.vectors = Vectors(data=vectors_data, keys=vector_keys)
-    nlp.vocab.vectors.name = "%s_model.vectors" % nlp.meta["lang"]
+    if name is None:
+        nlp.vocab.vectors.name = "%s_model.vectors" % nlp.meta["lang"]
+    else:
+        nlp.vocab.vectors.name = name
    nlp.meta["vectors"]["name"] = nlp.vocab.vectors.name
    if prune_vectors >= 1:
        nlp.vocab.prune_vectors(prune_vectors)
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -538,6 +538,7 @@ $ python -m spacy init-model [lang] [output_dir] [--jsonl-loc] [--vectors-loc]
 | `--jsonl-loc`, `-j`     | option     | Optional location of JSONL-formatted [vocabulary file](/api/annotation#vocab-jsonl) with lexical attributes.                                                                                                                                           |
 | `--vectors-loc`, `-v`   | option     | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. |
 | `--prune-vectors`, `-V` | flag       | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning.                                                                                                                                                                         |
+| `--vectors-name`, `-vn` | option     | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. |
 | **CREATES**             | model      | A spaCy model containing the vocab and vectors.                                                                                                                                                                                                        |

 ## Evaluate {#evaluate new="2"}