From a62337b3f381b061b2ec27e6d9e9ba718276131b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 28 Sep 2020 12:53:06 +0200 Subject: [PATCH] Tidy up vocab init --- spacy/cli/init_pipeline.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/spacy/cli/init_pipeline.py b/spacy/cli/init_pipeline.py index e64683fe1..28e314d0a 100644 --- a/spacy/cli/init_pipeline.py +++ b/spacy/cli/init_pipeline.py @@ -63,11 +63,7 @@ def init_pipeline(config: Config, use_gpu: int = -1) -> Language: train_corpus, dev_corpus = resolve_dot_names(config, dot_names) I = registry.resolve(config["initialize"], schema=ConfigSchemaInit) V = I["vocab"] - init_vocab(nlp, data=V["data"], lookups=V["lookups"]) - msg.good("Created vocabulary") - if V["vectors"] is not None: - add_vectors(nlp, V["vectors"]) - msg.good(f"Added vectors: {V['vectors']}") + init_vocab(nlp, data=V["data"], lookups=V["lookups"], vectors=V["vectors"]) optimizer = T["optimizer"] before_to_disk = create_before_to_disk_callback(T["before_to_disk"]) # Components that shouldn't be updated during training @@ -94,7 +90,11 @@ def init_pipeline(config: Config, use_gpu: int = -1) -> Language: def init_vocab( - nlp: Language, *, data: Optional[Path] = None, lookups: Optional[Lookups] = None, + nlp: Language, + *, + data: Optional[Path] = None, + lookups: Optional[Lookups] = None, + vectors: Optional[str] = None, ) -> Language: if lookups: nlp.vocab.lookups = lookups @@ -115,6 +115,10 @@ def init_vocab( oov_prob = DEFAULT_OOV_PROB nlp.vocab.cfg.update({"oov_prob": oov_prob}) msg.good(f"Added {len(nlp.vocab)} lexical entries to the vocab") + msg.good("Created vocabulary") + if vectors is not None: + add_vectors(nlp, vectors) + msg.good(f"Added vectors: {V['vectors']}") def add_tok2vec_weights(