mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Add -prune-vectors argument to spacy.cly.train
This commit is contained in:
parent
e026b29ea9
commit
e98451b5f7
|
@ -32,6 +32,7 @@ numpy.random.seed(0)
|
||||||
n_sents=("number of sentences", "option", "ns", int),
|
n_sents=("number of sentences", "option", "ns", int),
|
||||||
use_gpu=("Use GPU", "option", "g", int),
|
use_gpu=("Use GPU", "option", "g", int),
|
||||||
vectors=("Model to load vectors from", "option", "v"),
|
vectors=("Model to load vectors from", "option", "v"),
|
||||||
|
vectors_limit=("Truncate to N vectors (requires -v)", "option", None, int),
|
||||||
no_tagger=("Don't train tagger", "flag", "T", bool),
|
no_tagger=("Don't train tagger", "flag", "T", bool),
|
||||||
no_parser=("Don't train parser", "flag", "P", bool),
|
no_parser=("Don't train parser", "flag", "P", bool),
|
||||||
no_entities=("Don't train NER", "flag", "N", bool),
|
no_entities=("Don't train NER", "flag", "N", bool),
|
||||||
|
@ -40,9 +41,9 @@ numpy.random.seed(0)
|
||||||
meta_path=("Optional path to meta.json. All relevant properties will be "
|
meta_path=("Optional path to meta.json. All relevant properties will be "
|
||||||
"overwritten.", "option", "m", Path))
|
"overwritten.", "option", "m", Path))
|
||||||
def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
|
def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
|
||||||
use_gpu=-1, vectors=None, no_tagger=False, no_parser=False,
|
use_gpu=-1, vectors=None, vectors_limit=None, no_tagger=False,
|
||||||
no_entities=False, gold_preproc=False, version="0.0.0",
|
no_parser=False, no_entities=False, gold_preproc=False,
|
||||||
meta_path=None):
|
version="0.0.0", meta_path=None):
|
||||||
"""
|
"""
|
||||||
Train a model. Expects data in spaCy's JSON format.
|
Train a model. Expects data in spaCy's JSON format.
|
||||||
"""
|
"""
|
||||||
|
@ -94,6 +95,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
|
||||||
nlp.meta.update(meta)
|
nlp.meta.update(meta)
|
||||||
if vectors:
|
if vectors:
|
||||||
util.load_model(vectors, vocab=nlp.vocab)
|
util.load_model(vectors, vocab=nlp.vocab)
|
||||||
|
if vectors_limit is not None:
|
||||||
|
nlp.vocab.prune_vectors(vectors_limit)
|
||||||
for name in pipeline:
|
for name in pipeline:
|
||||||
nlp.add_pipe(nlp.create_pipe(name), name=name)
|
nlp.add_pipe(nlp.create_pipe(name), name=name)
|
||||||
optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu)
|
optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user