mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 20:28:20 +03:00
Set data augmentation by default
This commit is contained in:
parent
34a2eecb17
commit
4c1b6a4c81
|
@ -68,6 +68,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
|
||||||
batch_sizes = util.compounding(util.env_opt('batch_from', 1),
|
batch_sizes = util.compounding(util.env_opt('batch_from', 1),
|
||||||
util.env_opt('batch_to', 64),
|
util.env_opt('batch_to', 64),
|
||||||
util.env_opt('batch_compound', 1.001))
|
util.env_opt('batch_compound', 1.001))
|
||||||
|
gold_preproc = util.env_opt('gold_preproc', False)
|
||||||
|
noise_level = util.env_opt('noise_level', 0.25)
|
||||||
|
|
||||||
if resume:
|
if resume:
|
||||||
prints(output_path / 'model19.pickle', title="Resuming training")
|
prints(output_path / 'model19.pickle', title="Resuming training")
|
||||||
|
@ -86,7 +88,9 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
|
||||||
i += 20
|
i += 20
|
||||||
with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
|
with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
|
||||||
train_docs = corpus.train_docs(nlp, projectivize=True,
|
train_docs = corpus.train_docs(nlp, projectivize=True,
|
||||||
gold_preproc=False, max_length=0)
|
gold_preproc=gold_preproc,
|
||||||
|
noise_level=noise_level,
|
||||||
|
max_length=0)
|
||||||
losses = {}
|
losses = {}
|
||||||
for batch in minibatch(train_docs, size=batch_sizes):
|
for batch in minibatch(train_docs, size=batch_sizes):
|
||||||
docs, golds = zip(*batch)
|
docs, golds = zip(*batch)
|
||||||
|
@ -105,7 +109,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
|
||||||
scorer = nlp_loaded.evaluate(
|
scorer = nlp_loaded.evaluate(
|
||||||
corpus.dev_docs(
|
corpus.dev_docs(
|
||||||
nlp_loaded,
|
nlp_loaded,
|
||||||
gold_preproc=False))
|
gold_preproc=gold_preproc))
|
||||||
acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
|
acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
|
||||||
with acc_loc.open('w') as file_:
|
with acc_loc.open('w') as file_:
|
||||||
file_.write(json_dumps(scorer.scores))
|
file_.write(json_dumps(scorer.scores))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user