mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-24 20:51:30 +03:00 
			
		
		
		
	Set data augmentation by default
This commit is contained in:
		
							parent
							
								
									34a2eecb17
								
							
						
					
					
						commit
						4c1b6a4c81
					
				|  | @ -68,6 +68,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0, | ||||||
|     batch_sizes = util.compounding(util.env_opt('batch_from', 1), |     batch_sizes = util.compounding(util.env_opt('batch_from', 1), | ||||||
|                                    util.env_opt('batch_to', 64), |                                    util.env_opt('batch_to', 64), | ||||||
|                                    util.env_opt('batch_compound', 1.001)) |                                    util.env_opt('batch_compound', 1.001)) | ||||||
|  |     gold_preproc = util.env_opt('gold_preproc', False) | ||||||
|  |     noise_level = util.env_opt('noise_level', 0.25) | ||||||
| 
 | 
 | ||||||
|     if resume: |     if resume: | ||||||
|         prints(output_path / 'model19.pickle', title="Resuming training") |         prints(output_path / 'model19.pickle', title="Resuming training") | ||||||
|  | @ -86,7 +88,9 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0, | ||||||
|                 i += 20 |                 i += 20 | ||||||
|             with tqdm.tqdm(total=n_train_words, leave=False) as pbar: |             with tqdm.tqdm(total=n_train_words, leave=False) as pbar: | ||||||
|                 train_docs = corpus.train_docs(nlp, projectivize=True, |                 train_docs = corpus.train_docs(nlp, projectivize=True, | ||||||
|                                                gold_preproc=False, max_length=0) |                                                gold_preproc=gold_preproc, | ||||||
|  |                                                noise_level=noise_level, | ||||||
|  |                                                max_length=0) | ||||||
|                 losses = {} |                 losses = {} | ||||||
|                 for batch in minibatch(train_docs, size=batch_sizes): |                 for batch in minibatch(train_docs, size=batch_sizes): | ||||||
|                     docs, golds = zip(*batch) |                     docs, golds = zip(*batch) | ||||||
|  | @ -105,7 +109,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0, | ||||||
|                 scorer = nlp_loaded.evaluate( |                 scorer = nlp_loaded.evaluate( | ||||||
|                             corpus.dev_docs( |                             corpus.dev_docs( | ||||||
|                                 nlp_loaded, |                                 nlp_loaded, | ||||||
|                                 gold_preproc=False)) |                                 gold_preproc=gold_preproc)) | ||||||
|                 acc_loc =(output_path / ('model%d' % i) / 'accuracy.json') |                 acc_loc =(output_path / ('model%d' % i) / 'accuracy.json') | ||||||
|                 with acc_loc.open('w') as file_: |                 with acc_loc.open('w') as file_: | ||||||
|                     file_.write(json_dumps(scorer.scores)) |                     file_.write(json_dumps(scorer.scores)) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user