mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	* define new architectures for the pretraining objective * add loss function as attr of the omdel * cleanup * cleanup * shorten name * fix typo * remove unused error
		
			
				
	
	
		
			44 lines
		
	
	
		
			718 B
		
	
	
	
		
			INI
		
	
	
	
	
	
			
		
		
	
	
			44 lines
		
	
	
		
			718 B
		
	
	
	
		
			INI
		
	
	
	
	
	
| [paths]
 | |
| raw_text = null
 | |
| 
 | |
| [pretraining]
 | |
| max_epochs = 1000
 | |
| dropout = 0.2
 | |
| n_save_every = null
 | |
| component = "tok2vec"
 | |
| layer = ""
 | |
| corpus = "corpora.pretrain"
 | |
| 
 | |
| [pretraining.batcher]
 | |
| @batchers = "spacy.batch_by_words.v1"
 | |
| size = 3000
 | |
| discard_oversize = false
 | |
| tolerance = 0.2
 | |
| get_length = null
 | |
| 
 | |
| [pretraining.objective]
 | |
| @architectures = "spacy.PretrainCharacters.v1"
 | |
| maxout_pieces = 3
 | |
| hidden_size = 300
 | |
| n_characters = 4
 | |
| 
 | |
| [pretraining.optimizer]
 | |
| @optimizers = "Adam.v1"
 | |
| beta1 = 0.9
 | |
| beta2 = 0.999
 | |
| L2_is_weight_decay = true
 | |
| L2 = 0.01
 | |
| grad_clip = 1.0
 | |
| use_averages = true
 | |
| eps = 1e-8
 | |
| learn_rate = 0.001
 | |
| 
 | |
| [corpora]
 | |
| 
 | |
| [corpora.pretrain]
 | |
| @readers = "spacy.JsonlCorpus.v1"
 | |
| path = ${paths.raw_text}
 | |
| min_length = 5
 | |
| max_length = 500
 | |
| limit = 0
 |