mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* avoid changing original config * fix elif structure, batch with just int crashes otherwise * tok2vec example with doc2feats, encode and embed architectures * further clean up MultiHashEmbed * further generalize Tok2Vec to work with extract-embed-encode parts * avoid initializing the charembed layer with Docs (for now ?) * small fixes for bilstm config (still does not run) * rename to core layer * move new configs * walk model to set nI instead of using core ref * fix senter overfitting test to be more similar to the training data (avoid flakey behaviour)
		
			
				
	
	
		
			66 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			INI
		
	
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			INI
		
	
	
	
	
	
[training]
 | 
						|
use_gpu = -1
 | 
						|
limit = 0
 | 
						|
dropout = 0.2
 | 
						|
patience = 10000
 | 
						|
eval_frequency = 200
 | 
						|
scores = ["ents_f"]
 | 
						|
score_weights = {"ents_f": 1}
 | 
						|
orth_variant_level = 0.0
 | 
						|
gold_preproc = true
 | 
						|
max_length = 0
 | 
						|
batch_size = 25
 | 
						|
 | 
						|
[optimizer]
 | 
						|
@optimizers = "Adam.v1"
 | 
						|
learn_rate = 0.001
 | 
						|
beta1 = 0.9
 | 
						|
beta2 = 0.999
 | 
						|
 | 
						|
[nlp]
 | 
						|
lang = "en"
 | 
						|
vectors = null
 | 
						|
 | 
						|
[nlp.pipeline.tok2vec]
 | 
						|
factory = "tok2vec"
 | 
						|
 | 
						|
[nlp.pipeline.tok2vec.model]
 | 
						|
@architectures = "spacy.Tok2Vec.v1"
 | 
						|
 | 
						|
[nlp.pipeline.tok2vec.model.extract]
 | 
						|
@architectures = "spacy.Doc2Feats.v1"
 | 
						|
columns = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
 | 
						|
 | 
						|
[nlp.pipeline.tok2vec.model.embed]
 | 
						|
@architectures = "spacy.MultiHashEmbed.v1"
 | 
						|
columns = ${nlp.pipeline.tok2vec.model.extract:columns}
 | 
						|
width = 96
 | 
						|
rows = 2000
 | 
						|
use_subwords = true
 | 
						|
pretrained_vectors = null
 | 
						|
 | 
						|
[nlp.pipeline.tok2vec.model.embed.mix]
 | 
						|
@architectures = "spacy.LayerNormalizedMaxout.v1"
 | 
						|
width = ${nlp.pipeline.tok2vec.model.embed:width}
 | 
						|
maxout_pieces = 3
 | 
						|
 | 
						|
[nlp.pipeline.tok2vec.model.encode]
 | 
						|
@architectures = "spacy.MaxoutWindowEncoder.v1"
 | 
						|
width = ${nlp.pipeline.tok2vec.model.embed:width}
 | 
						|
window_size = 1
 | 
						|
maxout_pieces = 3
 | 
						|
depth = 2
 | 
						|
 | 
						|
[nlp.pipeline.ner]
 | 
						|
factory = "ner"
 | 
						|
 | 
						|
[nlp.pipeline.ner.model]
 | 
						|
@architectures = "spacy.TransitionBasedParser.v1"
 | 
						|
nr_feature_tokens = 6
 | 
						|
hidden_width = 64
 | 
						|
maxout_pieces = 2
 | 
						|
 | 
						|
[nlp.pipeline.ner.model.tok2vec]
 | 
						|
@architectures = "spacy.Tok2VecTensors.v1"
 | 
						|
width = ${nlp.pipeline.tok2vec.model.embed:width}
 |