mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* bring back default build_text_classifier method * remove _set_dims_ hack in favor of proper dim inference * add tok2vec initialize to unit test * small fixes * add unit test for various textcat config settings * logistic output layer does not have nO * fix window_size setting * proper fix * fix W initialization * Update textcat training example * Use ml_datasets * Convert training data to `Example` format * Use `n_texts` to set proportionate dev size * fix _init renaming on latest thinc * avoid setting a non-existing dim * update to thinc==8.0.0a2 * add BOW and CNN defaults for easy testing * various experiments with train_textcat script, fix softmax activation in textcat bow * allow textcat train script to work on other datasets as well * have dataset as a parameter * train textcat from config, with example config * add config for training textcat * formatting * fix exclusive_classes * fixing BOW for GPU * bump thinc to 8.0.0a3 (not published yet so CI will fail) * add in link_vectors_to_models which got deleted Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
		
			
				
	
	
		
			28 lines
		
	
	
		
			743 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			28 lines
		
	
	
		
			743 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| import numpy
 | |
| from thinc.api import Model, Unserializable
 | |
| 
 | |
| 
 | |
| def SpacyVectors(vectors) -> Model:
 | |
|     attrs = {"vectors": Unserializable(vectors)}
 | |
|     model = Model("spacy_vectors", forward, attrs=attrs)
 | |
|     return model
 | |
| 
 | |
| 
 | |
| def forward(model, docs, is_train: bool):
 | |
|     batch = []
 | |
|     vectors = model.attrs["vectors"].obj
 | |
|     for doc in docs:
 | |
|         indices = numpy.zeros((len(doc),), dtype="i")
 | |
|         for i, word in enumerate(doc):
 | |
|             if word.orth in vectors.key2row:
 | |
|                 indices[i] = vectors.key2row[word.orth]
 | |
|             else:
 | |
|                 indices[i] = 0
 | |
|         batch_vectors = vectors.data[indices]
 | |
|         batch.append(batch_vectors)
 | |
| 
 | |
|         def backprop(dY):
 | |
|             return None
 | |
| 
 | |
|     return batch, backprop
 |