mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* make disable_pipes deprecated in favour of the new toggle_pipes * rewrite disable_pipes statements * update documentation * remove bin/wiki_entity_linking folder * one more fix * remove deprecated link to documentation * few more doc fixes * add note about name change to the docs * restore original disable_pipes * small fixes * fix typo * fix error number to W096 * rename to select_pipes * also make changes to the documentation Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com>
		
			
				
	
	
		
			44 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			44 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import spacy
 | |
| from spacy.util import minibatch, compounding
 | |
| 
 | |
| 
 | |
| def test_issue3611():
 | |
|     """ Test whether adding n-grams in the textcat works even when n > token length of some docs """
 | |
|     unique_classes = ["offensive", "inoffensive"]
 | |
|     x_train = [
 | |
|         "This is an offensive text",
 | |
|         "This is the second offensive text",
 | |
|         "inoff",
 | |
|     ]
 | |
|     y_train = ["offensive", "offensive", "inoffensive"]
 | |
| 
 | |
|     # preparing the data
 | |
|     pos_cats = list()
 | |
|     for train_instance in y_train:
 | |
|         pos_cats.append({label: label == train_instance for label in unique_classes})
 | |
|     train_data = list(zip(x_train, [{"cats": cats} for cats in pos_cats]))
 | |
| 
 | |
|     # set up the spacy model with a text categorizer component
 | |
|     nlp = spacy.blank("en")
 | |
| 
 | |
|     textcat = nlp.create_pipe(
 | |
|         "textcat",
 | |
|         config={"exclusive_classes": True, "architecture": "bow", "ngram_size": 2},
 | |
|     )
 | |
| 
 | |
|     for label in unique_classes:
 | |
|         textcat.add_label(label)
 | |
|     nlp.add_pipe(textcat, last=True)
 | |
| 
 | |
|     # training the network
 | |
|     with nlp.select_pipes(enable="textcat"):
 | |
|         optimizer = nlp.begin_training(X=x_train, Y=y_train)
 | |
|         for i in range(3):
 | |
|             losses = {}
 | |
|             batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
 | |
| 
 | |
|             for batch in batches:
 | |
|                 nlp.update(
 | |
|                     examples=batch, sgd=optimizer, drop=0.1, losses=losses,
 | |
|                 )
 |