mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 05:31:15 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			84 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			84 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| include ../../_includes/_mixins
 | |
| 
 | |
| p
 | |
|     |  This workflow describes how to train new statistical models for spaCy's
 | |
|     |  part-of-speech tagger, named entity recognizer and dependency parser.
 | |
|     |  Once the model is trained, you can then
 | |
|     |  #[+a("/docs/usage/saving-loading") save and load] it.
 | |
| 
 | |
| +h(2, "101") Training 101
 | |
| 
 | |
| include _spacy-101/_training
 | |
| 
 | |
| +h(2, "train-pos-tagger") Training the part-of-speech tagger
 | |
| 
 | |
| +code.
 | |
|     from spacy.vocab import Vocab
 | |
|     from spacy.tagger import Tagger
 | |
|     from spacy.tokens import Doc
 | |
|     from spacy.gold import GoldParse
 | |
| 
 | |
| 
 | |
|     vocab = Vocab(tag_map={'N': {'pos': 'NOUN'}, 'V': {'pos': 'VERB'}})
 | |
|     tagger = Tagger(vocab)
 | |
| 
 | |
|     doc = Doc(vocab, words=['I', 'like', 'stuff'])
 | |
|     gold = GoldParse(doc, tags=['N', 'V', 'N'])
 | |
|     tagger.update(doc, gold)
 | |
| 
 | |
|     tagger.model.end_training()
 | |
| 
 | |
| p
 | |
|     +button(gh("spaCy", "examples/training/train_tagger.py"), false, "secondary") Full example
 | |
| 
 | |
| +h(2, "train-entity") Training the named entity recognizer
 | |
| 
 | |
| +code.
 | |
|     from spacy.vocab import Vocab
 | |
|     from spacy.pipeline import EntityRecognizer
 | |
|     from spacy.tokens import Doc
 | |
| 
 | |
|     vocab = Vocab()
 | |
|     entity = EntityRecognizer(vocab, entity_types=['PERSON', 'LOC'])
 | |
| 
 | |
|     doc = Doc(vocab, words=['Who', 'is', 'Shaka', 'Khan', '?'])
 | |
|     entity.update(doc, ['O', 'O', 'B-PERSON', 'L-PERSON', 'O'])
 | |
| 
 | |
|     entity.model.end_training()
 | |
| 
 | |
| p
 | |
|     +button(gh("spaCy", "examples/training/train_ner.py"), false, "secondary") Full example
 | |
| 
 | |
| +h(2, "extend-entity") Extending the named entity recognizer
 | |
| 
 | |
| p
 | |
|     |  All #[+a("/docs/usage/models") spaCy models] support online learning, so
 | |
|     |  you can update a pre-trained model with new examples. You can even add
 | |
|     |  new classes to an existing model, to recognise a new entity type,
 | |
|     |  part-of-speech, or syntactic relation. Updating an existing model is
 | |
|     |  particularly useful as a "quick and dirty solution", if you have only a
 | |
|     |  few corrections or annotations.
 | |
| 
 | |
| p.o-inline-list
 | |
|     +button(gh("spaCy", "examples/training/train_new_entity_type.py"), true, "secondary") Full example
 | |
|     +button("/docs/usage/training-ner", false, "secondary") Usage Workflow
 | |
| 
 | |
| +h(2, "train-dependency") Training the dependency parser
 | |
| 
 | |
| +code.
 | |
|     from spacy.vocab import Vocab
 | |
|     from spacy.pipeline import DependencyParser
 | |
|     from spacy.tokens import Doc
 | |
| 
 | |
|     vocab = Vocab()
 | |
|     parser = DependencyParser(vocab, labels=['nsubj', 'compound', 'dobj', 'punct'])
 | |
| 
 | |
|     doc = Doc(vocab, words=['Who', 'is', 'Shaka', 'Khan', '?'])
 | |
|     parser.update(doc, [(1, 'nsubj'), (1, 'ROOT'), (3, 'compound'), (1, 'dobj'),
 | |
|                         (1, 'punct')])
 | |
| 
 | |
|     parser.model.end_training()
 | |
| 
 | |
| p
 | |
|     +button(gh("spaCy", "examples/training/train_parser.py"), false, "secondary") Full example
 |