2016-11-05 22:40:11 +03:00
|
|
|
include ../../_includes/_mixins
|
|
|
|
|
|
|
|
p
|
2017-04-16 21:35:56 +03:00
|
|
|
| This workflow describes how to train new statistical models for spaCy's
|
2016-11-05 22:40:11 +03:00
|
|
|
| part-of-speech tagger, named entity recognizer and dependency parser.
|
2017-04-16 21:35:56 +03:00
|
|
|
| Once the model is trained, you can then
|
|
|
|
| #[+a("/docs/usage/saving-loading") save and load] it.
|
2016-11-05 22:40:11 +03:00
|
|
|
|
2017-05-25 12:18:02 +03:00
|
|
|
+h(2, "101") Training 101
|
|
|
|
|
|
|
|
include _spacy-101/_training
|
|
|
|
|
2016-11-05 22:40:11 +03:00
|
|
|
+h(2, "train-pos-tagger") Training the part-of-speech tagger
|
|
|
|
|
|
|
|
+code.
|
|
|
|
from spacy.vocab import Vocab
|
2016-12-20 23:01:16 +03:00
|
|
|
from spacy.tagger import Tagger
|
2016-11-05 22:40:11 +03:00
|
|
|
from spacy.tokens import Doc
|
2016-12-20 23:01:16 +03:00
|
|
|
from spacy.gold import GoldParse
|
|
|
|
|
2016-11-05 22:40:11 +03:00
|
|
|
|
|
|
|
vocab = Vocab(tag_map={'N': {'pos': 'NOUN'}, 'V': {'pos': 'VERB'}})
|
|
|
|
tagger = Tagger(vocab)
|
|
|
|
|
|
|
|
doc = Doc(vocab, words=['I', 'like', 'stuff'])
|
2016-12-20 23:01:16 +03:00
|
|
|
gold = GoldParse(doc, tags=['N', 'V', 'N'])
|
|
|
|
tagger.update(doc, gold)
|
2016-11-05 22:40:11 +03:00
|
|
|
|
|
|
|
tagger.model.end_training()
|
|
|
|
|
|
|
|
p
|
|
|
|
+button(gh("spaCy", "examples/training/train_tagger.py"), false, "secondary") Full example
|
|
|
|
|
|
|
|
+h(2, "train-entity") Training the named entity recognizer
|
|
|
|
|
|
|
|
+code.
|
|
|
|
from spacy.vocab import Vocab
|
|
|
|
from spacy.pipeline import EntityRecognizer
|
|
|
|
from spacy.tokens import Doc
|
|
|
|
|
|
|
|
vocab = Vocab()
|
|
|
|
entity = EntityRecognizer(vocab, entity_types=['PERSON', 'LOC'])
|
|
|
|
|
|
|
|
doc = Doc(vocab, words=['Who', 'is', 'Shaka', 'Khan', '?'])
|
|
|
|
entity.update(doc, ['O', 'O', 'B-PERSON', 'L-PERSON', 'O'])
|
|
|
|
|
|
|
|
entity.model.end_training()
|
|
|
|
|
|
|
|
p
|
|
|
|
+button(gh("spaCy", "examples/training/train_ner.py"), false, "secondary") Full example
|
|
|
|
|
2017-04-16 21:35:56 +03:00
|
|
|
+h(2, "extend-entity") Extending the named entity recognizer
|
|
|
|
|
|
|
|
p
|
|
|
|
| All #[+a("/docs/usage/models") spaCy models] support online learning, so
|
|
|
|
| you can update a pre-trained model with new examples. You can even add
|
|
|
|
| new classes to an existing model, to recognise a new entity type,
|
|
|
|
| part-of-speech, or syntactic relation. Updating an existing model is
|
|
|
|
| particularly useful as a "quick and dirty solution", if you have only a
|
|
|
|
| few corrections or annotations.
|
|
|
|
|
|
|
|
p.o-inline-list
|
|
|
|
+button(gh("spaCy", "examples/training/train_new_entity_type.py"), true, "secondary") Full example
|
|
|
|
+button("/docs/usage/training-ner", false, "secondary") Usage Workflow
|
|
|
|
|
|
|
|
+h(2, "train-dependency") Training the dependency parser
|
2016-11-05 22:40:11 +03:00
|
|
|
|
|
|
|
+code.
|
|
|
|
from spacy.vocab import Vocab
|
|
|
|
from spacy.pipeline import DependencyParser
|
|
|
|
from spacy.tokens import Doc
|
|
|
|
|
|
|
|
vocab = Vocab()
|
|
|
|
parser = DependencyParser(vocab, labels=['nsubj', 'compound', 'dobj', 'punct'])
|
|
|
|
|
|
|
|
doc = Doc(vocab, words=['Who', 'is', 'Shaka', 'Khan', '?'])
|
|
|
|
parser.update(doc, [(1, 'nsubj'), (1, 'ROOT'), (3, 'compound'), (1, 'dobj'),
|
|
|
|
(1, 'punct')])
|
|
|
|
|
|
|
|
parser.model.end_training()
|
|
|
|
|
|
|
|
p
|
|
|
|
+button(gh("spaCy", "examples/training/train_parser.py"), false, "secondary") Full example
|