mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
Update train_ner_standalone example
This commit is contained in:
parent
e514d6aa0a
commit
cbb1fbef80
|
@ -20,9 +20,10 @@ import plac
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import random
|
import random
|
||||||
import json
|
import json
|
||||||
|
import tqdm
|
||||||
|
|
||||||
from thinc.neural.optimizers import Adam
|
from thinc.neural.optimizers import Adam
|
||||||
from thinc.neural.ops import NumpyOps
|
from thinc.neural.ops import NumpyOps
|
||||||
import tqdm
|
|
||||||
|
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.pipeline import TokenVectorEncoder, NeuralEntityRecognizer
|
from spacy.pipeline import TokenVectorEncoder, NeuralEntityRecognizer
|
||||||
|
@ -35,6 +36,7 @@ from spacy.gold import minibatch
|
||||||
from spacy.scorer import Scorer
|
from spacy.scorer import Scorer
|
||||||
import spacy.util
|
import spacy.util
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
unicode
|
unicode
|
||||||
except NameError:
|
except NameError:
|
||||||
|
@ -55,20 +57,17 @@ def init_vocab():
|
||||||
|
|
||||||
|
|
||||||
class Pipeline(object):
|
class Pipeline(object):
|
||||||
def __init__(self, vocab=None, tokenizer=None, tensorizer=None, entity=None):
|
def __init__(self, vocab=None, tokenizer=None, entity=None):
|
||||||
if vocab is None:
|
if vocab is None:
|
||||||
vocab = init_vocab()
|
vocab = init_vocab()
|
||||||
if tokenizer is None:
|
if tokenizer is None:
|
||||||
tokenizer = Tokenizer(vocab, {}, None, None, None)
|
tokenizer = Tokenizer(vocab, {}, None, None, None)
|
||||||
if tensorizer is None:
|
|
||||||
tensorizer = TokenVectorEncoder(vocab)
|
|
||||||
if entity is None:
|
if entity is None:
|
||||||
entity = NeuralEntityRecognizer(vocab)
|
entity = NeuralEntityRecognizer(vocab)
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
self.tensorizer = tensorizer
|
|
||||||
self.entity = entity
|
self.entity = entity
|
||||||
self.pipeline = [tensorizer, self.entity]
|
self.pipeline = [self.entity]
|
||||||
|
|
||||||
def begin_training(self):
|
def begin_training(self):
|
||||||
for model in self.pipeline:
|
for model in self.pipeline:
|
||||||
|
@ -102,10 +101,8 @@ class Pipeline(object):
|
||||||
golds = [self.make_gold(input_, annot) for input_, annot in
|
golds = [self.make_gold(input_, annot) for input_, annot in
|
||||||
zip(inputs, annots)]
|
zip(inputs, annots)]
|
||||||
|
|
||||||
tensors, bp_tensors = self.tensorizer.update(docs, golds, drop=drop)
|
self.entity.update(docs, golds, drop=drop,
|
||||||
d_tensors = self.entity.update((docs, tensors), golds, drop=drop,
|
sgd=sgd, losses=losses)
|
||||||
sgd=sgd, losses=losses)
|
|
||||||
bp_tensors(d_tensors, sgd=sgd)
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def evaluate(self, examples):
|
def evaluate(self, examples):
|
||||||
|
@ -123,7 +120,6 @@ class Pipeline(object):
|
||||||
elif not path.is_dir():
|
elif not path.is_dir():
|
||||||
raise IOError("Can't save pipeline to %s\nNot a directory" % path)
|
raise IOError("Can't save pipeline to %s\nNot a directory" % path)
|
||||||
self.vocab.to_disk(path / 'vocab')
|
self.vocab.to_disk(path / 'vocab')
|
||||||
self.tensorizer.to_disk(path / 'tensorizer')
|
|
||||||
self.entity.to_disk(path / 'ner')
|
self.entity.to_disk(path / 'ner')
|
||||||
|
|
||||||
def from_disk(self, path):
|
def from_disk(self, path):
|
||||||
|
@ -133,7 +129,6 @@ class Pipeline(object):
|
||||||
if not path.is_dir():
|
if not path.is_dir():
|
||||||
raise IOError("Cannot load pipeline from %s\nNot a directory" % path)
|
raise IOError("Cannot load pipeline from %s\nNot a directory" % path)
|
||||||
self.vocab = self.vocab.from_disk(path / 'vocab')
|
self.vocab = self.vocab.from_disk(path / 'vocab')
|
||||||
self.tensorizer = self.tensorizer.from_disk(path / 'tensorizer')
|
|
||||||
self.entity = self.entity.from_disk(path / 'ner')
|
self.entity = self.entity.from_disk(path / 'ner')
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user