Return optimizer from begin_training, creating if necessary

This commit is contained in:
Matthew Honnibal 2017-11-06 14:26:26 +01:00
parent 465adfee94
commit 25859dbb48
2 changed files with 46 additions and 14 deletions

View File

@ -30,6 +30,7 @@ from .attrs import POS
from .parts_of_speech import X from .parts_of_speech import X
from ._ml import Tok2Vec, build_text_classifier, build_tagger_model from ._ml import Tok2Vec, build_text_classifier, build_tagger_model
from ._ml import link_vectors_to_models, zero_init, flatten from ._ml import link_vectors_to_models, zero_init, flatten
from ._ml import create_default_optimizer
from . import util from . import util
@ -139,12 +140,19 @@ class Pipe(object):
""" """
raise NotImplementedError raise NotImplementedError
def begin_training(self, gold_tuples=tuple(), pipeline=None): def create_optimizer(self):
return create_default_optimizer(self.model.ops,
**self.cfg.get('optimizer', {}))
def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None):
"""Initialize the pipe for training, using data exampes if available. """Initialize the pipe for training, using data exampes if available.
If no model has been initialized yet, the model is added.""" If no model has been initialized yet, the model is added."""
if self.model is True: if self.model is True:
self.model = self.Model(**self.cfg) self.model = self.Model(**self.cfg)
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd
def use_params(self, params): def use_params(self, params):
"""Modify the pipe's model, to use the given parameter values.""" """Modify the pipe's model, to use the given parameter values."""
@ -336,8 +344,8 @@ class Tensorizer(Pipe):
loss = (d_scores**2).sum() loss = (d_scores**2).sum()
return loss, d_scores return loss, d_scores
def begin_training(self, gold_tuples=tuple(), pipeline=None): def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None):
"""Allocate models, pre-process training data and acquire a trainer and """Allocate models, pre-process training data and acquire an
optimizer. optimizer.
gold_tuples (iterable): Gold-standard training data. gold_tuples (iterable): Gold-standard training data.
@ -349,9 +357,11 @@ class Tensorizer(Pipe):
if self.model is True: if self.model is True:
self.cfg['input_size'] = 384 self.cfg['input_size'] = 384
self.cfg['output_size'] = 300 self.cfg['output_size'] = 300
#self.cfg['pretrained_dims'] = self.vocab.vectors_length
self.model = self.Model(**self.cfg) self.model = self.Model(**self.cfg)
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd
class Tagger(Pipe): class Tagger(Pipe):
@ -457,7 +467,7 @@ class Tagger(Pipe):
d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs]) d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs])
return float(loss), d_scores return float(loss), d_scores
def begin_training(self, gold_tuples=tuple(), pipeline=None): def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None):
orig_tag_map = dict(self.vocab.morphology.tag_map) orig_tag_map = dict(self.vocab.morphology.tag_map)
new_tag_map = {} new_tag_map = {}
for raw_text, annots_brackets in gold_tuples: for raw_text, annots_brackets in gold_tuples:
@ -477,6 +487,9 @@ class Tagger(Pipe):
self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1] self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg) self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd
@classmethod @classmethod
def Model(cls, n_tags, **cfg): def Model(cls, n_tags, **cfg):
@ -627,7 +640,8 @@ class MultitaskObjective(Tagger):
def set_annotations(self, docs, dep_ids, tensors=None): def set_annotations(self, docs, dep_ids, tensors=None):
pass pass
def begin_training(self, gold_tuples=tuple(), pipeline=None, tok2vec=None): def begin_training(self, gold_tuples=tuple(), pipeline=None, tok2vec=None,
sgd=None):
gold_tuples = nonproj.preprocess_training_data(gold_tuples) gold_tuples = nonproj.preprocess_training_data(gold_tuples)
for raw_text, annots_brackets in gold_tuples: for raw_text, annots_brackets in gold_tuples:
for annots, brackets in annots_brackets: for annots, brackets in annots_brackets:
@ -643,6 +657,9 @@ class MultitaskObjective(Tagger):
Softmax(len(self.labels), token_vector_width) Softmax(len(self.labels), token_vector_width)
) )
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd
@classmethod @classmethod
def Model(cls, n_tags, tok2vec=None, **cfg): def Model(cls, n_tags, tok2vec=None, **cfg):
@ -739,7 +756,7 @@ class SimilarityHook(Pipe):
def update(self, doc1_doc2, golds, sgd=None, drop=0.): def update(self, doc1_doc2, golds, sgd=None, drop=0.):
sims, bp_sims = self.model.begin_update(doc1_doc2, drop=drop) sims, bp_sims = self.model.begin_update(doc1_doc2, drop=drop)
def begin_training(self, _=tuple(), pipeline=None): def begin_training(self, _=tuple(), pipeline=None, sgd=None):
"""Allocate model, using width from tensorizer in pipeline. """Allocate model, using width from tensorizer in pipeline.
gold_tuples (iterable): Gold-standard training data. gold_tuples (iterable): Gold-standard training data.
@ -748,6 +765,9 @@ class SimilarityHook(Pipe):
if self.model is True: if self.model is True:
self.model = self.Model(pipeline[0].model.nO) self.model = self.Model(pipeline[0].model.nO)
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd
class TextCategorizer(Pipe): class TextCategorizer(Pipe):
@ -831,7 +851,7 @@ class TextCategorizer(Pipe):
self.labels.append(label) self.labels.append(label)
return 1 return 1
def begin_training(self, gold_tuples=tuple(), pipeline=None): def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None):
if pipeline and getattr(pipeline[0], 'name', None) == 'tensorizer': if pipeline and getattr(pipeline[0], 'name', None) == 'tensorizer':
token_vector_width = pipeline[0].model.nO token_vector_width = pipeline[0].model.nO
else: else:
@ -841,6 +861,9 @@ class TextCategorizer(Pipe):
self.model = self.Model(len(self.labels), token_vector_width, self.model = self.Model(len(self.labels), token_vector_width,
**self.cfg) **self.cfg)
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd
cdef class DependencyParser(Parser): cdef class DependencyParser(Parser):
@ -851,12 +874,12 @@ cdef class DependencyParser(Parser):
def postprocesses(self): def postprocesses(self):
return [nonproj.deprojectivize] return [nonproj.deprojectivize]
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg): def init_multitask_objectives(self, gold_tuples, pipeline, sgd=None, **cfg):
for target in []: for target in []:
labeller = MultitaskObjective(self.vocab, target=target) labeller = MultitaskObjective(self.vocab, target=target)
tok2vec = self.model[0] tok2vec = self.model[0]
labeller.begin_training(gold_tuples, pipeline=pipeline, labeller.begin_training(gold_tuples, pipeline=pipeline,
tok2vec=tok2vec) tok2vec=tok2vec, sgd=sgd)
pipeline.append(labeller) pipeline.append(labeller)
self._multitasks.append(labeller) self._multitasks.append(labeller)
@ -871,7 +894,7 @@ cdef class EntityRecognizer(Parser):
nr_feature = 6 nr_feature = 6
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg): def init_multitask_objectives(self, gold_tuples, pipeline, sgd=None, **cfg):
for target in []: for target in []:
labeller = MultitaskObjective(self.vocab, target=target) labeller = MultitaskObjective(self.vocab, target=target)
tok2vec = self.model[0] tok2vec = self.model[0]

View File

@ -30,7 +30,7 @@ from thinc.neural.util import get_array_module
from thinc.linalg cimport Vec, VecVec from thinc.linalg cimport Vec, VecVec
from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
from .._ml import link_vectors_to_models from .._ml import link_vectors_to_models, create_default_optimizer
from ..compat import json_dumps, copy_array from ..compat import json_dumps, copy_array
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from ..gold cimport GoldParse from ..gold cimport GoldParse
@ -273,6 +273,10 @@ cdef class Parser:
} }
return (tok2vec, lower, upper), cfg return (tok2vec, lower, upper), cfg
def create_optimizer(self):
return create_default_optimizer(self.model[0].ops,
**self.cfg.get('optimizer', {}))
def __init__(self, Vocab vocab, moves=True, model=True, **cfg): def __init__(self, Vocab vocab, moves=True, model=True, **cfg):
"""Create a Parser. """Create a Parser.
@ -793,7 +797,7 @@ cdef class Parser:
copy_array(larger.b[:smaller.nO], smaller.b) copy_array(larger.b[:smaller.nO], smaller.b)
self.model[-1]._layers[-1] = larger self.model[-1]._layers[-1] = larger
def begin_training(self, gold_tuples, pipeline=None, **cfg): def begin_training(self, gold_tuples, pipeline=None, sgd=None, **cfg):
if 'model' in cfg: if 'model' in cfg:
self.model = cfg['model'] self.model = cfg['model']
gold_tuples = nonproj.preprocess_training_data(gold_tuples, gold_tuples = nonproj.preprocess_training_data(gold_tuples,
@ -805,9 +809,14 @@ cdef class Parser:
if self.model is True: if self.model is True:
cfg['pretrained_dims'] = self.vocab.vectors_length cfg['pretrained_dims'] = self.vocab.vectors_length
self.model, cfg = self.Model(self.moves.n_moves, **cfg) self.model, cfg = self.Model(self.moves.n_moves, **cfg)
self.init_multitask_objectives(gold_tuples, pipeline, **cfg) if sgd is None:
sgd = self.create_optimizer()
self.init_multitask_objectives(gold_tuples, pipeline, sgd=sgd, **cfg)
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
self.cfg.update(cfg) self.cfg.update(cfg)
elif sgd is None:
sgd = self.create_optimizer()
return sgd
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg): def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
'''Setup models for secondary objectives, to benefit from multi-task '''Setup models for secondary objectives, to benefit from multi-task