Trigger on_data hooks in parser model

This commit is contained in:
Matthew Honnibal 2018-09-14 20:50:59 +02:00
parent 5afd98dff5
commit c046392317
2 changed files with 13 additions and 2 deletions

View File

@ -211,6 +211,10 @@ class ParserModel(Model):
copy_array(larger.W[:smaller.nO], smaller.W)
copy_array(larger.b[:smaller.nO], smaller.b)
self._layers[-1]._layers[-1] = larger
def begin_training(self, X, y=None):
for layer in self._layers:
layer.begin_training(X, y=y)
@property
def tok2vec(self):

View File

@ -510,8 +510,15 @@ cdef class Parser:
self.model, cfg = self.Model(self.moves.n_moves, **cfg)
if sgd is None:
sgd = self.create_optimizer()
self.model.begin_training(
self.model.ops.allocate((5, cfg['token_vector_width'])))
doc_sample = []
gold_sample = []
for raw_text, annots_brackets in cytoolz.take(1000, get_gold_tuples()):
for annots, brackets in annots_brackets:
ids, words, tags, heads, deps, ents = annots
doc_sample.append(Doc(self.vocab, words=words))
gold_sample.append(GoldParse(doc_sample[-1], words=words, tags=tags,
heads=heads, deps=deps, ents=ents))
self.model.begin_training(doc_sample, gold_sample)
if pipeline is not None:
self.init_multitask_objectives(get_gold_tuples, pipeline, sgd=sgd, **cfg)
link_vectors_to_models(self.vocab)