Revert "Remove peeking from Parser.begin_training (#5456)"

This reverts commit 9393253b66.

The model shouldn't need to see all examples, and actually in v3 there's
no equivalent step. All examples are provided to the component, for the
component to do stuff like figuring out the labels. The model just needs
to do stuff like shape inference.
This commit is contained in:
Matthew Honnibal 2020-05-29 23:21:55 +02:00
parent 85f1acfaa0
commit 64adda3202

View File

@ -9,6 +9,7 @@ import numpy
cimport cython.parallel cimport cython.parallel
import numpy.random import numpy.random
cimport numpy as np cimport numpy as np
from itertools import islice
from cpython.ref cimport PyObject, Py_XDECREF from cpython.ref cimport PyObject, Py_XDECREF
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
from libc.math cimport exp from libc.math cimport exp
@ -620,15 +621,15 @@ cdef class Parser:
self.model, cfg = self.Model(self.moves.n_moves, **cfg) self.model, cfg = self.Model(self.moves.n_moves, **cfg)
if sgd is None: if sgd is None:
sgd = self.create_optimizer() sgd = self.create_optimizer()
docs = [] doc_sample = []
golds = [] gold_sample = []
for raw_text, annots_brackets in get_gold_tuples(): for raw_text, annots_brackets in islice(get_gold_tuples(), 1000):
for annots, brackets in annots_brackets: for annots, brackets in annots_brackets:
ids, words, tags, heads, deps, ents = annots ids, words, tags, heads, deps, ents = annots
docs.append(Doc(self.vocab, words=words)) doc_sample.append(Doc(self.vocab, words=words))
golds.append(GoldParse(docs[-1], words=words, tags=tags, gold_sample.append(GoldParse(doc_sample[-1], words=words, tags=tags,
heads=heads, deps=deps, entities=ents)) heads=heads, deps=deps, entities=ents))
self.model.begin_training(docs, golds) self.model.begin_training(doc_sample, gold_sample)
if pipeline is not None: if pipeline is not None:
self.init_multitask_objectives(get_gold_tuples, pipeline, sgd=sgd, **cfg) self.init_multitask_objectives(get_gold_tuples, pipeline, sgd=sgd, **cfg)
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)