mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Remove peeking from Parser.begin_training (#5456)
Inspect all instances in `Parser.begin_training` rather than only the first 1000.
This commit is contained in:
parent
40e65d6f63
commit
9393253b66
|
@ -9,7 +9,6 @@ import numpy
|
||||||
cimport cython.parallel
|
cimport cython.parallel
|
||||||
import numpy.random
|
import numpy.random
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from itertools import islice
|
|
||||||
from cpython.ref cimport PyObject, Py_XDECREF
|
from cpython.ref cimport PyObject, Py_XDECREF
|
||||||
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
|
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
|
||||||
from libc.math cimport exp
|
from libc.math cimport exp
|
||||||
|
@ -621,15 +620,15 @@ cdef class Parser:
|
||||||
self.model, cfg = self.Model(self.moves.n_moves, **cfg)
|
self.model, cfg = self.Model(self.moves.n_moves, **cfg)
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = self.create_optimizer()
|
sgd = self.create_optimizer()
|
||||||
doc_sample = []
|
docs = []
|
||||||
gold_sample = []
|
golds = []
|
||||||
for raw_text, annots_brackets in islice(get_gold_tuples(), 1000):
|
for raw_text, annots_brackets in get_gold_tuples():
|
||||||
for annots, brackets in annots_brackets:
|
for annots, brackets in annots_brackets:
|
||||||
ids, words, tags, heads, deps, ents = annots
|
ids, words, tags, heads, deps, ents = annots
|
||||||
doc_sample.append(Doc(self.vocab, words=words))
|
docs.append(Doc(self.vocab, words=words))
|
||||||
gold_sample.append(GoldParse(doc_sample[-1], words=words, tags=tags,
|
golds.append(GoldParse(docs[-1], words=words, tags=tags,
|
||||||
heads=heads, deps=deps, entities=ents))
|
heads=heads, deps=deps, entities=ents))
|
||||||
self.model.begin_training(doc_sample, gold_sample)
|
self.model.begin_training(docs, golds)
|
||||||
if pipeline is not None:
|
if pipeline is not None:
|
||||||
self.init_multitask_objectives(get_gold_tuples, pipeline, sgd=sgd, **cfg)
|
self.init_multitask_objectives(get_gold_tuples, pipeline, sgd=sgd, **cfg)
|
||||||
link_vectors_to_models(self.vocab)
|
link_vectors_to_models(self.vocab)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user