mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Support making updates periodically during training
This commit is contained in:
parent
3f725ff7b3
commit
6b918cc58e
|
@ -29,6 +29,7 @@ from thinc.linear.avgtron cimport AveragedPerceptron
|
||||||
from thinc.linalg cimport VecVec
|
from thinc.linalg cimport VecVec
|
||||||
from thinc.structs cimport SparseArrayC, FeatureC, ExampleC
|
from thinc.structs cimport SparseArrayC, FeatureC, ExampleC
|
||||||
from thinc.extra.eg cimport Example
|
from thinc.extra.eg cimport Example
|
||||||
|
|
||||||
from cymem.cymem cimport Pool, Address
|
from cymem.cymem cimport Pool, Address
|
||||||
from murmurhash.mrmr cimport hash64
|
from murmurhash.mrmr cimport hash64
|
||||||
from preshed.maps cimport MapStruct
|
from preshed.maps cimport MapStruct
|
||||||
|
@ -37,6 +38,7 @@ from preshed.maps cimport map_get
|
||||||
from thinc.api import layerize, chain, noop, clone
|
from thinc.api import layerize, chain, noop, clone
|
||||||
from thinc.neural import Model, Affine, ELU, ReLu, Maxout
|
from thinc.neural import Model, Affine, ELU, ReLu, Maxout
|
||||||
from thinc.neural.ops import NumpyOps, CupyOps
|
from thinc.neural.ops import NumpyOps, CupyOps
|
||||||
|
from thinc.neural.util import get_array_module
|
||||||
|
|
||||||
from .. import util
|
from .. import util
|
||||||
from ..util import get_async, get_cuda_stream
|
from ..util import get_async, get_cuda_stream
|
||||||
|
@ -381,6 +383,7 @@ cdef class Parser:
|
||||||
if not s.is_final() and g is not None]
|
if not s.is_final() and g is not None]
|
||||||
|
|
||||||
backprops = []
|
backprops = []
|
||||||
|
d_tokvecs = state2vec.ops.allocate(tokvecs.shape)
|
||||||
cdef float loss = 0.
|
cdef float loss = 0.
|
||||||
while len(todo) >= 3:
|
while len(todo) >= 3:
|
||||||
states, golds = zip(*todo)
|
states, golds = zip(*todo)
|
||||||
|
@ -404,22 +407,30 @@ cdef class Parser:
|
||||||
backprops.append((token_ids, d_vector, bp_vector))
|
backprops.append((token_ids, d_vector, bp_vector))
|
||||||
self.transition_batch(states, scores)
|
self.transition_batch(states, scores)
|
||||||
todo = [st for st in todo if not st[0].is_final()]
|
todo = [st for st in todo if not st[0].is_final()]
|
||||||
|
if len(backprops) >= 50:
|
||||||
|
self._make_updates(d_tokvecs,
|
||||||
|
backprops, sgd, cuda_stream)
|
||||||
|
backprops = []
|
||||||
|
if backprops:
|
||||||
|
self._make_updates(d_tokvecs,
|
||||||
|
backprops, sgd, cuda_stream)
|
||||||
|
return self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
|
||||||
|
|
||||||
|
def _make_updates(self, d_tokvecs, backprops, sgd, cuda_stream=None):
|
||||||
# Tells CUDA to block, so our async copies complete.
|
# Tells CUDA to block, so our async copies complete.
|
||||||
if cuda_stream is not None:
|
if cuda_stream is not None:
|
||||||
cuda_stream.synchronize()
|
cuda_stream.synchronize()
|
||||||
d_tokvecs = state2vec.ops.allocate(tokvecs.shape)
|
xp = get_array_module(d_tokvecs)
|
||||||
xp = state2vec.ops.xp # Handle for numpy/cupy
|
for ids, d_vector, bp_vector in backprops:
|
||||||
for token_ids, d_vector, bp_vector in backprops:
|
|
||||||
d_state_features = bp_vector(d_vector, sgd=sgd)
|
d_state_features = bp_vector(d_vector, sgd=sgd)
|
||||||
active_feats = token_ids * (token_ids >= 0)
|
active_feats = ids * (ids >= 0)
|
||||||
active_feats = active_feats.reshape((token_ids.shape[0], token_ids.shape[1], 1))
|
active_feats = active_feats.reshape((ids.shape[0], ids.shape[1], 1))
|
||||||
if hasattr(xp, 'scatter_add'):
|
if hasattr(xp, 'scatter_add'):
|
||||||
xp.scatter_add(d_tokvecs,
|
xp.scatter_add(d_tokvecs,
|
||||||
token_ids, d_state_features * active_feats)
|
ids, d_state_features * active_feats)
|
||||||
else:
|
else:
|
||||||
xp.add.at(d_tokvecs,
|
xp.add.at(d_tokvecs,
|
||||||
token_ids, d_state_features * active_feats)
|
ids, d_state_features * active_feats)
|
||||||
return self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
|
|
||||||
|
|
||||||
def get_batch_model(self, batch_size, tokvecs, stream, dropout):
|
def get_batch_model(self, batch_size, tokvecs, stream, dropout):
|
||||||
lower, upper = self.model
|
lower, upper = self.model
|
||||||
|
|
Loading…
Reference in New Issue
Block a user