mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Fix fine-tuning
This commit is contained in:
parent
3fe0d76e6d
commit
8a59718fd6
23
spacy/_ml.py
23
spacy/_ml.py
|
@ -359,8 +359,6 @@ def get_token_vectors(tokens_attrs_vectors, drop=0.):
|
|||
def backward(d_output, sgd=None):
|
||||
return (tokens, d_output)
|
||||
return vectors, backward
|
||||
|
||||
|
||||
def fine_tune(embedding, combine=None):
|
||||
if combine is not None:
|
||||
raise NotImplementedError(
|
||||
|
@ -372,22 +370,25 @@ def fine_tune(embedding, combine=None):
|
|||
vecs, bp_vecs = embedding.begin_update(docs, drop=drop)
|
||||
flat_tokvecs = embedding.ops.flatten(tokvecs)
|
||||
flat_vecs = embedding.ops.flatten(vecs)
|
||||
alpha = model.mix
|
||||
minus = 1-model.mix
|
||||
output = embedding.ops.unflatten(
|
||||
(model.mix[0] * flat_vecs + model.mix[1] * flat_tokvecs),
|
||||
lengths)
|
||||
(alpha * flat_tokvecs + minus * flat_vecs), lengths)
|
||||
|
||||
def fine_tune_bwd(d_output, sgd=None):
|
||||
bp_vecs(d_output, sgd=sgd)
|
||||
flat_grad = model.ops.flatten(d_output)
|
||||
model.d_mix[1] += flat_tokvecs.dot(flat_grad.T).sum()
|
||||
model.d_mix[0] += flat_vecs.dot(flat_grad.T).sum()
|
||||
if sgd is not None:
|
||||
sgd(model._mem.weights, model._mem.gradient, key=model.id)
|
||||
model.d_mix += flat_tokvecs.dot(flat_grad.T).sum()
|
||||
model.d_mix += 1-flat_vecs.dot(flat_grad.T).sum()
|
||||
|
||||
bp_vecs([d_o * minus for d_o in d_output], sgd=sgd)
|
||||
d_output = [d_o * alpha for d_o in d_output]
|
||||
sgd(model._mem.weights, model._mem.gradient, key=model.id)
|
||||
model.mix = model.ops.xp.minimum(model.mix, 1.0)
|
||||
return d_output
|
||||
return output, fine_tune_bwd
|
||||
model = wrap(fine_tune_fwd, embedding)
|
||||
model.mix = model._mem.add((model.id, 'mix'), (2,))
|
||||
model.mix.fill(1.)
|
||||
model.mix = model._mem.add((model.id, 'mix'), (1,))
|
||||
model.mix.fill(0.0)
|
||||
model.d_mix = model._mem.add_gradient((model.id, 'd_mix'), (model.id, 'mix'))
|
||||
return model
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user