mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Fiddle with nll loss in parser
This commit is contained in:
parent
ed23476c82
commit
2bfe184692
|
@ -2,6 +2,7 @@
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
from libc.stdint cimport uint64_t
|
from libc.stdint cimport uint64_t
|
||||||
from libc.string cimport memcpy, memset
|
from libc.string cimport memcpy, memset
|
||||||
|
from libc.math cimport sqrt
|
||||||
|
|
||||||
from cymem.cymem cimport Pool, Address
|
from cymem.cymem cimport Pool, Address
|
||||||
from murmurhash.mrmr cimport hash64
|
from murmurhash.mrmr cimport hash64
|
||||||
|
@ -12,6 +13,7 @@ from thinc.linalg cimport VecVec
|
||||||
from thinc.structs cimport NeuralNetC, SparseArrayC, ExampleC
|
from thinc.structs cimport NeuralNetC, SparseArrayC, ExampleC
|
||||||
from thinc.structs cimport FeatureC
|
from thinc.structs cimport FeatureC
|
||||||
from thinc.extra.eg cimport Example
|
from thinc.extra.eg cimport Example
|
||||||
|
from thinc.neural.forward cimport softmax
|
||||||
|
|
||||||
from preshed.maps cimport map_get
|
from preshed.maps cimport map_get
|
||||||
from preshed.maps cimport MapStruct
|
from preshed.maps cimport MapStruct
|
||||||
|
@ -31,21 +33,48 @@ cdef class ParserPerceptron(AveragedPerceptron):
|
||||||
def widths(self):
|
def widths(self):
|
||||||
return (self.extracter.nr_templ,)
|
return (self.extracter.nr_templ,)
|
||||||
|
|
||||||
def update(self, Example eg):
|
def update(self, Example eg, loss='regression'):
|
||||||
'''Does regression on negative cost. Sort of cute?'''
|
|
||||||
self.time += 1
|
self.time += 1
|
||||||
cdef weight_t loss = 0.0
|
|
||||||
best = eg.best
|
best = eg.best
|
||||||
for clas in range(eg.c.nr_class):
|
guess = eg.guess
|
||||||
if not eg.c.is_valid[clas]:
|
assert best >= 0, best
|
||||||
continue
|
assert guess >= 0, guess
|
||||||
if eg.c.scores[clas] < eg.c.scores[best]:
|
d_losses = {}
|
||||||
continue
|
if loss == 'regression':
|
||||||
loss += (-eg.c.costs[clas] - eg.c.scores[clas]) ** 2
|
# Does regression on negative cost. Sort of cute?
|
||||||
d_loss = 2 * (-eg.c.costs[clas] - eg.c.scores[clas])
|
# Clip to guess and best, to keep gradient sparse.
|
||||||
|
d_losses[guess] = -2 * (-eg.c.costs[guess] - eg.c.scores[guess])
|
||||||
|
d_losses[best] = -2 * (-eg.c.costs[best] - eg.c.scores[best])
|
||||||
|
elif loss == 'nll':
|
||||||
|
# Clip to guess and best, to keep gradient sparse.
|
||||||
|
if eg.c.scores[guess] == 0.0:
|
||||||
|
d_losses[guess] = 1.0
|
||||||
|
d_losses[best] = -1.0
|
||||||
|
else:
|
||||||
|
softmax(eg.c.scores, eg.c.nr_class)
|
||||||
|
for i in range(eg.c.nr_class):
|
||||||
|
if eg.c.is_valid[i] \
|
||||||
|
and eg.c.scores[i] >= eg.c.scores[best]:
|
||||||
|
d_losses[i] = eg.c.scores[i] - (eg.c.costs[i] <= 0)
|
||||||
|
elif loss == 'hinge':
|
||||||
|
for i in range(eg.c.nr_class):
|
||||||
|
if eg.c.is_valid[i] \
|
||||||
|
and eg.c.costs[i] > 0 \
|
||||||
|
and eg.c.scores[i] > (eg.c.scores[best]-1):
|
||||||
|
margin = eg.c.scores[i] - (eg.c.scores[best] - 1)
|
||||||
|
d_losses[i] = margin
|
||||||
|
d_losses[best] = min(-margin, d_losses.get(best, 0.0))
|
||||||
|
elif loss == 'perceptron':
|
||||||
|
if guess != best:
|
||||||
|
d_losses = {best: -1.0, guess: 1.0}
|
||||||
|
step = 0.0
|
||||||
|
i = 0
|
||||||
|
for clas, d_loss in d_losses.items():
|
||||||
for feat in eg.c.features[:eg.c.nr_feat]:
|
for feat in eg.c.features[:eg.c.nr_feat]:
|
||||||
self.update_weight(feat.key, clas, feat.value * -d_loss)
|
step += abs(self.update_weight(feat.key, clas, feat.value * d_loss))
|
||||||
return int(loss)
|
i += 1
|
||||||
|
self.total_L1 += self.l1_penalty * self.learn_rate
|
||||||
|
return sum(map(abs, d_losses.values()))
|
||||||
|
|
||||||
cdef int set_featuresC(self, FeatureC* feats, const void* _state) nogil:
|
cdef int set_featuresC(self, FeatureC* feats, const void* _state) nogil:
|
||||||
cdef atom_t[CONTEXT_SIZE] context
|
cdef atom_t[CONTEXT_SIZE] context
|
||||||
|
|
|
@ -21,6 +21,7 @@ import json
|
||||||
import sys
|
import sys
|
||||||
from .nonproj import PseudoProjectivity
|
from .nonproj import PseudoProjectivity
|
||||||
import random
|
import random
|
||||||
|
import numpy.random
|
||||||
|
|
||||||
from cymem.cymem cimport Pool, Address
|
from cymem.cymem cimport Pool, Address
|
||||||
from murmurhash.mrmr cimport hash64
|
from murmurhash.mrmr cimport hash64
|
||||||
|
@ -203,15 +204,18 @@ cdef class Parser:
|
||||||
cdef Transition action
|
cdef Transition action
|
||||||
while not stcls.is_final():
|
while not stcls.is_final():
|
||||||
eg.c.nr_feat = self.model.set_featuresC(eg.c.features, stcls.c)
|
eg.c.nr_feat = self.model.set_featuresC(eg.c.features, stcls.c)
|
||||||
|
self.model.dropoutC(eg.c.features,
|
||||||
|
0.5, eg.c.nr_feat)
|
||||||
|
if eg.c.features[0].i == 1:
|
||||||
|
eg.c.features[0].value = 1.0
|
||||||
|
#for i in range(eg.c.nr_feat):
|
||||||
|
# if eg.c.features[i].value != 0:
|
||||||
|
# self.model.apply_L1(eg.c.features[i].key)
|
||||||
self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
|
self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
|
||||||
self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
|
self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
|
||||||
for i in range(self.moves.n_moves):
|
|
||||||
if eg.c.costs[i] < 0:
|
|
||||||
eg.c.costs[i] = 0
|
|
||||||
action = self.moves.c[eg.guess]
|
action = self.moves.c[eg.guess]
|
||||||
action.do(stcls.c, action.label)
|
action.do(stcls.c, action.label)
|
||||||
|
loss += self.model.update(eg, loss='nll')
|
||||||
loss += self.model.update(eg)
|
|
||||||
eg.reset()
|
eg.reset()
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user