mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-23 15:54:13 +03:00
Play with hash kernel class
This commit is contained in:
parent
6c31a7222f
commit
1f292bfd17
|
@ -12,10 +12,13 @@ cdef class LinearModel:
|
||||||
cdef readonly int nr_class
|
cdef readonly int nr_class
|
||||||
cdef readonly uint32_t nr_weight
|
cdef readonly uint32_t nr_weight
|
||||||
cdef public weight_t learn_rate
|
cdef public weight_t learn_rate
|
||||||
|
cdef public weight_t momentum
|
||||||
cdef Pool mem
|
cdef Pool mem
|
||||||
|
cdef weight_t time
|
||||||
cdef weight_t* W
|
cdef weight_t* W
|
||||||
cdef weight_t* d_W
|
cdef weight_t* mom
|
||||||
cdef vector[uint64_t]* _indices
|
cdef weight_t* averages
|
||||||
|
cdef weight_t* last_upd
|
||||||
|
|
||||||
cdef void hinge_lossC(self, weight_t* d_scores,
|
cdef void hinge_lossC(self, weight_t* d_scores,
|
||||||
const weight_t* scores, const weight_t* costs) nogil
|
const weight_t* scores, const weight_t* costs) nogil
|
||||||
|
|
|
@ -20,21 +20,23 @@ from thinc.neural.ops import NumpyOps
|
||||||
|
|
||||||
|
|
||||||
cdef class LinearModel:
|
cdef class LinearModel:
|
||||||
def __init__(self, int nr_class, templates, weight_t learn_rate=0.001,
|
def __init__(self, int nr_class, templates,
|
||||||
size=2**18):
|
weight_t momentum=0.9, weight_t learn_rate=0.001, size=2**18):
|
||||||
self.extracter = ConjunctionExtracter(templates)
|
self.extracter = ConjunctionExtracter(templates)
|
||||||
self.nr_weight = size
|
self.nr_weight = size
|
||||||
self.nr_class = nr_class
|
self.nr_class = nr_class
|
||||||
self.learn_rate = learn_rate
|
self.learn_rate = learn_rate
|
||||||
|
self.momentum = momentum
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
|
self.time = 0
|
||||||
self.W = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
|
self.W = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
|
||||||
sizeof(weight_t))
|
sizeof(weight_t))
|
||||||
self.d_W = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
|
self.mom = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
|
||||||
|
sizeof(weight_t))
|
||||||
|
self.averages = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
|
||||||
|
sizeof(weight_t))
|
||||||
|
self.last_upd = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
|
||||||
sizeof(weight_t))
|
sizeof(weight_t))
|
||||||
self._indices = new vector[uint64_t]()
|
|
||||||
|
|
||||||
def __dealloc__(self):
|
|
||||||
del self._indices
|
|
||||||
|
|
||||||
cdef void hinge_lossC(self, weight_t* d_scores,
|
cdef void hinge_lossC(self, weight_t* d_scores,
|
||||||
const weight_t* scores, const weight_t* costs) nogil:
|
const weight_t* scores, const weight_t* costs) nogil:
|
||||||
|
@ -97,8 +99,8 @@ cdef class LinearModel:
|
||||||
|
|
||||||
cdef void set_scoresC(self, weight_t* scores,
|
cdef void set_scoresC(self, weight_t* scores,
|
||||||
const FeatureC* features, int nr_feat) nogil:
|
const FeatureC* features, int nr_feat) nogil:
|
||||||
cdef uint64_t nr_weight = self.nr_weight
|
|
||||||
cdef int nr_class = self.nr_class
|
cdef int nr_class = self.nr_class
|
||||||
|
cdef uint64_t nr_weight = self.nr_weight * nr_class - nr_class
|
||||||
cdef vector[uint64_t] indices
|
cdef vector[uint64_t] indices
|
||||||
# Collect all feature indices
|
# Collect all feature indices
|
||||||
cdef uint32_t[2] hashed
|
cdef uint32_t[2] hashed
|
||||||
|
@ -114,16 +116,23 @@ cdef class LinearModel:
|
||||||
# Sort them, to improve memory access pattern
|
# Sort them, to improve memory access pattern
|
||||||
libcpp.algorithm.sort(indices.begin(), indices.end())
|
libcpp.algorithm.sort(indices.begin(), indices.end())
|
||||||
for idx in indices:
|
for idx in indices:
|
||||||
W = &self.W[idx * nr_class]
|
W = &self.W[idx]
|
||||||
for clas in range(nr_class):
|
for clas in range(nr_class):
|
||||||
scores[clas] += W[clas]
|
scores[clas] += W[clas]
|
||||||
|
|
||||||
cdef void set_gradientC(self, const weight_t* d_scores, const FeatureC*
|
cdef void set_gradientC(self, const weight_t* d_scores, const FeatureC*
|
||||||
features, int nr_feat) nogil:
|
features, int nr_feat) nogil:
|
||||||
cdef uint64_t nr_weight = self.nr_weight
|
self.time += 1
|
||||||
cdef int nr_class = self.nr_class
|
cdef int nr_class = self.nr_class
|
||||||
|
cdef weight_t abs_grad = 0
|
||||||
|
for i in range(nr_class):
|
||||||
|
abs_grad += d_scores[i] if d_scores[i] > 0 else -d_scores[i]
|
||||||
|
if abs_grad < 0.1:
|
||||||
|
return
|
||||||
|
cdef uint64_t nr_weight = self.nr_weight * nr_class - nr_class
|
||||||
cdef vector[uint64_t] indices
|
cdef vector[uint64_t] indices
|
||||||
# Collect all feature indices
|
# Collect all feature indices
|
||||||
|
indices.reserve(nr_feat * 2)
|
||||||
cdef uint32_t[2] hashed
|
cdef uint32_t[2] hashed
|
||||||
cdef uint64_t hash2
|
cdef uint64_t hash2
|
||||||
for feat in features[:nr_feat]:
|
for feat in features[:nr_feat]:
|
||||||
|
@ -136,19 +145,24 @@ cdef class LinearModel:
|
||||||
# Sort them, to improve memory access pattern
|
# Sort them, to improve memory access pattern
|
||||||
libcpp.algorithm.sort(indices.begin(), indices.end())
|
libcpp.algorithm.sort(indices.begin(), indices.end())
|
||||||
for idx in indices:
|
for idx in indices:
|
||||||
d_W = &self.d_W[idx * nr_class]
|
#avg = &self.averages[idx]
|
||||||
for clas in range(nr_class):
|
#last_upd = &self.last_upd[idx]
|
||||||
if d_scores[clas] < 0:
|
W = &self.W[idx]
|
||||||
d_W[clas] += max(-10., d_scores[clas])
|
#mom = &self.mom[idx]
|
||||||
else:
|
for i in range(nr_class):
|
||||||
d_W[clas] += min(10., d_scores[clas])
|
if d_scores[i] == 0:
|
||||||
|
continue
|
||||||
|
d = d_scores[i]
|
||||||
|
W[i] -= self.learn_rate * d
|
||||||
|
#unchanged = self.time - last_upd[i]
|
||||||
|
#avg[i] += unchanged * W[i]
|
||||||
|
#mom[i] *= self.momentum ** unchanged
|
||||||
|
#mom[i] += self.learn_rate * d
|
||||||
|
#W[i] -= mom[i]
|
||||||
|
#last_upd[i] = self.time
|
||||||
|
|
||||||
def finish_update(self, optimizer):
|
def finish_update(self, optimizer):
|
||||||
cdef np.npy_intp[1] shape
|
pass
|
||||||
shape[0] = self.nr_weight * self.nr_class
|
|
||||||
W_arr = np.PyArray_SimpleNewFromData(1, shape, np.NPY_FLOAT, self.W)
|
|
||||||
dW_arr = np.PyArray_SimpleNewFromData(1, shape, np.NPY_FLOAT, self.d_W)
|
|
||||||
optimizer(W_arr, dW_arr, key=1)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def nr_active_feat(self):
|
def nr_active_feat(self):
|
||||||
|
@ -159,7 +173,13 @@ cdef class LinearModel:
|
||||||
return self.extracter.nr_templ
|
return self.extracter.nr_templ
|
||||||
|
|
||||||
def end_training(self, *args, **kwargs):
|
def end_training(self, *args, **kwargs):
|
||||||
pass
|
# Average weights
|
||||||
|
for i in range(self.nr_weight * self.nr_class):
|
||||||
|
unchanged = self.time - self.last_upd[i]
|
||||||
|
self.averages[i] += self.W[i] * unchanged
|
||||||
|
self.W[i], self.averages[i] = self.averages[i], self.W[i]
|
||||||
|
self.W[i] /= self.time
|
||||||
|
self.last_upd[i] = self.time
|
||||||
|
|
||||||
def dump(self, *args, **kwargs):
|
def dump(self, *args, **kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in New Issue
Block a user