Play with hash kernel class

This commit is contained in:
Matthew Honnibal 2017-03-30 02:35:36 +02:00
parent 6c31a7222f
commit 1f292bfd17
2 changed files with 47 additions and 24 deletions

View File

@ -12,10 +12,13 @@ cdef class LinearModel:
cdef readonly int nr_class cdef readonly int nr_class
cdef readonly uint32_t nr_weight cdef readonly uint32_t nr_weight
cdef public weight_t learn_rate cdef public weight_t learn_rate
cdef public weight_t momentum
cdef Pool mem cdef Pool mem
cdef weight_t time
cdef weight_t* W cdef weight_t* W
cdef weight_t* d_W cdef weight_t* mom
cdef vector[uint64_t]* _indices cdef weight_t* averages
cdef weight_t* last_upd
cdef void hinge_lossC(self, weight_t* d_scores, cdef void hinge_lossC(self, weight_t* d_scores,
const weight_t* scores, const weight_t* costs) nogil const weight_t* scores, const weight_t* costs) nogil

View File

@ -20,21 +20,23 @@ from thinc.neural.ops import NumpyOps
cdef class LinearModel: cdef class LinearModel:
def __init__(self, int nr_class, templates, weight_t learn_rate=0.001, def __init__(self, int nr_class, templates,
size=2**18): weight_t momentum=0.9, weight_t learn_rate=0.001, size=2**18):
self.extracter = ConjunctionExtracter(templates) self.extracter = ConjunctionExtracter(templates)
self.nr_weight = size self.nr_weight = size
self.nr_class = nr_class self.nr_class = nr_class
self.learn_rate = learn_rate self.learn_rate = learn_rate
self.momentum = momentum
self.mem = Pool() self.mem = Pool()
self.time = 0
self.W = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class, self.W = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
sizeof(weight_t)) sizeof(weight_t))
self.d_W = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class, self.mom = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
sizeof(weight_t))
self.averages = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
sizeof(weight_t))
self.last_upd = <weight_t*>self.mem.alloc(self.nr_weight * self.nr_class,
sizeof(weight_t)) sizeof(weight_t))
self._indices = new vector[uint64_t]()
def __dealloc__(self):
del self._indices
cdef void hinge_lossC(self, weight_t* d_scores, cdef void hinge_lossC(self, weight_t* d_scores,
const weight_t* scores, const weight_t* costs) nogil: const weight_t* scores, const weight_t* costs) nogil:
@ -97,8 +99,8 @@ cdef class LinearModel:
cdef void set_scoresC(self, weight_t* scores, cdef void set_scoresC(self, weight_t* scores,
const FeatureC* features, int nr_feat) nogil: const FeatureC* features, int nr_feat) nogil:
cdef uint64_t nr_weight = self.nr_weight
cdef int nr_class = self.nr_class cdef int nr_class = self.nr_class
cdef uint64_t nr_weight = self.nr_weight * nr_class - nr_class
cdef vector[uint64_t] indices cdef vector[uint64_t] indices
# Collect all feature indices # Collect all feature indices
cdef uint32_t[2] hashed cdef uint32_t[2] hashed
@ -114,16 +116,23 @@ cdef class LinearModel:
# Sort them, to improve memory access pattern # Sort them, to improve memory access pattern
libcpp.algorithm.sort(indices.begin(), indices.end()) libcpp.algorithm.sort(indices.begin(), indices.end())
for idx in indices: for idx in indices:
W = &self.W[idx * nr_class] W = &self.W[idx]
for clas in range(nr_class): for clas in range(nr_class):
scores[clas] += W[clas] scores[clas] += W[clas]
cdef void set_gradientC(self, const weight_t* d_scores, const FeatureC* cdef void set_gradientC(self, const weight_t* d_scores, const FeatureC*
features, int nr_feat) nogil: features, int nr_feat) nogil:
cdef uint64_t nr_weight = self.nr_weight self.time += 1
cdef int nr_class = self.nr_class cdef int nr_class = self.nr_class
cdef weight_t abs_grad = 0
for i in range(nr_class):
abs_grad += d_scores[i] if d_scores[i] > 0 else -d_scores[i]
if abs_grad < 0.1:
return
cdef uint64_t nr_weight = self.nr_weight * nr_class - nr_class
cdef vector[uint64_t] indices cdef vector[uint64_t] indices
# Collect all feature indices # Collect all feature indices
indices.reserve(nr_feat * 2)
cdef uint32_t[2] hashed cdef uint32_t[2] hashed
cdef uint64_t hash2 cdef uint64_t hash2
for feat in features[:nr_feat]: for feat in features[:nr_feat]:
@ -136,19 +145,24 @@ cdef class LinearModel:
# Sort them, to improve memory access pattern # Sort them, to improve memory access pattern
libcpp.algorithm.sort(indices.begin(), indices.end()) libcpp.algorithm.sort(indices.begin(), indices.end())
for idx in indices: for idx in indices:
d_W = &self.d_W[idx * nr_class] #avg = &self.averages[idx]
for clas in range(nr_class): #last_upd = &self.last_upd[idx]
if d_scores[clas] < 0: W = &self.W[idx]
d_W[clas] += max(-10., d_scores[clas]) #mom = &self.mom[idx]
else: for i in range(nr_class):
d_W[clas] += min(10., d_scores[clas]) if d_scores[i] == 0:
continue
d = d_scores[i]
W[i] -= self.learn_rate * d
#unchanged = self.time - last_upd[i]
#avg[i] += unchanged * W[i]
#mom[i] *= self.momentum ** unchanged
#mom[i] += self.learn_rate * d
#W[i] -= mom[i]
#last_upd[i] = self.time
def finish_update(self, optimizer): def finish_update(self, optimizer):
cdef np.npy_intp[1] shape pass
shape[0] = self.nr_weight * self.nr_class
W_arr = np.PyArray_SimpleNewFromData(1, shape, np.NPY_FLOAT, self.W)
dW_arr = np.PyArray_SimpleNewFromData(1, shape, np.NPY_FLOAT, self.d_W)
optimizer(W_arr, dW_arr, key=1)
@property @property
def nr_active_feat(self): def nr_active_feat(self):
@ -159,7 +173,13 @@ cdef class LinearModel:
return self.extracter.nr_templ return self.extracter.nr_templ
def end_training(self, *args, **kwargs): def end_training(self, *args, **kwargs):
pass # Average weights
for i in range(self.nr_weight * self.nr_class):
unchanged = self.time - self.last_upd[i]
self.averages[i] += self.W[i] * unchanged
self.W[i], self.averages[i] = self.averages[i], self.W[i]
self.W[i] /= self.time
self.last_upd[i] = self.time
def dump(self, *args, **kwargs): def dump(self, *args, **kwargs):
pass pass