WIP on beam parser. Currently segfaults.

This commit is contained in:
Matthew Honnibal 2017-03-11 06:19:52 -06:00
parent b0d80dc9ae
commit 318b9e32ff
3 changed files with 67 additions and 36 deletions

View File

@ -1,6 +1,9 @@
from libc.string cimport memcpy, memset from libc.string cimport memcpy, memset
from libc.stdlib cimport malloc, calloc, free from libc.stdlib cimport malloc, calloc, free
from libc.stdint cimport uint32_t from libc.stdint cimport uint32_t, uint64_t
from murmurhash.mrmr cimport hash64
from ..vocab cimport EMPTY_LEXEME from ..vocab cimport EMPTY_LEXEME
from ..structs cimport TokenC, Entity from ..structs cimport TokenC, Entity
from ..lexeme cimport Lexeme from ..lexeme cimport Lexeme
@ -201,6 +204,21 @@ cdef cppclass StateC:
else: else:
return this.length - this._b_i return this.length - this._b_i
uint64_t hash() nogil const:
cdef TokenC[11] sig
sig[0] = this.S_(2)[0]
sig[1] = this.S_(1)[0]
sig[2] = this.R_(this.S(1), 1)[0]
sig[3] = this.L_(this.S(0), 1)[0]
sig[4] = this.L_(this.S(0), 2)[0]
sig[5] = this.S_(0)[0]
sig[6] = this.R_(this.S(0), 2)[0]
sig[7] = this.R_(this.S(0), 1)[0]
sig[8] = this.B_(0)[0]
sig[9] = this.E_(0)[0]
sig[10] = this.E_(1)[0]
return hash64(sig, sizeof(sig), this._s_i)
void push() nogil: void push() nogil:
if this.B(0) != -1: if this.B(0) != -1:
this._stack[this._s_i] = this.B(0) this._stack[this._s_i] = this.B(0)
@ -290,6 +308,8 @@ cdef cppclass StateC:
memcpy(this._stack, src._stack, this.length * sizeof(int)) memcpy(this._stack, src._stack, this.length * sizeof(int))
memcpy(this._buffer, src._buffer, this.length * sizeof(int)) memcpy(this._buffer, src._buffer, this.length * sizeof(int))
memcpy(this._ents, src._ents, this.length * sizeof(Entity)) memcpy(this._ents, src._ents, this.length * sizeof(Entity))
memcpy(this.shifted, src.shifted, this.length * sizeof(this.shifted[0]))
this.length = src.length
this._b_i = src._b_i this._b_i = src._b_i
this._s_i = src._s_i this._s_i = src._s_i
this._e_i = src._e_i this._e_i = src._e_i

View File

@ -126,14 +126,15 @@ cdef class BeamParser(Parser):
violn.check_crf(pred, gold) violn.check_crf(pred, gold)
assert pred.size >= 1 assert pred.size >= 1
assert gold.size >= 1 assert gold.size >= 1
if pred.loss == 0:
self.model.update_from_histories(self.moves, tokens, [(0.0, [])])
elif True:
#_check_train_integrity(pred, gold, gold_parse, self.moves) #_check_train_integrity(pred, gold, gold_parse, self.moves)
histories = zip(violn.p_probs, violn.p_hist) + zip(violn.g_probs, violn.g_hist) histories = zip(violn.p_probs, violn.p_hist) + zip(violn.g_probs, violn.g_hist)
min_grad = 0.001 ** (itn+1) self.model.update_from_histories(self.moves, tokens, histories, min_grad=0.001**(itn+1))
histories = [(grad, hist) for grad, hist in histories if abs(grad) >= min_grad] else:
random.shuffle(histories) self.model.update_from_histories(self.moves, tokens,
for grad, hist in histories: [(1.0, violn.p_hist[0]), (-1.0, violn.g_hist[0])])
assert not math.isnan(grad) and not math.isinf(grad), hist
self.model.update_from_history(self.moves, tokens, hist, grad)
_cleanup(pred) _cleanup(pred)
_cleanup(gold) _cleanup(gold)
return pred.loss return pred.loss
@ -173,7 +174,7 @@ cdef class BeamParser(Parser):
if follow_gold: if follow_gold:
beam.advance(_transition_state, NULL, <void*>self.moves.c) beam.advance(_transition_state, NULL, <void*>self.moves.c)
else: else:
beam.advance(_transition_state, NULL, <void*>self.moves.c) beam.advance(_transition_state, _hash_state, <void*>self.moves.c)
beam.check_done(_check_final_state, NULL) beam.check_done(_check_final_state, NULL)

View File

@ -12,7 +12,9 @@ from cpython.exc cimport PyErr_CheckSignals
from libc.stdint cimport uint32_t, uint64_t from libc.stdint cimport uint32_t, uint64_t
from libc.string cimport memset, memcpy from libc.string cimport memset, memcpy
from libc.stdlib cimport malloc, calloc, free from libc.stdlib cimport malloc, calloc, free
import os.path import os.path
from collections import Counter
from os import path from os import path
import shutil import shutil
import json import json
@ -80,34 +82,46 @@ cdef class ParserModel(AveragedPerceptron):
def update(self, Example eg): def update(self, Example eg):
'''Does regression on negative cost. Sort of cute?''' '''Does regression on negative cost. Sort of cute?'''
self.time += 1 self.time += 1
cdef weight_t loss = 0.0
best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class) best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
for clas in range(eg.c.nr_class): guess = eg.guess
if not eg.c.is_valid[clas]: cdef weight_t loss = 0.0
continue if guess == best:
if eg.c.scores[clas] < eg.c.scores[best]: return loss
continue for clas in [guess, best]:
loss += (-eg.c.costs[clas] - eg.c.scores[clas]) ** 2 loss += (-eg.c.costs[clas] - eg.c.scores[clas]) ** 2
d_loss = -2 * (-eg.c.costs[clas] - eg.c.scores[clas]) d_loss = eg.c.scores[clas] - -eg.c.costs[clas]
for feat in eg.c.features[:eg.c.nr_feat]: for feat in eg.c.features[:eg.c.nr_feat]:
self.update_weight_ftrl(feat.key, clas, feat.value * d_loss) self.update_weight_ftrl(feat.key, clas, feat.value * d_loss)
return int(loss) return loss
def update_from_history(self, TransitionSystem moves, Doc doc, history, weight_t grad): def update_from_histories(self, TransitionSystem moves, Doc doc, histories, weight_t min_grad=0.0):
cdef Pool mem = Pool() cdef Pool mem = Pool()
features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC)) features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
cdef StateClass stcls = StateClass.init(doc.c, doc.length) cdef StateClass stcls
moves.initialize_state(stcls.c)
cdef class_t clas cdef class_t clas
self.time += 1 self.time += 1
cdef atom_t[CONTEXT_SIZE] atoms cdef atom_t[CONTEXT_SIZE] atoms
histories = [(grad, hist) for grad, hist in histories if abs(grad) >= min_grad and hist]
if not histories:
return None
gradient = [Counter() for _ in range(max([max(h)+1 for _, h in histories]))]
for d_loss, history in histories:
stcls = StateClass.init(doc.c, doc.length)
moves.initialize_state(stcls.c)
for clas in history: for clas in history:
nr_feat = self.set_featuresC(atoms, features, stcls.c) nr_feat = self.set_featuresC(atoms, features, stcls.c)
clas_grad = gradient[clas]
for feat in features[:nr_feat]: for feat in features[:nr_feat]:
self.update_weight(feat.key, clas, feat.value * grad) clas_grad[feat.key] += d_loss * feat.value
moves.c[clas].do(stcls.c, moves.c[clas].label) moves.c[clas].do(stcls.c, moves.c[clas].label)
cdef feat_t key
cdef weight_t d_feat
for clas, clas_grad in enumerate(gradient):
for key, d_feat in clas_grad.items():
if d_feat != 0:
self.update_weight_ftrl(key, clas, d_feat)
cdef class Parser: cdef class Parser:
@ -161,7 +175,8 @@ cdef class Parser:
elif 'features' not in cfg: elif 'features' not in cfg:
cfg['features'] = self.feature_templates cfg['features'] = self.feature_templates
self.model = ParserModel(cfg['features']) self.model = ParserModel(cfg['features'])
self.model.l1_penalty = cfg.get('L1', 0.0) self.model.l1_penalty = cfg.get('L1', 1e-8)
self.model.learn_rate = cfg.get('learn_rate', 0.001)
self.cfg = cfg self.cfg = cfg
@ -298,12 +313,7 @@ cdef class Parser:
self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold) self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat) self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class) guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
if eg.c.costs[guess] > 0:
self.model.update(eg) self.model.update(eg)
#best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
#for feat in eg.c.features[:eg.c.nr_feat]:
# self.model.update_weight_ftrl(feat.key, best, -feat.value * eg.c.costs[guess])
# self.model.update_weight_ftrl(feat.key, guess, feat.value * eg.c.costs[guess])
action = self.moves.c[guess] action = self.moves.c[guess]
action.do(stcls.c, action.label) action.do(stcls.c, action.label)