mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-27 20:33:42 +03:00
WIP on beam parser. Currently segfaults.
This commit is contained in:
parent
b0d80dc9ae
commit
318b9e32ff
|
@ -1,6 +1,9 @@
|
||||||
from libc.string cimport memcpy, memset
|
from libc.string cimport memcpy, memset
|
||||||
from libc.stdlib cimport malloc, calloc, free
|
from libc.stdlib cimport malloc, calloc, free
|
||||||
from libc.stdint cimport uint32_t
|
from libc.stdint cimport uint32_t, uint64_t
|
||||||
|
|
||||||
|
from murmurhash.mrmr cimport hash64
|
||||||
|
|
||||||
from ..vocab cimport EMPTY_LEXEME
|
from ..vocab cimport EMPTY_LEXEME
|
||||||
from ..structs cimport TokenC, Entity
|
from ..structs cimport TokenC, Entity
|
||||||
from ..lexeme cimport Lexeme
|
from ..lexeme cimport Lexeme
|
||||||
|
@ -201,6 +204,21 @@ cdef cppclass StateC:
|
||||||
else:
|
else:
|
||||||
return this.length - this._b_i
|
return this.length - this._b_i
|
||||||
|
|
||||||
|
uint64_t hash() nogil const:
|
||||||
|
cdef TokenC[11] sig
|
||||||
|
sig[0] = this.S_(2)[0]
|
||||||
|
sig[1] = this.S_(1)[0]
|
||||||
|
sig[2] = this.R_(this.S(1), 1)[0]
|
||||||
|
sig[3] = this.L_(this.S(0), 1)[0]
|
||||||
|
sig[4] = this.L_(this.S(0), 2)[0]
|
||||||
|
sig[5] = this.S_(0)[0]
|
||||||
|
sig[6] = this.R_(this.S(0), 2)[0]
|
||||||
|
sig[7] = this.R_(this.S(0), 1)[0]
|
||||||
|
sig[8] = this.B_(0)[0]
|
||||||
|
sig[9] = this.E_(0)[0]
|
||||||
|
sig[10] = this.E_(1)[0]
|
||||||
|
return hash64(sig, sizeof(sig), this._s_i)
|
||||||
|
|
||||||
void push() nogil:
|
void push() nogil:
|
||||||
if this.B(0) != -1:
|
if this.B(0) != -1:
|
||||||
this._stack[this._s_i] = this.B(0)
|
this._stack[this._s_i] = this.B(0)
|
||||||
|
@ -290,6 +308,8 @@ cdef cppclass StateC:
|
||||||
memcpy(this._stack, src._stack, this.length * sizeof(int))
|
memcpy(this._stack, src._stack, this.length * sizeof(int))
|
||||||
memcpy(this._buffer, src._buffer, this.length * sizeof(int))
|
memcpy(this._buffer, src._buffer, this.length * sizeof(int))
|
||||||
memcpy(this._ents, src._ents, this.length * sizeof(Entity))
|
memcpy(this._ents, src._ents, this.length * sizeof(Entity))
|
||||||
|
memcpy(this.shifted, src.shifted, this.length * sizeof(this.shifted[0]))
|
||||||
|
this.length = src.length
|
||||||
this._b_i = src._b_i
|
this._b_i = src._b_i
|
||||||
this._s_i = src._s_i
|
this._s_i = src._s_i
|
||||||
this._e_i = src._e_i
|
this._e_i = src._e_i
|
||||||
|
|
|
@ -126,14 +126,15 @@ cdef class BeamParser(Parser):
|
||||||
violn.check_crf(pred, gold)
|
violn.check_crf(pred, gold)
|
||||||
assert pred.size >= 1
|
assert pred.size >= 1
|
||||||
assert gold.size >= 1
|
assert gold.size >= 1
|
||||||
|
if pred.loss == 0:
|
||||||
|
self.model.update_from_histories(self.moves, tokens, [(0.0, [])])
|
||||||
|
elif True:
|
||||||
#_check_train_integrity(pred, gold, gold_parse, self.moves)
|
#_check_train_integrity(pred, gold, gold_parse, self.moves)
|
||||||
histories = zip(violn.p_probs, violn.p_hist) + zip(violn.g_probs, violn.g_hist)
|
histories = zip(violn.p_probs, violn.p_hist) + zip(violn.g_probs, violn.g_hist)
|
||||||
min_grad = 0.001 ** (itn+1)
|
self.model.update_from_histories(self.moves, tokens, histories, min_grad=0.001**(itn+1))
|
||||||
histories = [(grad, hist) for grad, hist in histories if abs(grad) >= min_grad]
|
else:
|
||||||
random.shuffle(histories)
|
self.model.update_from_histories(self.moves, tokens,
|
||||||
for grad, hist in histories:
|
[(1.0, violn.p_hist[0]), (-1.0, violn.g_hist[0])])
|
||||||
assert not math.isnan(grad) and not math.isinf(grad), hist
|
|
||||||
self.model.update_from_history(self.moves, tokens, hist, grad)
|
|
||||||
_cleanup(pred)
|
_cleanup(pred)
|
||||||
_cleanup(gold)
|
_cleanup(gold)
|
||||||
return pred.loss
|
return pred.loss
|
||||||
|
@ -173,7 +174,7 @@ cdef class BeamParser(Parser):
|
||||||
if follow_gold:
|
if follow_gold:
|
||||||
beam.advance(_transition_state, NULL, <void*>self.moves.c)
|
beam.advance(_transition_state, NULL, <void*>self.moves.c)
|
||||||
else:
|
else:
|
||||||
beam.advance(_transition_state, NULL, <void*>self.moves.c)
|
beam.advance(_transition_state, _hash_state, <void*>self.moves.c)
|
||||||
beam.check_done(_check_final_state, NULL)
|
beam.check_done(_check_final_state, NULL)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,9 @@ from cpython.exc cimport PyErr_CheckSignals
|
||||||
from libc.stdint cimport uint32_t, uint64_t
|
from libc.stdint cimport uint32_t, uint64_t
|
||||||
from libc.string cimport memset, memcpy
|
from libc.string cimport memset, memcpy
|
||||||
from libc.stdlib cimport malloc, calloc, free
|
from libc.stdlib cimport malloc, calloc, free
|
||||||
|
|
||||||
import os.path
|
import os.path
|
||||||
|
from collections import Counter
|
||||||
from os import path
|
from os import path
|
||||||
import shutil
|
import shutil
|
||||||
import json
|
import json
|
||||||
|
@ -80,34 +82,46 @@ cdef class ParserModel(AveragedPerceptron):
|
||||||
def update(self, Example eg):
|
def update(self, Example eg):
|
||||||
'''Does regression on negative cost. Sort of cute?'''
|
'''Does regression on negative cost. Sort of cute?'''
|
||||||
self.time += 1
|
self.time += 1
|
||||||
cdef weight_t loss = 0.0
|
|
||||||
best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
||||||
for clas in range(eg.c.nr_class):
|
guess = eg.guess
|
||||||
if not eg.c.is_valid[clas]:
|
cdef weight_t loss = 0.0
|
||||||
continue
|
if guess == best:
|
||||||
if eg.c.scores[clas] < eg.c.scores[best]:
|
return loss
|
||||||
continue
|
for clas in [guess, best]:
|
||||||
loss += (-eg.c.costs[clas] - eg.c.scores[clas]) ** 2
|
loss += (-eg.c.costs[clas] - eg.c.scores[clas]) ** 2
|
||||||
d_loss = -2 * (-eg.c.costs[clas] - eg.c.scores[clas])
|
d_loss = eg.c.scores[clas] - -eg.c.costs[clas]
|
||||||
for feat in eg.c.features[:eg.c.nr_feat]:
|
for feat in eg.c.features[:eg.c.nr_feat]:
|
||||||
self.update_weight_ftrl(feat.key, clas, feat.value * d_loss)
|
self.update_weight_ftrl(feat.key, clas, feat.value * d_loss)
|
||||||
return int(loss)
|
return loss
|
||||||
|
|
||||||
def update_from_history(self, TransitionSystem moves, Doc doc, history, weight_t grad):
|
def update_from_histories(self, TransitionSystem moves, Doc doc, histories, weight_t min_grad=0.0):
|
||||||
cdef Pool mem = Pool()
|
cdef Pool mem = Pool()
|
||||||
features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
|
features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
|
||||||
|
|
||||||
cdef StateClass stcls = StateClass.init(doc.c, doc.length)
|
cdef StateClass stcls
|
||||||
moves.initialize_state(stcls.c)
|
|
||||||
|
|
||||||
cdef class_t clas
|
cdef class_t clas
|
||||||
self.time += 1
|
self.time += 1
|
||||||
cdef atom_t[CONTEXT_SIZE] atoms
|
cdef atom_t[CONTEXT_SIZE] atoms
|
||||||
|
histories = [(grad, hist) for grad, hist in histories if abs(grad) >= min_grad and hist]
|
||||||
|
if not histories:
|
||||||
|
return None
|
||||||
|
gradient = [Counter() for _ in range(max([max(h)+1 for _, h in histories]))]
|
||||||
|
for d_loss, history in histories:
|
||||||
|
stcls = StateClass.init(doc.c, doc.length)
|
||||||
|
moves.initialize_state(stcls.c)
|
||||||
for clas in history:
|
for clas in history:
|
||||||
nr_feat = self.set_featuresC(atoms, features, stcls.c)
|
nr_feat = self.set_featuresC(atoms, features, stcls.c)
|
||||||
|
clas_grad = gradient[clas]
|
||||||
for feat in features[:nr_feat]:
|
for feat in features[:nr_feat]:
|
||||||
self.update_weight(feat.key, clas, feat.value * grad)
|
clas_grad[feat.key] += d_loss * feat.value
|
||||||
moves.c[clas].do(stcls.c, moves.c[clas].label)
|
moves.c[clas].do(stcls.c, moves.c[clas].label)
|
||||||
|
cdef feat_t key
|
||||||
|
cdef weight_t d_feat
|
||||||
|
for clas, clas_grad in enumerate(gradient):
|
||||||
|
for key, d_feat in clas_grad.items():
|
||||||
|
if d_feat != 0:
|
||||||
|
self.update_weight_ftrl(key, clas, d_feat)
|
||||||
|
|
||||||
|
|
||||||
cdef class Parser:
|
cdef class Parser:
|
||||||
|
@ -161,7 +175,8 @@ cdef class Parser:
|
||||||
elif 'features' not in cfg:
|
elif 'features' not in cfg:
|
||||||
cfg['features'] = self.feature_templates
|
cfg['features'] = self.feature_templates
|
||||||
self.model = ParserModel(cfg['features'])
|
self.model = ParserModel(cfg['features'])
|
||||||
self.model.l1_penalty = cfg.get('L1', 0.0)
|
self.model.l1_penalty = cfg.get('L1', 1e-8)
|
||||||
|
self.model.learn_rate = cfg.get('learn_rate', 0.001)
|
||||||
|
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
|
|
||||||
|
@ -298,12 +313,7 @@ cdef class Parser:
|
||||||
self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
|
self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
|
||||||
self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
|
self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
|
||||||
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
|
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
|
||||||
if eg.c.costs[guess] > 0:
|
|
||||||
self.model.update(eg)
|
self.model.update(eg)
|
||||||
#best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
|
||||||
#for feat in eg.c.features[:eg.c.nr_feat]:
|
|
||||||
# self.model.update_weight_ftrl(feat.key, best, -feat.value * eg.c.costs[guess])
|
|
||||||
# self.model.update_weight_ftrl(feat.key, guess, feat.value * eg.c.costs[guess])
|
|
||||||
|
|
||||||
action = self.moves.c[guess]
|
action = self.moves.c[guess]
|
||||||
action.do(stcls.c, action.label)
|
action.do(stcls.c, action.label)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user