mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
WIP on beam parser. Currently segfaults.
This commit is contained in:
parent
b0d80dc9ae
commit
318b9e32ff
|
@ -1,6 +1,9 @@
|
|||
from libc.string cimport memcpy, memset
|
||||
from libc.stdlib cimport malloc, calloc, free
|
||||
from libc.stdint cimport uint32_t
|
||||
from libc.stdint cimport uint32_t, uint64_t
|
||||
|
||||
from murmurhash.mrmr cimport hash64
|
||||
|
||||
from ..vocab cimport EMPTY_LEXEME
|
||||
from ..structs cimport TokenC, Entity
|
||||
from ..lexeme cimport Lexeme
|
||||
|
@ -138,7 +141,7 @@ cdef cppclass StateC:
|
|||
else:
|
||||
ptr += 1
|
||||
return -1
|
||||
|
||||
|
||||
int R(int i, int idx) nogil const:
|
||||
if idx < 1:
|
||||
return -1
|
||||
|
@ -201,6 +204,21 @@ cdef cppclass StateC:
|
|||
else:
|
||||
return this.length - this._b_i
|
||||
|
||||
uint64_t hash() nogil const:
|
||||
cdef TokenC[11] sig
|
||||
sig[0] = this.S_(2)[0]
|
||||
sig[1] = this.S_(1)[0]
|
||||
sig[2] = this.R_(this.S(1), 1)[0]
|
||||
sig[3] = this.L_(this.S(0), 1)[0]
|
||||
sig[4] = this.L_(this.S(0), 2)[0]
|
||||
sig[5] = this.S_(0)[0]
|
||||
sig[6] = this.R_(this.S(0), 2)[0]
|
||||
sig[7] = this.R_(this.S(0), 1)[0]
|
||||
sig[8] = this.B_(0)[0]
|
||||
sig[9] = this.E_(0)[0]
|
||||
sig[10] = this.E_(1)[0]
|
||||
return hash64(sig, sizeof(sig), this._s_i)
|
||||
|
||||
void push() nogil:
|
||||
if this.B(0) != -1:
|
||||
this._stack[this._s_i] = this.B(0)
|
||||
|
@ -212,7 +230,7 @@ cdef cppclass StateC:
|
|||
void pop() nogil:
|
||||
if this._s_i >= 1:
|
||||
this._s_i -= 1
|
||||
|
||||
|
||||
void unshift() nogil:
|
||||
this._b_i -= 1
|
||||
this._buffer[this._b_i] = this.S(0)
|
||||
|
@ -281,7 +299,7 @@ cdef cppclass StateC:
|
|||
this._sent[i].ent_type = ent_type
|
||||
|
||||
void set_break(int i) nogil:
|
||||
if 0 <= i < this.length:
|
||||
if 0 <= i < this.length:
|
||||
this._sent[i].sent_start = True
|
||||
this._break = this._b_i
|
||||
|
||||
|
@ -290,6 +308,8 @@ cdef cppclass StateC:
|
|||
memcpy(this._stack, src._stack, this.length * sizeof(int))
|
||||
memcpy(this._buffer, src._buffer, this.length * sizeof(int))
|
||||
memcpy(this._ents, src._ents, this.length * sizeof(Entity))
|
||||
memcpy(this.shifted, src.shifted, this.length * sizeof(this.shifted[0]))
|
||||
this.length = src.length
|
||||
this._b_i = src._b_i
|
||||
this._s_i = src._s_i
|
||||
this._e_i = src._e_i
|
||||
|
|
|
@ -126,14 +126,15 @@ cdef class BeamParser(Parser):
|
|||
violn.check_crf(pred, gold)
|
||||
assert pred.size >= 1
|
||||
assert gold.size >= 1
|
||||
#_check_train_integrity(pred, gold, gold_parse, self.moves)
|
||||
histories = zip(violn.p_probs, violn.p_hist) + zip(violn.g_probs, violn.g_hist)
|
||||
min_grad = 0.001 ** (itn+1)
|
||||
histories = [(grad, hist) for grad, hist in histories if abs(grad) >= min_grad]
|
||||
random.shuffle(histories)
|
||||
for grad, hist in histories:
|
||||
assert not math.isnan(grad) and not math.isinf(grad), hist
|
||||
self.model.update_from_history(self.moves, tokens, hist, grad)
|
||||
if pred.loss == 0:
|
||||
self.model.update_from_histories(self.moves, tokens, [(0.0, [])])
|
||||
elif True:
|
||||
#_check_train_integrity(pred, gold, gold_parse, self.moves)
|
||||
histories = zip(violn.p_probs, violn.p_hist) + zip(violn.g_probs, violn.g_hist)
|
||||
self.model.update_from_histories(self.moves, tokens, histories, min_grad=0.001**(itn+1))
|
||||
else:
|
||||
self.model.update_from_histories(self.moves, tokens,
|
||||
[(1.0, violn.p_hist[0]), (-1.0, violn.g_hist[0])])
|
||||
_cleanup(pred)
|
||||
_cleanup(gold)
|
||||
return pred.loss
|
||||
|
@ -173,7 +174,7 @@ cdef class BeamParser(Parser):
|
|||
if follow_gold:
|
||||
beam.advance(_transition_state, NULL, <void*>self.moves.c)
|
||||
else:
|
||||
beam.advance(_transition_state, NULL, <void*>self.moves.c)
|
||||
beam.advance(_transition_state, _hash_state, <void*>self.moves.c)
|
||||
beam.check_done(_check_final_state, NULL)
|
||||
|
||||
|
||||
|
|
|
@ -12,7 +12,9 @@ from cpython.exc cimport PyErr_CheckSignals
|
|||
from libc.stdint cimport uint32_t, uint64_t
|
||||
from libc.string cimport memset, memcpy
|
||||
from libc.stdlib cimport malloc, calloc, free
|
||||
|
||||
import os.path
|
||||
from collections import Counter
|
||||
from os import path
|
||||
import shutil
|
||||
import json
|
||||
|
@ -80,34 +82,46 @@ cdef class ParserModel(AveragedPerceptron):
|
|||
def update(self, Example eg):
|
||||
'''Does regression on negative cost. Sort of cute?'''
|
||||
self.time += 1
|
||||
cdef weight_t loss = 0.0
|
||||
best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
||||
for clas in range(eg.c.nr_class):
|
||||
if not eg.c.is_valid[clas]:
|
||||
continue
|
||||
if eg.c.scores[clas] < eg.c.scores[best]:
|
||||
continue
|
||||
guess = eg.guess
|
||||
cdef weight_t loss = 0.0
|
||||
if guess == best:
|
||||
return loss
|
||||
for clas in [guess, best]:
|
||||
loss += (-eg.c.costs[clas] - eg.c.scores[clas]) ** 2
|
||||
d_loss = -2 * (-eg.c.costs[clas] - eg.c.scores[clas])
|
||||
d_loss = eg.c.scores[clas] - -eg.c.costs[clas]
|
||||
for feat in eg.c.features[:eg.c.nr_feat]:
|
||||
self.update_weight_ftrl(feat.key, clas, feat.value * d_loss)
|
||||
return int(loss)
|
||||
return loss
|
||||
|
||||
def update_from_history(self, TransitionSystem moves, Doc doc, history, weight_t grad):
|
||||
def update_from_histories(self, TransitionSystem moves, Doc doc, histories, weight_t min_grad=0.0):
|
||||
cdef Pool mem = Pool()
|
||||
features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
|
||||
|
||||
cdef StateClass stcls = StateClass.init(doc.c, doc.length)
|
||||
moves.initialize_state(stcls.c)
|
||||
cdef StateClass stcls
|
||||
|
||||
cdef class_t clas
|
||||
self.time += 1
|
||||
cdef atom_t[CONTEXT_SIZE] atoms
|
||||
for clas in history:
|
||||
nr_feat = self.set_featuresC(atoms, features, stcls.c)
|
||||
for feat in features[:nr_feat]:
|
||||
self.update_weight(feat.key, clas, feat.value * grad)
|
||||
moves.c[clas].do(stcls.c, moves.c[clas].label)
|
||||
histories = [(grad, hist) for grad, hist in histories if abs(grad) >= min_grad and hist]
|
||||
if not histories:
|
||||
return None
|
||||
gradient = [Counter() for _ in range(max([max(h)+1 for _, h in histories]))]
|
||||
for d_loss, history in histories:
|
||||
stcls = StateClass.init(doc.c, doc.length)
|
||||
moves.initialize_state(stcls.c)
|
||||
for clas in history:
|
||||
nr_feat = self.set_featuresC(atoms, features, stcls.c)
|
||||
clas_grad = gradient[clas]
|
||||
for feat in features[:nr_feat]:
|
||||
clas_grad[feat.key] += d_loss * feat.value
|
||||
moves.c[clas].do(stcls.c, moves.c[clas].label)
|
||||
cdef feat_t key
|
||||
cdef weight_t d_feat
|
||||
for clas, clas_grad in enumerate(gradient):
|
||||
for key, d_feat in clas_grad.items():
|
||||
if d_feat != 0:
|
||||
self.update_weight_ftrl(key, clas, d_feat)
|
||||
|
||||
|
||||
cdef class Parser:
|
||||
|
@ -161,7 +175,8 @@ cdef class Parser:
|
|||
elif 'features' not in cfg:
|
||||
cfg['features'] = self.feature_templates
|
||||
self.model = ParserModel(cfg['features'])
|
||||
self.model.l1_penalty = cfg.get('L1', 0.0)
|
||||
self.model.l1_penalty = cfg.get('L1', 1e-8)
|
||||
self.model.learn_rate = cfg.get('learn_rate', 0.001)
|
||||
|
||||
self.cfg = cfg
|
||||
|
||||
|
@ -298,12 +313,7 @@ cdef class Parser:
|
|||
self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
|
||||
self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
|
||||
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
|
||||
if eg.c.costs[guess] > 0:
|
||||
self.model.update(eg)
|
||||
#best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
||||
#for feat in eg.c.features[:eg.c.nr_feat]:
|
||||
# self.model.update_weight_ftrl(feat.key, best, -feat.value * eg.c.costs[guess])
|
||||
# self.model.update_weight_ftrl(feat.key, guess, feat.value * eg.c.costs[guess])
|
||||
self.model.update(eg)
|
||||
|
||||
action = self.moves.c[guess]
|
||||
action.do(stcls.c, action.label)
|
||||
|
|
Loading…
Reference in New Issue
Block a user