mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 02:16:32 +03:00
Remove use of ExampleC from beam parser
This commit is contained in:
parent
6b912731f8
commit
25513b8389
|
@ -1,13 +1,13 @@
|
||||||
from thinc.linear.avgtron cimport AveragedPerceptron
|
from thinc.linear.avgtron cimport AveragedPerceptron
|
||||||
from thinc.neural.nn cimport NeuralNet
|
from thinc.neural.nn cimport NeuralNet
|
||||||
from thinc.linear.features cimport ConjunctionExtracter
|
from thinc.linear.features cimport ConjunctionExtracter
|
||||||
from thinc.structs cimport NeuralNetC, ExampleC
|
from thinc.structs cimport NeuralNetC, FeatureC
|
||||||
|
|
||||||
|
|
||||||
cdef class ParserNeuralNet(NeuralNet):
|
cdef class ParserNeuralNet(NeuralNet):
|
||||||
cdef ConjunctionExtracter extracter
|
cdef ConjunctionExtracter extracter
|
||||||
cdef void set_featuresC(self, ExampleC* eg, const void* _state) nogil
|
cdef int _set_featuresC(self, FeatureC* feats, const void* _state) nogil
|
||||||
|
|
||||||
|
|
||||||
cdef class ParserPerceptron(AveragedPerceptron):
|
cdef class ParserPerceptron(AveragedPerceptron):
|
||||||
cdef void set_featuresC(self, ExampleC* eg, const void* _state) nogil
|
cdef int _set_featuresC(self, FeatureC* feats, const void* _state) nogil
|
||||||
|
|
|
@ -48,14 +48,14 @@ cdef class ParserPerceptron(AveragedPerceptron):
|
||||||
self.update_weight(feat.key, clas, feat.value * step)
|
self.update_weight(feat.key, clas, feat.value * step)
|
||||||
return int(loss)
|
return int(loss)
|
||||||
|
|
||||||
cdef void set_featuresC(self, ExampleC* eg, const void* _state) nogil:
|
cdef int _set_featuresC(self, FeatureC* feats, const void* _state) nogil:
|
||||||
|
cdef atom_t[CONTEXT_SIZE] context
|
||||||
state = <const StateC*>_state
|
state = <const StateC*>_state
|
||||||
fill_context(eg.atoms, state)
|
fill_context(context, state)
|
||||||
eg.nr_feat = self.extracter.set_features(eg.features, eg.atoms)
|
return self.extracter.set_features(feats, context)
|
||||||
|
|
||||||
def _update_from_history(self, TransitionSystem moves, Doc doc, history, weight_t grad):
|
def _update_from_history(self, TransitionSystem moves, Doc doc, history, weight_t grad):
|
||||||
cdef Pool mem = Pool()
|
cdef Pool mem = Pool()
|
||||||
cdef atom_t[CONTEXT_SIZE] context
|
|
||||||
features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
|
features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
|
||||||
|
|
||||||
cdef StateClass stcls = StateClass.init(doc.c, doc.length)
|
cdef StateClass stcls = StateClass.init(doc.c, doc.length)
|
||||||
|
@ -64,8 +64,7 @@ cdef class ParserPerceptron(AveragedPerceptron):
|
||||||
cdef class_t clas
|
cdef class_t clas
|
||||||
self.time += 1
|
self.time += 1
|
||||||
for clas in history:
|
for clas in history:
|
||||||
fill_context(context, stcls.c)
|
nr_feat = self._set_featuresC(features, stcls.c)
|
||||||
nr_feat = self.extracter.set_features(features, context)
|
|
||||||
for feat in features[:nr_feat]:
|
for feat in features[:nr_feat]:
|
||||||
self.update_weight(feat.key, clas, feat.value * grad)
|
self.update_weight(feat.key, clas, feat.value * grad)
|
||||||
moves.c[clas].do(stcls.c, moves.c[clas].label)
|
moves.c[clas].do(stcls.c, moves.c[clas].label)
|
||||||
|
@ -96,11 +95,10 @@ cdef class ParserNeuralNet(NeuralNet):
|
||||||
def nr_feat(self):
|
def nr_feat(self):
|
||||||
return 2000
|
return 2000
|
||||||
|
|
||||||
cdef void set_featuresC(self, ExampleC* eg, const void* _state) nogil:
|
cdef int _set_featuresC(self, FeatureC* feats, const void* _state) nogil:
|
||||||
memset(eg.features, 0, 2000 * sizeof(FeatureC))
|
memset(feats, 0, 2000 * sizeof(FeatureC))
|
||||||
state = <const StateC*>_state
|
state = <const StateC*>_state
|
||||||
fill_context(eg.atoms, state)
|
start = feats
|
||||||
feats = eg.features
|
|
||||||
|
|
||||||
feats = _add_token(feats, 0, state.S_(0), 1.0)
|
feats = _add_token(feats, 0, state.S_(0), 1.0)
|
||||||
feats = _add_token(feats, 4, state.S_(1), 1.0)
|
feats = _add_token(feats, 4, state.S_(1), 1.0)
|
||||||
|
@ -132,7 +130,7 @@ cdef class ParserNeuralNet(NeuralNet):
|
||||||
state.R_(state.S(0), 2))
|
state.R_(state.S(0), 2))
|
||||||
feats = _add_pos_trigram(feats, 75, state.S_(0), state.L_(state.S(0), 1),
|
feats = _add_pos_trigram(feats, 75, state.S_(0), state.L_(state.S(0), 1),
|
||||||
state.L_(state.S(0), 2))
|
state.L_(state.S(0), 2))
|
||||||
eg.nr_feat = feats - eg.features
|
return feats - start
|
||||||
|
|
||||||
cdef void _set_delta_lossC(self, weight_t* delta_loss,
|
cdef void _set_delta_lossC(self, weight_t* delta_loss,
|
||||||
const weight_t* cost, const weight_t* scores) nogil:
|
const weight_t* cost, const weight_t* scores) nogil:
|
||||||
|
@ -143,8 +141,11 @@ cdef class ParserNeuralNet(NeuralNet):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _update_from_history(self, TransitionSystem moves, Doc doc, history, weight_t grad):
|
def _update_from_history(self, TransitionSystem moves, Doc doc, history, weight_t grad):
|
||||||
cdef Example py_eg = Example(nr_class=moves.n_moves, nr_atom=CONTEXT_SIZE,
|
cdef Pool mem = Pool()
|
||||||
nr_feat=self.nr_feat, widths=self.widths)
|
features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
|
||||||
|
is_valid = <int*>mem.alloc(self.moves.n_moves, sizeof(int))
|
||||||
|
costs = <weight_t*>mem.alloc(self.moves.n_moves, sizeof(weight_t))
|
||||||
|
|
||||||
stcls = StateClass.init(doc.c, doc.length)
|
stcls = StateClass.init(doc.c, doc.length)
|
||||||
moves.initialize_state(stcls.c)
|
moves.initialize_state(stcls.c)
|
||||||
cdef uint64_t[2] key
|
cdef uint64_t[2] key
|
||||||
|
@ -152,8 +153,8 @@ cdef class ParserNeuralNet(NeuralNet):
|
||||||
key[1] = 0
|
key[1] = 0
|
||||||
cdef uint64_t clas
|
cdef uint64_t clas
|
||||||
for clas in history:
|
for clas in history:
|
||||||
self.set_featuresC(py_eg.c, stcls.c)
|
nr_feat = self._set_featuresC(features, stcls.c)
|
||||||
moves.set_valid(py_eg.c.is_valid, stcls.c)
|
moves.set_valid(is_valid, stcls.c)
|
||||||
# Update with a sparse gradient: everything's 0, except our class.
|
# Update with a sparse gradient: everything's 0, except our class.
|
||||||
# Remember, this is a component of the global update. It's not our
|
# Remember, this is a component of the global update. It's not our
|
||||||
# "job" here to think about the other beam candidates. We just want
|
# "job" here to think about the other beam candidates. We just want
|
||||||
|
@ -162,13 +163,11 @@ cdef class ParserNeuralNet(NeuralNet):
|
||||||
# We therefore have a key that indicates the current sequence, so that
|
# We therefore have a key that indicates the current sequence, so that
|
||||||
# the model can merge updates that refer to the same state together,
|
# the model can merge updates that refer to the same state together,
|
||||||
# by summing their gradients.
|
# by summing their gradients.
|
||||||
memset(py_eg.c.costs, 0, self.moves.n_moves)
|
memset(costs, 0, self.moves.n_moves)
|
||||||
py_eg.c.costs[clas] = grad
|
costs[clas] = grad
|
||||||
self.updateC(
|
self.updateC(features,
|
||||||
py_eg.c.features, py_eg.c.nr_feat, True, py_eg.c.costs, py_eg.c.is_valid,
|
nr_feat, True, costs, is_valid, False, key=key[0])
|
||||||
False, key=key[0])
|
|
||||||
moves.c[clas].do(stcls.c, self.moves.c[clas].label)
|
moves.c[clas].do(stcls.c, self.moves.c[clas].label)
|
||||||
py_eg.c.reset()
|
|
||||||
# Build a hash of the state sequence.
|
# Build a hash of the state sequence.
|
||||||
# Position 0 represents the previous sequence, position 1 the new class.
|
# Position 0 represents the previous sequence, position 1 the new class.
|
||||||
# So we want to do:
|
# So we want to do:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user