mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Fix refactored parser
This commit is contained in:
parent
01c4e13b02
commit
bde3be1ad1
|
@ -57,11 +57,11 @@ cdef WeightsC get_c_weights(model) except *:
|
||||||
cdef SizesC get_c_sizes(model, int batch_size) except *:
|
cdef SizesC get_c_sizes(model, int batch_size) except *:
|
||||||
cdef SizesC output
|
cdef SizesC output
|
||||||
output.states = batch_size
|
output.states = batch_size
|
||||||
output.classes = model.nO
|
output.classes = model.vec2scores.nO
|
||||||
output.hiddens = model.nH
|
output.hiddens = model.state2vec.nO
|
||||||
output.pieces = model.nP
|
output.pieces = model.state2vec.nP
|
||||||
output.feats = model.nF
|
output.feats = model.state2vec.nF
|
||||||
output.embed_width = model.nI
|
output.embed_width = model.tokvecs.shape[1]
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
@ -71,9 +71,10 @@ cdef void resize_activations(ActivationsC* A, SizesC n) nogil:
|
||||||
return
|
return
|
||||||
if A._max_size == 0:
|
if A._max_size == 0:
|
||||||
A.token_ids = <int*>calloc(n.states * n.feats, sizeof(A.token_ids[0]))
|
A.token_ids = <int*>calloc(n.states * n.feats, sizeof(A.token_ids[0]))
|
||||||
A.vectors = <float*>calloc(n.states * n.hiddens, sizeof(A.hiddens[0]))
|
A.vectors = <float*>calloc(n.states * n.embed_width, sizeof(A.vectors[0]))
|
||||||
A.scores = <float*>calloc(n.states * n.classes, sizeof(A.scores[0]))
|
A.scores = <float*>calloc(n.states * n.classes, sizeof(A.scores[0]))
|
||||||
A.unmaxed = <float*>calloc(n.states * n.hiddens * n.feats, sizeof(A.unmaxed[0]))
|
A.unmaxed = <float*>calloc(n.states * n.hiddens * n.pieces, sizeof(A.unmaxed[0]))
|
||||||
|
A.hiddens = <float*>calloc(n.states * n.hiddens, sizeof(A.hiddens[0]))
|
||||||
A.is_valid = <int*>calloc(n.states * n.classes, sizeof(A.is_valid[0]))
|
A.is_valid = <int*>calloc(n.states * n.classes, sizeof(A.is_valid[0]))
|
||||||
A._max_size = n.states
|
A._max_size = n.states
|
||||||
else:
|
else:
|
||||||
|
@ -84,7 +85,9 @@ cdef void resize_activations(ActivationsC* A, SizesC n) nogil:
|
||||||
A.scores = <float*>realloc(A.scores,
|
A.scores = <float*>realloc(A.scores,
|
||||||
n.states * n.classes * sizeof(A.scores[0]))
|
n.states * n.classes * sizeof(A.scores[0]))
|
||||||
A.unmaxed = <float*>realloc(A.unmaxed,
|
A.unmaxed = <float*>realloc(A.unmaxed,
|
||||||
n.states * n.hiddens * n.feats * sizeof(A.unmaxed[0]))
|
n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0]))
|
||||||
|
A.hiddens = <float*>realloc(A.hiddens,
|
||||||
|
n.states * n.hiddens * sizeof(A.hiddens[0]))
|
||||||
A.is_valid = <int*>realloc(A.is_valid,
|
A.is_valid = <int*>realloc(A.is_valid,
|
||||||
n.states * n.classes * sizeof(A.is_valid[0]))
|
n.states * n.classes * sizeof(A.is_valid[0]))
|
||||||
A._max_size = n.states
|
A._max_size = n.states
|
||||||
|
@ -94,27 +97,28 @@ cdef void resize_activations(ActivationsC* A, SizesC n) nogil:
|
||||||
cdef void predict_states(ActivationsC* A, StateC** states,
|
cdef void predict_states(ActivationsC* A, StateC** states,
|
||||||
const WeightsC* W, SizesC n) nogil:
|
const WeightsC* W, SizesC n) nogil:
|
||||||
resize_activations(A, n)
|
resize_activations(A, n)
|
||||||
|
memset(A.unmaxed, 0, n.states * n.hiddens * n.pieces * sizeof(float))
|
||||||
|
memset(A.hiddens, 0, n.states * n.hiddens * sizeof(float))
|
||||||
for i in range(n.states):
|
for i in range(n.states):
|
||||||
state = states[i]
|
state = states[i]
|
||||||
state.set_context_tokens(A.token_ids, n.feats)
|
state.set_context_tokens(A.token_ids, n.feats)
|
||||||
memset(A.unmaxed, 0, n.hiddens * n.pieces * sizeof(float))
|
|
||||||
sum_state_features(A.unmaxed,
|
sum_state_features(A.unmaxed,
|
||||||
W.feat_weights, A.token_ids, 1, n.feats, n.hiddens * n.pieces)
|
W.feat_weights, A.token_ids, 1, n.feats, n.hiddens * n.pieces)
|
||||||
VecVec.add_i(A.unmaxed,
|
VecVec.add_i(A.unmaxed,
|
||||||
W.feat_bias, 1., n.hiddens * n.pieces)
|
W.feat_bias, 1., n.hiddens * n.pieces)
|
||||||
state_vector = &A.vectors[i*n.hiddens]
|
|
||||||
for j in range(n.hiddens):
|
for j in range(n.hiddens):
|
||||||
index = j * n.pieces
|
index = j * n.pieces
|
||||||
which = Vec.arg_max(&A.unmaxed[index], n.pieces)
|
which = Vec.arg_max(&A.unmaxed[index], n.pieces)
|
||||||
state_vector[j] = A.unmaxed[index + which]
|
A.hiddens[i*n.hiddens + j] = A.unmaxed[index + which]
|
||||||
# Compute hidden-to-output
|
memset(A.scores, 0, n.states * n.classes * sizeof(float))
|
||||||
openblas.simple_gemm(A.scores, n.states, n.classes,
|
# Compute hidden-to-output
|
||||||
A.vectors, n.states, n.hiddens,
|
openblas.simple_gemm(A.scores, n.states, n.classes,
|
||||||
W.hidden_weights, n.hiddens, n.classes, 0, 0)
|
A.hiddens, n.states, n.hiddens,
|
||||||
# Add bias
|
W.hidden_weights, n.classes, n.hiddens, 0, 1)
|
||||||
for i in range(n.states):
|
# Add bias
|
||||||
VecVec.add_i(&A.scores[i*n.classes],
|
for i in range(n.states):
|
||||||
W.hidden_bias, 1., n.classes)
|
VecVec.add_i(&A.scores[i*n.classes],
|
||||||
|
W.hidden_bias, 1., n.classes)
|
||||||
|
|
||||||
|
|
||||||
cdef void sum_state_features(float* output,
|
cdef void sum_state_features(float* output,
|
||||||
|
@ -241,14 +245,22 @@ class ParserStepModel(Model):
|
||||||
self.cuda_stream = util.get_cuda_stream()
|
self.cuda_stream = util.get_cuda_stream()
|
||||||
self.backprops = []
|
self.backprops = []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def nO(self):
|
||||||
|
return self.state2vec.nO
|
||||||
|
|
||||||
def begin_update(self, states, drop=0.):
|
def begin_update(self, states, drop=0.):
|
||||||
token_ids = self.get_token_ids(states)
|
token_ids = self.get_token_ids(states)
|
||||||
vector, get_d_tokvecs = self.state2vec.begin_update(token_ids, drop=0.0)
|
vector, get_d_tokvecs = self.state2vec.begin_update(token_ids, drop=0.0)
|
||||||
vector, bp_dropout = self.ops.dropout(vector, drop)
|
mask = self.ops.get_dropout_mask(vector.shape, drop)
|
||||||
|
if mask is not None:
|
||||||
|
vector *= mask
|
||||||
scores, get_d_vector = self.vec2scores.begin_update(vector, drop=drop)
|
scores, get_d_vector = self.vec2scores.begin_update(vector, drop=drop)
|
||||||
|
|
||||||
def backprop_parser_step(d_scores, sgd=None):
|
def backprop_parser_step(d_scores, sgd=None):
|
||||||
d_vector = bp_dropout(get_d_vector(d_scores, sgd=sgd))
|
d_vector = get_d_vector(d_scores, sgd=sgd)
|
||||||
|
if mask is not None:
|
||||||
|
d_vector *= mask
|
||||||
if isinstance(self.ops, CupyOps) \
|
if isinstance(self.ops, CupyOps) \
|
||||||
and not isinstance(token_ids, self.state2vec.ops.xp.ndarray):
|
and not isinstance(token_ids, self.state2vec.ops.xp.ndarray):
|
||||||
# Move token_ids and d_vector to GPU, asynchronously
|
# Move token_ids and d_vector to GPU, asynchronously
|
||||||
|
|
|
@ -183,7 +183,6 @@ cdef class Parser:
|
||||||
if beam_density is None:
|
if beam_density is None:
|
||||||
beam_density = self.cfg.get('beam_density', 0.0)
|
beam_density = self.cfg.get('beam_density', 0.0)
|
||||||
states = self.predict([doc])
|
states = self.predict([doc])
|
||||||
#beam_width=beam_width, beam_density=beam_density)
|
|
||||||
self.set_annotations([doc], states, tensors=None)
|
self.set_annotations([doc], states, tensors=None)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
@ -214,7 +213,7 @@ cdef class Parser:
|
||||||
for doc in batch_in_order:
|
for doc in batch_in_order:
|
||||||
yield doc
|
yield doc
|
||||||
|
|
||||||
def predict(self, docs):
|
def predict(self, docs, beam_width=1, beam_density=0.):
|
||||||
if isinstance(docs, Doc):
|
if isinstance(docs, Doc):
|
||||||
docs = [docs]
|
docs = [docs]
|
||||||
|
|
||||||
|
@ -223,9 +222,10 @@ cdef class Parser:
|
||||||
state_objs = self.moves.init_batch(docs)
|
state_objs = self.moves.init_batch(docs)
|
||||||
for state in state_objs:
|
for state in state_objs:
|
||||||
states.push_back(state.c)
|
states.push_back(state.c)
|
||||||
|
# Prepare the stepwise model, and get the callback for finishing the batch
|
||||||
model = self.model(docs)
|
model = self.model(docs)
|
||||||
cdef WeightsC weights = get_c_weights(model)
|
weights = get_c_weights(model)
|
||||||
cdef SizesC sizes = get_c_sizes(self.model, len(state_objs))
|
sizes = get_c_sizes(model, states.size())
|
||||||
with nogil:
|
with nogil:
|
||||||
self._parseC(&states[0],
|
self._parseC(&states[0],
|
||||||
weights, sizes)
|
weights, sizes)
|
||||||
|
@ -305,7 +305,7 @@ cdef class Parser:
|
||||||
states, golds = zip(*states_golds)
|
states, golds = zip(*states_golds)
|
||||||
scores, backprop = model.begin_update(states, drop=drop)
|
scores, backprop = model.begin_update(states, drop=drop)
|
||||||
d_scores = self.get_batch_loss(states, golds, scores, losses)
|
d_scores = self.get_batch_loss(states, golds, scores, losses)
|
||||||
backprop(d_scores)
|
backprop(d_scores, sgd=sgd)
|
||||||
# Follow the predicted action
|
# Follow the predicted action
|
||||||
self.transition_batch(states, scores)
|
self.transition_batch(states, scores)
|
||||||
states_golds = [eg for eg in states_golds if not eg[0].is_final()]
|
states_golds = [eg for eg in states_golds if not eg[0].is_final()]
|
||||||
|
@ -369,7 +369,7 @@ cdef class Parser:
|
||||||
c_d_scores += d_scores.shape[1]
|
c_d_scores += d_scores.shape[1]
|
||||||
if losses is not None:
|
if losses is not None:
|
||||||
losses.setdefault(self.name, 0.)
|
losses.setdefault(self.name, 0.)
|
||||||
losses[self.name] += d_scores.sum()
|
losses[self.name] += (d_scores**2).sum()
|
||||||
return d_scores
|
return d_scores
|
||||||
|
|
||||||
def create_optimizer(self):
|
def create_optimizer(self):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user