Clean up parser multi-threading

This commit is contained in:
Matthew Honnibal 2017-10-19 00:25:21 +02:00
parent f018f2030c
commit bbfd7d8d5d
2 changed files with 53 additions and 62 deletions

View File

@ -15,8 +15,6 @@ cdef class Parser:
cdef readonly object cfg cdef readonly object cfg
cdef public object _multitasks cdef public object _multitasks
cdef void _parse_step(self, StateC* state, cdef void _parseC(self, StateC* state,
const float* feat_weights, const float* hW, const float* hb, const float* feat_weights, const float* hW, const float* hb,
int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil
#cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil

View File

@ -395,7 +395,7 @@ cdef class Parser:
for batch in cytoolz.partition_all(batch_size, docs): for batch in cytoolz.partition_all(batch_size, docs):
batch = list(batch) batch = list(batch)
by_length = sorted(list(batch), key=lambda doc: len(doc)) by_length = sorted(list(batch), key=lambda doc: len(doc))
for subbatch in cytoolz.partition_all(32, by_length): for subbatch in cytoolz.partition_all(8, by_length):
subbatch = list(subbatch) subbatch = list(subbatch)
if beam_width == 1: if beam_width == 1:
parse_states = self.parse_batch(subbatch) parse_states = self.parse_batch(subbatch)
@ -412,57 +412,80 @@ cdef class Parser:
def parse_batch(self, docs): def parse_batch(self, docs):
cdef: cdef:
precompute_hiddens state2vec precompute_hiddens state2vec
StateClass state StateClass stcls
Pool mem Pool mem
const float* feat_weights const float* feat_weights
StateC* st StateC* st
vector[StateC*] next_step, this_step vector[StateC*] states
int nr_class, nr_feat, nr_piece, nr_dim, nr_state int guess, nr_class, nr_feat, nr_piece, nr_dim, nr_state, nr_step
int j
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
cuda_stream = get_cuda_stream() cuda_stream = get_cuda_stream()
(tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream, (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
0.0) 0.0)
nr_state = len(docs) nr_state = len(docs)
nr_class = self.moves.n_moves nr_class = self.moves.n_moves
nr_dim = tokvecs.shape[1] nr_dim = tokvecs.shape[1]
nr_feat = self.nr_feature nr_feat = self.nr_feature
nr_piece = state2vec.nP nr_piece = state2vec.nP
states = self.moves.init_batch(docs) state_objs = self.moves.init_batch(docs)
for state in states: for stcls in state_objs:
if not state.c.is_final(): if not stcls.c.is_final():
next_step.push_back(state.c) states.push_back(stcls.c)
feat_weights = state2vec.get_feat_weights() feat_weights = state2vec.get_feat_weights()
cdef int i cdef int i
cdef np.ndarray token_ids = numpy.zeros((nr_state, nr_feat), dtype='i')
cdef np.ndarray is_valid = numpy.zeros((nr_state, nr_class), dtype='i')
cdef np.ndarray scores
cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T) cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
cdef np.ndarray hidden_bias = vec2scores._layers[-1].b cdef np.ndarray hidden_bias = vec2scores._layers[-1].b
hW = <float*>hidden_weights.data hW = <float*>hidden_weights.data
hb = <float*>hidden_bias.data hb = <float*>hidden_bias.data
cdef int nr_hidden = hidden_weights.shape[0] cdef int nr_hidden = hidden_weights.shape[0]
c_token_ids = <int*>token_ids.data
c_is_valid = <int*>is_valid.data with nogil:
cdef int has_hidden = not getattr(vec2scores, 'is_noop', False) for i in cython.parallel.prange(states.size(), num_threads=2,
cdef int nr_step schedule='guided'):
while not next_step.empty(): self._parseC(states[i],
nr_step = next_step.size() feat_weights, hW, hb,
for i in cython.parallel.prange(nr_step, num_threads=3, nr_class, nr_hidden, nr_feat, nr_piece)
nogil=True): return state_objs
self._parse_step(next_step[i],
feat_weights, hW, hb, nr_class, nr_hidden, nr_feat, nr_piece) cdef void _parseC(self, StateC* state,
this_step, next_step = next_step, this_step const float* feat_weights, const float* hW, const float* hb,
next_step.clear() int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
for st in this_step: token_ids = <int*>calloc(nr_feat, sizeof(int))
if not st.is_final(): is_valid = <int*>calloc(nr_class, sizeof(int))
next_step.push_back(st) vectors = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
return states scores = <float*>calloc(nr_class, sizeof(float))
while not state.is_final():
state.set_context_tokens(token_ids, nr_feat)
memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
memset(scores, 0, nr_class * sizeof(float))
sum_state_features(vectors,
feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece)
V = vectors
W = hW
for i in range(nr_hidden):
feature = V[0] if V[0] >= V[1] else V[1]
for j in range(nr_class):
scores[j] += feature * W[j]
W += nr_class
V += nr_piece
for i in range(nr_class):
scores[i] += hb[i]
self.moves.set_valid(is_valid, state)
guess = arg_max_if_valid(scores, is_valid, nr_class)
action = self.moves.c[guess]
action.do(state, action.label)
state.push_hist(guess)
free(token_ids)
free(is_valid)
free(vectors)
free(scores)
def beam_parse(self, docs, int beam_width=3, float beam_density=0.001): def beam_parse(self, docs, int beam_width=3, float beam_density=0.001):
cdef Beam beam cdef Beam beam
@ -515,36 +538,6 @@ cdef class Parser:
beams.append(beam) beams.append(beam)
return beams return beams
cdef void _parse_step(self, StateC* state,
const float* feat_weights, const float* hW, const float* hb,
int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
'''This only works with no hidden layers -- fast but inaccurate'''
token_ids = <int*>calloc(nr_feat, sizeof(int))
vector = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
scores = <float*>calloc(nr_class, sizeof(float))
is_valid = <int*>calloc(nr_class, sizeof(int))
state.set_context_tokens(token_ids, nr_feat)
sum_state_features(vector,
feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece)
for i in range(nr_hidden):
feature = Vec.max(&vector[i*nr_piece], nr_piece)
for j in range(nr_class):
scores[j] += feature * hW[j]
hW += nr_class
for i in range(nr_class):
scores[i] += hb[i]
self.moves.set_valid(is_valid, state)
guess = arg_max_if_valid(scores, is_valid, nr_class)
action = self.moves.c[guess]
action.do(state, action.label)
state.push_hist(guess)
free(is_valid)
free(scores)
free(vector)
free(token_ids)
def update(self, docs, golds, drop=0., sgd=None, losses=None): def update(self, docs, golds, drop=0., sgd=None, losses=None):
if not any(self.moves.has_gold(gold) for gold in golds): if not any(self.moves.has_gold(gold) for gold in golds):
return None return None