Put the parsing loop in a nogil prange block

This commit is contained in:
Matthew Honnibal 2017-05-22 17:58:12 -05:00
parent 0264447c4d
commit 8a9e318deb
3 changed files with 133 additions and 43 deletions

View File

@ -71,6 +71,36 @@ cdef cppclass StateC:
free(this._stack - PADDING) free(this._stack - PADDING)
free(this.shifted - PADDING) free(this.shifted - PADDING)
void set_context_tokens(int* ids, int n) nogil:
if n == 13:
ids[0] = this.B(0)
ids[1] = this.B(1)
ids[2] = this.S(0)
ids[3] = this.S(1)
ids[4] = this.S(2)
ids[5] = this.L(this.S(0), 1)
ids[6] = this.L(this.S(0), 2)
ids[6] = this.R(this.S(0), 1)
ids[7] = this.L(this.B(0), 1)
ids[8] = this.R(this.S(0), 2)
ids[9] = this.L(this.S(1), 1)
ids[10] = this.L(this.S(1), 2)
ids[11] = this.R(this.S(1), 1)
ids[12] = this.R(this.S(1), 2)
elif n == 6:
ids[0] = this.B(0)-1
ids[1] = this.B(0)
ids[2] = this.B(1)
ids[3] = this.E(0)
ids[4] = this.E(0)-1
ids[5] = this.E(0)+1
else:
# TODO error =/
pass
for i in range(n):
if ids[i] >= 0:
ids[i] += this.offset
int S(int i) nogil const: int S(int i) nogil const:
if i >= this._s_i: if i >= this._s_i:
return -1 return -1

View File

@ -14,4 +14,8 @@ cdef class Parser:
cdef readonly TransitionSystem moves cdef readonly TransitionSystem moves
cdef readonly object cfg cdef readonly object cfg
cdef void _parse_step(self, StateC* state,
const float* feat_weights,
int nr_class, int nr_feat) nogil
#cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil #cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil

View File

@ -18,6 +18,7 @@ import dill
import numpy.random import numpy.random
cimport numpy as np cimport numpy as np
from libcpp.vector cimport vector
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
from cpython.exc cimport PyErr_CheckSignals from cpython.exc cimport PyErr_CheckSignals
from libc.stdint cimport uint32_t, uint64_t from libc.stdint cimport uint32_t, uint64_t
@ -33,7 +34,7 @@ from murmurhash.mrmr cimport hash64
from preshed.maps cimport MapStruct from preshed.maps cimport MapStruct
from preshed.maps cimport map_get from preshed.maps cimport map_get
from thinc.api import layerize, chain from thinc.api import layerize, chain, noop, clone
from thinc.neural import Model, Affine, ELU, ReLu, Maxout from thinc.neural import Model, Affine, ELU, ReLu, Maxout
from thinc.neural.ops import NumpyOps, CupyOps from thinc.neural.ops import NumpyOps, CupyOps
@ -111,27 +112,30 @@ cdef class precompute_hiddens:
self._cached = cached self._cached = cached
self._bp_hiddens = bp_features self._bp_hiddens = bp_features
cdef const float* get_feat_weights(self) except NULL:
if not self._is_synchronized \
and self._cuda_stream is not None:
self._cuda_stream.synchronize()
self._is_synchronized = True
return <float*>self._cached.data
def __call__(self, X): def __call__(self, X):
return self.begin_update(X)[0] return self.begin_update(X)[0]
def begin_update(self, token_ids, drop=0.): def begin_update(self, token_ids, drop=0.):
self._features.fill(0) self._features.fill(0)
if not self._is_synchronized \
and self._cuda_stream is not None:
self._cuda_stream.synchronize()
self._is_synchronized = True
# This is tricky, but (assuming GPU available); # This is tricky, but (assuming GPU available);
# - Input to forward on CPU # - Input to forward on CPU
# - Output from forward on CPU # - Output from forward on CPU
# - Input to backward on GPU! # - Input to backward on GPU!
# - Output from backward on GPU # - Output from backward on GPU
cdef np.ndarray state_vector = self._features[:len(token_ids)] cdef np.ndarray state_vector = self._features[:len(token_ids)]
cdef np.ndarray hiddens = self._cached
bp_hiddens = self._bp_hiddens bp_hiddens = self._bp_hiddens
feat_weights = self.get_feat_weights()
cdef int[:, ::1] ids = token_ids cdef int[:, ::1] ids = token_ids
self._sum_features(<float*>state_vector.data, sum_state_features(<float*>state_vector.data,
<float*>hiddens.data, &ids[0,0], feat_weights, &ids[0,0],
token_ids.shape[0], self.nF, self.nO) token_ids.shape[0], self.nF, self.nO)
def backward(d_state_vector, sgd=None): def backward(d_state_vector, sgd=None):
@ -142,20 +146,20 @@ cdef class precompute_hiddens:
return d_tokens return d_tokens
return state_vector, backward return state_vector, backward
cdef void _sum_features(self, float* output, cdef void sum_state_features(float* output,
const float* cached, const int* token_ids, int B, int F, int O) nogil: const float* cached, const int* token_ids, int B, int F, int O) nogil:
cdef int idx, b, f, i cdef int idx, b, f, i
cdef const float* feature cdef const float* feature
for b in range(B): for b in range(B):
for f in range(F): for f in range(F):
if token_ids[f] < 0: if token_ids[f] < 0:
continue continue
idx = token_ids[f] * F * O + f*O idx = token_ids[f] * F * O + f*O
feature = &cached[idx] feature = &cached[idx]
for i in range(O): for i in range(O):
output[i] += feature[i] output[i] += feature[i]
output += O output += O
token_ids += F token_ids += F
cdef void cpu_log_loss(float* d_scores, cdef void cpu_log_loss(float* d_scores,
@ -210,18 +214,22 @@ cdef class Parser:
Base class of the DependencyParser and EntityRecognizer. Base class of the DependencyParser and EntityRecognizer.
""" """
@classmethod @classmethod
def Model(cls, nr_class, token_vector_width=128, hidden_width=128, **cfg): def Model(cls, nr_class, token_vector_width=128, hidden_width=128, depth=1, **cfg):
depth = util.env_opt('parser_hidden_depth', depth)
token_vector_width = util.env_opt('token_vector_width', token_vector_width) token_vector_width = util.env_opt('token_vector_width', token_vector_width)
hidden_width = util.env_opt('hidden_width', hidden_width) hidden_width = util.env_opt('hidden_width', hidden_width)
lower = PrecomputableAffine(hidden_width, lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
nF=cls.nr_feature, nF=cls.nr_feature,
nI=token_vector_width) nI=token_vector_width)
with Model.use_device('cpu'): with Model.use_device('cpu'):
upper = chain( if depth == 0:
Maxout(hidden_width), upper = chain()
zero_init(Affine(nr_class)) else:
) upper = chain(
clone(Maxout(hidden_width), (depth-1)),
zero_init(Affine(nr_class))
)
# TODO: This is an unfortunate hack atm! # TODO: This is an unfortunate hack atm!
# Used to set input dimensions in network. # Used to set input dimensions in network.
lower.begin_training(lower.ops.allocate((500, token_vector_width))) lower.begin_training(lower.ops.allocate((500, token_vector_width)))
@ -271,7 +279,8 @@ cdef class Parser:
Returns: Returns:
None None
""" """
self.parse_batch([doc], doc.tensor) states = self.parse_batch([doc], doc.tensor)
self.set_annotations(doc, states[0])
def pipe(self, docs, int batch_size=1000, int n_threads=2): def pipe(self, docs, int batch_size=1000, int n_threads=2):
""" """
@ -289,27 +298,71 @@ cdef class Parser:
cdef Doc doc cdef Doc doc
queue = [] queue = []
for docs in cytoolz.partition_all(batch_size, docs): for docs in cytoolz.partition_all(batch_size, docs):
tokvecs = self.model[0].ops.flatten([d.tensor for d in docs]) docs = list(docs)
tokvecs = [d.tensor for d in docs]
parse_states = self.parse_batch(docs, tokvecs) parse_states = self.parse_batch(docs, tokvecs)
self.set_annotations(docs, parse_states) self.set_annotations(docs, parse_states)
yield from docs yield from docs
def parse_batch(self, docs, tokvecs): def parse_batch(self, docs, tokvecses):
cuda_stream = get_cuda_stream() cdef:
precompute_hiddens state2vec
StateClass state
Pool mem
const float* feat_weights
StateC* st
vector[StateC*] next_step, this_step
int nr_class, nr_feat, nr_dim, nr_state
if isinstance(docs, Doc):
docs = [docs]
states = self.moves.init_batch(docs) tokvecs = self.model[0].ops.flatten(tokvecses)
state2vec, vec2scores = self.get_batch_model(len(states), tokvecs,
nr_state = len(docs)
nr_class = self.moves.n_moves
nr_dim = tokvecs.shape[1]
nr_feat = self.nr_feature
cuda_stream = get_cuda_stream()
state2vec, vec2scores = self.get_batch_model(nr_state, tokvecs,
cuda_stream, 0.0) cuda_stream, 0.0)
todo = [st for st in states if not st.is_final()] states = self.moves.init_batch(docs)
while todo: for state in states:
token_ids = self.get_token_ids(todo) if not state.c.is_final():
vectors = state2vec(token_ids) next_step.push_back(state.c)
scores = vec2scores(vectors)
self.transition_batch(todo, scores) feat_weights = state2vec.get_feat_weights()
todo = [st for st in todo if not st.is_final()] cdef int i
while not next_step.empty():
for i in cython.parallel.prange(next_step.size(), num_threads=4, nogil=True):
self._parse_step(next_step[i], feat_weights, nr_class, nr_feat)
this_step, next_step = next_step, this_step
next_step.clear()
for st in this_step:
if not st.is_final():
next_step.push_back(st)
return states return states
cdef void _parse_step(self, StateC* state,
const float* feat_weights,
int nr_class, int nr_feat) nogil:
token_ids = <int*>calloc(nr_feat, sizeof(int))
scores = <float*>calloc(nr_class, sizeof(float))
is_valid = <int*>calloc(nr_class, sizeof(int))
state.set_context_tokens(token_ids, nr_feat)
sum_state_features(scores,
feat_weights, token_ids, 1, nr_feat, nr_class)
self.moves.set_valid(is_valid, state)
guess = arg_max_if_valid(scores, is_valid, nr_class)
action = self.moves.c[guess]
action.do(state, action.label)
free(is_valid)
free(scores)
free(token_ids)
def update(self, docs_tokvecs, golds, drop=0., sgd=None): def update(self, docs_tokvecs, golds, drop=0., sgd=None):
docs, tokvec_lists = docs_tokvecs docs, tokvec_lists = docs_tokvecs
tokvecs = self.model[0].ops.flatten(tokvec_lists) tokvecs = self.model[0].ops.flatten(tokvec_lists)
@ -379,9 +432,12 @@ cdef class Parser:
def get_token_ids(self, states): def get_token_ids(self, states):
cdef StateClass state cdef StateClass state
cdef int n_tokens = self.nr_feature cdef int n_tokens = self.nr_feature
ids = numpy.zeros((len(states), n_tokens), dtype='i', order='C') cdef np.ndarray ids = numpy.zeros((len(states), n_tokens),
dtype='i', order='C')
c_ids = <int*>ids.data
for i, state in enumerate(states): for i, state in enumerate(states):
state.set_context_tokens(ids[i]) state.c.set_context_tokens(c_ids, n_tokens)
c_ids += ids.shape[1]
return ids return ids
def transition_batch(self, states, float[:, ::1] scores): def transition_batch(self, states, float[:, ::1] scores):