* Check for errors in parser, and parallelise the left-over batch

This commit is contained in:
Matthew Honnibal 2016-02-06 10:06:13 +01:00
parent 031b00cb91
commit 1b41f868d2
2 changed files with 22 additions and 12 deletions

View File

@ -12,9 +12,8 @@ from ._state cimport StateC
cdef class ParserModel(AveragedPerceptron):
cdef void set_featuresC(self, ExampleC* eg, const StateC* state) nogil
cdef class Parser:
cdef readonly ParserModel model
cdef readonly TransitionSystem moves
cdef void parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil
cdef int parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil

View File

@ -123,29 +123,39 @@ cdef class Parser:
cdef int i
cdef int nr_class = self.moves.n_moves
cdef int nr_feat = self.model.nr_feat
cdef int status
queue = []
for doc in stream:
doc_ptr[len(queue)] = doc.c
lengths[len(queue)] = doc.length
queue.append(doc)
if len(queue) == batch_size:
for i in cython.parallel.prange(batch_size, nogil=True,
num_threads=n_threads):
self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
with nogil:
for i in cython.parallel.prange(batch_size, num_threads=n_threads):
status = self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
if status != 0:
with gil:
sent_str = queue[i].text
raise ValueError("Error parsing doc: %s" % sent_str)
PyErr_CheckSignals()
for doc in queue:
doc.is_parsed = True
yield doc
queue = []
batch_size = len(queue)
for i in range(batch_size):
self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
with nogil:
for i in cython.parallel.prange(batch_size, num_threads=n_threads):
status = self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
if status != 0:
with gil:
sent_str = queue[i].text
raise ValueError("Error parsing doc: %s" % sent_str)
for doc in queue:
doc.is_parsed = True
yield doc
PyErr_CheckSignals()
cdef void parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil:
cdef int parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil:
cdef ExampleC eg
eg.nr_feat = nr_feat
eg.nr_atom = CONTEXT_SIZE
@ -168,7 +178,7 @@ cdef class Parser:
if not eg.is_valid[guess]:
with gil:
move_name = self.moves.move_name(action.move, action.label)
raise ValueError("Illegal action: %s" % move_name)
return 1
action.do(state, action.label)
memset(eg.scores, 0, sizeof(eg.scores[0]) * eg.nr_class)
for i in range(eg.nr_class):
@ -181,6 +191,7 @@ cdef class Parser:
free(eg.atoms)
free(eg.scores)
free(eg.is_valid)
return 0
def train(self, Doc tokens, GoldParse gold):
self.moves.preprocess_gold(gold)