mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
* Check for errors in parser, and parallelise the left-over batch
This commit is contained in:
parent
031b00cb91
commit
1b41f868d2
|
@ -12,9 +12,8 @@ from ._state cimport StateC
|
||||||
cdef class ParserModel(AveragedPerceptron):
|
cdef class ParserModel(AveragedPerceptron):
|
||||||
cdef void set_featuresC(self, ExampleC* eg, const StateC* state) nogil
|
cdef void set_featuresC(self, ExampleC* eg, const StateC* state) nogil
|
||||||
|
|
||||||
|
|
||||||
cdef class Parser:
|
cdef class Parser:
|
||||||
cdef readonly ParserModel model
|
cdef readonly ParserModel model
|
||||||
cdef readonly TransitionSystem moves
|
cdef readonly TransitionSystem moves
|
||||||
|
|
||||||
cdef void parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil
|
cdef int parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil
|
||||||
|
|
|
@ -123,29 +123,39 @@ cdef class Parser:
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef int nr_class = self.moves.n_moves
|
cdef int nr_class = self.moves.n_moves
|
||||||
cdef int nr_feat = self.model.nr_feat
|
cdef int nr_feat = self.model.nr_feat
|
||||||
|
cdef int status
|
||||||
queue = []
|
queue = []
|
||||||
for doc in stream:
|
for doc in stream:
|
||||||
doc_ptr[len(queue)] = doc.c
|
doc_ptr[len(queue)] = doc.c
|
||||||
lengths[len(queue)] = doc.length
|
lengths[len(queue)] = doc.length
|
||||||
queue.append(doc)
|
queue.append(doc)
|
||||||
if len(queue) == batch_size:
|
if len(queue) == batch_size:
|
||||||
for i in cython.parallel.prange(batch_size, nogil=True,
|
with nogil:
|
||||||
num_threads=n_threads):
|
for i in cython.parallel.prange(batch_size, num_threads=n_threads):
|
||||||
self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
|
status = self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
|
||||||
|
if status != 0:
|
||||||
|
with gil:
|
||||||
|
sent_str = queue[i].text
|
||||||
|
raise ValueError("Error parsing doc: %s" % sent_str)
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
for doc in queue:
|
for doc in queue:
|
||||||
doc.is_parsed = True
|
doc.is_parsed = True
|
||||||
yield doc
|
yield doc
|
||||||
queue = []
|
queue = []
|
||||||
batch_size = len(queue)
|
batch_size = len(queue)
|
||||||
for i in range(batch_size):
|
with nogil:
|
||||||
self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
|
for i in cython.parallel.prange(batch_size, num_threads=n_threads):
|
||||||
|
status = self.parseC(doc_ptr[i], lengths[i], nr_feat, nr_class)
|
||||||
|
if status != 0:
|
||||||
|
with gil:
|
||||||
|
sent_str = queue[i].text
|
||||||
|
raise ValueError("Error parsing doc: %s" % sent_str)
|
||||||
for doc in queue:
|
for doc in queue:
|
||||||
doc.is_parsed = True
|
doc.is_parsed = True
|
||||||
yield doc
|
yield doc
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
|
|
||||||
cdef void parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil:
|
cdef int parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) nogil:
|
||||||
cdef ExampleC eg
|
cdef ExampleC eg
|
||||||
eg.nr_feat = nr_feat
|
eg.nr_feat = nr_feat
|
||||||
eg.nr_atom = CONTEXT_SIZE
|
eg.nr_atom = CONTEXT_SIZE
|
||||||
|
@ -168,7 +178,7 @@ cdef class Parser:
|
||||||
if not eg.is_valid[guess]:
|
if not eg.is_valid[guess]:
|
||||||
with gil:
|
with gil:
|
||||||
move_name = self.moves.move_name(action.move, action.label)
|
move_name = self.moves.move_name(action.move, action.label)
|
||||||
raise ValueError("Illegal action: %s" % move_name)
|
return 1
|
||||||
action.do(state, action.label)
|
action.do(state, action.label)
|
||||||
memset(eg.scores, 0, sizeof(eg.scores[0]) * eg.nr_class)
|
memset(eg.scores, 0, sizeof(eg.scores[0]) * eg.nr_class)
|
||||||
for i in range(eg.nr_class):
|
for i in range(eg.nr_class):
|
||||||
|
@ -181,6 +191,7 @@ cdef class Parser:
|
||||||
free(eg.atoms)
|
free(eg.atoms)
|
||||||
free(eg.scores)
|
free(eg.scores)
|
||||||
free(eg.is_valid)
|
free(eg.is_valid)
|
||||||
|
return 0
|
||||||
|
|
||||||
def train(self, Doc tokens, GoldParse gold):
|
def train(self, Doc tokens, GoldParse gold):
|
||||||
self.moves.preprocess_gold(gold)
|
self.moves.preprocess_gold(gold)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user