mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 17:54:39 +03:00
Fix beam parsing
This commit is contained in:
parent
99649d114d
commit
c49e44349a
|
@ -59,7 +59,7 @@ cdef class ParserBeam(object):
|
||||||
cdef public object dones
|
cdef public object dones
|
||||||
|
|
||||||
def __init__(self, TransitionSystem moves, states, golds,
|
def __init__(self, TransitionSystem moves, states, golds,
|
||||||
int width, float density):
|
int width, float density=0.):
|
||||||
self.moves = moves
|
self.moves = moves
|
||||||
self.states = states
|
self.states = states
|
||||||
self.golds = golds
|
self.golds = golds
|
||||||
|
@ -133,8 +133,12 @@ cdef class ParserBeam(object):
|
||||||
self.moves.set_costs(beam.is_valid[i], beam.costs[i],
|
self.moves.set_costs(beam.is_valid[i], beam.costs[i],
|
||||||
state, gold)
|
state, gold)
|
||||||
if follow_gold:
|
if follow_gold:
|
||||||
|
min_cost = 0
|
||||||
for j in range(beam.nr_class):
|
for j in range(beam.nr_class):
|
||||||
if beam.costs[i][j] >= 1:
|
if beam.costs[i][j] < min_cost:
|
||||||
|
min_cost = beam.costs[i][j]
|
||||||
|
for j in range(beam.nr_class):
|
||||||
|
if beam.costs[i][j] > min_cost:
|
||||||
beam.is_valid[i][j] = 0
|
beam.is_valid[i][j] = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -266,12 +266,13 @@ class ParserStepModel(Model):
|
||||||
|
|
||||||
def get_token_ids(self, batch):
|
def get_token_ids(self, batch):
|
||||||
states = _beam_utils.collect_states(batch)
|
states = _beam_utils.collect_states(batch)
|
||||||
|
cdef StateClass state
|
||||||
|
states = [state for state in states if not state.is_final()]
|
||||||
cdef np.ndarray ids = numpy.zeros((len(states), self.state2vec.nF),
|
cdef np.ndarray ids = numpy.zeros((len(states), self.state2vec.nF),
|
||||||
dtype='i', order='C')
|
dtype='i', order='C')
|
||||||
|
ids.fill(-1)
|
||||||
c_ids = <int*>ids.data
|
c_ids = <int*>ids.data
|
||||||
cdef StateClass state
|
|
||||||
for state in states:
|
for state in states:
|
||||||
if not state.c.is_final():
|
|
||||||
state.c.set_context_tokens(c_ids, ids.shape[1])
|
state.c.set_context_tokens(c_ids, ids.shape[1])
|
||||||
c_ids += ids.shape[1]
|
c_ids += ids.shape[1]
|
||||||
return ids
|
return ids
|
||||||
|
|
|
@ -208,14 +208,18 @@ cdef class Parser:
|
||||||
for doc in batch_in_order:
|
for doc in batch_in_order:
|
||||||
yield doc
|
yield doc
|
||||||
|
|
||||||
def predict(self, docs, beam_width=1):
|
def predict(self, docs, beam_width=1, drop=0.):
|
||||||
if isinstance(docs, Doc):
|
if isinstance(docs, Doc):
|
||||||
docs = [docs]
|
docs = [docs]
|
||||||
|
if beam_width < 2:
|
||||||
|
return self.greedy_parse(docs, drop=drop)
|
||||||
|
else:
|
||||||
|
return self.beam_parse(docs, beam_width=beam_width, drop=drop)
|
||||||
|
|
||||||
|
def greedy_parse(self, docs, drop=0.):
|
||||||
cdef vector[StateC*] states
|
cdef vector[StateC*] states
|
||||||
cdef StateClass state
|
cdef StateClass state
|
||||||
model = self.model(docs)
|
model = self.model(docs)
|
||||||
if beam_width == 1:
|
|
||||||
batch = self.moves.init_batch(docs)
|
batch = self.moves.init_batch(docs)
|
||||||
weights = get_c_weights(model)
|
weights = get_c_weights(model)
|
||||||
for state in batch:
|
for state in batch:
|
||||||
|
@ -225,14 +229,41 @@ cdef class Parser:
|
||||||
with nogil:
|
with nogil:
|
||||||
self._parseC(&states[0],
|
self._parseC(&states[0],
|
||||||
weights, sizes)
|
weights, sizes)
|
||||||
else:
|
|
||||||
batch = self.moves.init_beams(docs, beam_width)
|
|
||||||
unfinished = list(batch)
|
|
||||||
while unfinished:
|
|
||||||
scores = model.predict(unfinished)
|
|
||||||
unfinished = self.transition_beams(batch, scores)
|
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
|
def beam_parse(self, docs, int beam_width=3, float drop=0.):
|
||||||
|
cdef Beam beam
|
||||||
|
cdef Doc doc
|
||||||
|
cdef np.ndarray token_ids
|
||||||
|
model = self.model(docs)
|
||||||
|
beams = self.moves.init_beams(docs, beam_width)
|
||||||
|
token_ids = numpy.zeros((len(docs) * beam_width, self.nr_feature),
|
||||||
|
dtype='i', order='C')
|
||||||
|
cdef int* c_ids
|
||||||
|
cdef int nr_feature = self.nr_feature
|
||||||
|
cdef int n_states
|
||||||
|
model = self.model(docs)
|
||||||
|
todo = [beam for beam in beams if not beam.is_done]
|
||||||
|
while todo:
|
||||||
|
token_ids.fill(-1)
|
||||||
|
c_ids = <int*>token_ids.data
|
||||||
|
n_states = 0
|
||||||
|
for beam in todo:
|
||||||
|
for i in range(beam.size):
|
||||||
|
state = <StateC*>beam.at(i)
|
||||||
|
# This way we avoid having to score finalized states
|
||||||
|
# We do have to take care to keep indexes aligned, though
|
||||||
|
if not state.is_final():
|
||||||
|
state.set_context_tokens(c_ids, nr_feature)
|
||||||
|
c_ids += nr_feature
|
||||||
|
n_states += 1
|
||||||
|
if n_states == 0:
|
||||||
|
break
|
||||||
|
vectors = model.state2vec(token_ids[:n_states])
|
||||||
|
scores = model.vec2scores(vectors)
|
||||||
|
todo = self.transition_beams(todo, scores)
|
||||||
|
return beams
|
||||||
|
|
||||||
cdef void _parseC(self, StateC** states,
|
cdef void _parseC(self, StateC** states,
|
||||||
WeightsC weights, SizesC sizes) nogil:
|
WeightsC weights, SizesC sizes) nogil:
|
||||||
cdef int i, j
|
cdef int i, j
|
||||||
|
@ -325,7 +356,7 @@ cdef class Parser:
|
||||||
beam_update_prob = 1-self.cfg.get('beam_update_prob', 0.5)
|
beam_update_prob = 1-self.cfg.get('beam_update_prob', 0.5)
|
||||||
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= beam_update_prob:
|
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= beam_update_prob:
|
||||||
return self.update_beam(docs, golds,
|
return self.update_beam(docs, golds,
|
||||||
self.cfg['beam_width'], self.cfg['beam_density'],
|
self.cfg['beam_width'],
|
||||||
drop=drop, sgd=sgd, losses=losses)
|
drop=drop, sgd=sgd, losses=losses)
|
||||||
# Chop sequences into lengths of this many transitions, to make the
|
# Chop sequences into lengths of this many transitions, to make the
|
||||||
# batch uniform length.
|
# batch uniform length.
|
||||||
|
@ -352,12 +383,11 @@ cdef class Parser:
|
||||||
|
|
||||||
def update_beam(self, docs, golds, width, drop=0., sgd=None, losses=None):
|
def update_beam(self, docs, golds, width, drop=0., sgd=None, losses=None):
|
||||||
lengths = [len(d) for d in docs]
|
lengths = [len(d) for d in docs]
|
||||||
states = self.moves.init_batch(docs)
|
cut_gold = numpy.random.choice(range(20, 100))
|
||||||
for gold in golds:
|
states, golds, max_steps = self._init_gold_batch(docs, golds, max_length=cut_gold)
|
||||||
self.moves.preprocess_gold(gold)
|
|
||||||
model, finish_update = self.model.begin_update(docs, drop=drop)
|
model, finish_update = self.model.begin_update(docs, drop=drop)
|
||||||
states_d_scores, backprops, beams = _beam_utils.update_beam(
|
states_d_scores, backprops, beams = _beam_utils.update_beam(
|
||||||
self.moves, self.nr_feature, 500, states, golds, model.state2vec,
|
self.moves, self.nr_feature, max_steps, states, golds, model.state2vec,
|
||||||
model.vec2scores, width, drop=drop, losses=losses)
|
model.vec2scores, width, drop=drop, losses=losses)
|
||||||
for i, d_scores in enumerate(states_d_scores):
|
for i, d_scores in enumerate(states_d_scores):
|
||||||
ids, bp_vectors, bp_scores = backprops[i]
|
ids, bp_vectors, bp_scores = backprops[i]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user