From f75420ae79f67da5091bd9c02622aa3c756d36d9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 18 Aug 2017 13:31:15 -0500 Subject: [PATCH] Unhack beam parsing, moving it under options instead of global flags --- spacy/syntax/_beam_utils.pyx | 33 +++++++++++++++++--------------- spacy/syntax/nn_parser.pyx | 37 ++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/spacy/syntax/_beam_utils.pyx b/spacy/syntax/_beam_utils.pyx index 7afe51d4f..4d90fe23b 100644 --- a/spacy/syntax/_beam_utils.pyx +++ b/spacy/syntax/_beam_utils.pyx @@ -49,7 +49,7 @@ cdef class ParserBeam(object): cdef public object dones def __init__(self, TransitionSystem moves, states, golds, - int width=4, float density=0.001): + int width, float density): self.moves = moves self.states = states self.golds = golds @@ -89,7 +89,10 @@ cdef class ParserBeam(object): self._set_scores(beam, scores[i]) if self.golds is not None: self._set_costs(beam, self.golds[i], follow_gold=follow_gold) - beam.advance(_transition_state, _hash_state, self.moves.c) + if follow_gold: + beam.advance(_transition_state, NULL, self.moves.c) + else: + beam.advance(_transition_state, _hash_state, self.moves.c) beam.check_done(_check_final_state, NULL) if beam.is_done and self.golds is not None: for j in range(beam.size): @@ -145,15 +148,16 @@ def get_token_ids(states, int n_tokens): nr_update = 0 def update_beam(TransitionSystem moves, int nr_feature, int max_steps, states, tokvecs, golds, - state2vec, vec2scores, drop=0., sgd=None, - losses=None, int width=4, float density=0.001): + state2vec, vec2scores, + int width, float density, + sgd=None, losses=None, drop=0.): global nr_update cdef MaxViolation violn nr_update += 1 pbeam = ParserBeam(moves, states, golds, width=width, density=density) gbeam = ParserBeam(moves, states, golds, - width=width, density=density) + width=width, density=0.0) cdef StateClass state beam_maps = [] backprops = [] @@ -194,13 +198,13 @@ def update_beam(TransitionSystem moves, int nr_feature, int max_steps, violn.check_crf(pbeam[i], gbeam[i]) histories = [] losses = [] - for i, violn in enumerate(violns): - if violn.cost < 1: - histories.append([]) - losses.append([]) - else: + for violn in violns: + if violn.p_hist: histories.append(violn.p_hist + violn.g_hist) losses.append(violn.p_probs + violn.g_probs) + else: + histories.append([]) + losses.append([]) states_d_scores = get_gradient(moves.n_moves, beam_maps, histories, losses) return states_d_scores, backprops[:len(states_d_scores)] @@ -215,10 +219,6 @@ def get_states(pbeams, gbeams, beam_map, nr_update): for eg_id, (pbeam, gbeam) in enumerate(zip(pbeams, gbeams)): p_indices.append([]) g_indices.append([]) - if pbeam.loss > 0 and pbeam.min_score > (gbeam.score + numpy.sqrt(nr_update)): - pbeams.dones[eg_id] = True - gbeams.dones[eg_id] = True - continue for i in range(pbeam.size): state = pbeam.at(i) if not state.is_final(): @@ -269,9 +269,12 @@ def get_gradient(nr_class, beam_maps, histories, losses): assert len(histories) == len(losses) for eg_id, hists in enumerate(histories): for loss, hist in zip(losses[eg_id], hists): - if abs(loss) == 0.0 or numpy.isnan(loss): + if loss == 0.0 or numpy.isnan(loss): continue key = tuple([eg_id]) + # Adjust loss for length + avg_loss = loss / len(hist) + loss += avg_loss * (nr_step - len(hist)) for j, clas in enumerate(hist): i = beam_maps[j][key] # In step j, at state i action clas diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 11fc4e742..3e5566705 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -67,7 +67,6 @@ from ..attrs cimport ID, TAG, DEP, ORTH, NORM, PREFIX, SUFFIX, TAG from . import _beam_utils USE_FINE_TUNE = True -BEAM_PARSE = True def get_templates(*args, **kwargs): return [] @@ -299,6 +298,10 @@ cdef class Parser: self.moves = self.TransitionSystem(self.vocab.strings, {}) else: self.moves = moves + if 'beam_width' not in cfg: + cfg['beam_width'] = util.env_opt('beam_width', 1) + if 'beam_density' not in cfg: + cfg['beam_density'] = util.env_opt('beam_density', 0.0) self.cfg = cfg if 'actions' in self.cfg: for action, labels in self.cfg.get('actions', {}).items(): @@ -321,9 +324,7 @@ cdef class Parser: if beam_width is None: beam_width = self.cfg.get('beam_width', 1) if beam_density is None: - beam_density = self.cfg.get('beam_density', 0.001) - if BEAM_PARSE: - beam_width = 16 + beam_density = self.cfg.get('beam_density', 0.0) cdef Beam beam if beam_width == 1: states = self.parse_batch([doc], [doc.tensor]) @@ -339,7 +340,7 @@ cdef class Parser: return output def pipe(self, docs, int batch_size=1000, int n_threads=2, - beam_width=1, beam_density=0.001): + beam_width=None, beam_density=None): """ Process a stream of documents. @@ -351,8 +352,10 @@ cdef class Parser: The number of threads with which to work on the buffer in parallel. Yields (Doc): Documents, in order. """ - if BEAM_PARSE: - beam_width = 16 + if beam_width is None: + beam_width = self.cfg.get('beam_width', 1) + if beam_density is None: + beam_density = self.cfg.get('beam_density', 0.0) cdef Doc doc cdef Beam beam for docs in cytoolz.partition_all(batch_size, docs): @@ -430,7 +433,7 @@ cdef class Parser: next_step.push_back(st) return states - def beam_parse(self, docs, tokvecses, int beam_width=16, float beam_density=0.001): + def beam_parse(self, docs, tokvecses, int beam_width=3, float beam_density=0.001): cdef Beam beam cdef np.ndarray scores cdef Doc doc @@ -480,9 +483,10 @@ cdef class Parser: return beams def update(self, docs_tokvecs, golds, drop=0., sgd=None, losses=None): - if BEAM_PARSE and numpy.random.random() >= 0.5: - return self.update_beam(docs_tokvecs, golds, drop=drop, sgd=sgd, - losses=losses) + if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.5: + return self.update_beam(docs_tokvecs, golds, + self.cfg['beam_width'], self.cfg['beam_density'], + drop=drop, sgd=sgd, losses=losses) if losses is not None and self.name not in losses: losses[self.name] = 0. docs, tokvec_lists = docs_tokvecs @@ -548,7 +552,12 @@ cdef class Parser: bp_my_tokvecs(d_tokvecs, sgd=sgd) return d_tokvecs - def update_beam(self, docs_tokvecs, golds, drop=0., sgd=None, losses=None): + def update_beam(self, docs_tokvecs, golds, width=None, density=None, + drop=0., sgd=None, losses=None): + if width is None: + width = self.cfg.get('beam_width', 2) + if density is None: + density = self.cfg.get('beam_density', 0.0) if losses is not None and self.name not in losses: losses[self.name] = 0. docs, tokvecs = docs_tokvecs @@ -570,8 +579,8 @@ cdef class Parser: states_d_scores, backprops = _beam_utils.update_beam(self.moves, self.nr_feature, 500, states, tokvecs, golds, state2vec, vec2scores, - drop, sgd, losses, - width=16) + width, density, + sgd=sgd, drop=drop, losses=losses) backprop_lower = [] for i, d_scores in enumerate(states_d_scores): if losses is not None: