From 3c0fc10dc4e7c10974fc489494fc8dbcf22bf532 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 14 Jun 2020 19:33:30 +0200 Subject: [PATCH] Remove beam for now (maybe) Remove beam_utils Update setup.py Remove beam --- setup.py | 1 - spacy/syntax/_beam_utils.pxd | 9 - spacy/syntax/_beam_utils.pyx | 329 ----------------------------- spacy/syntax/arc_eager.pyx | 38 ---- spacy/syntax/ner.pyx | 35 --- spacy/syntax/nn_parser.pyx | 253 ++-------------------- spacy/syntax/transition_system.pxd | 2 - spacy/syntax/transition_system.pyx | 27 --- 8 files changed, 14 insertions(+), 680 deletions(-) delete mode 100644 spacy/syntax/_beam_utils.pxd delete mode 100644 spacy/syntax/_beam_utils.pyx diff --git a/setup.py b/setup.py index 89ecb24b5..eacb2d35d 100755 --- a/setup.py +++ b/setup.py @@ -39,7 +39,6 @@ MOD_NAMES = [ "spacy.tokenizer", "spacy.syntax.nn_parser", "spacy.syntax._parser_model", - "spacy.syntax._beam_utils", "spacy.syntax.nonproj", "spacy.syntax.transition_system", "spacy.syntax.arc_eager", diff --git a/spacy/syntax/_beam_utils.pxd b/spacy/syntax/_beam_utils.pxd deleted file mode 100644 index cf99ac3d1..000000000 --- a/spacy/syntax/_beam_utils.pxd +++ /dev/null @@ -1,9 +0,0 @@ -from ..typedefs cimport hash_t, class_t - -# These are passed as callbacks to thinc.search.Beam -cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1 - -cdef int check_final_state(void* _state, void* extra_args) except -1 - - -cdef hash_t hash_state(void* _state, void* _) except 0 diff --git a/spacy/syntax/_beam_utils.pyx b/spacy/syntax/_beam_utils.pyx deleted file mode 100644 index 8f00e263d..000000000 --- a/spacy/syntax/_beam_utils.pyx +++ /dev/null @@ -1,329 +0,0 @@ -# cython: infer_types=True, profile=True -cimport numpy as np -from cpython.ref cimport PyObject, Py_XDECREF -from thinc.extra.search cimport Beam -from thinc.extra.search cimport MaxViolation - -from thinc.extra.search import MaxViolation -import numpy - -from ..typedefs cimport hash_t, class_t -from .transition_system cimport TransitionSystem, Transition -from .stateclass cimport StateC, StateClass -from ..gold.example cimport Example - -from ..errors import Errors - - -# These are passed as callbacks to thinc.search.Beam -cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1: - dest = _dest - src = _src - moves = _moves - dest.clone(src) - moves[clas].do(dest, moves[clas].label) - dest.push_hist(clas) - - -cdef int check_final_state(void* _state, void* extra_args) except -1: - state = _state - return state.is_final() - - -cdef hash_t hash_state(void* _state, void* _) except 0: - state = _state - if state.is_final(): - return 1 - else: - return state.hash() - - -def collect_states(beams): - cdef StateClass state - cdef Beam beam - states = [] - for state_or_beam in beams: - if isinstance(state_or_beam, StateClass): - states.append(state_or_beam) - else: - beam = state_or_beam - state = StateClass.borrow(beam.at(0)) - states.append(state) - return states - - -cdef class ParserBeam(object): - cdef public TransitionSystem moves - cdef public object states - cdef public object golds - cdef public object beams - cdef public object dones - - def __init__(self, TransitionSystem moves, states, golds, - int width, float density=0.): - self.moves = moves - self.states = states - self.golds = golds - self.beams = [] - cdef Beam beam - cdef StateClass state - cdef StateC* st - for state in states: - beam = Beam(self.moves.n_moves, width, min_density=density) - beam.initialize(self.moves.init_beam_state, - self.moves.del_beam_state, state.c.length, - state.c._sent) - for i in range(beam.width): - st = beam.at(i) - st.offset = state.c.offset - self.beams.append(beam) - self.dones = [False] * len(self.beams) - - @property - def is_done(self): - return all(b.is_done or self.dones[i] - for i, b in enumerate(self.beams)) - - def __getitem__(self, i): - return self.beams[i] - - def __len__(self): - return len(self.beams) - - def advance(self, scores, follow_gold=False): - cdef Beam beam - for i, beam in enumerate(self.beams): - if beam.is_done or not scores[i].size or self.dones[i]: - continue - self._set_scores(beam, scores[i]) - if self.golds is not None: - self._set_costs(beam, self.golds[i], follow_gold=follow_gold) - beam.advance(transition_state, hash_state, self.moves.c) - beam.check_done(check_final_state, NULL) - # This handles the non-monotonic stuff for the parser. - if beam.is_done and self.golds is not None: - for j in range(beam.size): - state = StateClass.borrow(beam.at(j)) - if state.is_final(): - try: - if self.moves.is_gold_parse(state, self.golds[i]): - beam._states[j].loss = 0.0 - except NotImplementedError: - break - - def _set_scores(self, Beam beam, float[:, ::1] scores): - cdef float* c_scores = &scores[0, 0] - cdef int nr_state = min(scores.shape[0], beam.size) - cdef int nr_class = scores.shape[1] - for i in range(nr_state): - state = beam.at(i) - if not state.is_final(): - for j in range(nr_class): - beam.scores[i][j] = c_scores[i * nr_class + j] - self.moves.set_valid(beam.is_valid[i], state) - else: - for j in range(beam.nr_class): - beam.scores[i][j] = 0 - beam.costs[i][j] = 0 - - def _set_costs(self, Beam beam, Example example, int follow_gold=False): - for i in range(beam.size): - state = StateClass.borrow(beam.at(i)) - if not state.is_final(): - self.moves.set_costs(beam.is_valid[i], beam.costs[i], - state, example) - if follow_gold: - min_cost = 0 - for j in range(beam.nr_class): - if beam.is_valid[i][j] and beam.costs[i][j] < min_cost: - min_cost = beam.costs[i][j] - for j in range(beam.nr_class): - if beam.costs[i][j] > min_cost: - beam.is_valid[i][j] = 0 - - -def get_token_ids(states, int n_tokens): - cdef StateClass state - cdef np.ndarray ids = numpy.zeros((len(states), n_tokens), - dtype='int32', order='C') - c_ids = ids.data - for i, state in enumerate(states): - if not state.is_final(): - state.c.set_context_tokens(c_ids, n_tokens) - else: - ids[i] = -1 - c_ids += ids.shape[1] - return ids - - -nr_update = 0 - - -def update_beam(TransitionSystem moves, int nr_feature, int max_steps, - states, golds, - state2vec, vec2scores, - int width, losses=None, drop=0., - early_update=True, beam_density=0.0): - global nr_update - cdef MaxViolation violn - nr_update += 1 - pbeam = ParserBeam(moves, states, golds, width=width, density=beam_density) - gbeam = ParserBeam(moves, states, golds, width=width, density=beam_density) - cdef StateClass state - beam_maps = [] - backprops = [] - violns = [MaxViolation() for _ in range(len(states))] - for t in range(max_steps): - if pbeam.is_done and gbeam.is_done: - break - # The beam maps let us find the right row in the flattened scores - # arrays for each state. States are identified by (example id, - # history). We keep a different beam map for each step (since we'll - # have a flat scores array for each step). The beam map will let us - # take the per-state losses, and compute the gradient for each (step, - # state, class). - beam_maps.append({}) - # Gather all states from the two beams in a list. Some stats may occur - # in both beams. To figure out which beam each state belonged to, - # we keep two lists of indices, p_indices and g_indices - states, p_indices, g_indices = get_states(pbeam, gbeam, beam_maps[-1], - nr_update) - if not states: - break - # Now that we have our flat list of states, feed them through the model - token_ids = get_token_ids(states, nr_feature) - vectors, bp_vectors = state2vec.begin_update(token_ids, drop=drop) - scores, bp_scores = vec2scores.begin_update(vectors, drop=drop) - - # Store the callbacks for the backward pass - backprops.append((token_ids, bp_vectors, bp_scores)) - - # Unpack the flat scores into lists for the two beams. The indices arrays - # tell us which example and state the scores-row refers to. - p_scores = [numpy.ascontiguousarray(scores[indices], dtype='f') - for indices in p_indices] - g_scores = [numpy.ascontiguousarray(scores[indices], dtype='f') - for indices in g_indices] - # Now advance the states in the beams. The gold beam is constrained to - # to follow only gold analyses. - pbeam.advance(p_scores) - gbeam.advance(g_scores, follow_gold=True) - # Track the "maximum violation", to use in the update. - for i, violn in enumerate(violns): - violn.check_crf(pbeam[i], gbeam[i]) - histories = [] - losses = [] - for violn in violns: - if violn.p_hist: - histories.append(violn.p_hist + violn.g_hist) - losses.append(violn.p_probs + violn.g_probs) - else: - histories.append([]) - losses.append([]) - states_d_scores = get_gradient(moves.n_moves, beam_maps, histories, losses) - beams = list(pbeam.beams) + list(gbeam.beams) - return states_d_scores, backprops[:len(states_d_scores)], beams - - -def get_states(pbeams, gbeams, beam_map, nr_update): - seen = {} - states = [] - p_indices = [] - g_indices = [] - cdef Beam pbeam, gbeam - if len(pbeams) != len(gbeams): - raise ValueError(Errors.E079.format(pbeams=len(pbeams), gbeams=len(gbeams))) - for eg_id, (pbeam, gbeam) in enumerate(zip(pbeams, gbeams)): - p_indices.append([]) - g_indices.append([]) - for i in range(pbeam.size): - state = StateClass.borrow(pbeam.at(i)) - if not state.is_final(): - key = tuple([eg_id] + pbeam.histories[i]) - if key in seen: - raise ValueError(Errors.E080.format(key=key)) - seen[key] = len(states) - p_indices[-1].append(len(states)) - states.append(state) - beam_map.update(seen) - for i in range(gbeam.size): - state = StateClass.borrow(gbeam.at(i)) - if not state.is_final(): - key = tuple([eg_id] + gbeam.histories[i]) - if key in seen: - g_indices[-1].append(seen[key]) - else: - g_indices[-1].append(len(states)) - beam_map[key] = len(states) - states.append(state) - p_idx = [numpy.asarray(idx, dtype='i') for idx in p_indices] - g_idx = [numpy.asarray(idx, dtype='i') for idx in g_indices] - return states, p_idx, g_idx - - -def get_gradient(nr_class, beam_maps, histories, losses): - """The global model assigns a loss to each parse. The beam scores - are additive, so the same gradient is applied to each action - in the history. This gives the gradient of a single *action* - for a beam state -- so we have "the gradient of loss for taking - action i given history H." - - Histories: Each hitory is a list of actions - Each candidate has a history - Each beam has multiple candidates - Each batch has multiple beams - So history is list of lists of lists of ints - """ - grads = [] - nr_steps = [] - for eg_id, hists in enumerate(histories): - nr_step = 0 - for loss, hist in zip(losses[eg_id], hists): - if loss != 0.0 and not numpy.isnan(loss): - nr_step = max(nr_step, len(hist)) - nr_steps.append(nr_step) - for i in range(max(nr_steps)): - grads.append(numpy.zeros((max(beam_maps[i].values())+1, nr_class), - dtype='f')) - if len(histories) != len(losses): - raise ValueError(Errors.E081.format(n_hist=len(histories), losses=len(losses))) - for eg_id, hists in enumerate(histories): - for loss, hist in zip(losses[eg_id], hists): - if loss == 0.0 or numpy.isnan(loss): - continue - key = tuple([eg_id]) - # Adjust loss for length - # We need to do this because each state in a short path is scored - # multiple times, as we add in the average cost when we run out - # of actions. - avg_loss = loss / len(hist) - loss += avg_loss * (nr_steps[eg_id] - len(hist)) - for j, clas in enumerate(hist): - i = beam_maps[j][key] - # In step j, at state i action clas - # resulted in loss - grads[j][i, clas] += loss - key = key + tuple([clas]) - return grads - - -def cleanup_beam(Beam beam): - cdef StateC* state - # Once parsing has finished, states in beam may not be unique. Is this - # correct? - seen = set() - for i in range(beam.width): - addr = beam._parents[i].content - if addr not in seen: - state = addr - del state - seen.add(addr) - else: - raise ValueError(Errors.E023.format(addr=addr, i=i)) - addr = beam._states[i].content - if addr not in seen: - state = addr - del state - seen.add(addr) - else: - raise ValueError(Errors.E023.format(addr=addr, i=i)) diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index 774c8a9b5..787546144 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -1,7 +1,6 @@ # cython: profile=True, cdivision=True, infer_types=True from cpython.ref cimport Py_INCREF from cymem.cymem cimport Pool -from thinc.extra.search cimport Beam from libc.stdint cimport int32_t from collections import defaultdict, Counter @@ -379,8 +378,6 @@ cdef int _del_state(Pool mem, void* state, void* x) except -1: cdef class ArcEager(TransitionSystem): def __init__(self, *args, **kwargs): TransitionSystem.__init__(self, *args, **kwargs) - self.init_beam_state = _init_state - self.del_beam_state = _del_state @classmethod def get_actions(cls, **kwargs): @@ -444,22 +441,6 @@ cdef class ArcEager(TransitionSystem): def preprocess_gold(self, gold): raise NotImplementedError - def get_beam_parses(self, Beam beam): - parses = [] - probs = beam.probs - for i in range(beam.size): - state = beam.at(i) - if state.is_final(): - self.finalize_state(state) - prob = probs[i] - parse = [] - for j in range(state.length): - head = state.H(j) - label = self.strings[state._sent[j].dep] - parse.append((head, j, label)) - parses.append((prob, parse)) - return parses - cdef Transition lookup_transition(self, object name_or_id) except *: if isinstance(name_or_id, int): return self.c[name_or_id] @@ -600,22 +581,3 @@ cdef class ArcEager(TransitionSystem): failure_state = stcls.print_state([t.text for t in example]) raise ValueError(Errors.E021.format(n_actions=self.n_moves, state=failure_state)) - - def get_beam_annot(self, Beam beam): - length = (beam.at(0)).length - heads = [{} for _ in range(length)] - deps = [{} for _ in range(length)] - probs = beam.probs - for i in range(beam.size): - state = beam.at(i) - self.finalize_state(state) - if state.is_final(): - prob = probs[i] - for j in range(state.length): - head = j + state._sent[j].head - dep = state._sent[j].dep - heads[j].setdefault(head, 0.0) - heads[j][head] += prob - deps[j].setdefault(dep, 0.0) - deps[j][dep] += prob - return heads, deps diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index 421210be7..2754d826a 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -1,5 +1,3 @@ -from thinc.extra.search cimport Beam - from collections import Counter from ..typedefs cimport weight_t @@ -99,39 +97,6 @@ cdef class BiluoPushDown(TransitionSystem): def preprocess_gold(self, gold): raise NotImplementedError - def get_beam_annot(self, Beam beam): - entities = {} - probs = beam.probs - for i in range(beam.size): - state = beam.at(i) - if state.is_final(): - self.finalize_state(state) - prob = probs[i] - for j in range(state._e_i): - start = state._ents[j].start - end = state._ents[j].end - label = state._ents[j].label - entities.setdefault((start, end, label), 0.0) - entities[(start, end, label)] += prob - return entities - - def get_beam_parses(self, Beam beam): - parses = [] - probs = beam.probs - for i in range(beam.size): - state = beam.at(i) - if state.is_final(): - self.finalize_state(state) - prob = probs[i] - parse = [] - for j in range(state._e_i): - start = state._ents[j].start - end = state._ents[j].end - label = state._ents[j].label - parse.append((start, end, self.strings[label])) - parses.append((prob, parse)) - return parses - cdef Transition lookup_transition(self, object name) except *: cdef attr_t label if name == '-' or name == '' or name is None: diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 1267bdad8..8c9050351 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -8,7 +8,6 @@ from libcpp.vector cimport vector from libc.string cimport memset, memcpy from libc.stdlib cimport calloc, free from cymem.cymem cimport Pool -from thinc.extra.search cimport Beam from thinc.backends.linalg cimport Vec, VecVec from thinc.api import chain, clone, Linear, list2array, NumpyOps, CupyOps, use_ops @@ -28,21 +27,15 @@ from ._parser_model cimport get_c_weights, get_c_sizes from .stateclass cimport StateClass from ._state cimport StateC from .transition_system cimport Transition -from . cimport _beam_utils from ..gold.example cimport Example from ..util import link_vectors_to_models, create_default_optimizer, registry from ..compat import copy_array from ..errors import Errors, Warnings from .. import util -from . import _beam_utils from . import nonproj -def get_parses_from_example(example, merge=False, vocab=None): - # TODO: This is just a temporary shim to make the refactor easier. - return [(example.predicted, example)] - cdef class Parser: """ Base class of the DependencyParser and EntityRecognizer. @@ -146,71 +139,47 @@ cdef class Parser: ''' pass - def preprocess_gold(self, examples): - for ex in examples: - yield ex - def use_params(self, params): # Can't decorate cdef class :(. Workaround. with self.model.use_params(params): yield - def __call__(self, Doc doc, beam_width=None): + def __call__(self, Doc doc): """Apply the parser or entity recognizer, setting the annotations onto the `Doc` object. doc (Doc): The document to be processed. """ - if beam_width is None: - beam_width = self.cfg['beam_width'] - beam_density = self.cfg.get('beam_density', 0.) - states = self.predict([doc], beam_width=beam_width, - beam_density=beam_density) + states = self.predict([doc]) self.set_annotations([doc], states, tensors=None) return doc - def pipe(self, docs, int batch_size=256, int n_threads=-1, beam_width=None, - as_example=False): + def pipe(self, docs, int batch_size=256, int n_threads=-1): """Process a stream of documents. stream: The sequence of documents to process. batch_size (int): Number of documents to accumulate into a working set. YIELDS (Doc): Documents, in order. """ - if beam_width is None: - beam_width = self.cfg['beam_width'] - beam_density = self.cfg.get('beam_density', 0.) cdef Doc doc for batch in util.minibatch(docs, size=batch_size): batch_in_order = list(batch) - docs = [self._get_doc(ex) for ex in batch_in_order] + docs = [ex.predicted for ex in batch_in_order] by_length = sorted(docs, key=lambda doc: len(doc)) for subbatch in util.minibatch(by_length, size=max(batch_size//4, 2)): subbatch = list(subbatch) - parse_states = self.predict(subbatch, beam_width=beam_width, - beam_density=beam_density) + parse_states = self.predict(subbatch) self.set_annotations(subbatch, parse_states, tensors=None) - if as_example: - annotated_examples = [] - for ex, doc in zip(batch_in_order, docs): - ex.doc = doc - annotated_examples.append(ex) - yield from annotated_examples - else: - yield from batch_in_order + yield from batch_in_order - def predict(self, docs, beam_width=1, beam_density=0.0, drop=0.): + def predict(self, docs): if isinstance(docs, Doc): docs = [docs] if not any(len(doc) for doc in docs): result = self.moves.init_batch(docs) self._resize() return result - if beam_width < 2: - return self.greedy_parse(docs, drop=drop) - else: - return self.beam_parse(docs, beam_width=beam_width, - beam_density=beam_density, drop=drop) + return self.greedy_parse(docs, drop=0.0) def greedy_parse(self, docs, drop=0.): cdef vector[StateC*] states @@ -232,44 +201,6 @@ cdef class Parser: weights, sizes) return batch - def beam_parse(self, docs, int beam_width, float drop=0., beam_density=0.): - cdef Beam beam - cdef Doc doc - cdef np.ndarray token_ids - set_dropout_rate(self.model, drop) - beams = self.moves.init_beams(docs, beam_width, beam_density=beam_density) - # This is pretty dirty, but the NER can resize itself in init_batch, - # if labels are missing. We therefore have to check whether we need to - # expand our model output. - self._resize() - cdef int nr_feature = self.model.get_ref("lower").get_dim("nF") - model = self.model.predict(docs) - token_ids = numpy.zeros((len(docs) * beam_width, nr_feature), - dtype='i', order='C') - cdef int* c_ids - cdef int n_states - model = self.model.predict(docs) - todo = [beam for beam in beams if not beam.is_done] - while todo: - token_ids.fill(-1) - c_ids = token_ids.data - n_states = 0 - for beam in todo: - for i in range(beam.size): - state = beam.at(i) - # This way we avoid having to score finalized states - # We do have to take care to keep indexes aligned, though - if not state.is_final(): - state.set_context_tokens(c_ids, nr_feature) - c_ids += nr_feature - n_states += 1 - if n_states == 0: - break - vectors = model.state2vec.predict(token_ids[:n_states]) - scores = model.vec2scores.predict(vectors) - todo = self.transition_beams(todo, scores) - return beams - cdef void _parseC(self, StateC** states, WeightsC weights, SizesC sizes) nogil: cdef int i, j @@ -290,20 +221,9 @@ cdef class Parser: unfinished.clear() free_activations(&activations) - def set_annotations(self, docs, states_or_beams, tensors=None): + def set_annotations(self, docs, states, tensors=None): cdef StateClass state - cdef Beam beam cdef Doc doc - states = [] - beams = [] - for state_or_beam in states_or_beams: - if isinstance(state_or_beam, StateClass): - states.append(state_or_beam) - else: - beam = state_or_beam - state = StateClass.borrow(beam.at(0)) - states.append(state) - beams.append(beam) for i, (state, doc) in enumerate(zip(states, docs)): self.moves.finalize_state(state.c) for j in range(doc.length): @@ -311,8 +231,6 @@ cdef class Parser: self.moves.finalize_doc(doc) for hook in self.postprocesses: hook(doc) - for beam in beams: - _beam_utils.cleanup_beam(beam) def transition_states(self, states, float[:, ::1] scores): cdef StateClass state @@ -344,20 +262,6 @@ cdef class Parser: states[i].push_hist(guess) free(is_valid) - def transition_beams(self, beams, float[:, ::1] scores): - cdef Beam beam - cdef float* c_scores = &scores[0, 0] - for beam in beams: - for i in range(beam.size): - state = beam.at(i) - if not state.is_final(): - self.moves.set_valid(beam.is_valid[i], state) - memcpy(beam.scores[i], c_scores, scores.shape[1] * sizeof(float)) - c_scores += scores.shape[1] - beam.advance(_beam_utils.transition_state, _beam_utils.hash_state, self.moves.c) - beam.check_done(_beam_utils.check_final_state, NULL) - return [b for b in beams if not b.is_done] - def update(self, examples, drop=0., set_annotations=False, sgd=None, losses=None): examples = Example.to_example_objects(examples) @@ -366,23 +270,8 @@ cdef class Parser: losses.setdefault(self.name, 0.) for multitask in self._multitasks: multitask.update(examples, drop=drop, sgd=sgd) - # The probability we use beam update, instead of falling back to - # a greedy update - beam_update_prob = self.cfg['beam_update_prob'] - if self.cfg['beam_width'] >= 2 and numpy.random.random() < beam_update_prob: - return self.update_beam(examples, self.cfg['beam_width'], - drop=drop, sgd=sgd, losses=losses, set_annotations=set_annotations, - beam_density=self.cfg.get('beam_density', 0.001)) - set_dropout_rate(self.model, drop) - cut_gold = True - if cut_gold: - # Chop sequences into lengths of this many transitions, to make the - # batch uniform length. - cut_gold = numpy.random.choice(range(20, 100)) - states, golds, max_steps = self._init_gold_batch(examples, max_length=cut_gold) - else: - states, golds, max_steps = self._init_gold_batch_no_cut(examples) + states, golds, max_steps = self._init_gold_batch_no_cut(examples) states_golds = [(s, g) for (s, g) in zip(states, golds) if not s.is_final() and g is not None] # Prepare the stepwise model, and get the callback for finishing the batch @@ -450,52 +339,6 @@ cdef class Parser: losses[self.name] += loss / n_scores return losses - def update_beam(self, examples, width, drop=0., sgd=None, losses=None, - set_annotations=False, beam_density=0.0): - examples = Example.to_example_objects(examples) - docs = [ex.doc for ex in examples] - golds = [ex.gold for ex in examples] - new_golds = [] - lengths = [len(d) for d in docs] - states = self.moves.init_batch(docs) - for gold in golds: - self.moves.preprocess_gold(gold) - new_golds.append(gold) - set_dropout_rate(self.model, drop) - model, backprop_tok2vec = self.model.begin_update(docs) - states_d_scores, backprops, beams = _beam_utils.update_beam( - self.moves, - self.model.get_ref("lower").get_dim("nF"), - 10000, - states, - golds, - model.state2vec, - model.vec2scores, - width, - losses=losses, - beam_density=beam_density - ) - for i, d_scores in enumerate(states_d_scores): - losses[self.name] += (d_scores**2).mean() - ids, bp_vectors, bp_scores = backprops[i] - d_vector = bp_scores(d_scores) - if isinstance(model.ops, CupyOps) \ - and not isinstance(ids, model.state2vec.ops.xp.ndarray): - model.backprops.append(( - util.get_async(model.cuda_stream, ids), - util.get_async(model.cuda_stream, d_vector), - bp_vectors)) - else: - model.backprops.append((ids, d_vector, bp_vectors)) - backprop_tok2vec(golds) - if sgd is not None: - self.model.finish_update(sgd) - if set_annotations: - self.set_annotations(docs, beams) - cdef Beam beam - for beam in beams: - _beam_utils.cleanup_beam(beam) - def get_gradients(self): """Get non-zero gradients of the model's parameters, as a dictionary keyed by the parameter ID. The values are (weights, gradients) tuples. @@ -513,66 +356,9 @@ cdef class Parser: queue.extend(node._layers) return gradients - def _init_gold_batch_no_cut(self, whole_examples): - states = self.moves.init_batch([eg.doc for eg in whole_examples]) - good_docs = [] - good_golds = [] - good_states = [] - for i, eg in enumerate(whole_examples): - parses = get_parses_from_example(eg) - doc, gold = parses[0] - if gold is not None and self.moves.has_gold(gold): - good_docs.append(doc) - good_golds.append(gold) - good_states.append(states[i]) - n_moves = [] - for doc, gold in zip(good_docs, good_golds): - oracle_actions = self.moves.get_oracle_sequence(doc, gold) - n_moves.append(len(oracle_actions)) - return good_states, good_golds, max(n_moves, default=0) * 2 - - def _init_gold_batch(self, whole_examples, min_length=5, max_length=500): - """Make a square batch, of length equal to the shortest doc. A long - doc will get multiple states. Let's say we have a doc of length 2*N, - where N is the shortest doc. We'll make two states, one representing - long_doc[:N], and another representing long_doc[N:].""" - cdef: - StateClass state - Transition action - whole_docs = [] - whole_golds = [] - for eg in whole_examples: - for doc, gold in get_parses_from_example(eg): - whole_docs.append(doc) - whole_golds.append(gold) - whole_states = self.moves.init_batch(whole_docs) - max_length = max(min_length, min(max_length, min([len(doc) for doc in whole_docs]))) - max_moves = 0 - states = [] - golds = [] - for doc, state, gold in zip(whole_docs, whole_states, whole_golds): - gold = self.moves.preprocess_gold(gold) - if gold is None: - continue - oracle_actions = self.moves.get_oracle_sequence(doc, gold) - start = 0 - while start < len(doc): - state = state.copy() - n_moves = 0 - while state.B(0) < start and not state.is_final(): - action = self.moves.c[oracle_actions.pop(0)] - action.do(state.c, action.label) - state.c.push_hist(action.clas) - n_moves += 1 - has_gold = self.moves.has_gold(gold, start=start, - end=start+max_length) - if not state.is_final() and has_gold: - states.append(state) - golds.append(gold) - max_moves = max(max_moves, n_moves) - start += min(max_length, len(doc)-start) - max_moves = max(max_moves, len(oracle_actions)) - return states, golds, max_moves + def _init_gold_batch_no_cut(self, examples): + states = self.moves.init_batch([eg.predicted for eg in examples]) + return states, examples def get_batch_loss(self, states, examples, float[:, ::1] scores, losses): cdef StateClass state @@ -631,13 +417,8 @@ cdef class Parser: if sgd is None: sgd = self.create_optimizer() doc_sample = [] - gold_sample = [] for example in islice(get_examples(), 10): - parses = get_parses_from_example(example, merge=False, vocab=self.vocab) - for doc, gold in parses: - if len(doc): - doc_sample.append(doc) - gold_sample.append(gold) + doc_sample.append(example.predicted) if pipeline is not None: for name, component in pipeline: @@ -656,12 +437,6 @@ cdef class Parser: link_vectors_to_models(self.vocab) return sgd - def _get_doc(self, example): - """ Use this method if the `example` can be both a Doc or an Example """ - if isinstance(example, Doc): - return example - return example.doc - def to_disk(self, path, exclude=tuple(), **kwargs): serializers = { 'model': lambda p: (self.model.to_disk(p) if self.model is not True else True), diff --git a/spacy/syntax/transition_system.pxd b/spacy/syntax/transition_system.pxd index 1d26837f4..21752b15f 100644 --- a/spacy/syntax/transition_system.pxd +++ b/spacy/syntax/transition_system.pxd @@ -40,8 +40,6 @@ cdef class TransitionSystem: cdef int _size cdef public attr_t root_label cdef public freqs - cdef init_state_t init_beam_state - cdef del_state_t del_beam_state cdef public object labels cdef int initialize_state(self, StateC* state) nogil diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx index d74ec5384..b7163ef86 100644 --- a/spacy/syntax/transition_system.pyx +++ b/spacy/syntax/transition_system.pyx @@ -1,13 +1,11 @@ # cython: infer_types=True from cpython.ref cimport Py_INCREF from cymem.cymem cimport Pool -from thinc.extra.search cimport Beam from collections import Counter import srsly from ..typedefs cimport weight_t -from . cimport _beam_utils from ..tokens.doc cimport Doc from ..structs cimport TokenC from .stateclass cimport StateClass @@ -47,8 +45,6 @@ cdef class TransitionSystem: if labels_by_action: self.initialize_actions(labels_by_action, min_freq=min_freq) self.root_label = self.strings.add('ROOT') - self.init_beam_state = _init_state - self.del_beam_state = _del_state def __reduce__(self): return (self.__class__, (self.strings, self.labels), None, None) @@ -64,29 +60,6 @@ cdef class TransitionSystem: offset += len(doc) return states - def init_beams(self, docs, beam_width, beam_density=0.): - cdef Doc doc - beams = [] - cdef int offset = 0 - - # Doc objects might contain labels that we need to register actions for. We need to check for that - # *before* we create any Beam objects, because the Beam object needs the correct number of - # actions. It's sort of dumb, but the best way is to just call init_batch() -- that triggers the additions, - # and it doesn't matter that we create and discard the state objects. - self.init_batch(docs) - - for doc in docs: - beam = Beam(self.n_moves, beam_width, min_density=beam_density) - beam.initialize(self.init_beam_state, self.del_beam_state, - doc.length, doc.c) - for i in range(beam.width): - state = beam.at(i) - state.offset = offset - offset += len(doc) - beam.check_done(_beam_utils.check_final_state, NULL) - beams.append(beam) - return beams - def get_oracle_sequence(self, Example example): cdef Pool mem = Pool() # n_moves should not be zero at this point, but make sure to avoid zero-length mem alloc