diff --git a/spacy/syntax/beam_parser.pyx b/spacy/syntax/beam_parser.pyx index 8801f6e7f..598a09620 100644 --- a/spacy/syntax/beam_parser.pyx +++ b/spacy/syntax/beam_parser.pyx @@ -26,10 +26,11 @@ from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t from util import Config from thinc.linear.features cimport ConjunctionExtracter -from thinc.structs cimport FeatureC +from thinc.structs cimport FeatureC, ExampleC from thinc.extra.search cimport Beam from thinc.extra.search cimport MaxViolation +from thinc.extra.eg cimport Example from ..structs cimport TokenC @@ -46,6 +47,7 @@ from ._parse_features cimport fill_context from .stateclass cimport StateClass from .parser cimport Parser from .parser cimport ParserPerceptron +from .parser cimport ParserNeuralNet DEBUG = False def set_debug(val): @@ -78,7 +80,6 @@ cdef class BeamParser(Parser): self._parseC(tokens, length, nr_feat, nr_class) cdef int _parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) except -1: - cdef Beam beam = Beam(self.moves.n_moves, self.beam_width) beam.initialize(_init_state, length, tokens) beam.check_done(_check_final_state, NULL) @@ -104,34 +105,39 @@ cdef class BeamParser(Parser): while not pred.is_done and not gold.is_done: self._advance_beam(pred, gold_parse, False) self._advance_beam(gold, gold_parse, True) - violn.check(pred, gold) - self.model.time += 1 - if pred.is_done and pred.loss == 0: - pass - elif pred.is_done and pred.loss > 0: - self._update(tokens, pred.histories[0], -1.0) - self._update(tokens, gold.histories[0], 1.0) - elif violn.cost > 0: - self._update(tokens, violn.p_hist, -1.0) - self._update(tokens, violn.g_hist, 1.0) + if pred.min_score > gold.score: + break + #print(pred.score, pred.min_score, gold.score) + cdef long double Z = 0.0 + for i in range(pred.size): + if pred._states[i].loss > 0: + Z += exp(pred._states[i].score) + if Z > 0: + Z += exp(gold.score) + for i, hist in enumerate(pred.histories): + if pred._states[i].loss > 0: + self._update_dense(tokens, hist, exp(pred._states[i].score) / Z) + self._update_dense(tokens, gold.histories[0], (exp(gold.score) / Z) - 1) _cleanup(pred) _cleanup(gold) return pred.loss def _advance_beam(self, Beam beam, GoldParse gold, bint follow_gold): - cdef atom_t[CONTEXT_SIZE] context - cdef Pool mem = Pool() - features = mem.alloc(self.model.nr_feat, sizeof(FeatureC)) - cdef ParserPerceptron model = self.model + cdef Example py_eg = Example(nr_class=self.moves.n_moves, nr_atom=CONTEXT_SIZE, + nr_feat=self.model.nr_feat, widths=self.model.widths) + cdef ExampleC* eg = py_eg.c + + cdef ParserNeuralNet model = self.model for i in range(beam.size): + py_eg.reset() stcls = beam.at(i) if not stcls.c.is_final(): - fill_context(context, stcls.c) - nr_feat = model.extracter.set_features(features, context) - self.model.set_scoresC(beam.scores[i], features, nr_feat, 1) + model.set_featuresC(eg, stcls.c) + model.set_scoresC(beam.scores[i], eg.features, eg.nr_feat, 1) self.moves.set_valid(beam.is_valid[i], stcls.c) if gold is not None: for i in range(beam.size): + py_eg.reset() stcls = beam.at(i) if not stcls.c.is_final(): self.moves.set_costs(beam.is_valid[i], beam.costs[i], stcls, gold) @@ -141,88 +147,24 @@ cdef class BeamParser(Parser): beam.advance(_transition_state, _hash_state, self.moves.c) beam.check_done(_check_final_state, NULL) - def _maxent_update_dense(self, doc, pred_scores, pred_hist, gold_scores, - gold_hist, step_size=0.001): - for i, history in enumerate(pred_hist): - stcls = StateClass.init(doc.c, doc.length) - self.moves.initialize_state(stcls.c) - for j, clas in enumerate(history): - fill_context(context, stcls.c) - nr_feat = model.extracter.set_features(features, context) - - self.moves.set_valid(is_valid, stcls) - # Move weight away from this outcome - for i in range(nr_class): - costs[i] = 0.0 - costs[clas] = 1.0 - self.update(features, nr_feat, True, costs, is_valid, False) - - self.moves.c[clas].do(stcls.c, self.moves.c[clas].label) - for i, history in enumerate(gold_hist): - stcls = StateClass.init(doc.c, doc.length) - self.moves.initialize_state(stcls.c) - for j, clas in enumerate(history): - fill_context(context, stcls.c) - nr_feat = model.extracter.set_features(features, context) - - self.moves.set_valid(is_valid, stcls) - # Move weight towards this outcome - for i in range(nr_class): - costs[i] = 1.0 - costs[clas] = 0.0 - self.update(features, nr_feat, True, costs, is_valid, False) - - self.moves.c[clas].do(stcls.c, self.moves.c[clas].label) - - def _maxent_update(self, doc, pred_scores, pred_hist, gold_scores, gold_hist, - step_size=0.001): - cdef weight_t Z, gZ, value - cdef feat_t feat - cdef class_t clas - gZ, g_counts = self._maxent_counts(doc, gold_scores, gold_hist) - Z, counts = self._maxent_counts(doc, pred_scores, pred_hist) - update = {} - if gZ > 0: - for (clas, feat), value in g_counts.items(): - update[(clas, feat)] = value / gZ - Z += gZ - for (clas, feat), value in counts.items(): - update.setdefault((clas, feat), 0.0) - update[(clas, feat)] -= value / Z - for (clas, feat), value in update.items(): - if value < 1000: - self.model.update_weight(feat, clas, step_size * value) - - def _maxent_counts(self, Doc doc, scores, history): - cdef Pool mem = Pool() - cdef atom_t[CONTEXT_SIZE] context - features = mem.alloc(self.model.nr_feat, sizeof(FeatureC)) - - cdef StateClass stcls - - cdef class_t clas - cdef ParserPerceptron model = self.model - - cdef weight_t Z = 0.0 - cdef weight_t score - counts = {} - for i, (score, history) in enumerate(zip(scores, history)): - prob = exp(score) - if prob < 1e-6: - continue - stcls = StateClass.init(doc.c, doc.length) - self.moves.initialize_state(stcls.c) - for clas in history: - fill_context(context, stcls.c) - nr_feat = model.extracter.set_features(features, context) - for feat in features[:nr_feat]: - key = (clas, feat.key) - counts[key] = counts.get(key, 0.0) + feat.value - self.moves.c[clas].do(stcls.c, self.moves.c[clas].label) - for key in counts: - counts[key] *= prob - Z += prob - return Z, counts + def _update_dense(self, Doc doc, history, weight_t loss): + cdef Example py_eg = Example(nr_class=self.moves.n_moves, + nr_atom=CONTEXT_SIZE, + nr_feat=self.model.nr_feat, + widths=self.model.widths) + cdef ExampleC* eg = py_eg.c + cdef ParserNeuralNet model = self.model + stcls = StateClass.init(doc.c, doc.length) + self.moves.initialize_state(stcls.c) + for clas in history: + model.set_featuresC(eg, stcls.c) + self.moves.set_valid(eg.is_valid, stcls.c) + for i in range(self.moves.n_moves): + eg.costs[i] = loss if i == clas else 0 + model.updateC( + eg.features, eg.nr_feat, True, eg.costs, eg.is_valid, False) + self.moves.c[clas].do(stcls.c, self.moves.c[clas].label) + py_eg.reset() def _update(self, Doc tokens, list hist, weight_t inc): cdef Pool mem = Pool() @@ -278,7 +220,88 @@ cdef hash_t _hash_state(void* _state, void* _) except 0: #return state.c return state.c.hash() +# +# def _maxent_update(self, Doc doc, pred_scores, pred_hist, gold_scores, gold_hist): +# Z = 0 +# for i, (score, history) in enumerate(zip(pred_scores, pred_hist)): +# prob = exp(score) +# if prob < 1e-6: +# continue +# stcls = StateClass.init(doc.c, doc.length) +# self.moves.initialize_state(stcls.c) +# for clas in history: +# delta_loss[clas] = prob * 1/Z +# gradient = [(input_ * prob) / Z for input_ in hidden] +# fill_context(context, stcls.c) +# nr_feat = model.extracter.set_features(features, context) +# for feat in features[:nr_feat]: +# key = (clas, feat.key) +# counts[key] = counts.get(key, 0.0) + feat.value +# self.moves.c[clas].do(stcls.c, self.moves.c[clas].label) +# for key in counts: +# counts[key] *= prob +# Z += prob +# gZ, g_counts = self._maxent_counts(doc, gold_scores, gold_hist) +# for (clas, feat), value in g_counts.items(): +# self.model.update_weight(feat, clas, value / gZ) +# +# Z, counts = self._maxent_counts(doc, pred_scores, pred_hist) +# for (clas, feat), value in counts.items(): +# self.model.update_weight(feat, clas, -value / (Z + gZ)) +# +# + +# def _maxent_update(self, doc, pred_scores, pred_hist, gold_scores, gold_hist, +# step_size=0.001): +# cdef weight_t Z, gZ, value +# cdef feat_t feat +# cdef class_t clas +# gZ, g_counts = self._maxent_counts(doc, gold_scores, gold_hist) +# Z, counts = self._maxent_counts(doc, pred_scores, pred_hist) +# update = {} +# if gZ > 0: +# for (clas, feat), value in g_counts.items(): +# update[(clas, feat)] = value / gZ +# Z += gZ +# for (clas, feat), value in counts.items(): +# update.setdefault((clas, feat), 0.0) +# update[(clas, feat)] -= value / Z +# for (clas, feat), value in update.items(): +# if value < 1000: +# self.model.update_weight(feat, clas, step_size * value) +# +# def _maxent_counts(self, Doc doc, scores, history): +# cdef Pool mem = Pool() +# cdef atom_t[CONTEXT_SIZE] context +# features = mem.alloc(self.model.nr_feat, sizeof(FeatureC)) +# +# cdef StateClass stcls +# +# cdef class_t clas +# cdef ParserPerceptron model = self.model +# +# cdef weight_t Z = 0.0 +# cdef weight_t score +# counts = {} +# for i, (score, history) in enumerate(zip(scores, history)): +# prob = exp(score) +# if prob < 1e-6: +# continue +# stcls = StateClass.init(doc.c, doc.length) +# self.moves.initialize_state(stcls.c) +# for clas in history: +# fill_context(context, stcls.c) +# nr_feat = model.extracter.set_features(features, context) +# for feat in features[:nr_feat]: +# key = (clas, feat.key) +# counts[key] = counts.get(key, 0.0) + feat.value +# self.moves.c[clas].do(stcls.c, self.moves.c[clas].label) +# for key in counts: +# counts[key] *= prob +# Z += prob +# return Z, counts +# # # def _advance_beam(self, Beam beam, GoldParse gold, bint follow_gold, words): # cdef atom_t[CONTEXT_SIZE] context diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index 3b1d7a284..8af41c918 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -13,6 +13,7 @@ from cpython.exc cimport PyErr_CheckSignals from libc.stdint cimport uint32_t, uint64_t from libc.string cimport memset, memcpy from libc.stdlib cimport malloc, calloc, free +from libc.math cimport exp import os.path from os import path import shutil @@ -106,7 +107,7 @@ cdef class ParserPerceptron(AveragedPerceptron): cdef class ParserNeuralNet(NeuralNet): def __init__(self, shape, **kwargs): - vector_widths = [4] * 57 + vector_widths = [4] * 76 slots = [0, 1, 2, 3] # S0 slots += [4, 5, 6, 7] # S1 slots += [8, 9, 10, 11] # S2 @@ -119,11 +120,10 @@ cdef class ParserNeuralNet(NeuralNet): slots += [36, 37, 38, 39] * 2 # B0l, B0r slots += [40, 41, 42, 43] * 2 # S1l, S1r slots += [44, 45, 46, 47] * 2 # S2l, S2r - slots += [48, 49, 50, 51, 52] + slots += [48, 49, 50, 51, 52, 53, 54, 55] slots += [53, 54, 55, 56] input_length = sum(vector_widths[slot] for slot in slots) - widths = [input_length] + shape[3:] - + widths = [input_length] + shape NeuralNet.__init__(self, widths, embed=(vector_widths, slots), **kwargs) @property @@ -156,15 +156,26 @@ cdef class ParserNeuralNet(NeuralNet): feats = _add_pos_bigram(feats, 65, state.S_(1), state.S_(0)) feats = _add_pos_bigram(feats, 66, state.S_(1), state.B_(0)) feats = _add_pos_bigram(feats, 67, state.S_(0), state.B_(1)) - feats = _add_pos_bigram(feats, 68, state.B_(0), state.B_(1)) - feats = _add_pos_trigram(feats, 69, state.S_(1), state.S_(0), state.B_(0)) - feats = _add_pos_trigram(feats, 70, state.S_(0), state.B_(0), state.B_(1)) - feats = _add_pos_trigram(feats, 71, state.S_(0), state.R_(state.S(0), 1), + feats = _add_pos_bigram(feats, 68, state.S_(0), state.R_(state.S(0), 1)) + feats = _add_pos_bigram(feats, 69, state.S_(0), state.R_(state.S(0), 2)) + feats = _add_pos_bigram(feats, 70, state.S_(0), state.L_(state.S(0), 1)) + feats = _add_pos_bigram(feats, 71, state.S_(0), state.L_(state.S(0), 2)) + feats = _add_pos_trigram(feats, 72, state.S_(1), state.S_(0), state.B_(0)) + feats = _add_pos_trigram(feats, 73, state.S_(0), state.B_(0), state.B_(1)) + feats = _add_pos_trigram(feats, 74, state.S_(0), state.R_(state.S(0), 1), state.R_(state.S(0), 2)) - feats = _add_pos_trigram(feats, 72, state.S_(0), state.L_(state.S(0), 1), + feats = _add_pos_trigram(feats, 75, state.S_(0), state.L_(state.S(0), 1), state.L_(state.S(0), 2)) eg.nr_feat = feats - eg.features + cdef void _set_delta_lossC(self, weight_t* delta_loss, + const weight_t* Zs, const weight_t* scores) nogil: + for i in range(self.c.widths[self.c.nr_layer-1]): + delta_loss[i] = Zs[i] + + cdef void _softmaxC(self, weight_t* out) nogil: + pass + cdef inline FeatureC* _add_token(FeatureC* feats, int slot, const TokenC* token, weight_t value) nogil: @@ -230,80 +241,6 @@ cdef inline FeatureC* _add_pos_trigram(FeatureC* feat, int slot, feat.value = 1.0 return feat+1 -cdef class ParserNeuralNetEnsemble(ParserNeuralNet): - def __init__(self, shape, update_step='sgd', eta=0.01, rho=0.0, n=5): - ParserNeuralNet.__init__(self, shape, update_step=update_step, eta=eta, rho=rho) - self._models_c = self.mem.alloc(sizeof(NeuralNetC*), n) - self._masks = self.mem.alloc(sizeof(int*), n) - self._models = [] - cdef ParserNeuralNet model - threshold = 1.5 / n - self._nr_model = n - for i in range(n): - self._masks[i] = self.mem.alloc(sizeof(int), self.nr_feat) - for j in range(self.nr_feat): - self._masks[i][j] = random.random() < threshold - # We have to pass our pool here, because the embedding table passes - # it around. - model = ParserNeuralNet(shape, update_step=update_step, eta=eta, rho=rho) - self._models_c[i] = &model.c - self._models.append(model) - - property eta: - def __get__(self): - return self._models[0].eta - - def __set__(self, weight_t value): - for model in self._models: - model.eta = value - - def sparsify_embeddings(self, penalty): - p = 0.0 - for model in self._models: - p += model.sparsify_embeddings(penalty) - return p / len(self._models) - - cdef void set_scoresC(self, weight_t* scores, const void* _feats, - int nr_feat, int is_sparse) nogil: - nr_class = self.c.widths[self.c.nr_layer-1] - sub_scores = calloc(sizeof(weight_t), nr_class) - sub_feats = calloc(sizeof(FeatureC), nr_feat) - feats = _feats - for i in range(self._nr_model): - for j in range(nr_feat): - sub_feats[j] = feats[j] - sub_feats[j].value *= self._masks[i][j] - self.c = self._models_c[i][0] - self.c.weights = self._models_c[i].weights - self.c.gradient = self._models_c[i].gradient - ParserNeuralNet.set_scoresC(self, sub_scores, sub_feats, nr_feat, 1) - for j in range(nr_class): - scores[j] += sub_scores[j] - sub_scores[j] = 0.0 - for j in range(nr_class): - scores[j] /= self._nr_model - free(sub_feats) - free(sub_scores) - - def update(self, Example eg): - if eg.cost == 0: - return 0.0 - loss = 0.0 - full_feats = calloc(sizeof(FeatureC), eg.nr_feat) - memcpy(full_feats, eg.c.features, sizeof(FeatureC) * eg.nr_feat) - cdef ParserNeuralNet model - for i, model in enumerate(self._models): - for j in range(eg.nr_feat): - eg.c.features[j].value *= self._masks[i][j] - loss += model.update(eg) - memcpy(eg.c.features, full_feats, sizeof(FeatureC) * eg.nr_feat) - free(full_feats) - return loss - - def end_training(self): - for model in self._models: - model.end_training() - cdef class Parser: def __init__(self, StringStore strings, transition_system, model): @@ -320,16 +257,8 @@ cdef class Parser: moves = transition_system(strings, cfg.labels) if cfg.get('model') == 'neural': - shape = [cfg.vector_widths, cfg.slots, cfg.feat_set] - shape.extend(cfg.hidden_layers) - shape.append(moves.n_moves) - if cfg.get('ensemble_size') >= 2: - model = ParserNeuralNetEnsemble(shape, update_step=cfg.update_step, - eta=cfg.eta, rho=cfg.rho, - n=cfg.ensemble_size) - else: - model = ParserNeuralNet(shape, update_step=cfg.update_step, - eta=cfg.eta, rho=cfg.rho) + model = ParserNeuralNet(cfg.hidden_layers + [moves.n_moves], + update_step=cfg.update_step, eta=cfg.eta, rho=cfg.rho) else: model = ParserPerceptron(get_templates(cfg.feat_set))