diff --git a/spacy/_ml.py b/spacy/_ml.py index d3bb903e7..f9894cd54 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -93,7 +93,10 @@ class PrecomputableMaxouts(Model): # W: (f, o, p, i) # b: (o, p) - Yfp = numpy.einsum('bi,fopi->fbop', X, self.W) + # Yfp = numpy.einsum('bi,fopi->fbop', X, self.W) + Yfp = self.ops.xp.tensordot(X, self.W, + axes=[[1], [3]]).transpose((1, 0, 2, 3)) + Yfp = self.ops.xp.ascontiguousarray(Yfp) Yfp += self.b Yf = self.ops.allocate((self.nF, X.shape[0], self.nO)) which = self.ops.allocate((self.nF, X.shape[0], self.nO), dtype='i') @@ -106,8 +109,11 @@ class PrecomputableMaxouts(Model): for i in range(self.nF): dYp += self.ops.backprop_maxout(dY, which[i], self.nP) - dXf = numpy.einsum('bop,fopi->bfi', dYp, self.W) - dW = numpy.einsum('bop,bfi->fopi', dYp, Xf) + #dXf = numpy.einsum('bop,fopi->bfi', dYp, self.W) + dXf = self.ops.xp.tensordot(dYp, self.W, axes=[[1,2], [1,2]]) + #dW = numpy.einsum('bfi,bop->fopi', Xf, dYp) + dW = self.ops.xp.tensordot(Xf, dYp, axes=[[0], [0]]) + dW = dW.transpose((0, 2, 3, 1)) db = dYp.sum(axis=0) self.d_W += dW diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 61c71c2bb..c357397f8 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -21,7 +21,7 @@ class TokenVectorEncoder(object): '''Assign position-sensitive vectors to tokens, using a CNN or RNN.''' def __init__(self, vocab, **cfg): self.vocab = vocab - self.model = build_tok2vec(vocab.lang, 64, **cfg) + self.model = build_tok2vec(vocab.lang, **cfg) self.tagger = chain( self.model, Softmax(self.vocab.morphology.n_tags)) diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index 4989a1fb3..76f16f881 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -89,7 +89,7 @@ def get_greedy_model_for_batch(tokvecs, TransitionSystem moves, upper_model, low for i, offset in enumerate(offsets): adjusted_ids[i] *= token_ids[i] >= 0 adjusted_ids[i] += offset - features = upper_model.ops.allocate((len(states), 64), dtype='f') + features = upper_model.ops.allocate((len(states), lower_model.nO), dtype='f') for i in range(len(states)): for j, tok_i in enumerate(adjusted_ids[i]): if tok_i >= 0: @@ -222,7 +222,7 @@ cdef class Parser: nr_context_tokens = StateClass.nr_context_tokens(nF, nB, nS, nL, nR) upper = chain(Maxout(width, width), Maxout(self.moves.n_moves, width)) - lower = PrecomputableMaxouts(width, nF=nr_context_tokens, nI=width) + lower = PrecomputableMaxouts(width, nF=nr_context_tokens, nI=width*2) return upper, lower def __call__(self, Doc tokens):