spaCy/spacy/_ml.py

from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
from thinc.neural import Model, Maxout, Softmax, Affine
from thinc.neural._classes.hash_embed import HashEmbed

from thinc.neural._classes.convolution import ExtractWindow
from thinc.neural._classes.static_vectors import StaticVectors
from thinc.neural._classes.batchnorm import BatchNorm
from thinc.neural._classes.resnet import Residual

from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP


def get_col(idx):
    def forward(X, drop=0.):
        assert len(X.shape) <= 3
        output = Model.ops.xp.ascontiguousarray(X[:, idx])
        def backward(y, sgd=None):
            dX = Model.ops.allocate(X.shape)
            dX[:, idx] += y
            return dX
        return output, backward
    return layerize(forward)


def build_model(state2vec, width, depth, nr_class):
    with Model.define_operators({'>>': chain, '**': clone}):
        model = (
            state2vec
            >> Maxout(width, 1344)
            >> Maxout(width, width)
            >> Affine(nr_class, width)
        )
    return model


def build_debug_model(state2vec, width, depth, nr_class):
    with Model.define_operators({'>>': chain, '**': clone}):
        model = (
            state2vec
            >> Maxout(nr_class)
        )
    return model


def build_debug_state2vec(width, nr_vector=1000, nF=1, nB=0, nS=1, nL=2, nR=2):
    ops = Model.ops
    def forward(tokens_attrs_vectors, drop=0.):
        tokens, attr_vals, tokvecs = tokens_attrs_vectors

        orig_tokvecs_shape = tokvecs.shape
        tokvecs = tokvecs.reshape((tokvecs.shape[0], tokvecs.shape[1] *
                                   tokvecs.shape[2]))

        vector = tokvecs

        def backward(d_vector, sgd=None):
            d_tokvecs = vector.reshape(orig_tokvecs_shape)
            return (tokens, d_tokvecs)
        return vector, backward
    model = layerize(forward)
    return model


def build_state2vec(nr_context_tokens, width, nr_vector=1000):
    ops = Model.ops
    with Model.define_operators({'|': concatenate, '+': add, '>>': chain}):
        hiddens = [get_col(i) >> Maxout(width) for i in range(nr_context_tokens)]
        model = get_token_vectors >> add(*hiddens)
    return model


def print_shape(prefix):
    def forward(X, drop=0.):
        return X, lambda dX, **kwargs: dX
    return layerize(forward)


@layerize
def get_token_vectors(tokens_attrs_vectors, drop=0.):
    ops = Model.ops
    tokens, attrs, vectors = tokens_attrs_vectors
    def backward(d_output, sgd=None):
        return (tokens, d_output)
    return vectors, backward


def build_parser_state2vec(width, nr_vector=1000, nF=1, nB=0, nS=1, nL=2, nR=2):
    embed_tags = _reshape(chain(get_col(0), HashEmbed(16, nr_vector)))
    embed_deps = _reshape(chain(get_col(1), HashEmbed(16, nr_vector)))
    ops = embed_tags.ops
    def forward(tokens_attrs_vectors, drop=0.):
        tokens, attr_vals, tokvecs = tokens_attrs_vectors
        tagvecs, bp_tagvecs = embed_deps.begin_update(attr_vals, drop=drop)
        depvecs, bp_depvecs = embed_tags.begin_update(attr_vals, drop=drop)
        orig_tokvecs_shape = tokvecs.shape
        tokvecs = tokvecs.reshape((tokvecs.shape[0], tokvecs.shape[1] *
                                   tokvecs.shape[2]))

        shapes = (tagvecs.shape, depvecs.shape, tokvecs.shape)
        assert tagvecs.shape[0] == depvecs.shape[0] == tokvecs.shape[0], shapes
        vector = ops.xp.hstack((tagvecs, depvecs, tokvecs))

        def backward(d_vector, sgd=None):
            d_tagvecs, d_depvecs, d_tokvecs = backprop_concatenate(d_vector, shapes)
            assert d_tagvecs.shape == shapes[0], (d_tagvecs.shape, shapes)
            assert d_depvecs.shape == shapes[1], (d_depvecs.shape, shapes)
            assert d_tokvecs.shape == shapes[2], (d_tokvecs.shape, shapes)
            bp_tagvecs(d_tagvecs)
            bp_depvecs(d_depvecs)
            d_tokvecs = d_tokvecs.reshape(orig_tokvecs_shape)

            return (tokens, d_tokvecs)
        return vector, backward
    model = layerize(forward)
    model._layers = [embed_tags, embed_deps]
    return model


def backprop_concatenate(gradient, shapes):
    grads = []
    start = 0
    for shape in shapes:
        end = start + shape[1]
        grads.append(gradient[:, start : end])
        start = end
    return grads


def _reshape(layer):
    '''Transforms input with shape
      (states, tokens, features)
    into input with shape:
      (states * tokens, features)
    So that it can be used with a token-wise feature extraction layer, e.g.
    an embedding layer. The embedding layer outputs:
      (states * tokens, ndim)
    But we want to concatenate the vectors for the tokens, so we produce:
      (states, tokens * ndim)
    We then need to reverse the transforms to do the backward pass. Recall
    the simple rule here: each layer is a map:
        inputs -> (outputs, (d_outputs->d_inputs))
    So the shapes must match like this:
        shape of forward input == shape of backward output
        shape of backward input == shape of forward output
    '''
    def forward(X__bfm, drop=0.):
        b, f, m = X__bfm.shape
        B = b*f
        M = f*m
        X__Bm = X__bfm.reshape((B, m))
        y__Bn, bp_yBn = layer.begin_update(X__Bm, drop=drop)
        n = y__Bn.shape[1]
        N = f * n
        y__bN = y__Bn.reshape((b, N))
        def backward(dy__bN, sgd=None):
            dy__Bn = dy__bN.reshape((B, n))
            dX__Bm = bp_yBn(dy__Bn, sgd)
            if dX__Bm is None:
                return None
            else:
                return dX__Bm.reshape((b, f, m))
        return y__bN, backward
    model = layerize(forward)
    model._layers.append(layer)
    return model


@layerize
def flatten(seqs, drop=0.):
    ops = Model.ops
    lengths = [len(seq) for seq in seqs]
    def finish_update(d_X, sgd=None):
        return ops.unflatten(d_X, lengths)
    X = ops.xp.vstack(seqs)
    return X, finish_update


def build_tok2vec(lang, width, depth=2, embed_size=1000):
    cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG]
    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone}):
        #static = get_col(cols.index(ID))     >> StaticVectors(lang, width)
        lower = get_col(cols.index(LOWER))     >> HashEmbed(width, embed_size)
        prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size)
        suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size)
        shape = get_col(cols.index(SHAPE))   >> HashEmbed(width, embed_size)
        tag = get_col(cols.index(TAG))   >> HashEmbed(width, embed_size)
        tok2vec = (
            doc2feats(cols)
            >> with_flatten(
                #(static | prefix | suffix | shape)
                (lower | prefix | suffix | shape | tag)
                >> Maxout(width, width*5)
                >> Residual((ExtractWindow(nW=1) >> Maxout(width, width*3)))
                >> Residual((ExtractWindow(nW=1) >> Maxout(width, width*3)))
                >> Residual((ExtractWindow(nW=1) >> Maxout(width, width*3)))
            )
        )
    return tok2vec


def doc2feats(cols):
    def forward(docs, drop=0.):
        feats = [doc.to_array(cols) for doc in docs]
        feats = [model.ops.asarray(f, dtype='uint64') for f in feats]
        return feats, None
    model = layerize(forward)
    return model
Learning smoothly 2017-05-06 21:38:12 +03:00			`from thinc.api import add, layerize, chain, clone, concatenate, with_flatten`
Gradients look correct 2017-05-06 17:47:15 +03:00			`from thinc.neural import Model, Maxout, Softmax, Affine`
Draft up Parser model 2017-05-04 14:31:40 +03:00			`from thinc.neural._classes.hash_embed import HashEmbed`
Restore tok2vec function 2017-05-05 21:12:03 +03:00
			`from thinc.neural._classes.convolution import ExtractWindow`
			`from thinc.neural._classes.static_vectors import StaticVectors`
Learns things 2017-05-06 19:24:38 +03:00			`from thinc.neural._classes.batchnorm import BatchNorm`
working residual net 2017-05-07 04:57:26 +03:00			`from thinc.neural._classes.resnet import Residual`
Restore tok2vec function 2017-05-05 21:12:03 +03:00
Learns things 2017-05-06 18:37:36 +03:00			`from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP`
Draft up Parser model 2017-05-04 14:31:40 +03:00

			`def get_col(idx):`
			`def forward(X, drop=0.):`
Learning smoothly 2017-05-06 21:38:12 +03:00			`assert len(X.shape) <= 3`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`output = Model.ops.xp.ascontiguousarray(X[:, idx])`
Learning smoothly 2017-05-06 21:38:12 +03:00			`def backward(y, sgd=None):`
			`dX = Model.ops.allocate(X.shape)`
			`dX[:, idx] += y`
			`return dX`
			`return output, backward`
Draft up Parser model 2017-05-04 14:31:40 +03:00			`return layerize(forward)`


Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`def build_model(state2vec, width, depth, nr_class):`
			`with Model.define_operators({'>>': chain, '**': clone}):`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`model = (`
			`state2vec`
			`>> Maxout(width, 1344)`
			`>> Maxout(width, width)`
Gradients look correct 2017-05-06 17:47:15 +03:00			`>> Affine(nr_class, width)`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`)`
Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`return model`
Draft up Parser model 2017-05-04 14:31:40 +03:00

Gradients look correct 2017-05-06 17:47:15 +03:00			`def build_debug_model(state2vec, width, depth, nr_class):`
			`with Model.define_operators({'>>': chain, '**': clone}):`
			`model = (`
			`state2vec`
working residual net 2017-05-07 04:57:26 +03:00			`>> Maxout(nr_class)`
Gradients look correct 2017-05-06 17:47:15 +03:00			`)`
			`return model`


			`def build_debug_state2vec(width, nr_vector=1000, nF=1, nB=0, nS=1, nL=2, nR=2):`
			`ops = Model.ops`
			`def forward(tokens_attrs_vectors, drop=0.):`
			`tokens, attr_vals, tokvecs = tokens_attrs_vectors`
Learning smoothly 2017-05-06 21:38:12 +03:00
Gradients look correct 2017-05-06 17:47:15 +03:00			`orig_tokvecs_shape = tokvecs.shape`
			`tokvecs = tokvecs.reshape((tokvecs.shape[0], tokvecs.shape[1] *`
			`tokvecs.shape[2]))`

			`vector = tokvecs`

			`def backward(d_vector, sgd=None):`
			`d_tokvecs = vector.reshape(orig_tokvecs_shape)`
			`return (tokens, d_tokvecs)`
			`return vector, backward`
			`model = layerize(forward)`
			`return model`


Learning smoothly 2017-05-06 21:38:12 +03:00			`def build_state2vec(nr_context_tokens, width, nr_vector=1000):`
			`ops = Model.ops`
			`with Model.define_operators({'\|': concatenate, '+': add, '>>': chain}):`
working residual net 2017-05-07 04:57:26 +03:00			`hiddens = [get_col(i) >> Maxout(width) for i in range(nr_context_tokens)]`
			`model = get_token_vectors >> add(*hiddens)`
Learning smoothly 2017-05-06 21:38:12 +03:00			`return model`


			`def print_shape(prefix):`
			`def forward(X, drop=0.):`
			`return X, lambda dX, **kwargs: dX`
			`return layerize(forward)`
working residual net 2017-05-07 04:57:26 +03:00
Learning smoothly 2017-05-06 21:38:12 +03:00
			`@layerize`
			`def get_token_vectors(tokens_attrs_vectors, drop=0.):`
			`ops = Model.ops`
			`tokens, attrs, vectors = tokens_attrs_vectors`
			`def backward(d_output, sgd=None):`
			`return (tokens, d_output)`
			`return vectors, backward`


Bug fixes 2017-05-05 21:09:50 +03:00			`def build_parser_state2vec(width, nr_vector=1000, nF=1, nB=0, nS=1, nL=2, nR=2):`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`embed_tags = _reshape(chain(get_col(0), HashEmbed(16, nr_vector)))`
			`embed_deps = _reshape(chain(get_col(1), HashEmbed(16, nr_vector)))`
Bug fixes 2017-05-05 21:09:50 +03:00			`ops = embed_tags.ops`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`def forward(tokens_attrs_vectors, drop=0.):`
			`tokens, attr_vals, tokvecs = tokens_attrs_vectors`
Bug fixes 2017-05-05 21:09:50 +03:00			`tagvecs, bp_tagvecs = embed_deps.begin_update(attr_vals, drop=drop)`
			`depvecs, bp_depvecs = embed_tags.begin_update(attr_vals, drop=drop)`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`orig_tokvecs_shape = tokvecs.shape`
Bug fixes 2017-05-05 21:09:50 +03:00			`tokvecs = tokvecs.reshape((tokvecs.shape[0], tokvecs.shape[1] *`
			`tokvecs.shape[2]))`
Draft up Parser model 2017-05-04 14:31:40 +03:00
Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`shapes = (tagvecs.shape, depvecs.shape, tokvecs.shape)`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`assert tagvecs.shape[0] == depvecs.shape[0] == tokvecs.shape[0], shapes`
			`vector = ops.xp.hstack((tagvecs, depvecs, tokvecs))`

Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`def backward(d_vector, sgd=None):`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`d_tagvecs, d_depvecs, d_tokvecs = backprop_concatenate(d_vector, shapes)`
			`assert d_tagvecs.shape == shapes[0], (d_tagvecs.shape, shapes)`
			`assert d_depvecs.shape == shapes[1], (d_depvecs.shape, shapes)`
			`assert d_tokvecs.shape == shapes[2], (d_tokvecs.shape, shapes)`
Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`bp_tagvecs(d_tagvecs)`
			`bp_depvecs(d_depvecs)`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`d_tokvecs = d_tokvecs.reshape(orig_tokvecs_shape)`

			`return (tokens, d_tokvecs)`
Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`return vector, backward`
			`model = layerize(forward)`
			`model._layers = [embed_tags, embed_deps]`
Draft up Parser model 2017-05-04 14:31:40 +03:00			`return model`

Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`def backprop_concatenate(gradient, shapes):`
			`grads = []`
			`start = 0`
			`for shape in shapes:`
			`end = start + shape[1]`
			`grads.append(gradient[:, start : end])`
			`start = end`
			`return grads`
Bug fixes 2017-05-05 21:09:50 +03:00

Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`def _reshape(layer):`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`'''Transforms input with shape`
			`(states, tokens, features)`
			`into input with shape:`
			`(states * tokens, features)`
			`So that it can be used with a token-wise feature extraction layer, e.g.`
			`an embedding layer. The embedding layer outputs:`
			`(states * tokens, ndim)`
			`But we want to concatenate the vectors for the tokens, so we produce:`
			`(states, tokens * ndim)`
			`We then need to reverse the transforms to do the backward pass. Recall`
			`the simple rule here: each layer is a map:`
			`inputs -> (outputs, (d_outputs->d_inputs))`
			`So the shapes must match like this:`
			`shape of forward input == shape of backward output`
			`shape of backward input == shape of forward output`
			`'''`
			`def forward(X__bfm, drop=0.):`
			`b, f, m = X__bfm.shape`
			`B = b*f`
			`M = f*m`
			`X__Bm = X__bfm.reshape((B, m))`
			`y__Bn, bp_yBn = layer.begin_update(X__Bm, drop=drop)`
			`n = y__Bn.shape[1]`
			`N = f * n`
			`y__bN = y__Bn.reshape((b, N))`
			`def backward(dy__bN, sgd=None):`
			`dy__Bn = dy__bN.reshape((B, n))`
			`dX__Bm = bp_yBn(dy__Bn, sgd)`
			`if dX__Bm is None:`
			`return None`
			`else:`
			`return dX__Bm.reshape((b, f, m))`
			`return y__bN, backward`
Draft of NN parser, to be tested 2017-05-05 20:20:39 +03:00			`model = layerize(forward)`
			`model._layers.append(layer)`
Draft up Parser model 2017-05-04 14:31:40 +03:00			`return model`

Data running through, likely errors in model 2017-05-06 15:22:20 +03:00
			`@layerize`
			`def flatten(seqs, drop=0.):`
			`ops = Model.ops`
working residual net 2017-05-07 04:57:26 +03:00			`lengths = [len(seq) for seq in seqs]`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`def finish_update(d_X, sgd=None):`
working residual net 2017-05-07 04:57:26 +03:00			`return ops.unflatten(d_X, lengths)`
			`X = ops.xp.vstack(seqs)`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`return X, finish_update`


			`def build_tok2vec(lang, width, depth=2, embed_size=1000):`
Learns things 2017-05-06 18:37:36 +03:00			`cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG]`
Restore tok2vec function 2017-05-05 21:12:03 +03:00			`with Model.define_operators({'>>': chain, '\|': concatenate, '**': clone}):`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`#static = get_col(cols.index(ID)) >> StaticVectors(lang, width)`
Learns things 2017-05-06 18:37:36 +03:00			`lower = get_col(cols.index(LOWER)) >> HashEmbed(width, embed_size)`
Restore tok2vec function 2017-05-05 21:12:03 +03:00			`prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size)`
			`suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size)`
			`shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size)`
Learns things 2017-05-06 18:37:36 +03:00			`tag = get_col(cols.index(TAG)) >> HashEmbed(width, embed_size)`
Restore tok2vec function 2017-05-05 21:12:03 +03:00			`tok2vec = (`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`doc2feats(cols)`
			`>> with_flatten(`
			`#(static \| prefix \| suffix \| shape)`
Learns things 2017-05-06 18:37:36 +03:00			`(lower \| prefix \| suffix \| shape \| tag)`
Learning smoothly 2017-05-06 21:38:12 +03:00			`>> Maxout(width, width*5)`
working residual net 2017-05-07 04:57:26 +03:00			`>> Residual((ExtractWindow(nW=1) >> Maxout(width, width*3)))`
			`>> Residual((ExtractWindow(nW=1) >> Maxout(width, width*3)))`
			`>> Residual((ExtractWindow(nW=1) >> Maxout(width, width*3)))`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00			`)`
Restore tok2vec function 2017-05-05 21:12:03 +03:00			`)`
			`return tok2vec`
Data running through, likely errors in model 2017-05-06 15:22:20 +03:00

			`def doc2feats(cols):`
			`def forward(docs, drop=0.):`
			`feats = [doc.to_array(cols) for doc in docs]`
			`feats = [model.ops.asarray(f, dtype='uint64') for f in feats]`
			`return feats, None`
			`model = layerize(forward)`
			`return model`