spaCy/spacy/_nn.pyx

"""Feed-forward neural network, using Thenao."""

import os
import sys
import time

import numpy

import theano
import theano.tensor as T
import plac

from spacy.gold import read_json_file
from spacy.gold import GoldParse
from spacy.en.pos import POS_TEMPLATES, POS_TAGS, setup_model_dir


def build_model(n_classes, n_vocab, n_hidden, n_word_embed, n_tag_embed):
    # allocate symbolic variables for the data
    words = T.vector('words')
    tags = T.vector('tags') 
    
    word_e = _init_embedding(n_words, n_word_embed)
    tag_e = _init_embedding(n_tags, n_tag_embed)
    label_e = _init_embedding(n_labels, n_label_embed)
    maxent_W, maxent_b = _init_maxent_weights(n_hidden, n_classes)
    hidden_W, hidden_b = _init_hidden_weights(28*28, n_hidden, T.tanh) 
    params = [hidden_W, hidden_b, maxent_W, maxent_b, word_e, tag_e, label_e]

    x = T.concatenate([
          T.flatten(word_e[word_indices], outdim=1),
          T.flatten(tag_e[tag_indices], outdim=1)])

    p_y_given_x = feed_layer(
                    T.nnet.softmax,
                    maxent_W,
                    maxent_b,
                      feed_layer(
                        T.tanh,
                        hidden_W,
                        hidden_b,
                        x))[0]

    guess = T.argmax(p_y_given_x)

    cost = (
        -T.log(p_y_given_x[y])
        + L1(L1_reg, maxent_W, hidden_W, word_e, tag_e)
        + L2(L2_reg, maxent_W, hidden_W, wod_e, tag_e)
    )

    train_model = theano.function(
        inputs=[words, tags, y],
        outputs=guess,
        updates=[update(learning_rate, param, cost) for param in params]
    )

    evaluate_model = theano.function(
        inputs=[x, y],
        outputs=T.neq(y, T.argmax(p_y_given_x[0])),
    )
    return train_model, evaluate_model


def _init_embedding(vocab_size, n_dim):
    embedding = 0.2 * numpy.random.uniform(-1.0, 1.0, (vocab_size+1, n_dim))
    return theano.shared(embedding).astype(theano.config.floatX)


def _init_maxent_weights(n_hidden, n_out):
    weights = numpy.zeros((n_hidden, 10), dtype=theano.config.floatX)
    bias =  numpy.zeros((10,), dtype=theano.config.floatX)
    return (
        theano.shared(name='W', borrow=True, value=weights),
        theano.shared(name='b', borrow=True, value=bias)
    )


def _init_hidden_weights(n_in, n_out, activation=T.tanh):
    rng = numpy.random.RandomState(1234)
    weights = numpy.asarray(
        rng.uniform(
            low=-numpy.sqrt(6. / (n_in + n_out)),
            high=numpy.sqrt(6. / (n_in + n_out)),
            size=(n_in, n_out)
        ),
        dtype=theano.config.floatX
    )

    bias = numpy.zeros((n_out,), dtype=theano.config.floatX)
    return (
        theano.shared(value=weights, name='W', borrow=True),
        theano.shared(value=bias, name='b', borrow=True)
    )

           
def feed_layer(activation, weights, bias, input):
    return activation(T.dot(input, weights) + bias)


def L1(L1_reg, w1, w2):
    return L1_reg * (abs(w1).sum() + abs(w2).sum())


def L2(L2_reg, w1, w2):
    return L2_reg * ((w1 ** 2).sum() + (w2 ** 2).sum())


def update(eta, param, cost):
    return (param, param - (eta * T.grad(cost, param)))


def main(train_loc, eval_loc, model_dir):
    learning_rate = 0.01
    L1_reg = 0.00
    L2_reg = 0.0001

    print "... reading the data"
    gold_train = list(read_json_file(train_loc))
    print '... building the model'
    pos_model_dir = path.join(model_dir, 'pos')
    if path.exists(pos_model_dir):
        shutil.rmtree(pos_model_dir)
    os.mkdir(pos_model_dir)

    setup_model_dir(sorted(POS_TAGS.keys()), POS_TAGS, POS_TEMPLATES, pos_model_dir)

    train_model, evaluate_model = build_model(n_hidden, len(POS_TAGS), learning_rate,
                                              L1_reg, L2_reg)

    print '... training'
    for epoch in range(1, n_epochs+1):
        for raw_text, sents in gold_tuples:
            for (ids, words, tags, ner, heads, deps), _ in sents:
                tokens = nlp.tokenizer.tokens_from_list(words)
                for t in tokens:
                    guess = train_model([t.orth], [t.tag])
                    loss += guess != t.tag
        print loss
        # compute zero-one loss on validation set
        #error = numpy.mean([evaluate_model(x, y) for x, y in dev_examples])
        #print('epoch %i, validation error %f %%' % (epoch, error * 100))


if __name__ == '__main__':
    plac.call(main)
* Merge changes, and adjust Example to use memoryview 2015-06-28 12:36:11 +03:00			`"""Feed-forward neural network, using Thenao."""`

			`import os`
			`import sys`
			`import time`

			`import numpy`

			`import theano`
			`import theano.tensor as T`
			`import plac`

			`from spacy.gold import read_json_file`
			`from spacy.gold import GoldParse`
			`from spacy.en.pos import POS_TEMPLATES, POS_TAGS, setup_model_dir`


			`def build_model(n_classes, n_vocab, n_hidden, n_word_embed, n_tag_embed):`
			`# allocate symbolic variables for the data`
			`words = T.vector('words')`
			`tags = T.vector('tags')`

			`word_e = _init_embedding(n_words, n_word_embed)`
			`tag_e = _init_embedding(n_tags, n_tag_embed)`
			`label_e = _init_embedding(n_labels, n_label_embed)`
			`maxent_W, maxent_b = _init_maxent_weights(n_hidden, n_classes)`
			`hidden_W, hidden_b = _init_hidden_weights(28*28, n_hidden, T.tanh)`
			`params = [hidden_W, hidden_b, maxent_W, maxent_b, word_e, tag_e, label_e]`

			`x = T.concatenate([`
			`T.flatten(word_e[word_indices], outdim=1),`
			`T.flatten(tag_e[tag_indices], outdim=1)])`

			`p_y_given_x = feed_layer(`
			`T.nnet.softmax,`
			`maxent_W,`
			`maxent_b,`
			`feed_layer(`
			`T.tanh,`
			`hidden_W,`
			`hidden_b,`
			`x))[0]`

			`guess = T.argmax(p_y_given_x)`

			`cost = (`
			`-T.log(p_y_given_x[y])`
			`+ L1(L1_reg, maxent_W, hidden_W, word_e, tag_e)`
			`+ L2(L2_reg, maxent_W, hidden_W, wod_e, tag_e)`
			`)`

			`train_model = theano.function(`
			`inputs=[words, tags, y],`
			`outputs=guess,`
			`updates=[update(learning_rate, param, cost) for param in params]`
			`)`

			`evaluate_model = theano.function(`
			`inputs=[x, y],`
			`outputs=T.neq(y, T.argmax(p_y_given_x[0])),`
			`)`
			`return train_model, evaluate_model`


			`def _init_embedding(vocab_size, n_dim):`
			`embedding = 0.2 * numpy.random.uniform(-1.0, 1.0, (vocab_size+1, n_dim))`
			`return theano.shared(embedding).astype(theano.config.floatX)`


			`def _init_maxent_weights(n_hidden, n_out):`
			`weights = numpy.zeros((n_hidden, 10), dtype=theano.config.floatX)`
			`bias = numpy.zeros((10,), dtype=theano.config.floatX)`
			`return (`
			`theano.shared(name='W', borrow=True, value=weights),`
			`theano.shared(name='b', borrow=True, value=bias)`
			`)`


			`def _init_hidden_weights(n_in, n_out, activation=T.tanh):`
			`rng = numpy.random.RandomState(1234)`
			`weights = numpy.asarray(`
			`rng.uniform(`
			`low=-numpy.sqrt(6. / (n_in + n_out)),`
			`high=numpy.sqrt(6. / (n_in + n_out)),`
			`size=(n_in, n_out)`
			`),`
			`dtype=theano.config.floatX`
			`)`

			`bias = numpy.zeros((n_out,), dtype=theano.config.floatX)`
			`return (`
			`theano.shared(value=weights, name='W', borrow=True),`
			`theano.shared(value=bias, name='b', borrow=True)`
			`)`


			`def feed_layer(activation, weights, bias, input):`
			`return activation(T.dot(input, weights) + bias)`


			`def L1(L1_reg, w1, w2):`
			`return L1_reg * (abs(w1).sum() + abs(w2).sum())`


			`def L2(L2_reg, w1, w2):`
			`return L2_reg * ((w1 2).sum() + (w2 2).sum())`


			`def update(eta, param, cost):`
			`return (param, param - (eta * T.grad(cost, param)))`


			`def main(train_loc, eval_loc, model_dir):`
			`learning_rate = 0.01`
			`L1_reg = 0.00`
			`L2_reg = 0.0001`

			`print "... reading the data"`
			`gold_train = list(read_json_file(train_loc))`
			`print '... building the model'`
			`pos_model_dir = path.join(model_dir, 'pos')`
			`if path.exists(pos_model_dir):`
			`shutil.rmtree(pos_model_dir)`
			`os.mkdir(pos_model_dir)`

			`setup_model_dir(sorted(POS_TAGS.keys()), POS_TAGS, POS_TEMPLATES, pos_model_dir)`

			`train_model, evaluate_model = build_model(n_hidden, len(POS_TAGS), learning_rate,`
			`L1_reg, L2_reg)`

			`print '... training'`
			`for epoch in range(1, n_epochs+1):`
			`for raw_text, sents in gold_tuples:`
			`for (ids, words, tags, ner, heads, deps), _ in sents:`
			`tokens = nlp.tokenizer.tokens_from_list(words)`
			`for t in tokens:`
			`guess = train_model([t.orth], [t.tag])`
			`loss += guess != t.tag`
			`print loss`
			`# compute zero-one loss on validation set`
			`#error = numpy.mean([evaluate_model(x, y) for x, y in dev_examples])`
			`#print('epoch %i, validation error %f %%' % (epoch, error * 100))`


			`if __name__ == '__main__':`
			`plac.call(main)`