"""Feed-forward neural network, using Thenao.""" import os import sys import time import numpy import theano import theano.tensor as T import plac from spacy.gold import read_json_file from spacy.gold import GoldParse from spacy.en.pos import POS_TEMPLATES, POS_TAGS, setup_model_dir def build_model(n_classes, n_vocab, n_hidden, n_word_embed, n_tag_embed): # allocate symbolic variables for the data words = T.vector('words') tags = T.vector('tags') word_e = _init_embedding(n_words, n_word_embed) tag_e = _init_embedding(n_tags, n_tag_embed) label_e = _init_embedding(n_labels, n_label_embed) maxent_W, maxent_b = _init_maxent_weights(n_hidden, n_classes) hidden_W, hidden_b = _init_hidden_weights(28*28, n_hidden, T.tanh) params = [hidden_W, hidden_b, maxent_W, maxent_b, word_e, tag_e, label_e] x = T.concatenate([ T.flatten(word_e[word_indices], outdim=1), T.flatten(tag_e[tag_indices], outdim=1)]) p_y_given_x = feed_layer( T.nnet.softmax, maxent_W, maxent_b, feed_layer( T.tanh, hidden_W, hidden_b, x))[0] guess = T.argmax(p_y_given_x) cost = ( -T.log(p_y_given_x[y]) + L1(L1_reg, maxent_W, hidden_W, word_e, tag_e) + L2(L2_reg, maxent_W, hidden_W, wod_e, tag_e) ) train_model = theano.function( inputs=[words, tags, y], outputs=guess, updates=[update(learning_rate, param, cost) for param in params] ) evaluate_model = theano.function( inputs=[x, y], outputs=T.neq(y, T.argmax(p_y_given_x[0])), ) return train_model, evaluate_model def _init_embedding(vocab_size, n_dim): embedding = 0.2 * numpy.random.uniform(-1.0, 1.0, (vocab_size+1, n_dim)) return theano.shared(embedding).astype(theano.config.floatX) def _init_maxent_weights(n_hidden, n_out): weights = numpy.zeros((n_hidden, 10), dtype=theano.config.floatX) bias = numpy.zeros((10,), dtype=theano.config.floatX) return ( theano.shared(name='W', borrow=True, value=weights), theano.shared(name='b', borrow=True, value=bias) ) def _init_hidden_weights(n_in, n_out, activation=T.tanh): rng = numpy.random.RandomState(1234) weights = numpy.asarray( rng.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) bias = numpy.zeros((n_out,), dtype=theano.config.floatX) return ( theano.shared(value=weights, name='W', borrow=True), theano.shared(value=bias, name='b', borrow=True) ) def feed_layer(activation, weights, bias, input): return activation(T.dot(input, weights) + bias) def L1(L1_reg, w1, w2): return L1_reg * (abs(w1).sum() + abs(w2).sum()) def L2(L2_reg, w1, w2): return L2_reg * ((w1 ** 2).sum() + (w2 ** 2).sum()) def update(eta, param, cost): return (param, param - (eta * T.grad(cost, param))) def main(train_loc, eval_loc, model_dir): learning_rate = 0.01 L1_reg = 0.00 L2_reg = 0.0001 print "... reading the data" gold_train = list(read_json_file(train_loc)) print '... building the model' pos_model_dir = path.join(model_dir, 'pos') if path.exists(pos_model_dir): shutil.rmtree(pos_model_dir) os.mkdir(pos_model_dir) setup_model_dir(sorted(POS_TAGS.keys()), POS_TAGS, POS_TEMPLATES, pos_model_dir) train_model, evaluate_model = build_model(n_hidden, len(POS_TAGS), learning_rate, L1_reg, L2_reg) print '... training' for epoch in range(1, n_epochs+1): for raw_text, sents in gold_tuples: for (ids, words, tags, ner, heads, deps), _ in sents: tokens = nlp.tokenizer.tokens_from_list(words) for t in tokens: guess = train_model([t.orth], [t.tag]) loss += guess != t.tag print loss # compute zero-one loss on validation set #error = numpy.mean([evaluate_model(x, y) for x, y in dev_examples]) #print('epoch %i, validation error %f %%' % (epoch, error * 100)) if __name__ == '__main__': plac.call(main)