Add partial embedding updates to Parikh model, fix dropout, other corrections.

2025-11-04 01:48:04 +03:00 · 2016-11-18 06:32:12 -06:00 · 2016-11-18 06:32:12 -06:00 · ff5ab75f5e
commit ff5ab75f5e
parent 80f473dfb8
3 changed files with 65 additions and 30 deletions
--- a/examples/keras_parikh_entailment/main.py
+++ b/examples/keras_parikh_entailment/main.py
@ -93,7 +93,7 @@ def read_snli(path):
    nr_hidden=("Number of hidden units", "option", "H", int),
    dropout=("Dropout level", "option", "d", float),
    learn_rate=("Learning rate", "option", "e", float),
-    batch_size=("Batch size for neural network training", "option", "b", float),
+    batch_size=("Batch size for neural network training", "option", "b", int),
    nr_epoch=("Number of training epochs", "option", "i", int),
    tree_truncate=("Truncate sentences by tree distance", "flag", "T", bool),
    gru_encode=("Encode sentences with bidirectional GRU", "flag", "E", bool),
--- a/examples/keras_parikh_entailment/keras_decomposable_attention.py
+++ b/examples/keras_parikh_entailment/keras_decomposable_attention.py
@ -3,8 +3,10 @@
 import numpy

 from keras.layers import InputSpec, Layer, Input, Dense, merge
-from keras.layers import Activation, Dropout, Embedding, TimeDistributed
-from keras.layers import Bidirectional, GRU
+from keras.layers import Lambda, Activation, Dropout, Embedding, TimeDistributed
+from keras.layers import Bidirectional, GRU, LSTM
+from keras.layers.noise import GaussianNoise
+from keras.layers.advanced_activations import ELU
 import keras.backend as K
 from keras.models import Sequential, Model, model_from_json
 from keras.regularizers import l2
@ -20,13 +22,13 @@ def build_model(vectors, shape, settings):
    ids2 = Input(shape=(max_length,), dtype='int32', name='words2')

    # Construct operations, which we'll chain together.
-    embed = _StaticEmbedding(vectors, max_length, nr_hidden)
+    embed = _StaticEmbedding(vectors, max_length, nr_hidden, dropout=0.2, nr_tune=5000)
    if settings['gru_encode']:
-        encode = _BiRNNEncoding(max_length, nr_hidden)
-    attend = _Attention(max_length, nr_hidden)
+        encode = _BiRNNEncoding(max_length, nr_hidden, dropout=settings['dropout'])
+    attend = _Attention(max_length, nr_hidden, dropout=settings['dropout'])
    align = _SoftAlignment(max_length, nr_hidden)
-    compare = _Comparison(max_length, nr_hidden)
-    entail = _Entailment(nr_hidden, nr_class)
+    compare = _Comparison(max_length, nr_hidden, dropout=settings['dropout'])
+    entail = _Entailment(nr_hidden, nr_class, dropout=settings['dropout'])
    
    # Declare the model as a computational graph.
    sent1 = embed(ids1) # Shape: (i, n)
@ -59,15 +61,26 @@ def build_model(vectors, shape, settings):


 class _StaticEmbedding(object):
-    def __init__(self, vectors, max_length, nr_out):
+    def __init__(self, vectors, max_length, nr_out, nr_tune=1000, dropout=0.0):
+        self.nr_out = nr_out
+        self.max_length = max_length
        self.embed = Embedding(
                        vectors.shape[0],
                        vectors.shape[1],
                        input_length=max_length,
                        weights=[vectors],
                        name='embed',
-                        trainable=False,
-                        dropout=0.0)
+                        trainable=False)
+        self.tune = Embedding(
+                        nr_tune,
+                        nr_out,
+                        input_length=max_length,
+                        weights=None,
+                        name='tune',
+                        trainable=True,
+                        dropout=dropout)
+        self.mod_ids = Lambda(lambda sent: sent % (nr_tune-1)+1,
+                              output_shape=(self.max_length,))

        self.project = TimeDistributed(
                            Dense(
@ -77,23 +90,37 @@ class _StaticEmbedding(object):
                                name='project'))

    def __call__(self, sentence):
-        return self.project(self.embed(sentence))
+        def get_output_shape(shapes):
+            print(shapes)
+            return shapes[0]
+        mod_sent = self.mod_ids(sentence) 
+        tuning = self.tune(mod_sent)
+        #tuning = merge([tuning, mod_sent],
+        #    mode=lambda AB: AB[0] * (K.clip(K.cast(AB[1], 'float32'), 0, 1)),
+        #    output_shape=(self.max_length, self.nr_out))
+        pretrained = self.project(self.embed(sentence))
+        vectors = merge([pretrained, tuning], mode='sum')
+        return vectors


 class _BiRNNEncoding(object):
-    def __init__(self, max_length, nr_out):
+    def __init__(self, max_length, nr_out, dropout=0.0):
        self.model = Sequential()
-        self.model.add(Bidirectional(GRU(int(nr_out/2), return_sequences=True),
+        self.model.add(Bidirectional(LSTM(nr_out, return_sequences=True,
+                                         dropout_W=dropout, dropout_U=dropout),
                                         input_shape=(max_length, nr_out)))
+        self.model.add(TimeDistributed(Dense(nr_out, activation='relu', init='he_normal')))
+        self.model.add(TimeDistributed(Dropout(0.2)))

    def __call__(self, sentence):
        return self.model(sentence)


 class _Attention(object):
-    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'):
+    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
+        self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
        self.model.add(
            Dense(nr_hidden, name='attend1',
                init='he_normal', W_regularizer=l2(L2),
@ -134,18 +161,17 @@ class _SoftAlignment(object):
 

 class _Comparison(object):
-    def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2):
+    def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
        self.words = words
        self.model = Sequential()
+        self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
        self.model.add(Dense(nr_hidden, name='compare1',
-            init='he_normal', W_regularizer=l2(L2),
-            input_shape=(nr_hidden*2,)))
+            init='he_normal', W_regularizer=l2(L2)))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='compare2',
                        W_regularizer=l2(L2), init='he_normal'))
        self.model.add(Activation('relu'))
-        self.model.add(Dropout(dropout))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent, align, **kwargs):
@ -156,13 +182,16 @@ class _Comparison(object):
 

 class _Entailment(object):
-    def __init__(self, nr_hidden, nr_out, dropout=0.2, L2=1e-4):
+    def __init__(self, nr_hidden, nr_out, dropout=0.0, L2=0.0):
        self.model = Sequential()
+        self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
        self.model.add(Dense(nr_hidden, name='entail1',
-            init='he_normal', W_regularizer=l2(L2),
-            input_shape=(nr_hidden*2,)))
+            init='he_normal', W_regularizer=l2(L2)))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
+        self.model.add(Dense(nr_hidden, name='entail2',
+            init='he_normal', W_regularizer=l2(L2)))
+        self.model.add(Activation('relu'))
        self.model.add(Dense(nr_out, name='entail_out', activation='softmax',
                        W_regularizer=l2(L2), init='zero'))

--- a/examples/keras_parikh_entailment/spacy_hook.py
+++ b/examples/keras_parikh_entailment/spacy_hook.py
@ -1,5 +1,6 @@
 from keras.models import model_from_json
 import numpy
+import numpy.random


 class KerasSimilarityShim(object):
@ -31,16 +32,16 @@ class KerasSimilarityShim(object):
        return scores[0]


-def get_embeddings(vocab):
-    max_rank = max(lex.rank+1 for lex in vocab if lex.has_vector)
-    vectors = numpy.ndarray((max_rank+1, vocab.vectors_length), dtype='float32')
+def get_embeddings(vocab, nr_unk=100):
+    nr_vector = max(lex.rank for lex in vocab) + 1
+    vectors = numpy.zeros((nr_vector+nr_unk+2, vocab.vectors_length), dtype='float32')
    for lex in vocab:
        if lex.has_vector:
-            vectors[lex.rank + 1] = lex.vector
+            vectors[lex.rank+1] = lex.vector / lex.vector_norm
    return vectors


-def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100):
+def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100, nr_unk=100):
    Xs = numpy.zeros((len(docs), max_length), dtype='int32')
    for i, doc in enumerate(docs):
        if tree_truncate:
@ -50,17 +51,22 @@ def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100):
        words = []
        while len(words) <= max_length and queue:
            word = queue.pop(0)
-            if rnn_encode or (word.has_vector and not word.is_punct and not word.is_space):
+            if rnn_encode or (not word.is_punct and not word.is_space):
                words.append(word)
            if tree_truncate:
                queue.extend(list(word.lefts))
                queue.extend(list(word.rights))
        words.sort()
        for j, token in enumerate(words):
+            if token.has_vector:
                Xs[i, j] = token.rank+1
+            else:
+                Xs[i, j] = (token.shape % (nr_unk-1))+2
            j += 1
            if j >= max_length:
                break
+        else:
+            Xs[i, len(words)] = 1
    return Xs