mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Add partial embedding updates to Parikh model, fix dropout, other corrections.
This commit is contained in:
		
							parent
							
								
									80f473dfb8
								
							
						
					
					
						commit
						ff5ab75f5e
					
				| 
						 | 
				
			
			@ -93,7 +93,7 @@ def read_snli(path):
 | 
			
		|||
    nr_hidden=("Number of hidden units", "option", "H", int),
 | 
			
		||||
    dropout=("Dropout level", "option", "d", float),
 | 
			
		||||
    learn_rate=("Learning rate", "option", "e", float),
 | 
			
		||||
    batch_size=("Batch size for neural network training", "option", "b", float),
 | 
			
		||||
    batch_size=("Batch size for neural network training", "option", "b", int),
 | 
			
		||||
    nr_epoch=("Number of training epochs", "option", "i", int),
 | 
			
		||||
    tree_truncate=("Truncate sentences by tree distance", "flag", "T", bool),
 | 
			
		||||
    gru_encode=("Encode sentences with bidirectional GRU", "flag", "E", bool),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,8 +3,10 @@
 | 
			
		|||
import numpy
 | 
			
		||||
 | 
			
		||||
from keras.layers import InputSpec, Layer, Input, Dense, merge
 | 
			
		||||
from keras.layers import Activation, Dropout, Embedding, TimeDistributed
 | 
			
		||||
from keras.layers import Bidirectional, GRU
 | 
			
		||||
from keras.layers import Lambda, Activation, Dropout, Embedding, TimeDistributed
 | 
			
		||||
from keras.layers import Bidirectional, GRU, LSTM
 | 
			
		||||
from keras.layers.noise import GaussianNoise
 | 
			
		||||
from keras.layers.advanced_activations import ELU
 | 
			
		||||
import keras.backend as K
 | 
			
		||||
from keras.models import Sequential, Model, model_from_json
 | 
			
		||||
from keras.regularizers import l2
 | 
			
		||||
| 
						 | 
				
			
			@ -20,13 +22,13 @@ def build_model(vectors, shape, settings):
 | 
			
		|||
    ids2 = Input(shape=(max_length,), dtype='int32', name='words2')
 | 
			
		||||
 | 
			
		||||
    # Construct operations, which we'll chain together.
 | 
			
		||||
    embed = _StaticEmbedding(vectors, max_length, nr_hidden)
 | 
			
		||||
    embed = _StaticEmbedding(vectors, max_length, nr_hidden, dropout=0.2, nr_tune=5000)
 | 
			
		||||
    if settings['gru_encode']:
 | 
			
		||||
        encode = _BiRNNEncoding(max_length, nr_hidden)
 | 
			
		||||
    attend = _Attention(max_length, nr_hidden)
 | 
			
		||||
        encode = _BiRNNEncoding(max_length, nr_hidden, dropout=settings['dropout'])
 | 
			
		||||
    attend = _Attention(max_length, nr_hidden, dropout=settings['dropout'])
 | 
			
		||||
    align = _SoftAlignment(max_length, nr_hidden)
 | 
			
		||||
    compare = _Comparison(max_length, nr_hidden)
 | 
			
		||||
    entail = _Entailment(nr_hidden, nr_class)
 | 
			
		||||
    compare = _Comparison(max_length, nr_hidden, dropout=settings['dropout'])
 | 
			
		||||
    entail = _Entailment(nr_hidden, nr_class, dropout=settings['dropout'])
 | 
			
		||||
    
 | 
			
		||||
    # Declare the model as a computational graph.
 | 
			
		||||
    sent1 = embed(ids1) # Shape: (i, n)
 | 
			
		||||
| 
						 | 
				
			
			@ -59,15 +61,26 @@ def build_model(vectors, shape, settings):
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
class _StaticEmbedding(object):
 | 
			
		||||
    def __init__(self, vectors, max_length, nr_out):
 | 
			
		||||
    def __init__(self, vectors, max_length, nr_out, nr_tune=1000, dropout=0.0):
 | 
			
		||||
        self.nr_out = nr_out
 | 
			
		||||
        self.max_length = max_length
 | 
			
		||||
        self.embed = Embedding(
 | 
			
		||||
                        vectors.shape[0],
 | 
			
		||||
                        vectors.shape[1],
 | 
			
		||||
                        input_length=max_length,
 | 
			
		||||
                        weights=[vectors],
 | 
			
		||||
                        name='embed',
 | 
			
		||||
                        trainable=False,
 | 
			
		||||
                        dropout=0.0)
 | 
			
		||||
                        trainable=False)
 | 
			
		||||
        self.tune = Embedding(
 | 
			
		||||
                        nr_tune,
 | 
			
		||||
                        nr_out,
 | 
			
		||||
                        input_length=max_length,
 | 
			
		||||
                        weights=None,
 | 
			
		||||
                        name='tune',
 | 
			
		||||
                        trainable=True,
 | 
			
		||||
                        dropout=dropout)
 | 
			
		||||
        self.mod_ids = Lambda(lambda sent: sent % (nr_tune-1)+1,
 | 
			
		||||
                              output_shape=(self.max_length,))
 | 
			
		||||
 | 
			
		||||
        self.project = TimeDistributed(
 | 
			
		||||
                            Dense(
 | 
			
		||||
| 
						 | 
				
			
			@ -77,23 +90,37 @@ class _StaticEmbedding(object):
 | 
			
		|||
                                name='project'))
 | 
			
		||||
 | 
			
		||||
    def __call__(self, sentence):
 | 
			
		||||
        return self.project(self.embed(sentence))
 | 
			
		||||
        def get_output_shape(shapes):
 | 
			
		||||
            print(shapes)
 | 
			
		||||
            return shapes[0]
 | 
			
		||||
        mod_sent = self.mod_ids(sentence) 
 | 
			
		||||
        tuning = self.tune(mod_sent)
 | 
			
		||||
        #tuning = merge([tuning, mod_sent],
 | 
			
		||||
        #    mode=lambda AB: AB[0] * (K.clip(K.cast(AB[1], 'float32'), 0, 1)),
 | 
			
		||||
        #    output_shape=(self.max_length, self.nr_out))
 | 
			
		||||
        pretrained = self.project(self.embed(sentence))
 | 
			
		||||
        vectors = merge([pretrained, tuning], mode='sum')
 | 
			
		||||
        return vectors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _BiRNNEncoding(object):
 | 
			
		||||
    def __init__(self, max_length, nr_out):
 | 
			
		||||
    def __init__(self, max_length, nr_out, dropout=0.0):
 | 
			
		||||
        self.model = Sequential()
 | 
			
		||||
        self.model.add(Bidirectional(GRU(int(nr_out/2), return_sequences=True),
 | 
			
		||||
        self.model.add(Bidirectional(LSTM(nr_out, return_sequences=True,
 | 
			
		||||
                                         dropout_W=dropout, dropout_U=dropout),
 | 
			
		||||
                                         input_shape=(max_length, nr_out)))
 | 
			
		||||
        self.model.add(TimeDistributed(Dense(nr_out, activation='relu', init='he_normal')))
 | 
			
		||||
        self.model.add(TimeDistributed(Dropout(0.2)))
 | 
			
		||||
 | 
			
		||||
    def __call__(self, sentence):
 | 
			
		||||
        return self.model(sentence)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _Attention(object):
 | 
			
		||||
    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'):
 | 
			
		||||
    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
 | 
			
		||||
        self.max_length = max_length
 | 
			
		||||
        self.model = Sequential()
 | 
			
		||||
        self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
 | 
			
		||||
        self.model.add(
 | 
			
		||||
            Dense(nr_hidden, name='attend1',
 | 
			
		||||
                init='he_normal', W_regularizer=l2(L2),
 | 
			
		||||
| 
						 | 
				
			
			@ -134,18 +161,17 @@ class _SoftAlignment(object):
 | 
			
		|||
 
 | 
			
		||||
 | 
			
		||||
class _Comparison(object):
 | 
			
		||||
    def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2):
 | 
			
		||||
    def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
 | 
			
		||||
        self.words = words
 | 
			
		||||
        self.model = Sequential()
 | 
			
		||||
        self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
 | 
			
		||||
        self.model.add(Dense(nr_hidden, name='compare1',
 | 
			
		||||
            init='he_normal', W_regularizer=l2(L2),
 | 
			
		||||
            input_shape=(nr_hidden*2,)))
 | 
			
		||||
            init='he_normal', W_regularizer=l2(L2)))
 | 
			
		||||
        self.model.add(Activation('relu'))
 | 
			
		||||
        self.model.add(Dropout(dropout))
 | 
			
		||||
        self.model.add(Dense(nr_hidden, name='compare2',
 | 
			
		||||
                        W_regularizer=l2(L2), init='he_normal'))
 | 
			
		||||
        self.model.add(Activation('relu'))
 | 
			
		||||
        self.model.add(Dropout(dropout))
 | 
			
		||||
        self.model = TimeDistributed(self.model)
 | 
			
		||||
 | 
			
		||||
    def __call__(self, sent, align, **kwargs):
 | 
			
		||||
| 
						 | 
				
			
			@ -156,13 +182,16 @@ class _Comparison(object):
 | 
			
		|||
 
 | 
			
		||||
 | 
			
		||||
class _Entailment(object):
 | 
			
		||||
    def __init__(self, nr_hidden, nr_out, dropout=0.2, L2=1e-4):
 | 
			
		||||
    def __init__(self, nr_hidden, nr_out, dropout=0.0, L2=0.0):
 | 
			
		||||
        self.model = Sequential()
 | 
			
		||||
        self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
 | 
			
		||||
        self.model.add(Dense(nr_hidden, name='entail1',
 | 
			
		||||
            init='he_normal', W_regularizer=l2(L2),
 | 
			
		||||
            input_shape=(nr_hidden*2,)))
 | 
			
		||||
            init='he_normal', W_regularizer=l2(L2)))
 | 
			
		||||
        self.model.add(Activation('relu'))
 | 
			
		||||
        self.model.add(Dropout(dropout))
 | 
			
		||||
        self.model.add(Dense(nr_hidden, name='entail2',
 | 
			
		||||
            init='he_normal', W_regularizer=l2(L2)))
 | 
			
		||||
        self.model.add(Activation('relu'))
 | 
			
		||||
        self.model.add(Dense(nr_out, name='entail_out', activation='softmax',
 | 
			
		||||
                        W_regularizer=l2(L2), init='zero'))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
from keras.models import model_from_json
 | 
			
		||||
import numpy
 | 
			
		||||
import numpy.random
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class KerasSimilarityShim(object):
 | 
			
		||||
| 
						 | 
				
			
			@ -31,16 +32,16 @@ class KerasSimilarityShim(object):
 | 
			
		|||
        return scores[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_embeddings(vocab):
 | 
			
		||||
    max_rank = max(lex.rank+1 for lex in vocab if lex.has_vector)
 | 
			
		||||
    vectors = numpy.ndarray((max_rank+1, vocab.vectors_length), dtype='float32')
 | 
			
		||||
def get_embeddings(vocab, nr_unk=100):
 | 
			
		||||
    nr_vector = max(lex.rank for lex in vocab) + 1
 | 
			
		||||
    vectors = numpy.zeros((nr_vector+nr_unk+2, vocab.vectors_length), dtype='float32')
 | 
			
		||||
    for lex in vocab:
 | 
			
		||||
        if lex.has_vector:
 | 
			
		||||
            vectors[lex.rank + 1] = lex.vector
 | 
			
		||||
            vectors[lex.rank+1] = lex.vector / lex.vector_norm
 | 
			
		||||
    return vectors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100):
 | 
			
		||||
def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100, nr_unk=100):
 | 
			
		||||
    Xs = numpy.zeros((len(docs), max_length), dtype='int32')
 | 
			
		||||
    for i, doc in enumerate(docs):
 | 
			
		||||
        if tree_truncate:
 | 
			
		||||
| 
						 | 
				
			
			@ -50,17 +51,22 @@ def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100):
 | 
			
		|||
        words = []
 | 
			
		||||
        while len(words) <= max_length and queue:
 | 
			
		||||
            word = queue.pop(0)
 | 
			
		||||
            if rnn_encode or (word.has_vector and not word.is_punct and not word.is_space):
 | 
			
		||||
            if rnn_encode or (not word.is_punct and not word.is_space):
 | 
			
		||||
                words.append(word)
 | 
			
		||||
            if tree_truncate:
 | 
			
		||||
                queue.extend(list(word.lefts))
 | 
			
		||||
                queue.extend(list(word.rights))
 | 
			
		||||
        words.sort()
 | 
			
		||||
        for j, token in enumerate(words):
 | 
			
		||||
            if token.has_vector:
 | 
			
		||||
                Xs[i, j] = token.rank+1
 | 
			
		||||
            else:
 | 
			
		||||
                Xs[i, j] = (token.shape % (nr_unk-1))+2
 | 
			
		||||
            j += 1
 | 
			
		||||
            if j >= max_length:
 | 
			
		||||
                break
 | 
			
		||||
        else:
 | 
			
		||||
            Xs[i, len(words)] = 1
 | 
			
		||||
    return Xs
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user