mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 05:01:02 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			153 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			153 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Semantic entailment/similarity with decomposable attention (using spaCy and Keras)
 | |
| # Practical state-of-the-art textual entailment with spaCy and Keras
 | |
| 
 | |
| import numpy as np
 | |
| from keras import layers, Model, models, optimizers
 | |
| from keras import backend as K
 | |
| 
 | |
| 
 | |
| def build_model(vectors, shape, settings):
 | |
|     max_length, nr_hidden, nr_class = shape
 | |
| 
 | |
|     input1 = layers.Input(shape=(max_length,), dtype="int32", name="words1")
 | |
|     input2 = layers.Input(shape=(max_length,), dtype="int32", name="words2")
 | |
| 
 | |
|     # embeddings (projected)
 | |
|     embed = create_embedding(vectors, max_length, nr_hidden)
 | |
| 
 | |
|     a = embed(input1)
 | |
|     b = embed(input2)
 | |
| 
 | |
|     # step 1: attend
 | |
|     F = create_feedforward(nr_hidden)
 | |
|     att_weights = layers.dot([F(a), F(b)], axes=-1)
 | |
| 
 | |
|     G = create_feedforward(nr_hidden)
 | |
| 
 | |
|     if settings["entail_dir"] == "both":
 | |
|         norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
 | |
|         norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
 | |
|         alpha = layers.dot([norm_weights_a, a], axes=1)
 | |
|         beta = layers.dot([norm_weights_b, b], axes=1)
 | |
| 
 | |
|         # step 2: compare
 | |
|         comp1 = layers.concatenate([a, beta])
 | |
|         comp2 = layers.concatenate([b, alpha])
 | |
|         v1 = layers.TimeDistributed(G)(comp1)
 | |
|         v2 = layers.TimeDistributed(G)(comp2)
 | |
| 
 | |
|         # step 3: aggregate
 | |
|         v1_sum = layers.Lambda(sum_word)(v1)
 | |
|         v2_sum = layers.Lambda(sum_word)(v2)
 | |
|         concat = layers.concatenate([v1_sum, v2_sum])
 | |
| 
 | |
|     elif settings["entail_dir"] == "left":
 | |
|         norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
 | |
|         alpha = layers.dot([norm_weights_a, a], axes=1)
 | |
|         comp2 = layers.concatenate([b, alpha])
 | |
|         v2 = layers.TimeDistributed(G)(comp2)
 | |
|         v2_sum = layers.Lambda(sum_word)(v2)
 | |
|         concat = v2_sum
 | |
| 
 | |
|     else:
 | |
|         norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
 | |
|         beta = layers.dot([norm_weights_b, b], axes=1)
 | |
|         comp1 = layers.concatenate([a, beta])
 | |
|         v1 = layers.TimeDistributed(G)(comp1)
 | |
|         v1_sum = layers.Lambda(sum_word)(v1)
 | |
|         concat = v1_sum
 | |
| 
 | |
|     H = create_feedforward(nr_hidden)
 | |
|     out = H(concat)
 | |
|     out = layers.Dense(nr_class, activation="softmax")(out)
 | |
| 
 | |
|     model = Model([input1, input2], out)
 | |
| 
 | |
|     model.compile(
 | |
|         optimizer=optimizers.Adam(lr=settings["lr"]),
 | |
|         loss="categorical_crossentropy",
 | |
|         metrics=["accuracy"],
 | |
|     )
 | |
| 
 | |
|     return model
 | |
| 
 | |
| 
 | |
| def create_embedding(vectors, max_length, projected_dim):
 | |
|     return models.Sequential(
 | |
|         [
 | |
|             layers.Embedding(
 | |
|                 vectors.shape[0],
 | |
|                 vectors.shape[1],
 | |
|                 input_length=max_length,
 | |
|                 weights=[vectors],
 | |
|                 trainable=False,
 | |
|             ),
 | |
|             layers.TimeDistributed(
 | |
|                 layers.Dense(projected_dim, activation=None, use_bias=False)
 | |
|             ),
 | |
|         ]
 | |
|     )
 | |
| 
 | |
| 
 | |
| def create_feedforward(num_units=200, activation="relu", dropout_rate=0.2):
 | |
|     return models.Sequential(
 | |
|         [
 | |
|             layers.Dense(num_units, activation=activation),
 | |
|             layers.Dropout(dropout_rate),
 | |
|             layers.Dense(num_units, activation=activation),
 | |
|             layers.Dropout(dropout_rate),
 | |
|         ]
 | |
|     )
 | |
| 
 | |
| 
 | |
| def normalizer(axis):
 | |
|     def _normalize(att_weights):
 | |
|         exp_weights = K.exp(att_weights)
 | |
|         sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)
 | |
|         return exp_weights / sum_weights
 | |
| 
 | |
|     return _normalize
 | |
| 
 | |
| 
 | |
| def sum_word(x):
 | |
|     return K.sum(x, axis=1)
 | |
| 
 | |
| 
 | |
| def test_build_model():
 | |
|     vectors = np.ndarray((100, 8), dtype="float32")
 | |
|     shape = (10, 16, 3)
 | |
|     settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
 | |
|     model = build_model(vectors, shape, settings)
 | |
| 
 | |
| 
 | |
| def test_fit_model():
 | |
|     def _generate_X(nr_example, length, nr_vector):
 | |
|         X1 = np.ndarray((nr_example, length), dtype="int32")
 | |
|         X1 *= X1 < nr_vector
 | |
|         X1 *= 0 <= X1
 | |
|         X2 = np.ndarray((nr_example, length), dtype="int32")
 | |
|         X2 *= X2 < nr_vector
 | |
|         X2 *= 0 <= X2
 | |
|         return [X1, X2]
 | |
| 
 | |
|     def _generate_Y(nr_example, nr_class):
 | |
|         ys = np.zeros((nr_example, nr_class), dtype="int32")
 | |
|         for i in range(nr_example):
 | |
|             ys[i, i % nr_class] = 1
 | |
|         return ys
 | |
| 
 | |
|     vectors = np.ndarray((100, 8), dtype="float32")
 | |
|     shape = (10, 16, 3)
 | |
|     settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
 | |
|     model = build_model(vectors, shape, settings)
 | |
| 
 | |
|     train_X = _generate_X(20, shape[0], vectors.shape[0])
 | |
|     train_Y = _generate_Y(20, shape[2])
 | |
|     dev_X = _generate_X(15, shape[0], vectors.shape[0])
 | |
|     dev_Y = _generate_Y(15, shape[2])
 | |
| 
 | |
|     model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), epochs=5, batch_size=4)
 | |
| 
 | |
| 
 | |
| __all__ = [build_model]
 |