various tests, architectures and experiments

This commit is contained in:
svlandeg 2019-05-16 18:25:34 +02:00
parent 9ffe5437ae
commit b5470f3d75
2 changed files with 363 additions and 111 deletions

View File

@ -6,32 +6,40 @@ import datetime
from os import listdir from os import listdir
from random import shuffle from random import shuffle
import numpy as np import numpy as np
import random
from thinc.neural._classes.convolution import ExtractWindow
from thinc.neural._classes.feature_extracter import FeatureExtracter
from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator
from spacy._ml import SpacyVectors, create_default_optimizer, zero_init from spacy._ml import SpacyVectors, create_default_optimizer, zero_init, logistic
from thinc.api import chain, flatten_add_lengths, with_getitem, clone from thinc.api import chain, concatenate, flatten_add_lengths, with_getitem, clone, with_flatten
from thinc.neural.util import get_array_module from thinc.neural.util import get_array_module
from thinc.v2v import Model, Softmax, Maxout, Affine, ReLu from thinc.v2v import Model, Softmax, Maxout, Affine, ReLu
from thinc.t2v import Pooling, sum_pool, mean_pool from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
from thinc.t2t import ParametricAttention from thinc.t2t import ParametricAttention
from thinc.misc import Residual from thinc.misc import Residual
from thinc.misc import LayerNorm as LN
from spacy.tokens import Doc from spacy.tokens import Doc
""" TODO: this code needs to be implemented in pipes.pyx""" """ TODO: this code needs to be implemented in pipes.pyx"""
class EL_Model(): class EL_Model:
INPUT_DIM = 300 PRINT_LOSS = True
OUTPUT_DIM = 96
PRINT_LOSS = False
PRINT_F = True PRINT_F = True
EPS = 0.0000000005 EPS = 0.0000000005
CUTOFF = 0.5
INPUT_DIM = 300
ENTITY_WIDTH = 64
ARTICLE_WIDTH = 64
HIDDEN_1_WIDTH = 256
HIDDEN_2_WIDTH = 64
labels = ["MATCH", "NOMATCH"]
name = "entity_linker" name = "entity_linker"
def __init__(self, kb, nlp): def __init__(self, kb, nlp):
@ -39,58 +47,102 @@ class EL_Model():
self.nlp = nlp self.nlp = nlp
self.kb = kb self.kb = kb
self.entity_encoder = self._simple_encoder(in_width=self.INPUT_DIM, out_width=self.OUTPUT_DIM) self._build_cnn(hidden_entity_width=self.ENTITY_WIDTH, hidden_article_width=self.ARTICLE_WIDTH)
self.article_encoder = self._simple_encoder(in_width=self.INPUT_DIM, out_width=self.OUTPUT_DIM)
# self.entity_encoder = self._simple_encoder(in_width=self.INPUT_DIM, out_width=self.OUTPUT_DIM)
# self.article_encoder = self._simple_encoder(in_width=self.INPUT_DIM, out_width=self.OUTPUT_DIM)
def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True): def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True):
# raise errors instead of runtime warnings in case of int/float overflow
np.seterr(all='raise')
Doc.set_extension("entity_id", default=None) Doc.set_extension("entity_id", default=None)
train_instances, train_pos, train_neg, train_doc = self._get_training_data(training_dir, train_instances, train_pos, train_neg, train_doc = self._get_training_data(training_dir,
entity_descr_output, entity_descr_output,
False, False,
trainlimit, trainlimit,
to_print) to_print=False)
dev_instances, dev_pos, dev_neg, dev_doc = self._get_training_data(training_dir, dev_instances, dev_pos, dev_neg, dev_doc = self._get_training_data(training_dir,
entity_descr_output, entity_descr_output,
True, True,
devlimit, devlimit,
to_print) to_print=False)
# self.sgd_entity = self.begin_training(self.entity_encoder)
# self.sgd_article = self.begin_training(self.article_encoder)
self._begin_training()
if self.PRINT_F:
_, _, f_avg_train = -3.42, -3.42, -3.42 # self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=True)
_, _, f_nonavg_train = self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=False)
_, _, f_random_train = self._test_dev(train_instances, train_pos, train_neg, train_doc, calc_random=True)
_, _, f_avg_dev = -3.42, -3.42, -3.42 # self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=True)
_, _, f_nonavg_dev = self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=False)
_, _, f_random_dev = self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, calc_random=True)
print("random F train", round(f_random_train, 1))
print("random F dev", round(f_random_dev, 1))
print()
print("avg/nonavg F train", round(f_avg_train, 1), round(f_nonavg_train, 1))
print("avg/nonavg F dev", round(f_avg_dev, 1), round(f_nonavg_dev, 1))
print()
instance_pos_count = 0
instance_neg_count = 0
if to_print: if to_print:
print("Training on", len(train_instances.values()), "articles") print("Training on", len(train_instances.values()), "articles")
print("Dev test on", len(dev_instances.values()), "articles") print("Dev test on", len(dev_instances.values()), "articles")
print() print()
self.sgd_entity = self.begin_training(self.entity_encoder) # for article_id, inst_cluster_set in train_instances.items():
self.sgd_article = self.begin_training(self.article_encoder) # article_doc = train_doc[article_id]
# print("training on", article_id, inst_cluster_set)
# pos_ex_list = list()
# neg_exs_list = list()
# for inst_cluster in inst_cluster_set:
# instance_count += 1
# pos_ex_list.append(train_pos.get(inst_cluster))
# neg_exs_list.append(train_neg.get(inst_cluster, []))
self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc) #self.update(article_doc, pos_ex_list, neg_exs_list)
losses = {}
instance_count = 0
article_docs = list()
entities = list()
golds = list()
for article_id, inst_cluster_set in train_instances.items(): for article_id, inst_cluster_set in train_instances.items():
# print("article", article_id)
article_doc = train_doc[article_id]
pos_ex_list = list()
neg_exs_list = list()
for inst_cluster in inst_cluster_set: for inst_cluster in inst_cluster_set:
# print("inst_cluster", inst_cluster) article_docs.append(train_doc[article_id])
instance_count += 1 entities.append(train_pos.get(inst_cluster))
pos_ex_list.append(train_pos.get(inst_cluster)) golds.append(float(1.0))
neg_exs_list.append(train_neg.get(inst_cluster, [])) instance_pos_count += 1
for neg_entity in train_neg.get(inst_cluster, []):
article_docs.append(train_doc[article_id])
entities.append(neg_entity)
golds.append(float(0.0))
instance_neg_count += 1
self.update(article_doc, pos_ex_list, neg_exs_list, losses=losses) for x in range(10):
p, r, fscore = self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc) print("Updating", x)
self.update(article_docs=article_docs, entities=entities, golds=golds)
# eval again
if self.PRINT_F: if self.PRINT_F:
print(round(fscore, 1)) _, _, f_avg_train = -3.42, -3.42, -3.42 # self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=True)
_, _, f_nonavg_train = self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=False)
_, _, f_avg_dev = -3.42, -3.42, -3.42 # self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=True)
_, _, f_nonavg_dev = self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=False)
print("avg/nonavg F train", round(f_avg_train, 1), round(f_nonavg_train, 1))
print("avg/nonavg F dev", round(f_avg_dev, 1), round(f_nonavg_dev, 1))
print()
if to_print: if to_print:
print("Trained on", instance_count, "instance clusters") print("Trained on", instance_pos_count, "/", instance_neg_count, "instances pos/neg")
def _test_dev(self, dev_instances, dev_pos, dev_neg, dev_doc): def _test_dev_depr(self, dev_instances, dev_pos, dev_neg, dev_doc, avg=False, calc_random=False):
predictions = list() predictions = list()
golds = list() golds = list()
@ -113,23 +165,65 @@ class EL_Model():
examples.append(pos_ex) examples.append(pos_ex)
shuffle(examples) shuffle(examples)
best_entity, highest_prob = self._predict(examples, article_doc) best_entity, highest_prob = self._predict(examples, article_doc, avg)
if calc_random:
best_entity, highest_prob = self._predict_random(examples)
predictions.append(ex_to_id[best_entity]) predictions.append(ex_to_id[best_entity])
golds.append(ex_to_id[pos_ex]) golds.append(ex_to_id[pos_ex])
# TODO: use lowest_mse and combine with prior probability # TODO: use lowest_mse and combine with prior probability
p, r, F = run_el.evaluate(predictions, golds, to_print=False) p, r, f = run_el.evaluate(predictions, golds, to_print=False)
return p, r, F return p, r, f
def _predict(self, entities, article_doc): def _test_dev(self, dev_instances, dev_pos, dev_neg, dev_doc, avg=False, calc_random=False):
doc_encoding = self.article_encoder([article_doc]) predictions = list()
golds = list()
for article_id, inst_cluster_set in dev_instances.items():
for inst_cluster in inst_cluster_set:
pos_ex = dev_pos.get(inst_cluster)
neg_exs = dev_neg.get(inst_cluster, [])
article = inst_cluster.split(sep="_")[0]
entity_id = inst_cluster.split(sep="_")[1]
article_doc = dev_doc[article]
if calc_random:
prediction = self._predict_random(entity=pos_ex)
else:
prediction = self._predict(article_doc=article_doc, entity=pos_ex, avg=avg)
predictions.append(prediction)
golds.append(float(1.0))
for neg_ex in neg_exs:
if calc_random:
prediction = self._predict_random(entity=neg_ex)
else:
prediction = self._predict(article_doc=article_doc, entity=neg_ex, avg=avg)
predictions.append(prediction)
golds.append(float(0.0))
# TODO: use lowest_mse and combine with prior probability
p, r, f = run_el.evaluate(predictions, golds, to_print=False)
return p, r, f
def _predict_depr(self, entities, article_doc, avg=False):
if avg:
with self.article_encoder.use_params(self.sgd_article.averages):
doc_encoding = self.article_encoder([article_doc])
else:
doc_encoding = self.article_encoder([article_doc])
highest_prob = None highest_prob = None
best_entity = None best_entity = None
entity_to_vector = dict() entity_to_vector = dict()
for entity in entities: for entity in entities:
entity_to_vector[entity] = self.entity_encoder([entity]) if avg:
with self.entity_encoder.use_params(self.sgd_entity.averages):
entity_to_vector[entity] = self.entity_encoder([entity])
else:
entity_to_vector[entity] = self.entity_encoder([entity])
for entity in entities: for entity in entities:
entity_encoding = entity_to_vector[entity] entity_encoding = entity_to_vector[entity]
@ -140,7 +234,97 @@ class EL_Model():
return best_entity, highest_prob return best_entity, highest_prob
def _simple_encoder(self, in_width, out_width): def _predict(self, article_doc, entity, avg=False, apply_threshold=True):
if avg:
with self.sgd.use_params(self.model.averages):
doc_encoding = self.article_encoder([article_doc])
entity_encoding = self.entity_encoder([entity])
return self.model(np.append(entity_encoding, doc_encoding)) # TODO list
doc_encoding = self.article_encoder([article_doc])[0]
entity_encoding = self.entity_encoder([entity])[0]
concat_encoding = list(entity_encoding) + list(doc_encoding)
np_array = np.asarray([concat_encoding])
prediction = self.model(np_array)
if not apply_threshold:
return float(prediction)
if prediction > self.CUTOFF:
return float(1.0)
return float(0.0)
def _predict_random_depr(self, entities):
highest_prob = 1
best_entity = random.choice(entities)
return best_entity, highest_prob
def _predict_random(self, entity, apply_threshold=True):
r = random.uniform(0, 1)
if not apply_threshold:
return r
if r > self.CUTOFF:
return float(1.0)
return float(0.0)
def _build_cnn(self, hidden_entity_width, hidden_article_width):
with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
self.entity_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_entity_width) # entity encoding
self.article_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_article_width) # doc encoding
hidden_input_with = hidden_entity_width + hidden_article_width
hidden_output_with = self.HIDDEN_1_WIDTH
convolution_2 = Residual((ExtractWindow(nW=1) >> LN(Maxout(hidden_output_with, hidden_output_with * 3))))
# self.entity_encoder | self.article_encoder \
# self.model = with_flatten(LN(Maxout(hidden_with, hidden_with)) >> convolution_2 ** 2, pad=2) \
# >> flatten_add_lengths \
# >> ParametricAttention(hidden_with) \
# >> Pooling(sum_pool) \
# >> Softmax(nr_class, nr_class)
self.model = Affine(hidden_output_with, hidden_input_with) \
>> LN(Maxout(hidden_output_with, hidden_output_with)) \
>> convolution_2 \
>> Affine(self.HIDDEN_2_WIDTH, hidden_output_with) \
>> Affine(1, self.HIDDEN_2_WIDTH) \
>> logistic
# >> with_flatten(LN(Maxout(hidden_output_with, hidden_output_with)) >> convolution_2 ** 2, pad=2)
# >> convolution_2 \
# >> flatten_add_lengths
# >> ParametricAttention(hidden_output_with) \
# >> Pooling(max_pool) \
# >> Softmax(nr_class, nr_class)
# self.model.nO = nr_class
@staticmethod
def _encoder(in_width, hidden_width):
with Model.define_operators({">>": chain}):
encoder = SpacyVectors \
>> flatten_add_lengths \
>> ParametricAttention(in_width)\
>> Pooling(mean_pool) \
>> Residual(zero_init(Maxout(in_width, in_width))) \
>> zero_init(Affine(hidden_width, in_width, drop_factor=0.0))
return encoder
def begin_training_depr(self, model):
# TODO ? link_vectors_to_models(self.vocab) depr?
sgd = create_default_optimizer(model.ops)
return sgd
def _begin_training(self):
# self.sgd_entity = self.begin_training(self.entity_encoder)
# self.sgd_article = self.begin_training(self.article_encoder)
self.sgd = create_default_optimizer(self.model.ops)
# TODO: deprecated ?
def _simple_encoder_depr(self, in_width, out_width):
hidden_with = 128
conv_depth = 1 conv_depth = 1
cnn_maxout_pieces = 3 cnn_maxout_pieces = 3
with Model.define_operators({">>": chain, "**": clone}): with Model.define_operators({">>": chain, "**": clone}):
@ -150,21 +334,56 @@ class EL_Model():
# >> Pooling(mean_pool) \ # >> Pooling(mean_pool) \
# >> Residual(zero_init(Maxout(in_width, in_width))) \ # >> Residual(zero_init(Maxout(in_width, in_width))) \
# >> zero_init(Affine(out_width, in_width, drop_factor=0.0)) # >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
encoder = SpacyVectors \ # encoder = SpacyVectors \
>> flatten_add_lengths \ # >> flatten_add_lengths \
>> with_getitem(0, Affine(in_width, in_width)) \ # >> with_getitem(0, Affine(in_width, in_width)) \
>> ParametricAttention(in_width) \ # >> ParametricAttention(in_width) \
>> Pooling(sum_pool) \ # >> Pooling(sum_pool) \
>> Residual(ReLu(in_width, in_width)) ** conv_depth \ # >> Residual(ReLu(in_width, in_width)) ** conv_depth \
>> zero_init(Affine(out_width, in_width, drop_factor=0.0)) # >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
# encoder = SpacyVectors \
# >> flatten_add_lengths \
# >> ParametricAttention(in_width)\
# >> Pooling(sum_pool) \
# >> Residual(zero_init(Maxout(in_width, in_width))) \
# >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
# >> zero_init(Affine(nr_class, width, drop_factor=0.0)) # >> zero_init(Affine(nr_class, width, drop_factor=0.0))
# >> logistic # >> logistic
# convolution = Residual( #convolution = Residual(ExtractWindow(nW=1)
# ExtractWindow(nW=1) # >> LN(Maxout(in_width, in_width * 3, pieces=cnn_maxout_pieces))
# >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces)) #)
# ) #encoder = SpacyVectors >> with_flatten(
# embed >> convolution ** conv_depth, pad=conv_depth
#)
# static_vectors = SpacyVectors >> with_flatten(
# Affine(in_width, in_width)
#)
convolution_2 = Residual((ExtractWindow(nW=1) >> LN(Maxout(hidden_with, hidden_with * 3))))
encoder = SpacyVectors >> with_flatten(LN(Maxout(hidden_with, in_width)) >> convolution_2 ** 2, pad = 2) \
>> flatten_add_lengths \
>> ParametricAttention(hidden_with) \
>> Pooling(sum_pool) \
>> Residual(zero_init(Maxout(hidden_with, hidden_with))) \
>> zero_init(Affine(out_width, hidden_with, drop_factor=0.0)) \
>> logistic
# convolution = Residual(ExtractWindow(nW=1) >> ReLu(in_width, in_width*3))
# encoder = static_vectors # >> with_flatten(
# ReLu(in_width, in_width)
# >> convolution ** conv_depth, pad=conv_depth) \
# >> Affine(out_width, in_width, drop_factor=0.0)
# encoder = SpacyVectors >> with_flatten(
# LN(Maxout(in_width, in_width))
# >> Residual((ExtractWindow(nW=1) >> LN(Maxout(in_width, in_width * 3, pieces=cnn_maxout_pieces)))) ** conv_depth,
# pad=conv_depth,
#) >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
# embed = SpacyVectors >> LN(Maxout(width, width, pieces=3)) # embed = SpacyVectors >> LN(Maxout(width, width, pieces=3))
@ -173,75 +392,91 @@ class EL_Model():
return encoder return encoder
def begin_training(self, model): def update_depr(self, article_doc, true_entity_list, false_entities_list, drop=0., losses=None):
# TODO ? link_vectors_to_models(self.vocab)
sgd = create_default_optimizer(model.ops)
return sgd
def update(self, article_doc, true_entity_list, false_entities_list, drop=0., losses=None):
doc_encoding, article_bp = self.article_encoder.begin_update([article_doc], drop=drop) doc_encoding, article_bp = self.article_encoder.begin_update([article_doc], drop=drop)
doc_encoding = doc_encoding[0] doc_encoding = doc_encoding[0]
# print()
# print("doc", doc_encoding) # print("doc", doc_encoding)
for i, true_entity in enumerate(true_entity_list): for i, true_entity in enumerate(true_entity_list):
try: try:
false_vectors = list()
false_entities = false_entities_list[i] false_entities = false_entities_list[i]
if len(false_entities) > 0: if len(false_entities) > 0:
# TODO: batch per doc # TODO: batch per doc
for false_entity in false_entities: all_entities = [true_entity]
# TODO: one call only to begin_update ? all_entities.extend(false_entities)
false_entity_encoding, false_entity_bp = self.entity_encoder.begin_update([false_entity], drop=drop)
false_entity_encoding = false_entity_encoding[0]
false_vectors.append(false_entity_encoding)
true_entity_encoding, true_entity_bp = self.entity_encoder.begin_update([true_entity], drop=drop) entity_encodings, entity_bp = self.entity_encoder.begin_update(all_entities, drop=drop)
true_entity_encoding = true_entity_encoding[0] true_entity_encoding = entity_encodings[0]
# true_gradient = self._calculate_true_gradient(doc_encoding, true_entity_encoding) false_entity_encodings = entity_encodings[1:]
all_vectors = [true_entity_encoding] all_vectors = [true_entity_encoding]
all_vectors.extend(false_vectors) all_vectors.extend(false_entity_encodings)
# consensus_encoding = self._calculate_consensus(doc_encoding, true_entity_encoding) # consensus_encoding = self._calculate_consensus(doc_encoding, true_entity_encoding)
true_prob = self._calculate_probability(doc_encoding, true_entity_encoding, all_vectors) true_prob = self._calculate_probability(doc_encoding, true_entity_encoding, all_vectors)
# print("true", true_prob, true_entity_encoding) # print("true", true_prob, true_entity_encoding)
# print("true gradient", true_gradient)
# print()
all_probs = [true_prob] all_probs = [true_prob]
for false_vector in false_vectors: for false_vector in false_entity_encodings:
false_prob = self._calculate_probability(doc_encoding, false_vector, all_vectors) false_prob = self._calculate_probability(doc_encoding, false_vector, all_vectors)
# print("false", false_prob, false_vector) # print("false", false_prob, false_vector)
# print("false gradient", false_gradient)
# print()
all_probs.append(false_prob) all_probs.append(false_prob)
loss = self._calculate_loss(true_prob, all_probs).astype(np.float32) loss = self._calculate_loss(true_prob, all_probs).astype(np.float32)
if self.PRINT_LOSS: if self.PRINT_LOSS:
print(round(loss, 5)) print("loss train", round(loss, 5))
#doc_gradient = self._calculate_doc_gradient(loss, doc_encoding, true_entity_encoding, false_vectors) # for false_vector in false_vectors:
entity_gradient = self._calculate_entity_gradient(doc_encoding, true_entity_encoding, false_vectors) # false_gradient = -1 * self._calculate_entity_gradient(loss, doc_encoding, false_vector, false_vectors)
# print("entity_gradient", entity_gradient) # print("false gradient", false_gradient)
# doc_gradient = self._calculate_doc_gradient(loss, doc_encoding, true_entity_encoding, false_entity_encodings)
true_gradient, doc_gradient = self._calculate_entity_gradient(loss, doc_encoding, true_entity_encoding, false_entity_encodings)
# print("true_gradient", true_gradient)
# print("doc_gradient", doc_gradient) # print("doc_gradient", doc_gradient)
# article_bp([doc_gradient.astype(np.float32)], sgd=self.sgd_article) article_bp([doc_gradient.astype(np.float32)], sgd=self.sgd_article)
true_entity_bp([entity_gradient.astype(np.float32)], sgd=self.sgd_entity) entity_bp([true_gradient.astype(np.float32)], sgd=self.sgd_entity)
#true_entity_bp([true_gradient.astype(np.float32)], sgd=self.sgd_entity) #true_entity_bp([true_gradient.astype(np.float32)], sgd=self.sgd_entity)
except Exception as e: except Exception as e:
pass pass
def update(self, article_docs, entities, golds, drop=0.):
doc_encodings, bp_doc = self.article_encoder.begin_update(article_docs, drop=drop)
entity_encodings, bp_encoding = self.entity_encoder.begin_update(entities, drop=drop)
concat_encodings = [list(entity_encodings[i]) + list(doc_encodings[i]) for i in range(len(entities))]
# TODO: FIX predictions, bp_model = self.model.begin_update(np.asarray(concat_encodings), drop=drop)
def _calculate_consensus(self, vector1, vector2):
if len(vector1) != len(vector2):
raise ValueError("To calculate consensus, both vectors should be of equal length")
avg = (vector2 + vector1) / 2 predictions = self.model.ops.flatten(predictions)
return avg golds = self.model.ops.asarray(golds)
def _calculate_probability(self, vector1, vector2, allvectors): # print("predictions", predictions)
# print("golds", golds)
d_scores = (predictions - golds) # / predictions.shape[0]
# print("d_scores (1)", d_scores)
loss = (d_scores ** 2).sum()
if self.PRINT_LOSS:
print("loss train", round(loss, 5))
d_scores = d_scores.reshape((-1, 1))
d_scores = d_scores.astype(np.float32)
# print("d_scores (2)", d_scores)
model_gradient = bp_model(d_scores, sgd=self.sgd)
doc_gradient = [x[0:self.ARTICLE_WIDTH] for x in model_gradient]
entity_gradient = [x[self.ARTICLE_WIDTH:] for x in model_gradient]
bp_doc(doc_gradient)
bp_encoding(entity_gradient)
def _calculate_probability_depr(self, vector1, vector2, allvectors):
""" Make sure that vector2 is included in allvectors """ """ Make sure that vector2 is included in allvectors """
if len(vector1) != len(vector2): if len(vector1) != len(vector2):
raise ValueError("To calculate similarity, both vectors should be of equal length") raise ValueError("To calculate similarity, both vectors should be of equal length")
@ -254,12 +489,12 @@ class EL_Model():
return float(e / (self.EPS + e_sum)) return float(e / (self.EPS + e_sum))
def _calculate_loss(self, true_prob, all_probs): def _calculate_loss_depr(self, true_prob, all_probs):
""" all_probs should include true_prob ! """ """ all_probs should include true_prob ! """
return -1 * np.log((self.EPS + true_prob) / (self.EPS + sum(all_probs))) return -1 * np.log((self.EPS + true_prob) / (self.EPS + sum(all_probs)))
@staticmethod @staticmethod
def _calculate_doc_gradient(loss, doc_vector, true_vector, false_vectors): def _calculate_doc_gradient_depr(loss, doc_vector, true_vector, false_vectors):
gradient = np.zeros(len(doc_vector)) gradient = np.zeros(len(doc_vector))
for i in range(len(doc_vector)): for i in range(len(doc_vector)):
min_false = min(x[i] for x in false_vectors) min_false = min(x[i] for x in false_vectors)
@ -276,21 +511,25 @@ class EL_Model():
if doc_vector[i] < 0: if doc_vector[i] < 0:
gradient[i] = 0 gradient[i] = 0
else: else:
target = 0 # non-distinctive vector positions should convert to 0 # non-distinctive vector positions should converge to 0
gradient[i] = doc_vector[i] - target gradient[i] = doc_vector[i]
return gradient return gradient
def _calculate_true_gradient(self, doc_vector, entity_vector): # TODO: delete ? try again ?
def depr__calculate_true_gradient(self, doc_vector, entity_vector):
# sum_entity_vector = sum(entity_vector) # sum_entity_vector = sum(entity_vector)
# gradient = [-sum_entity_vector/(self.EPS + np.exp(doc_vector[i] * entity_vector[i])) for i in range(len(doc_vector))] # gradient = [-sum_entity_vector/(self.EPS + np.exp(doc_vector[i] * entity_vector[i])) for i in range(len(doc_vector))]
gradient = [1 / (self.EPS + np.exp(doc_vector[i] * entity_vector[i])) for i in range(len(doc_vector))] gradient = [1 / (self.EPS + np.exp(doc_vector[i] * entity_vector[i])) for i in range(len(doc_vector))]
return np.asarray(gradient) return np.asarray(gradient)
def _calculate_entity_gradient(self, doc_vector, true_vector, false_vectors): def _calculate_losses_vector_depr(self, doc_vector, true_vector, false_vectors):
entity_gradient = list() # prob_true = list()
prob_true = list() # prob_false_dict = dict()
false_prob_list = list()
true_losses = list()
# false_losses_dict = dict()
for i in range(len(true_vector)): for i in range(len(true_vector)):
doc_i = np.asarray([doc_vector[i]]) doc_i = np.asarray([doc_vector[i]])
true_i = np.asarray([true_vector[i]]) true_i = np.asarray([true_vector[i]])
@ -299,32 +538,45 @@ class EL_Model():
all_i.extend(falses_i) all_i.extend(falses_i)
prob_true_i = self._calculate_probability(doc_i, true_i, all_i) prob_true_i = self._calculate_probability(doc_i, true_i, all_i)
prob_true.append(prob_true_i) # prob_true.append(prob_true_i)
false_list = list() # false_list = list()
all_probs_i = [prob_true_i] all_probs_i = [prob_true_i]
for false_vector in falses_i: for false_i in falses_i:
false_prob_i = self._calculate_probability(doc_i, false_vector, all_i) prob_false_i = self._calculate_probability(doc_i, false_i, all_i)
all_probs_i.append(false_prob_i) all_probs_i.append(prob_false_i)
false_list.append(false_prob_i) # false_list.append(prob_false_i)
false_prob_list.append(false_list) # prob_false_dict[i] = false_list
sign_loss_i = 1 true_loss_i = self._calculate_loss(prob_true_i, all_probs_i).astype(np.float32)
if doc_vector[i] * true_vector[i] < 0: if doc_vector[i] > 0:
sign_loss_i = -1 true_loss_i = -1 * true_loss_i
true_losses.append(true_loss_i)
loss_i = sign_loss_i * self._calculate_loss(prob_true_i, all_probs_i).astype(np.float32) # false_loss_list = list()
entity_gradient.append(loss_i) # for prob_false_i in false_list:
# print("prob_true", prob_true) # false_loss_i = self._calculate_loss(prob_false_i, all_probs_i).astype(np.float32)
# print("false_prob_list", false_prob_list) # false_loss_list.append(false_loss_i)
return np.asarray(entity_gradient) # false_losses_dict[i] = false_loss_list
return true_losses # , false_losses_dict
def _calculate_entity_gradient_depr(self, loss, doc_vector, true_vector, false_vectors):
true_losses = self._calculate_losses_vector(doc_vector, true_vector, false_vectors)
# renormalize the gradient so that the total sum of abs values does not exceed the actual loss
loss_i = sum([abs(x) for x in true_losses]) # sum of absolute values
entity_gradient = [(x/2) * (loss/loss_i) for x in true_losses]
doc_gradient = [(x/2) * (loss/loss_i) for x in true_losses]
return np.asarray(entity_gradient), np.asarray(doc_gradient)
@staticmethod @staticmethod
def _calculate_dot_exp(vector1, vector2_transposed): def _calculate_dot_exp_depr(vector1, vector2_transposed):
dot_product = vector1.dot(vector2_transposed) dot_product = vector1.dot(vector2_transposed)
dot_product = min(50, dot_product) dot_product = min(50, dot_product)
# dot_product = max(-10000, dot_product) dot_product = max(-10000, dot_product)
# print("DOT", dot_product) # print("DOT", dot_product)
e = np.exp(dot_product) e = np.exp(dot_product)
# print("E", e) # print("E", e)

View File

@ -111,7 +111,7 @@ if __name__ == "__main__":
print("STEP 6: training ", datetime.datetime.now()) print("STEP 6: training ", datetime.datetime.now())
my_nlp = spacy.load('en_core_web_md') my_nlp = spacy.load('en_core_web_md')
trainer = EL_Model(kb=my_kb, nlp=my_nlp) trainer = EL_Model(kb=my_kb, nlp=my_nlp)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1500, devlimit=50) trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1, devlimit=1)
print() print()
# STEP 7: apply the EL algorithm on the dev dataset # STEP 7: apply the EL algorithm on the dev dataset