simplify architecture and larger-scale test runs

This commit is contained in:
svlandeg 2019-05-17 01:51:18 +02:00
parent d51bffe63b
commit 400b19353d
3 changed files with 88 additions and 84 deletions

View File

@ -81,7 +81,8 @@ def evaluate(predictions, golds, to_print=True):
for pred, gold in zip(predictions, golds):
is_correct = pred == gold
if not pred:
fn += 1
if not is_correct: # we don't care about tn
fn += 1
elif is_correct:
tp += 1
else:

View File

@ -12,10 +12,9 @@ from examples.pipeline.wiki_entity_linking import run_el, training_set_creator,
from spacy._ml import SpacyVectors, create_default_optimizer, zero_init, logistic
from thinc.api import chain, concatenate, flatten_add_lengths, with_getitem, clone, with_flatten
from thinc.neural.util import get_array_module
from thinc.v2v import Model, Softmax, Maxout, Affine, ReLu
from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
from thinc.api import chain, concatenate, flatten_add_lengths, clone
from thinc.v2v import Model, Maxout, Affine
from thinc.t2v import Pooling, mean_pool
from thinc.t2t import ParametricAttention
from thinc.misc import Residual
from thinc.misc import LayerNorm as LN
@ -27,16 +26,15 @@ from spacy.tokens import Doc
class EL_Model:
PRINT_LOSS = True
PRINT_LOSS = False
PRINT_F = True
EPS = 0.0000000005
CUTOFF = 0.5
INPUT_DIM = 300
ENTITY_WIDTH = 64
ARTICLE_WIDTH = 64
HIDDEN_1_WIDTH = 256
HIDDEN_2_WIDTH = 64
ARTICLE_WIDTH = 128
HIDDEN_WIDTH = 64
name = "entity_linker"
@ -53,46 +51,44 @@ class EL_Model:
Doc.set_extension("entity_id", default=None)
train_instances, train_pos, train_neg, train_doc = self._get_training_data(training_dir,
entity_descr_output,
False,
trainlimit,
to_print=False)
train_inst, train_pos, train_neg, train_doc = self._get_training_data(training_dir,
entity_descr_output,
False,
trainlimit,
to_print=False)
dev_instances, dev_pos, dev_neg, dev_doc = self._get_training_data(training_dir,
entity_descr_output,
True,
devlimit,
to_print=False)
dev_inst, dev_pos, dev_neg, dev_doc = self._get_training_data(training_dir,
entity_descr_output,
True,
devlimit,
to_print=False)
self._begin_training()
if self.PRINT_F:
_, _, f_avg_train = -3.42, -3.42, -3.42 # self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=True)
_, _, f_nonavg_train = self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=False)
_, _, f_random_train = self._test_dev(train_instances, train_pos, train_neg, train_doc, calc_random=True)
_, _, f_avg_dev = -3.42, -3.42, -3.42 # self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=True)
_, _, f_nonavg_dev = self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=False)
_, _, f_random_dev = self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, calc_random=True)
print("random F train", round(f_random_train, 1))
print("random F dev", round(f_random_dev, 1))
print()
print("avg/nonavg F train", round(f_avg_train, 1), round(f_nonavg_train, 1))
print("avg/nonavg F dev", round(f_avg_dev, 1), round(f_nonavg_dev, 1))
print()
print()
self._test_dev(train_inst, train_pos, train_neg, train_doc, print_string="train_random", calc_random=True)
self._test_dev(dev_inst, dev_pos, dev_neg, dev_doc, print_string="dev_random", calc_random=True)
print()
self._test_dev(train_inst, train_pos, train_neg, train_doc, print_string="train_pre", calc_random=False)
self._test_dev(dev_inst, dev_pos, dev_neg, dev_doc, print_string="dev_pre", avg=False)
instance_pos_count = 0
instance_neg_count = 0
if to_print:
print("Training on", len(train_instances.values()), "articles")
print("Dev test on", len(dev_instances.values()), "articles")
print()
print("Training on", len(train_inst.values()), "articles")
print("Dev test on", len(dev_inst.values()), "articles")
article_docs = list()
entities = list()
golds = list()
for article_id, inst_cluster_set in train_instances.items():
# TODO: proper batches. Currently 1 article at the time
article_count = 0
for article_id, inst_cluster_set in train_inst.items():
# if to_print:
# print()
# print(article_count, "Training on article", article_id)
article_count += 1
article_docs = list()
entities = list()
golds = list()
for inst_cluster in inst_cluster_set:
article_docs.append(train_doc[article_id])
entities.append(train_pos.get(inst_cluster))
@ -104,36 +100,31 @@ class EL_Model:
golds.append(float(0.0))
instance_neg_count += 1
for x in range(10):
print("Updating", x)
self.update(article_docs=article_docs, entities=entities, golds=golds)
# eval again
if self.PRINT_F:
_, _, f_avg_train = -3.42, -3.42, -3.42 # self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=True)
_, _, f_nonavg_train = self._test_dev(train_instances, train_pos, train_neg, train_doc, avg=False)
_, _, f_avg_dev = -3.42, -3.42, -3.42 # self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=True)
_, _, f_nonavg_dev = self._test_dev(dev_instances, dev_pos, dev_neg, dev_doc, avg=False)
print("avg/nonavg F train", round(f_avg_train, 1), round(f_nonavg_train, 1))
print("avg/nonavg F dev", round(f_avg_dev, 1), round(f_nonavg_dev, 1))
print()
# dev eval
self._test_dev(dev_inst, dev_pos, dev_neg, dev_doc, print_string="dev_inter", avg=False)
if to_print:
print()
print("Trained on", instance_pos_count, "/", instance_neg_count, "instances pos/neg")
def _test_dev(self, dev_instances, dev_pos, dev_neg, dev_doc, avg=False, calc_random=False):
print()
self._test_dev(train_inst, train_pos, train_neg, train_doc, print_string="train_post", calc_random=False)
self._test_dev(dev_inst, dev_pos, dev_neg, dev_doc, print_string="dev_post", avg=False)
def _test_dev(self, instances, pos, neg, doc, print_string, avg=False, calc_random=False):
predictions = list()
golds = list()
for article_id, inst_cluster_set in dev_instances.items():
for article_id, inst_cluster_set in instances.items():
for inst_cluster in inst_cluster_set:
pos_ex = dev_pos.get(inst_cluster)
neg_exs = dev_neg.get(inst_cluster, [])
pos_ex = pos.get(inst_cluster)
neg_exs = neg.get(inst_cluster, [])
article = inst_cluster.split(sep="_")[0]
entity_id = inst_cluster.split(sep="_")[1]
article_doc = dev_doc[article]
article_doc = doc[article]
if calc_random:
prediction = self._predict_random(entity=pos_ex)
@ -150,9 +141,17 @@ class EL_Model:
predictions.append(prediction)
golds.append(float(0.0))
# TODO: use lowest_mse and combine with prior probability
# TODO: combine with prior probability
p, r, f = run_el.evaluate(predictions, golds, to_print=False)
return p, r, f
if self.PRINT_F:
# print("p/r/F", print_string, round(p, 1), round(r, 1), round(f, 1))
print("F", print_string, round(f, 1))
loss, d_scores = self.get_loss(self.model.ops.asarray(predictions), self.model.ops.asarray(golds))
if self.PRINT_LOSS:
print("loss", print_string, round(loss, 5))
return loss, p, r, f
def _predict(self, article_doc, entity, avg=False, apply_threshold=True):
if avg:
@ -182,20 +181,16 @@ class EL_Model:
def _build_cnn(self, hidden_entity_width, hidden_article_width):
with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
self.entity_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_entity_width) # entity encoding
self.article_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_article_width) # doc encoding
self.entity_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_entity_width)
self.article_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_article_width)
hidden_input_with = hidden_entity_width + hidden_article_width
hidden_output_with = self.HIDDEN_1_WIDTH
nr_i = hidden_entity_width + hidden_article_width
nr_o = self.HIDDEN_WIDTH
convolution_2 = Residual((ExtractWindow(nW=1) >> LN(Maxout(hidden_output_with, hidden_output_with * 3))))
self.model = Affine(hidden_output_with, hidden_input_with) \
>> LN(Maxout(hidden_output_with, hidden_output_with)) \
>> convolution_2 \
>> Affine(self.HIDDEN_2_WIDTH, hidden_output_with) \
>> Affine(1, self.HIDDEN_2_WIDTH) \
>> logistic
self.model = Affine(nr_o, nr_i) \
>> LN(Maxout(nr_o, nr_o)) \
>> Affine(1, nr_o) \
>> logistic
@staticmethod
def _encoder(in_width, hidden_width):
@ -204,38 +199,46 @@ class EL_Model:
>> flatten_add_lengths \
>> ParametricAttention(in_width)\
>> Pooling(mean_pool) \
>> Residual(zero_init(Maxout(in_width, in_width))) \
>> Residual((ExtractWindow(nW=1) >> LN(Maxout(in_width, in_width * 3)))) \
>> zero_init(Affine(hidden_width, in_width, drop_factor=0.0))
# TODO: ReLu instead of LN(Maxout) ?
return encoder
def _begin_training(self):
self.sgd = create_default_optimizer(self.model.ops)
def update(self, article_docs, entities, golds, drop=0.):
@staticmethod
def get_loss(predictions, golds):
d_scores = (predictions - golds)
loss = (d_scores ** 2).sum()
return loss, d_scores
def update(self, article_docs, entities, golds, drop=0., apply_threshold=True):
doc_encodings, bp_doc = self.article_encoder.begin_update(article_docs, drop=drop)
entity_encodings, bp_encoding = self.entity_encoder.begin_update(entities, drop=drop)
concat_encodings = [list(entity_encodings[i]) + list(doc_encodings[i]) for i in range(len(entities))]
predictions, bp_model = self.model.begin_update(np.asarray(concat_encodings), drop=drop)
predictions = self.model.ops.flatten(predictions)
golds = self.model.ops.asarray(golds)
# print("predictions", predictions)
# print("golds", golds)
loss, d_scores = self.get_loss(predictions, golds)
d_scores = (predictions - golds) # / predictions.shape[0]
# print("d_scores (1)", d_scores)
# if self.PRINT_LOSS:
# print("loss train", round(loss, 5))
loss = (d_scores ** 2).sum()
if self.PRINT_LOSS:
print("loss train", round(loss, 5))
# if self.PRINT_F:
# predictions_f = [x for x in predictions]
# if apply_threshold:
# predictions_f = [1.0 if x > self.CUTOFF else 0.0 for x in predictions_f]
# p, r, f = run_el.evaluate(predictions_f, golds, to_print=False)
# print("p/r/F train", round(p, 1), round(r, 1), round(f, 1))
d_scores = d_scores.reshape((-1, 1))
d_scores = d_scores.astype(np.float32)
# print("d_scores (2)", d_scores)
model_gradient = bp_model(d_scores, sgd=self.sgd)

View File

@ -108,10 +108,10 @@ if __name__ == "__main__":
# STEP 6: apply the EL algorithm on the training dataset
if run_training:
print("STEP 6: training ", datetime.datetime.now())
print("STEP 6: training", datetime.datetime.now())
my_nlp = spacy.load('en_core_web_md')
trainer = EL_Model(kb=my_kb, nlp=my_nlp)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1, devlimit=1)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=2000, devlimit=200)
print()
# STEP 7: apply the EL algorithm on the dev dataset