experimenting with cosine sim for negative examples (not OK yet)

This commit is contained in:
svlandeg 2019-05-29 16:07:53 +02:00
parent a761929fa5
commit 268a52ead7
2 changed files with 38 additions and 8 deletions

View File

@ -8,6 +8,7 @@ import numpy as np
import random import random
from random import shuffle from random import shuffle
from thinc.neural._classes.convolution import ExtractWindow from thinc.neural._classes.convolution import ExtractWindow
from thinc.neural.util import get_array_module
from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator
@ -20,7 +21,7 @@ from thinc.t2t import ParametricAttention
from thinc.misc import Residual from thinc.misc import Residual
from thinc.misc import LayerNorm as LN from thinc.misc import LayerNorm as LN
from spacy.cli.pretrain import get_cossim_loss # from spacy.cli.pretrain import get_cossim_loss
from spacy.matcher import PhraseMatcher from spacy.matcher import PhraseMatcher
from spacy.tokens import Doc from spacy.tokens import Doc
@ -307,27 +308,56 @@ class EL_Model:
self.sgd_desc.learn_rate = self.LEARN_RATE self.sgd_desc.learn_rate = self.LEARN_RATE
self.sgd_desc.L2 = self.L2 self.sgd_desc.L2 = self.L2
@staticmethod def get_loss(self, v1, v2, targets):
def get_loss(predictions, golds): loss, gradients = self.get_cossim_loss(v1, v2, targets)
loss, gradients = get_cossim_loss(predictions, golds)
return loss, gradients return loss, gradients
def get_cossim_loss(self, yh, y, t):
# Add a small constant to avoid 0 vectors
# print()
# print("yh", yh)
# print("y", y)
# print("t", t)
yh = yh + 1e-8
y = y + 1e-8
# https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity
xp = get_array_module(yh)
norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
mul_norms = norm_yh * norm_y
cos = (yh * y).sum(axis=1, keepdims=True) / mul_norms
# print("cos", cos)
d_yh = (y / mul_norms) - (cos * (yh / norm_yh ** 2))
# print("abs", xp.abs(cos - t))
loss = xp.abs(cos - t).sum()
# print("loss", loss)
# print("d_yh", d_yh)
inverse = np.asarray([int(t[i][0]) * d_yh[i] for i in range(len(t))])
# print("inverse", inverse)
return loss, -inverse
def update(self, entity_clusters, golds, descs, art_texts, arts, sent_texts, sents): def update(self, entity_clusters, golds, descs, art_texts, arts, sent_texts, sents):
all_clusters = list(entity_clusters.keys()) all_clusters = list(entity_clusters.keys())
arts_list = list() arts_list = list()
sents_list = list() sents_list = list()
descs_list = list() descs_list = list()
targets = list()
for cluster, entities in entity_clusters.items(): for cluster, entities in entity_clusters.items():
art = art_texts[arts[cluster]] art = art_texts[arts[cluster]]
sent = sent_texts[sents[cluster]] sent = sent_texts[sents[cluster]]
for e in entities: for e in entities:
# TODO: more appropriate loss for the whole cluster (currently only pos entities)
if golds[e]: if golds[e]:
arts_list.append(art) arts_list.append(art)
sents_list.append(sent) sents_list.append(sent)
descs_list.append(descs[e]) descs_list.append(descs[e])
targets.append([1])
else:
arts_list.append(art)
sents_list.append(sent)
descs_list.append(descs[e])
targets.append([-1])
desc_docs = self.nlp.pipe(descs_list) desc_docs = self.nlp.pipe(descs_list)
desc_encodings, bp_desc = self.desc_encoder.begin_update(desc_docs, drop=self.DROP) desc_encodings, bp_desc = self.desc_encoder.begin_update(desc_docs, drop=self.DROP)
@ -339,7 +369,7 @@ class EL_Model:
sent_encodings, bp_sent = self.sent_encoder.begin_update(sent_docs, drop=self.DROP) sent_encodings, bp_sent = self.sent_encoder.begin_update(sent_docs, drop=self.DROP)
concat_encodings = [list(doc_encodings[i]) + list(sent_encodings[i]) for i in concat_encodings = [list(doc_encodings[i]) + list(sent_encodings[i]) for i in
range(len(all_clusters))] range(len(targets))]
cont_encodings, bp_cont = self.cont_encoder.begin_update(np.asarray(concat_encodings), drop=self.DROP) cont_encodings, bp_cont = self.cont_encoder.begin_update(np.asarray(concat_encodings), drop=self.DROP)
# print("sent_encodings", type(sent_encodings), sent_encodings) # print("sent_encodings", type(sent_encodings), sent_encodings)
@ -347,7 +377,7 @@ class EL_Model:
# print("doc_encodings", type(doc_encodings), doc_encodings) # print("doc_encodings", type(doc_encodings), doc_encodings)
# print("getting los for", len(arts_list), "entities") # print("getting los for", len(arts_list), "entities")
loss, gradient = self.get_loss(cont_encodings, desc_encodings) loss, gradient = self.get_loss(cont_encodings, desc_encodings, targets)
# print("gradient", gradient) # print("gradient", gradient)
if self.PRINT_BATCH_LOSS: if self.PRINT_BATCH_LOSS:

View File

@ -111,7 +111,7 @@ if __name__ == "__main__":
print("STEP 6: training", datetime.datetime.now()) print("STEP 6: training", datetime.datetime.now())
my_nlp = spacy.load('en_core_web_md') my_nlp = spacy.load('en_core_web_md')
trainer = EL_Model(kb=my_kb, nlp=my_nlp) trainer = EL_Model(kb=my_kb, nlp=my_nlp)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=5000, devlimit=100) trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1000, devlimit=100)
print() print()
# STEP 7: apply the EL algorithm on the dev dataset # STEP 7: apply the EL algorithm on the dev dataset