From 268a52ead7bbad21a22df11e9446971102193bcf Mon Sep 17 00:00:00 2001 From: svlandeg Date: Wed, 29 May 2019 16:07:53 +0200 Subject: [PATCH] experimenting with cosine sim for negative examples (not OK yet) --- .../pipeline/wiki_entity_linking/train_el.py | 44 ++++++++++++++++--- .../wiki_entity_linking/wiki_nel_pipeline.py | 2 +- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/examples/pipeline/wiki_entity_linking/train_el.py b/examples/pipeline/wiki_entity_linking/train_el.py index ea42f9ab6..ba8a6a6c9 100644 --- a/examples/pipeline/wiki_entity_linking/train_el.py +++ b/examples/pipeline/wiki_entity_linking/train_el.py @@ -8,6 +8,7 @@ import numpy as np import random from random import shuffle from thinc.neural._classes.convolution import ExtractWindow +from thinc.neural.util import get_array_module from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator @@ -20,7 +21,7 @@ from thinc.t2t import ParametricAttention from thinc.misc import Residual from thinc.misc import LayerNorm as LN -from spacy.cli.pretrain import get_cossim_loss +# from spacy.cli.pretrain import get_cossim_loss from spacy.matcher import PhraseMatcher from spacy.tokens import Doc @@ -307,27 +308,56 @@ class EL_Model: self.sgd_desc.learn_rate = self.LEARN_RATE self.sgd_desc.L2 = self.L2 - @staticmethod - def get_loss(predictions, golds): - loss, gradients = get_cossim_loss(predictions, golds) + def get_loss(self, v1, v2, targets): + loss, gradients = self.get_cossim_loss(v1, v2, targets) return loss, gradients + def get_cossim_loss(self, yh, y, t): + # Add a small constant to avoid 0 vectors + # print() + # print("yh", yh) + # print("y", y) + # print("t", t) + yh = yh + 1e-8 + y = y + 1e-8 + # https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity + xp = get_array_module(yh) + norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True) + norm_y = xp.linalg.norm(y, axis=1, keepdims=True) + mul_norms = norm_yh * norm_y + cos = (yh * y).sum(axis=1, keepdims=True) / mul_norms + # print("cos", cos) + d_yh = (y / mul_norms) - (cos * (yh / norm_yh ** 2)) + # print("abs", xp.abs(cos - t)) + loss = xp.abs(cos - t).sum() + # print("loss", loss) + # print("d_yh", d_yh) + inverse = np.asarray([int(t[i][0]) * d_yh[i] for i in range(len(t))]) + # print("inverse", inverse) + return loss, -inverse + def update(self, entity_clusters, golds, descs, art_texts, arts, sent_texts, sents): all_clusters = list(entity_clusters.keys()) arts_list = list() sents_list = list() descs_list = list() + targets = list() for cluster, entities in entity_clusters.items(): art = art_texts[arts[cluster]] sent = sent_texts[sents[cluster]] for e in entities: - # TODO: more appropriate loss for the whole cluster (currently only pos entities) if golds[e]: arts_list.append(art) sents_list.append(sent) descs_list.append(descs[e]) + targets.append([1]) + else: + arts_list.append(art) + sents_list.append(sent) + descs_list.append(descs[e]) + targets.append([-1]) desc_docs = self.nlp.pipe(descs_list) desc_encodings, bp_desc = self.desc_encoder.begin_update(desc_docs, drop=self.DROP) @@ -339,7 +369,7 @@ class EL_Model: sent_encodings, bp_sent = self.sent_encoder.begin_update(sent_docs, drop=self.DROP) concat_encodings = [list(doc_encodings[i]) + list(sent_encodings[i]) for i in - range(len(all_clusters))] + range(len(targets))] cont_encodings, bp_cont = self.cont_encoder.begin_update(np.asarray(concat_encodings), drop=self.DROP) # print("sent_encodings", type(sent_encodings), sent_encodings) @@ -347,7 +377,7 @@ class EL_Model: # print("doc_encodings", type(doc_encodings), doc_encodings) # print("getting los for", len(arts_list), "entities") - loss, gradient = self.get_loss(cont_encodings, desc_encodings) + loss, gradient = self.get_loss(cont_encodings, desc_encodings, targets) # print("gradient", gradient) if self.PRINT_BATCH_LOSS: diff --git a/examples/pipeline/wiki_entity_linking/wiki_nel_pipeline.py b/examples/pipeline/wiki_entity_linking/wiki_nel_pipeline.py index 25c1e4721..a24ff30c5 100644 --- a/examples/pipeline/wiki_entity_linking/wiki_nel_pipeline.py +++ b/examples/pipeline/wiki_entity_linking/wiki_nel_pipeline.py @@ -111,7 +111,7 @@ if __name__ == "__main__": print("STEP 6: training", datetime.datetime.now()) my_nlp = spacy.load('en_core_web_md') trainer = EL_Model(kb=my_kb, nlp=my_nlp) - trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=5000, devlimit=100) + trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1000, devlimit=100) print() # STEP 7: apply the EL algorithm on the dev dataset