mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
experimenting with cosine sim for negative examples (not OK yet)
This commit is contained in:
parent
a761929fa5
commit
268a52ead7
|
@ -8,6 +8,7 @@ import numpy as np
|
||||||
import random
|
import random
|
||||||
from random import shuffle
|
from random import shuffle
|
||||||
from thinc.neural._classes.convolution import ExtractWindow
|
from thinc.neural._classes.convolution import ExtractWindow
|
||||||
|
from thinc.neural.util import get_array_module
|
||||||
|
|
||||||
from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator
|
from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator
|
||||||
|
|
||||||
|
@ -20,7 +21,7 @@ from thinc.t2t import ParametricAttention
|
||||||
from thinc.misc import Residual
|
from thinc.misc import Residual
|
||||||
from thinc.misc import LayerNorm as LN
|
from thinc.misc import LayerNorm as LN
|
||||||
|
|
||||||
from spacy.cli.pretrain import get_cossim_loss
|
# from spacy.cli.pretrain import get_cossim_loss
|
||||||
from spacy.matcher import PhraseMatcher
|
from spacy.matcher import PhraseMatcher
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
|
|
||||||
|
@ -307,27 +308,56 @@ class EL_Model:
|
||||||
self.sgd_desc.learn_rate = self.LEARN_RATE
|
self.sgd_desc.learn_rate = self.LEARN_RATE
|
||||||
self.sgd_desc.L2 = self.L2
|
self.sgd_desc.L2 = self.L2
|
||||||
|
|
||||||
@staticmethod
|
def get_loss(self, v1, v2, targets):
|
||||||
def get_loss(predictions, golds):
|
loss, gradients = self.get_cossim_loss(v1, v2, targets)
|
||||||
loss, gradients = get_cossim_loss(predictions, golds)
|
|
||||||
return loss, gradients
|
return loss, gradients
|
||||||
|
|
||||||
|
def get_cossim_loss(self, yh, y, t):
|
||||||
|
# Add a small constant to avoid 0 vectors
|
||||||
|
# print()
|
||||||
|
# print("yh", yh)
|
||||||
|
# print("y", y)
|
||||||
|
# print("t", t)
|
||||||
|
yh = yh + 1e-8
|
||||||
|
y = y + 1e-8
|
||||||
|
# https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity
|
||||||
|
xp = get_array_module(yh)
|
||||||
|
norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
|
||||||
|
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
|
||||||
|
mul_norms = norm_yh * norm_y
|
||||||
|
cos = (yh * y).sum(axis=1, keepdims=True) / mul_norms
|
||||||
|
# print("cos", cos)
|
||||||
|
d_yh = (y / mul_norms) - (cos * (yh / norm_yh ** 2))
|
||||||
|
# print("abs", xp.abs(cos - t))
|
||||||
|
loss = xp.abs(cos - t).sum()
|
||||||
|
# print("loss", loss)
|
||||||
|
# print("d_yh", d_yh)
|
||||||
|
inverse = np.asarray([int(t[i][0]) * d_yh[i] for i in range(len(t))])
|
||||||
|
# print("inverse", inverse)
|
||||||
|
return loss, -inverse
|
||||||
|
|
||||||
def update(self, entity_clusters, golds, descs, art_texts, arts, sent_texts, sents):
|
def update(self, entity_clusters, golds, descs, art_texts, arts, sent_texts, sents):
|
||||||
all_clusters = list(entity_clusters.keys())
|
all_clusters = list(entity_clusters.keys())
|
||||||
|
|
||||||
arts_list = list()
|
arts_list = list()
|
||||||
sents_list = list()
|
sents_list = list()
|
||||||
descs_list = list()
|
descs_list = list()
|
||||||
|
targets = list()
|
||||||
|
|
||||||
for cluster, entities in entity_clusters.items():
|
for cluster, entities in entity_clusters.items():
|
||||||
art = art_texts[arts[cluster]]
|
art = art_texts[arts[cluster]]
|
||||||
sent = sent_texts[sents[cluster]]
|
sent = sent_texts[sents[cluster]]
|
||||||
for e in entities:
|
for e in entities:
|
||||||
# TODO: more appropriate loss for the whole cluster (currently only pos entities)
|
|
||||||
if golds[e]:
|
if golds[e]:
|
||||||
arts_list.append(art)
|
arts_list.append(art)
|
||||||
sents_list.append(sent)
|
sents_list.append(sent)
|
||||||
descs_list.append(descs[e])
|
descs_list.append(descs[e])
|
||||||
|
targets.append([1])
|
||||||
|
else:
|
||||||
|
arts_list.append(art)
|
||||||
|
sents_list.append(sent)
|
||||||
|
descs_list.append(descs[e])
|
||||||
|
targets.append([-1])
|
||||||
|
|
||||||
desc_docs = self.nlp.pipe(descs_list)
|
desc_docs = self.nlp.pipe(descs_list)
|
||||||
desc_encodings, bp_desc = self.desc_encoder.begin_update(desc_docs, drop=self.DROP)
|
desc_encodings, bp_desc = self.desc_encoder.begin_update(desc_docs, drop=self.DROP)
|
||||||
|
@ -339,7 +369,7 @@ class EL_Model:
|
||||||
sent_encodings, bp_sent = self.sent_encoder.begin_update(sent_docs, drop=self.DROP)
|
sent_encodings, bp_sent = self.sent_encoder.begin_update(sent_docs, drop=self.DROP)
|
||||||
|
|
||||||
concat_encodings = [list(doc_encodings[i]) + list(sent_encodings[i]) for i in
|
concat_encodings = [list(doc_encodings[i]) + list(sent_encodings[i]) for i in
|
||||||
range(len(all_clusters))]
|
range(len(targets))]
|
||||||
cont_encodings, bp_cont = self.cont_encoder.begin_update(np.asarray(concat_encodings), drop=self.DROP)
|
cont_encodings, bp_cont = self.cont_encoder.begin_update(np.asarray(concat_encodings), drop=self.DROP)
|
||||||
|
|
||||||
# print("sent_encodings", type(sent_encodings), sent_encodings)
|
# print("sent_encodings", type(sent_encodings), sent_encodings)
|
||||||
|
@ -347,7 +377,7 @@ class EL_Model:
|
||||||
# print("doc_encodings", type(doc_encodings), doc_encodings)
|
# print("doc_encodings", type(doc_encodings), doc_encodings)
|
||||||
# print("getting los for", len(arts_list), "entities")
|
# print("getting los for", len(arts_list), "entities")
|
||||||
|
|
||||||
loss, gradient = self.get_loss(cont_encodings, desc_encodings)
|
loss, gradient = self.get_loss(cont_encodings, desc_encodings, targets)
|
||||||
|
|
||||||
# print("gradient", gradient)
|
# print("gradient", gradient)
|
||||||
if self.PRINT_BATCH_LOSS:
|
if self.PRINT_BATCH_LOSS:
|
||||||
|
|
|
@ -111,7 +111,7 @@ if __name__ == "__main__":
|
||||||
print("STEP 6: training", datetime.datetime.now())
|
print("STEP 6: training", datetime.datetime.now())
|
||||||
my_nlp = spacy.load('en_core_web_md')
|
my_nlp = spacy.load('en_core_web_md')
|
||||||
trainer = EL_Model(kb=my_kb, nlp=my_nlp)
|
trainer = EL_Model(kb=my_kb, nlp=my_nlp)
|
||||||
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=5000, devlimit=100)
|
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1000, devlimit=100)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# STEP 7: apply the EL algorithm on the dev dataset
|
# STEP 7: apply the EL algorithm on the dev dataset
|
||||||
|
|
Loading…
Reference in New Issue
Block a user