mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Implement cosine loss for spacy pretrain. Make default
This commit is contained in:
parent
685fff40cf
commit
1612990e88
|
@ -9,7 +9,7 @@ from collections import Counter
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from thinc.v2v import Affine, Maxout
|
from thinc.v2v import Affine, Maxout
|
||||||
from thinc.misc import LayerNorm as LN
|
from thinc.misc import LayerNorm as LN
|
||||||
from thinc.neural.util import prefer_gpu
|
from thinc.neural.util import prefer_gpu, get_array_module
|
||||||
from wasabi import Printer
|
from wasabi import Printer
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ from .. import util
|
||||||
width=("Width of CNN layers", "option", "cw", int),
|
width=("Width of CNN layers", "option", "cw", int),
|
||||||
depth=("Depth of CNN layers", "option", "cd", int),
|
depth=("Depth of CNN layers", "option", "cd", int),
|
||||||
embed_rows=("Embedding rows", "option", "er", int),
|
embed_rows=("Embedding rows", "option", "er", int),
|
||||||
|
loss_func=("Loss to use for the objective. L2 or cosine", "option", "L", str),
|
||||||
use_vectors=("Whether to use the static vectors as input features", "flag", "uv"),
|
use_vectors=("Whether to use the static vectors as input features", "flag", "uv"),
|
||||||
dropout=("Dropout", "option", "d", float),
|
dropout=("Dropout", "option", "d", float),
|
||||||
batch_size=("Number of words per training batch", "option", "bs", int),
|
batch_size=("Number of words per training batch", "option", "bs", int),
|
||||||
|
@ -42,6 +43,7 @@ def pretrain(
|
||||||
width=96,
|
width=96,
|
||||||
depth=4,
|
depth=4,
|
||||||
embed_rows=2000,
|
embed_rows=2000,
|
||||||
|
loss_func="cosine",
|
||||||
use_vectors=False,
|
use_vectors=False,
|
||||||
dropout=0.2,
|
dropout=0.2,
|
||||||
nr_iter=1000,
|
nr_iter=1000,
|
||||||
|
@ -123,7 +125,7 @@ def pretrain(
|
||||||
max_length=max_length,
|
max_length=max_length,
|
||||||
min_length=min_length,
|
min_length=min_length,
|
||||||
)
|
)
|
||||||
loss = make_update(model, docs, optimizer, drop=dropout)
|
loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout)
|
||||||
progress = tracker.update(epoch, loss, docs)
|
progress = tracker.update(epoch, loss, docs)
|
||||||
if progress:
|
if progress:
|
||||||
msg.row(progress, **row_settings)
|
msg.row(progress, **row_settings)
|
||||||
|
@ -196,11 +198,26 @@ def get_vectors_loss(ops, docs, prediction, objective="L2"):
|
||||||
ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
|
ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
|
||||||
target = docs[0].vocab.vectors.data[ids]
|
target = docs[0].vocab.vectors.data[ids]
|
||||||
if objective == "L2":
|
if objective == "L2":
|
||||||
d_scores = prediction - target
|
d_target = prediction - target
|
||||||
loss = (d_scores ** 2).sum()
|
loss = (d_target ** 2).sum()
|
||||||
else:
|
elif objective == "cosine":
|
||||||
raise NotImplementedError(objective)
|
loss, d_target = get_cossim_loss(prediction, target)
|
||||||
return loss, d_scores
|
return loss, d_target
|
||||||
|
|
||||||
|
|
||||||
|
def get_cossim_loss(yh, y):
|
||||||
|
# Add a small constant to avoid 0 vectors
|
||||||
|
yh = yh + 1e-8
|
||||||
|
y = y + 1e-8
|
||||||
|
# https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity
|
||||||
|
xp = get_array_module(yh)
|
||||||
|
norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
|
||||||
|
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
|
||||||
|
mul_norms = norm_yh * norm_y
|
||||||
|
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
|
||||||
|
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
|
||||||
|
loss = xp.abs(cosine-1).sum()
|
||||||
|
return loss, -d_yh
|
||||||
|
|
||||||
|
|
||||||
def create_pretraining_model(nlp, tok2vec):
|
def create_pretraining_model(nlp, tok2vec):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user