mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 06:09:01 +03:00
Use chars loss in ClozeMultitask
This commit is contained in:
parent
77af446d04
commit
eba89f08bd
|
@ -30,6 +30,7 @@ from .._ml import build_text_classifier, build_simple_cnn_text_classifier
|
||||||
from .._ml import build_bow_text_classifier, build_nel_encoder
|
from .._ml import build_bow_text_classifier, build_nel_encoder
|
||||||
from .._ml import link_vectors_to_models, zero_init, flatten
|
from .._ml import link_vectors_to_models, zero_init, flatten
|
||||||
from .._ml import masked_language_model, create_default_optimizer, get_cossim_loss
|
from .._ml import masked_language_model, create_default_optimizer, get_cossim_loss
|
||||||
|
from .._ml import MultiSoftmax, get_characters_loss
|
||||||
from ..errors import Errors, TempErrors, user_warning, Warnings
|
from ..errors import Errors, TempErrors, user_warning, Warnings
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
||||||
|
@ -837,11 +838,15 @@ class MultitaskObjective(Tagger):
|
||||||
class ClozeMultitask(Pipe):
|
class ClozeMultitask(Pipe):
|
||||||
@classmethod
|
@classmethod
|
||||||
def Model(cls, vocab, tok2vec, **cfg):
|
def Model(cls, vocab, tok2vec, **cfg):
|
||||||
output_size = vocab.vectors.data.shape[1]
|
if cfg["objective"] == "characters":
|
||||||
output_layer = chain(
|
out_sizes = [256] * cfg.get("nr_char", 10)
|
||||||
LayerNorm(Maxout(output_size, tok2vec.nO, pieces=3)),
|
output_layer = MultiSoftmax(out_sizes)
|
||||||
zero_init(Affine(output_size, output_size, drop_factor=0.0))
|
else:
|
||||||
)
|
output_size = vocab.vectors.data.shape[1]
|
||||||
|
output_layer = chain(
|
||||||
|
LayerNorm(Maxout(output_size, tok2vec.nO, pieces=3)),
|
||||||
|
zero_init(Affine(output_size, output_size, drop_factor=0.0))
|
||||||
|
)
|
||||||
model = chain(tok2vec, output_layer)
|
model = chain(tok2vec, output_layer)
|
||||||
model = masked_language_model(vocab, model)
|
model = masked_language_model(vocab, model)
|
||||||
model.tok2vec = tok2vec
|
model.tok2vec = tok2vec
|
||||||
|
@ -852,6 +857,8 @@ class ClozeMultitask(Pipe):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
|
self.cfg.setdefault("objective", "characters")
|
||||||
|
self.cfg.setdefault("nr_char", 10)
|
||||||
|
|
||||||
def set_annotations(self, docs, dep_ids, tensors=None):
|
def set_annotations(self, docs, dep_ids, tensors=None):
|
||||||
pass
|
pass
|
||||||
|
@ -860,7 +867,8 @@ class ClozeMultitask(Pipe):
|
||||||
tok2vec=None, sgd=None, **kwargs):
|
tok2vec=None, sgd=None, **kwargs):
|
||||||
link_vectors_to_models(self.vocab)
|
link_vectors_to_models(self.vocab)
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
self.model = self.Model(self.vocab, tok2vec)
|
kwargs.update(self.cfg)
|
||||||
|
self.model = self.Model(self.vocab, tok2vec, **kwargs)
|
||||||
X = self.model.ops.allocate((5, self.model.tok2vec.nO))
|
X = self.model.ops.allocate((5, self.model.tok2vec.nO))
|
||||||
self.model.output_layer.begin_training(X)
|
self.model.output_layer.begin_training(X)
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
|
@ -874,13 +882,16 @@ class ClozeMultitask(Pipe):
|
||||||
return tokvecs, vectors
|
return tokvecs, vectors
|
||||||
|
|
||||||
def get_loss(self, docs, vectors, prediction):
|
def get_loss(self, docs, vectors, prediction):
|
||||||
# The simplest way to implement this would be to vstack the
|
if self.cfg["objective"] == "characters":
|
||||||
# token.vector values, but that's a bit inefficient, especially on GPU.
|
loss, gradient = get_characters_loss(self.model.ops, docs, prediction)
|
||||||
# Instead we fetch the index into the vectors table for each of our tokens,
|
else:
|
||||||
# and look them up all at once. This prevents data copying.
|
# The simplest way to implement this would be to vstack the
|
||||||
ids = self.model.ops.flatten([doc.to_array(ID).ravel() for doc in docs])
|
# token.vector values, but that's a bit inefficient, especially on GPU.
|
||||||
target = vectors[ids]
|
# Instead we fetch the index into the vectors table for each of our tokens,
|
||||||
loss, gradient = get_cossim_loss(prediction, target, ignore_zeros=True)
|
# and look them up all at once. This prevents data copying.
|
||||||
|
ids = self.model.ops.flatten([doc.to_array(ID).ravel() for doc in docs])
|
||||||
|
target = vectors[ids]
|
||||||
|
loss, gradient = get_cossim_loss(prediction, target, ignore_zeros=True)
|
||||||
return float(loss), gradient
|
return float(loss), gradient
|
||||||
|
|
||||||
def update(self, docs, golds, drop=0., sgd=None, losses=None):
|
def update(self, docs, golds, drop=0., sgd=None, losses=None):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user