Use chars loss in ClozeMultitask

This commit is contained in:
Matthw Honnibal 2019-10-20 17:47:15 +02:00
parent 77af446d04
commit eba89f08bd

View File

@ -30,6 +30,7 @@ from .._ml import build_text_classifier, build_simple_cnn_text_classifier
from .._ml import build_bow_text_classifier, build_nel_encoder from .._ml import build_bow_text_classifier, build_nel_encoder
from .._ml import link_vectors_to_models, zero_init, flatten from .._ml import link_vectors_to_models, zero_init, flatten
from .._ml import masked_language_model, create_default_optimizer, get_cossim_loss from .._ml import masked_language_model, create_default_optimizer, get_cossim_loss
from .._ml import MultiSoftmax, get_characters_loss
from ..errors import Errors, TempErrors, user_warning, Warnings from ..errors import Errors, TempErrors, user_warning, Warnings
from .. import util from .. import util
@ -837,6 +838,10 @@ class MultitaskObjective(Tagger):
class ClozeMultitask(Pipe): class ClozeMultitask(Pipe):
@classmethod @classmethod
def Model(cls, vocab, tok2vec, **cfg): def Model(cls, vocab, tok2vec, **cfg):
if cfg["objective"] == "characters":
out_sizes = [256] * cfg.get("nr_char", 10)
output_layer = MultiSoftmax(out_sizes)
else:
output_size = vocab.vectors.data.shape[1] output_size = vocab.vectors.data.shape[1]
output_layer = chain( output_layer = chain(
LayerNorm(Maxout(output_size, tok2vec.nO, pieces=3)), LayerNorm(Maxout(output_size, tok2vec.nO, pieces=3)),
@ -852,6 +857,8 @@ class ClozeMultitask(Pipe):
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
self.cfg = cfg self.cfg = cfg
self.cfg.setdefault("objective", "characters")
self.cfg.setdefault("nr_char", 10)
def set_annotations(self, docs, dep_ids, tensors=None): def set_annotations(self, docs, dep_ids, tensors=None):
pass pass
@ -860,7 +867,8 @@ class ClozeMultitask(Pipe):
tok2vec=None, sgd=None, **kwargs): tok2vec=None, sgd=None, **kwargs):
link_vectors_to_models(self.vocab) link_vectors_to_models(self.vocab)
if self.model is True: if self.model is True:
self.model = self.Model(self.vocab, tok2vec) kwargs.update(self.cfg)
self.model = self.Model(self.vocab, tok2vec, **kwargs)
X = self.model.ops.allocate((5, self.model.tok2vec.nO)) X = self.model.ops.allocate((5, self.model.tok2vec.nO))
self.model.output_layer.begin_training(X) self.model.output_layer.begin_training(X)
if sgd is None: if sgd is None:
@ -874,6 +882,9 @@ class ClozeMultitask(Pipe):
return tokvecs, vectors return tokvecs, vectors
def get_loss(self, docs, vectors, prediction): def get_loss(self, docs, vectors, prediction):
if self.cfg["objective"] == "characters":
loss, gradient = get_characters_loss(self.model.ops, docs, prediction)
else:
# The simplest way to implement this would be to vstack the # The simplest way to implement this would be to vstack the
# token.vector values, but that's a bit inefficient, especially on GPU. # token.vector values, but that's a bit inefficient, especially on GPU.
# Instead we fetch the index into the vectors table for each of our tokens, # Instead we fetch the index into the vectors table for each of our tokens,