WIP: Add fine-tuning logic to tagger model, re #1182

This commit is contained in:
Matthew Honnibal 2017-08-06 01:13:23 +02:00
parent 7f876a7a82
commit 468c138ab3
2 changed files with 73 additions and 11 deletions

View File

@ -19,7 +19,7 @@ from thinc.api import FeatureExtracter, with_getitem
from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
from thinc.neural._classes.attention import ParametricAttention from thinc.neural._classes.attention import ParametricAttention
from thinc.linear.linear import LinearModel from thinc.linear.linear import LinearModel
from thinc.api import uniqued, wrap from thinc.api import uniqued, wrap, flatten_add_lengths
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
from .tokens.doc import Doc from .tokens.doc import Doc
@ -53,6 +53,27 @@ def _logistic(X, drop=0.):
return Y, logistic_bwd return Y, logistic_bwd
@layerize
def add_tuples(X, drop=0.):
"""Give inputs of sequence pairs, where each sequence is (vals, length),
sum the values, returning a single sequence.
If input is:
((vals1, length), (vals2, length)
Output is:
(vals1+vals2, length)
vals are a single tensor for the whole batch.
"""
(vals1, length1), (vals2, length2) = X
assert length1 == length2
def add_tuples_bwd(dY, sgd=None):
return (dY, dY)
return (vals1+vals2, length), add_tuples_bwd
def _zero_init(model): def _zero_init(model):
def _zero_init_impl(self, X, y): def _zero_init_impl(self, X, y):
self.W.fill(0) self.W.fill(0)
@ -61,6 +82,7 @@ def _zero_init(model):
model.W.fill(0.) model.W.fill(0.)
return model return model
@layerize @layerize
def _preprocess_doc(docs, drop=0.): def _preprocess_doc(docs, drop=0.):
keys = [doc.to_array([LOWER]) for doc in docs] keys = [doc.to_array([LOWER]) for doc in docs]
@ -72,7 +94,6 @@ def _preprocess_doc(docs, drop=0.):
return (keys, vals, lengths), None return (keys, vals, lengths), None
def _init_for_precomputed(W, ops): def _init_for_precomputed(W, ops):
if (W**2).sum() != 0.: if (W**2).sum() != 0.:
return return
@ -80,6 +101,7 @@ def _init_for_precomputed(W, ops):
ops.xavier_uniform_init(reshaped) ops.xavier_uniform_init(reshaped)
W[:] = reshaped.reshape(W.shape) W[:] = reshaped.reshape(W.shape)
@describe.on_data(_set_dimensions_if_needed) @describe.on_data(_set_dimensions_if_needed)
@describe.attributes( @describe.attributes(
nI=Dimension("Input size"), nI=Dimension("Input size"),
@ -323,6 +345,21 @@ def get_token_vectors(tokens_attrs_vectors, drop=0.):
return vectors, backward return vectors, backward
def fine_tune(model1, combine=None):
def fine_tune_fwd(docs, drop=0.):
X1, bp_X1 = model1.begin_update(docs)
lengths = [len(doc) for doc in docs]
X2 = model1.ops.flatten(X1)
def fine_tune_bwd(d_output, sgd=None):
bp_X1(d_output, sgd=sgd)
return d_output
return (X1+X2, lengths), fine_tune_bwd
model = wrap(fine_tune_fwd)
return model
@layerize @layerize
def flatten(seqs, drop=0.): def flatten(seqs, drop=0.):
if isinstance(seqs[0], numpy.ndarray): if isinstance(seqs[0], numpy.ndarray):
@ -370,6 +407,35 @@ def preprocess_doc(docs, drop=0.):
return (keys, vals, lengths), None return (keys, vals, lengths), None
def build_tagger_model(nr_class, token_vector_width, **cfg):
with Model.define_operators({'>>': chain, '+': add}):
# Input: (doc, tensor) tuples
embed_docs = with_getitem(0,
FeatureExtracter([NORM])
>> HashEmbed(token_vector_width, 1000)
>> flatten_add_lengths
)
model = (
fine_tune(embed_docs)
>>
with_getitem(0,
FeatureExtracter([NORM])
>> HashEmbed(token_vector_width, 1000)
>> flatten_add_lengths
)
>> with_getitem(1,
flatten_add_lengths)
>> add_tuples
>> with_flatten(
Maxout(token_vector_width, token_vector_width)
>> Softmax(nr_class, token_vector_width)
)
)
return model
def build_text_classifier(nr_class, width=64, **cfg): def build_text_classifier(nr_class, width=64, **cfg):
nr_vector = cfg.get('nr_vector', 200) nr_vector = cfg.get('nr_vector', 200)
with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}): with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}):

View File

@ -42,7 +42,7 @@ from .compat import json_dumps
from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
from ._ml import build_text_classifier from ._ml import build_text_classifier, build_tagger_model
from .parts_of_speech import X from .parts_of_speech import X
@ -346,9 +346,7 @@ class NeuralTagger(BaseThincComponent):
@classmethod @classmethod
def Model(cls, n_tags, token_vector_width): def Model(cls, n_tags, token_vector_width):
return with_flatten( return build_tagger_model(n_tags, token_vector_width)
chain(Maxout(token_vector_width, token_vector_width),
Softmax(n_tags, token_vector_width)))
def use_params(self, params): def use_params(self, params):
with self.model.use_params(params): with self.model.use_params(params):
@ -455,9 +453,7 @@ class NeuralLabeller(NeuralTagger):
@classmethod @classmethod
def Model(cls, n_tags, token_vector_width): def Model(cls, n_tags, token_vector_width):
return with_flatten( return build_tagger_model(n_tags, token_vector_width)
chain(Maxout(token_vector_width, token_vector_width),
Softmax(n_tags, token_vector_width)))
def get_loss(self, docs, golds, scores): def get_loss(self, docs, golds, scores):
scores = self.model.ops.flatten(scores) scores = self.model.ops.flatten(scores)