Update parser for fine tuning

This commit is contained in:
Matthew Honnibal 2017-08-08 15:34:17 -05:00
parent 5d837c3776
commit 88bf1cf87c
2 changed files with 14 additions and 14 deletions

View File

@ -222,11 +222,11 @@ def Tok2Vec(width, embed_size, preprocess=None):
asarray(Model.ops, dtype='uint64') asarray(Model.ops, dtype='uint64')
>> uniqued(embed, column=5) >> uniqued(embed, column=5)
>> LN(Maxout(width, width*4, pieces=3)) >> LN(Maxout(width, width*4, pieces=3))
>> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)) >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width, width*3)))
>> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)) >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
>> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)) >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
>> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)), >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)),
pad=4) pad=4)
) )
if preprocess not in (False, None): if preprocess not in (False, None):
tok2vec = preprocess >> tok2vec tok2vec = preprocess >> tok2vec
@ -432,8 +432,8 @@ def build_tagger_model(nr_class, token_vector_width, **cfg):
with Model.define_operators({'>>': chain, '+': add}): with Model.define_operators({'>>': chain, '+': add}):
# Input: (doc, tensor) tuples # Input: (doc, tensor) tuples
private_tok2vec = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats()) private_tok2vec = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats())
model = ( model = (
fine_tune(private_tok2vec) fine_tune(private_tok2vec)
>> with_flatten( >> with_flatten(
Maxout(token_vector_width, token_vector_width) Maxout(token_vector_width, token_vector_width)
@ -457,7 +457,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
>> _flatten_add_lengths >> _flatten_add_lengths
>> with_getitem(0, >> with_getitem(0,
uniqued( uniqued(
(embed_lower | embed_prefix | embed_suffix | embed_shape) (embed_lower | embed_prefix | embed_suffix | embed_shape)
>> Maxout(width, width+(width//2)*3)) >> Maxout(width, width+(width//2)*3))
>> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
>> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
@ -478,7 +478,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
>> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0)) >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0))
>> logistic >> logistic
) )
model.lsuv = False model.lsuv = False
return model return model

View File

@ -44,7 +44,7 @@ from thinc.neural.util import get_array_module
from .. import util from .. import util
from ..util import get_async, get_cuda_stream from ..util import get_async, get_cuda_stream
from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
from .._ml import Tok2Vec, doc2feats, rebatch from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
from ..compat import json_dumps from ..compat import json_dumps
from . import _parse_features from . import _parse_features
@ -237,7 +237,7 @@ cdef class Parser:
token_vector_width = util.env_opt('token_vector_width', token_vector_width) token_vector_width = util.env_opt('token_vector_width', token_vector_width)
hidden_width = util.env_opt('hidden_width', hidden_width) hidden_width = util.env_opt('hidden_width', hidden_width)
parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2) parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2)
tensors = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats()) tensors = fine_tune(Tok2Vec(token_vector_width, 7500, preprocess=doc2feats()))
if parser_maxout_pieces == 1: if parser_maxout_pieces == 1:
lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class, lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
nF=cls.nr_feature, nF=cls.nr_feature,
@ -367,7 +367,7 @@ cdef class Parser:
tokvecses = [tokvecses] tokvecses = [tokvecses]
tokvecs = self.model[0].ops.flatten(tokvecses) tokvecs = self.model[0].ops.flatten(tokvecses)
tokvecs += self.model[0].ops.flatten(self.model[0](docs)) tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
nr_state = len(docs) nr_state = len(docs)
nr_class = self.moves.n_moves nr_class = self.moves.n_moves
@ -419,7 +419,7 @@ cdef class Parser:
cdef int nr_class = self.moves.n_moves cdef int nr_class = self.moves.n_moves
cdef StateClass stcls, output cdef StateClass stcls, output
tokvecs = self.model[0].ops.flatten(tokvecses) tokvecs = self.model[0].ops.flatten(tokvecses)
tokvecs += self.model[0].ops.flatten(self.model[0](docs)) tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
cuda_stream = get_cuda_stream() cuda_stream = get_cuda_stream()
state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs, state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
cuda_stream, 0.0) cuda_stream, 0.0)
@ -460,7 +460,7 @@ cdef class Parser:
if isinstance(docs, Doc) and isinstance(golds, GoldParse): if isinstance(docs, Doc) and isinstance(golds, GoldParse):
docs = [docs] docs = [docs]
golds = [golds] golds = [golds]
my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs, drop=0.) my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=0.)
my_tokvecs = self.model[0].ops.flatten(my_tokvecs) my_tokvecs = self.model[0].ops.flatten(my_tokvecs)
tokvecs += my_tokvecs tokvecs += my_tokvecs