From 88bf1cf87c874c2e9fa0d88aa28db07907b6ad90 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 8 Aug 2017 15:34:17 -0500 Subject: [PATCH] Update parser for fine tuning --- spacy/_ml.py | 18 +++++++++--------- spacy/syntax/nn_parser.pyx | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/spacy/_ml.py b/spacy/_ml.py index d28f48c42..01f166b9f 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -222,11 +222,11 @@ def Tok2Vec(width, embed_size, preprocess=None): asarray(Model.ops, dtype='uint64') >> uniqued(embed, column=5) >> LN(Maxout(width, width*4, pieces=3)) - >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)), - pad=4) + >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width, width*3))) + >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) + >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) + >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)), + pad=4) ) if preprocess not in (False, None): tok2vec = preprocess >> tok2vec @@ -432,8 +432,8 @@ def build_tagger_model(nr_class, token_vector_width, **cfg): with Model.define_operators({'>>': chain, '+': add}): # Input: (doc, tensor) tuples private_tok2vec = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats()) - - model = ( + + model = ( fine_tune(private_tok2vec) >> with_flatten( Maxout(token_vector_width, token_vector_width) @@ -457,7 +457,7 @@ def build_text_classifier(nr_class, width=64, **cfg): >> _flatten_add_lengths >> with_getitem(0, uniqued( - (embed_lower | embed_prefix | embed_suffix | embed_shape) + (embed_lower | embed_prefix | embed_suffix | embed_shape) >> Maxout(width, width+(width//2)*3)) >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) @@ -478,7 +478,7 @@ def build_text_classifier(nr_class, width=64, **cfg): >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0)) >> logistic ) - + model.lsuv = False return model diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 06c61656b..00835f697 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -44,7 +44,7 @@ from thinc.neural.util import get_array_module from .. import util from ..util import get_async, get_cuda_stream from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts -from .._ml import Tok2Vec, doc2feats, rebatch +from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune from ..compat import json_dumps from . import _parse_features @@ -237,7 +237,7 @@ cdef class Parser: token_vector_width = util.env_opt('token_vector_width', token_vector_width) hidden_width = util.env_opt('hidden_width', hidden_width) parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2) - tensors = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats()) + tensors = fine_tune(Tok2Vec(token_vector_width, 7500, preprocess=doc2feats())) if parser_maxout_pieces == 1: lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class, nF=cls.nr_feature, @@ -367,7 +367,7 @@ cdef class Parser: tokvecses = [tokvecses] tokvecs = self.model[0].ops.flatten(tokvecses) - tokvecs += self.model[0].ops.flatten(self.model[0](docs)) + tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses))) nr_state = len(docs) nr_class = self.moves.n_moves @@ -419,7 +419,7 @@ cdef class Parser: cdef int nr_class = self.moves.n_moves cdef StateClass stcls, output tokvecs = self.model[0].ops.flatten(tokvecses) - tokvecs += self.model[0].ops.flatten(self.model[0](docs)) + tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses))) cuda_stream = get_cuda_stream() state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs, cuda_stream, 0.0) @@ -460,7 +460,7 @@ cdef class Parser: if isinstance(docs, Doc) and isinstance(golds, GoldParse): docs = [docs] golds = [golds] - my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs, drop=0.) + my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=0.) my_tokvecs = self.model[0].ops.flatten(my_tokvecs) tokvecs += my_tokvecs