From 88bf1cf87c874c2e9fa0d88aa28db07907b6ad90 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Tue, 8 Aug 2017 15:34:17 -0500
Subject: [PATCH] Update parser for fine tuning

---
 spacy/_ml.py               | 18 +++++++++---------
 spacy/syntax/nn_parser.pyx | 10 +++++-----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/spacy/_ml.py b/spacy/_ml.py
index d28f48c42..01f166b9f 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -222,11 +222,11 @@ def Tok2Vec(width, embed_size, preprocess=None):
                 asarray(Model.ops, dtype='uint64')
                 >> uniqued(embed, column=5)
                 >> LN(Maxout(width, width*4, pieces=3))
-                >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3))
-                >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3))
-                >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3))
-                >> Residual(ExtractWindow(nW=1) >> SELU(width, width*3)),
-            pad=4)
+                >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width, width*3)))
+                >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
+                >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
+                >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)),
+                pad=4)
         )
         if preprocess not in (False, None):
             tok2vec = preprocess >> tok2vec
@@ -432,8 +432,8 @@ def build_tagger_model(nr_class, token_vector_width, **cfg):
     with Model.define_operators({'>>': chain, '+': add}):
         # Input: (doc, tensor) tuples
         private_tok2vec = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats())
- 
-        model = ( 
+
+        model = (
             fine_tune(private_tok2vec)
             >> with_flatten(
                 Maxout(token_vector_width, token_vector_width)
@@ -457,7 +457,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
             >> _flatten_add_lengths
             >> with_getitem(0,
                 uniqued(
-                  (embed_lower | embed_prefix | embed_suffix | embed_shape) 
+                  (embed_lower | embed_prefix | embed_suffix | embed_shape)
                   >> Maxout(width, width+(width//2)*3))
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
@@ -478,7 +478,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
             >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0))
             >> logistic
         )
- 
+
     model.lsuv = False
     return model
 
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 06c61656b..00835f697 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -44,7 +44,7 @@ from thinc.neural.util import get_array_module
 from .. import util
 from ..util import get_async, get_cuda_stream
 from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
-from .._ml import Tok2Vec, doc2feats, rebatch
+from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
 from ..compat import json_dumps
 
 from . import _parse_features
@@ -237,7 +237,7 @@ cdef class Parser:
         token_vector_width = util.env_opt('token_vector_width', token_vector_width)
         hidden_width = util.env_opt('hidden_width', hidden_width)
         parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2)
-        tensors = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats())
+        tensors = fine_tune(Tok2Vec(token_vector_width, 7500, preprocess=doc2feats()))
         if parser_maxout_pieces == 1:
             lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
                         nF=cls.nr_feature,
@@ -367,7 +367,7 @@ cdef class Parser:
             tokvecses = [tokvecses]
 
         tokvecs = self.model[0].ops.flatten(tokvecses)
-        tokvecs += self.model[0].ops.flatten(self.model[0](docs))
+        tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
 
         nr_state = len(docs)
         nr_class = self.moves.n_moves
@@ -419,7 +419,7 @@ cdef class Parser:
         cdef int nr_class = self.moves.n_moves
         cdef StateClass stcls, output
         tokvecs = self.model[0].ops.flatten(tokvecses)
-        tokvecs += self.model[0].ops.flatten(self.model[0](docs))
+        tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
         cuda_stream = get_cuda_stream()
         state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
                                                      cuda_stream, 0.0)
@@ -460,7 +460,7 @@ cdef class Parser:
         if isinstance(docs, Doc) and isinstance(golds, GoldParse):
             docs = [docs]
             golds = [golds]
-        my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs, drop=0.)
+        my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=0.)
         my_tokvecs = self.model[0].ops.flatten(my_tokvecs)
         tokvecs += my_tokvecs