diff --git a/spacy/_ml.py b/spacy/_ml.py index 39041cc22..33c6f378b 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -23,8 +23,10 @@ from thinc.neural._classes.attention import ParametricAttention from thinc.linear.linear import LinearModel from thinc.api import uniqued, wrap, flatten_add_lengths + from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP from .tokens.doc import Doc +from . import util import numpy import io @@ -208,6 +210,17 @@ class PrecomputableMaxouts(Model): return Yfp, backward +def drop_layer(layer, factor=1.0): + def drop_layer_fwd(X, drop=0.): + drop *= factor + mask = layer.ops.get_dropout_mask((1,), drop) + if mask is not None and mask[0] == 0.: + return X, lambda dX, sgd=None: dX + else: + return layer.begin_update(X, drop=drop) + return wrap(drop_layer_fwd, layer) + + def Tok2Vec(width, embed_size, preprocess=None): cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}): @@ -220,13 +233,13 @@ def Tok2Vec(width, embed_size, preprocess=None): tok2vec = ( with_flatten( asarray(Model.ops, dtype='uint64') - >> embed - >> Maxout(width, width*4, pieces=3) - >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)), - pad=4) + >> uniqued(embed >> Maxout(width, width*4, pieces=3), column=5) + >> Residual( + (ExtractWindow(nW=1) >> ReLu(width, width*3)) + >> (ExtractWindow(nW=1) >> ReLu(width, width*3)) + >> (ExtractWindow(nW=1) >> ReLu(width, width*3)) + >> (ExtractWindow(nW=1) >> ReLu(width, width*3)) + ), pad=4) ) if preprocess not in (False, None): tok2vec = preprocess >> tok2vec @@ -430,9 +443,10 @@ def getitem(i): return layerize(getitem_fwd) def build_tagger_model(nr_class, token_vector_width, **cfg): + embed_size = util.env_opt('embed_size', 7500) with Model.define_operators({'>>': chain, '+': add}): # Input: (doc, tensor) tuples - private_tok2vec = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats()) + private_tok2vec = Tok2Vec(token_vector_width, embed_size, preprocess=doc2feats()) model = ( fine_tune(private_tok2vec)