mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-03 19:08:06 +03:00
Pass embed size correctly in tagger, and cache embeddings for efficiency
This commit is contained in:
parent
1a59db1c86
commit
ebe0f7f641
30
spacy/_ml.py
30
spacy/_ml.py
|
@ -23,8 +23,10 @@ from thinc.neural._classes.attention import ParametricAttention
|
||||||
from thinc.linear.linear import LinearModel
|
from thinc.linear.linear import LinearModel
|
||||||
from thinc.api import uniqued, wrap, flatten_add_lengths
|
from thinc.api import uniqued, wrap, flatten_add_lengths
|
||||||
|
|
||||||
|
|
||||||
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
|
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
|
||||||
from .tokens.doc import Doc
|
from .tokens.doc import Doc
|
||||||
|
from . import util
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import io
|
import io
|
||||||
|
@ -208,6 +210,17 @@ class PrecomputableMaxouts(Model):
|
||||||
return Yfp, backward
|
return Yfp, backward
|
||||||
|
|
||||||
|
|
||||||
|
def drop_layer(layer, factor=1.0):
|
||||||
|
def drop_layer_fwd(X, drop=0.):
|
||||||
|
drop *= factor
|
||||||
|
mask = layer.ops.get_dropout_mask((1,), drop)
|
||||||
|
if mask is not None and mask[0] == 0.:
|
||||||
|
return X, lambda dX, sgd=None: dX
|
||||||
|
else:
|
||||||
|
return layer.begin_update(X, drop=drop)
|
||||||
|
return wrap(drop_layer_fwd, layer)
|
||||||
|
|
||||||
|
|
||||||
def Tok2Vec(width, embed_size, preprocess=None):
|
def Tok2Vec(width, embed_size, preprocess=None):
|
||||||
cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
|
cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
|
||||||
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
|
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
|
||||||
|
@ -220,13 +233,13 @@ def Tok2Vec(width, embed_size, preprocess=None):
|
||||||
tok2vec = (
|
tok2vec = (
|
||||||
with_flatten(
|
with_flatten(
|
||||||
asarray(Model.ops, dtype='uint64')
|
asarray(Model.ops, dtype='uint64')
|
||||||
>> embed
|
>> uniqued(embed >> Maxout(width, width*4, pieces=3), column=5)
|
||||||
>> Maxout(width, width*4, pieces=3)
|
>> Residual(
|
||||||
>> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
|
(ExtractWindow(nW=1) >> ReLu(width, width*3))
|
||||||
>> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
|
>> (ExtractWindow(nW=1) >> ReLu(width, width*3))
|
||||||
>> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
|
>> (ExtractWindow(nW=1) >> ReLu(width, width*3))
|
||||||
>> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)),
|
>> (ExtractWindow(nW=1) >> ReLu(width, width*3))
|
||||||
pad=4)
|
), pad=4)
|
||||||
)
|
)
|
||||||
if preprocess not in (False, None):
|
if preprocess not in (False, None):
|
||||||
tok2vec = preprocess >> tok2vec
|
tok2vec = preprocess >> tok2vec
|
||||||
|
@ -430,9 +443,10 @@ def getitem(i):
|
||||||
return layerize(getitem_fwd)
|
return layerize(getitem_fwd)
|
||||||
|
|
||||||
def build_tagger_model(nr_class, token_vector_width, **cfg):
|
def build_tagger_model(nr_class, token_vector_width, **cfg):
|
||||||
|
embed_size = util.env_opt('embed_size', 7500)
|
||||||
with Model.define_operators({'>>': chain, '+': add}):
|
with Model.define_operators({'>>': chain, '+': add}):
|
||||||
# Input: (doc, tensor) tuples
|
# Input: (doc, tensor) tuples
|
||||||
private_tok2vec = Tok2Vec(token_vector_width, 7500, preprocess=doc2feats())
|
private_tok2vec = Tok2Vec(token_vector_width, embed_size, preprocess=doc2feats())
|
||||||
|
|
||||||
model = (
|
model = (
|
||||||
fine_tune(private_tok2vec)
|
fine_tune(private_tok2vec)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user