diff --git a/spacy/_ml.py b/spacy/_ml.py index 31d811084..4dbc7cb92 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -11,6 +11,7 @@ from thinc.misc import LayerNorm as LN from thinc.api import add, layerize, chain, clone, concatenate, with_flatten from thinc.api import FeatureExtracter, with_getitem, flatten_add_lengths from thinc.api import uniqued, wrap, noop +from thinc.api import with_square_sequences from thinc.linear.linear import LinearModel from thinc.neural.ops import NumpyOps, CupyOps from thinc.neural.util import get_array_module, copy_array @@ -26,6 +27,10 @@ from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE from .errors import Errors from . import util +try: + import torch.nn +except: + torch = None VECTORS_KEY = 'spacy_pretrained_vectors' @@ -245,11 +250,19 @@ def link_vectors_to_models(vocab): thinc.extra.load_nlp.VECTORS[(ops.device, vectors.name)] = data +def PyTorchBiLSTM(nO, nI, depth, dropout=0.2): + if depth == 0: + return noop() + model = torch.nn.LSTM(nI, nO//2, depth, bidirectional=True, dropout=dropout) + return with_square_sequences(PyTorchWrapperRNN(model)) + + def Tok2Vec(width, embed_size, **kwargs): pretrained_vectors = kwargs.get('pretrained_vectors', None) cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2) subword_features = kwargs.get('subword_features', True) conv_depth = kwargs.get('conv_depth', 4) + bilstm_depth = kwargs.get('bilstm_depth', 0) cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add, '*': reapply}): @@ -293,6 +306,7 @@ def Tok2Vec(width, embed_size, **kwargs): embed >> convolution ** conv_depth, pad=conv_depth ) + >> PyTorchBiLSTM(width, width, bilstm_depth) ) # Work around thinc API limitations :(. TODO: Revise in Thinc 7 tok2vec.nO = width