diff --git a/spacy/_ml.py b/spacy/_ml.py index 8adacdfda..7dbb137b8 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -469,30 +469,80 @@ def build_tagger_model(nr_class, token_vector_width, **cfg): return model +@layerize +def SpacyVectors(docs, drop=0.): + xp = get_array_module(docs[0].vocab.vectors.data) + width = docs[0].vocab.vectors.data.shape[1] + batch = [] + for doc in docs: + indices = numpy.zeros((len(doc),), dtype='i') + for i, word in enumerate(doc): + if word.orth in doc.vocab.vectors.key2row: + indices[i] = doc.vocab.vectors.key2row[word.orth] + else: + indices[i] = 0 + vectors = doc.vocab.vectors.data[indices] + batch.append(vectors) + return batch, None + + +def foreach(layer, drop_factor=1.0): + '''Map a layer across elements in a list''' + def foreach_fwd(Xs, drop=0.): + drop *= drop_factor + ys = [] + backprops = [] + for X in Xs: + y, bp_y = layer.begin_update(X, drop=drop) + ys.append(y) + backprops.append(bp_y) + def foreach_bwd(d_ys, sgd=None): + d_Xs = [] + for d_y, bp_y in zip(d_ys, backprops): + if bp_y is not None and bp_y is not None: + d_Xs.append(d_y, sgd=sgd) + else: + d_Xs.append(None) + return d_Xs + return ys, foreach_bwd + model = wrap(foreach_fwd, layer) + return model + + def build_text_classifier(nr_class, width=64, **cfg): nr_vector = cfg.get('nr_vector', 200) - with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}): - embed_lower = HashEmbed(width, nr_vector, column=1) - embed_prefix = HashEmbed(width//2, nr_vector, column=2) - embed_suffix = HashEmbed(width//2, nr_vector, column=3) - embed_shape = HashEmbed(width//2, nr_vector, column=4) + with Model.define_operators({'>>': chain, '+': add, '|': concatenate, + '**': clone}): + lower = HashEmbed(width, nr_vector, column=1) + prefix = HashEmbed(width//2, nr_vector, column=2) + suffix = HashEmbed(width//2, nr_vector, column=3) + shape = HashEmbed(width//2, nr_vector, column=4) + + trained_vectors = ( + FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) + >> with_flatten( + (lower | prefix | suffix | shape) + ) + ) + + convolution = ( + ExtractWindow(nW=1) + >> LN(Maxout(width, width*3)) + ) cnn_model = ( - FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE]) - >> _flatten_add_lengths - >> with_getitem(0, - uniqued( - (embed_lower | embed_prefix | embed_suffix | embed_shape) - >> Maxout(width, width+(width//2)*3)) - >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) - ) - >> ParametricAttention(width,) + # TODO Make concatenate support lists + concatenate_lists(trained_vectors, SpacyVectors) + >> with_flatten( + LN(Maxout(width, 64+32+32+32+300)) + >> convolution ** 4, pad=4) + >> flatten_add_lengths + >> ParametricAttention(width) >> Pooling(sum_pool) >> ReLu(width, width) >> zero_init(Affine(nr_class, width, drop_factor=0.0)) ) + linear_model = ( _preprocess_doc >> LinearModel(nr_class, drop_factor=0.) @@ -507,3 +557,35 @@ def build_text_classifier(nr_class, width=64, **cfg): model.lsuv = False return model +@layerize +def flatten(seqs, drop=0.): + ops = Model.ops + lengths = ops.asarray([len(seq) for seq in seqs], dtype='i') + def finish_update(d_X, sgd=None): + return ops.unflatten(d_X, lengths, pad=0) + X = ops.flatten(seqs, pad=0) + return X, finish_update + + +def concatenate_lists(*layers, **kwargs): # pragma: no cover + '''Compose two or more models `f`, `g`, etc, such that their outputs are + concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))` + ''' + if not layers: + return noop() + drop_factor = kwargs.get('drop_factor', 1.0) + ops = layers[0].ops + layers = [chain(layer, flatten) for layer in layers] + concat = concatenate(*layers) + def concatenate_lists_fwd(Xs, drop=0.): + drop *= drop_factor + lengths = ops.asarray([len(X) for X in Xs], dtype='i') + flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop) + ys = ops.unflatten(flat_y, lengths) + def concatenate_lists_bwd(d_ys, sgd=None): + return bp_flat_y(ops.flatten(d_ys), sgd=sgd) + return ys, concatenate_lists_bwd + model = wrap(concatenate_lists_fwd, concat) + return model + +