diff --git a/spacy/_ml.py b/spacy/_ml.py index f1ded666e..5f8ce9470 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -19,7 +19,7 @@ from thinc.api import FeatureExtracter, with_getitem from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool from thinc.neural._classes.attention import ParametricAttention from thinc.linear.linear import LinearModel -from thinc.api import uniqued, wrap +from thinc.api import uniqued, wrap, flatten_add_lengths from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP from .tokens.doc import Doc @@ -53,6 +53,27 @@ def _logistic(X, drop=0.): return Y, logistic_bwd +@layerize +def add_tuples(X, drop=0.): + """Give inputs of sequence pairs, where each sequence is (vals, length), + sum the values, returning a single sequence. + + If input is: + ((vals1, length), (vals2, length) + Output is: + (vals1+vals2, length) + + vals are a single tensor for the whole batch. + """ + (vals1, length1), (vals2, length2) = X + assert length1 == length2 + + def add_tuples_bwd(dY, sgd=None): + return (dY, dY) + + return (vals1+vals2, length), add_tuples_bwd + + def _zero_init(model): def _zero_init_impl(self, X, y): self.W.fill(0) @@ -61,6 +82,7 @@ def _zero_init(model): model.W.fill(0.) return model + @layerize def _preprocess_doc(docs, drop=0.): keys = [doc.to_array([LOWER]) for doc in docs] @@ -72,7 +94,6 @@ def _preprocess_doc(docs, drop=0.): return (keys, vals, lengths), None - def _init_for_precomputed(W, ops): if (W**2).sum() != 0.: return @@ -80,6 +101,7 @@ def _init_for_precomputed(W, ops): ops.xavier_uniform_init(reshaped) W[:] = reshaped.reshape(W.shape) + @describe.on_data(_set_dimensions_if_needed) @describe.attributes( nI=Dimension("Input size"), @@ -323,6 +345,21 @@ def get_token_vectors(tokens_attrs_vectors, drop=0.): return vectors, backward +def fine_tune(model1, combine=None): + def fine_tune_fwd(docs, drop=0.): + X1, bp_X1 = model1.begin_update(docs) + lengths = [len(doc) for doc in docs] + X2 = model1.ops.flatten(X1) + + def fine_tune_bwd(d_output, sgd=None): + bp_X1(d_output, sgd=sgd) + return d_output + + return (X1+X2, lengths), fine_tune_bwd + model = wrap(fine_tune_fwd) + return model + + @layerize def flatten(seqs, drop=0.): if isinstance(seqs[0], numpy.ndarray): @@ -370,6 +407,35 @@ def preprocess_doc(docs, drop=0.): return (keys, vals, lengths), None +def build_tagger_model(nr_class, token_vector_width, **cfg): + with Model.define_operators({'>>': chain, '+': add}): + # Input: (doc, tensor) tuples + embed_docs = with_getitem(0, + FeatureExtracter([NORM]) + >> HashEmbed(token_vector_width, 1000) + >> flatten_add_lengths + ) + + model = ( + fine_tune(embed_docs) + >> + with_getitem(0, + FeatureExtracter([NORM]) + >> HashEmbed(token_vector_width, 1000) + >> flatten_add_lengths + ) + >> with_getitem(1, + flatten_add_lengths) + >> add_tuples + >> with_flatten( + Maxout(token_vector_width, token_vector_width) + >> Softmax(nr_class, token_vector_width) + ) + ) + return model + + + def build_text_classifier(nr_class, width=64, **cfg): nr_vector = cfg.get('nr_vector', 200) with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}): diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 947f0a1f1..b96387351 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -42,7 +42,7 @@ from .compat import json_dumps from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats -from ._ml import build_text_classifier +from ._ml import build_text_classifier, build_tagger_model from .parts_of_speech import X @@ -346,10 +346,8 @@ class NeuralTagger(BaseThincComponent): @classmethod def Model(cls, n_tags, token_vector_width): - return with_flatten( - chain(Maxout(token_vector_width, token_vector_width), - Softmax(n_tags, token_vector_width))) - + return build_tagger_model(n_tags, token_vector_width) + def use_params(self, params): with self.model.use_params(params): yield @@ -455,10 +453,8 @@ class NeuralLabeller(NeuralTagger): @classmethod def Model(cls, n_tags, token_vector_width): - return with_flatten( - chain(Maxout(token_vector_width, token_vector_width), - Softmax(n_tags, token_vector_width))) - + return build_tagger_model(n_tags, token_vector_width) + def get_loss(self, docs, golds, scores): scores = self.model.ops.flatten(scores) cdef int idx = 0