diff --git a/spacy/_ml.py b/spacy/_ml.py index da624c8d6..9a541d46f 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -1,8 +1,7 @@ -from thinc.api import layerize, chain, clone, concatenate, add +from thinc.api import layerize, chain, clone from thinc.neural import Model, Maxout, Softmax -from thinc.neural._classes.static_vectors import StaticVectors from thinc.neural._classes.hash_embed import HashEmbed -from thinc.neural._classes.convolution import ExtractWindow +from .attrs import TAG, DEP def get_col(idx): @@ -17,19 +16,19 @@ def build_model(state2vec, width, depth, nr_class): return model -def build_parser_state2vec(tag_vectors, dep_vectors, **cfg): - embed_tags = _reshape(chain(get_col(0), tag_vectors)) - embed_deps = _reshape(chain(get_col(1), dep_vectors)) +def build_parser_state2vec(width, nr_vector=1000, nF=1, nB=0, nS=1, nL=2, nR=2): + embed_tags = _reshape(chain(get_col(0), HashEmbed(width, nr_vector))) + embed_deps = _reshape(chain(get_col(1), HashEmbed(width, nr_vector))) + ops = embed_tags.ops attr_names = ops.asarray([TAG, DEP], dtype='i') + extract = build_feature_extractor(attr_names, nF, nB, nS, nL, nR) def forward(states, drop=0.): - n_tokens = state.nr_context_tokens(nF, nB, nS, nL, nR) - for i, state in enumerate(states): - state.set_context_tokens(tokens[i], nF, nB, nS, nL, nR) - state.set_attributes(features[i], tokens[i], attr_names) - state.set_token_vectors(token_vectors[i], tokens[i]) - - tagvecs, bp_tag_vecs = embed_deps.begin_update(attr_vals, drop=drop) - depvecs, bp_dep_vecs = embed_tags.begin_update(attr_vals, drop=drop) + tokens, attr_vals, tokvecs = extract(states) + tagvecs, bp_tagvecs = embed_deps.begin_update(attr_vals, drop=drop) + depvecs, bp_depvecs = embed_tags.begin_update(attr_vals, drop=drop) + + tokvecs = tokvecs.reshape((tokvecs.shape[0], tokvecs.shape[1] * + tokvecs.shape[2])) vector = ops.concatenate((tagvecs, depvecs, tokvecs)) @@ -38,6 +37,7 @@ def build_parser_state2vec(tag_vectors, dep_vectors, **cfg): d_depvecs, d_tagvecs, d_tokvecs = ops.backprop_concatenate(d_vector, shapes) bp_tagvecs(d_tagvecs) bp_depvecs(d_depvecs) + d_tokvecs = d_tokvecs.reshape((len(states), tokens.shape[1], tokvecs.shape[2])) return (d_tokvecs, tokens) return vector, backward model = layerize(forward) @@ -45,11 +45,31 @@ def build_parser_state2vec(tag_vectors, dep_vectors, **cfg): return model +def build_feature_extractor(attr_names, nF, nB, nS, nL, nR): + def forward(states, drop=0.): + ops = model.ops + n_tokens = states[0].nr_context_tokens(nF, nB, nS, nL, nR) + vector_length = states[0].token_vector_length + tokens = ops.allocate((len(states), n_tokens), dtype='i') + features = ops.allocate((len(states), n_tokens, attr_names.shape[0]), dtype='i') + tokvecs = ops.allocate((len(states), n_tokens, vector_length), dtype='f') + for i, state in enumerate(states): + state.set_context_tokens(tokens[i], nF, nB, nS, nL, nR) + state.set_attributes(features[i], tokens[i], attr_names) + state.set_token_vectors(tokvecs[i], tokens[i]) + def backward(d_features, sgd=None): + return d_features + return (tokens, features, tokvecs), backward + model = layerize(forward) + return model + + def _reshape(layer): def forward(X, drop=0.): Xh = X.reshape((X.shape[0] * X.shape[1], X.shape[2])) yh, bp_yh = layer.begin_update(Xh, drop=drop) n = X.shape[0] + old_shape = X.shape def backward(d_y, sgd=None): d_yh = d_y.reshape((n, d_y.size / n)) d_Xh = bp_yh(d_yh, sgd) @@ -59,7 +79,9 @@ def _reshape(layer): model._layers.append(layer) return model - +#from thinc.api import layerize, chain, clone, concatenate, add +# from thinc.neural._classes.convolution import ExtractWindow +# from thinc.neural._classes.static_vectors import StaticVectors #def build_tok2vec(lang, width, depth, embed_size, cols): # with Model.define_operators({'>>': chain, '|': concatenate, '**': clone}): @@ -73,7 +95,3 @@ def _reshape(layer): # >> (ExtractWindow(nW=1) >> Maxout(width, width*3)) ** depth # ) # return tok2vec - - -if __name__ == '__main__': - test_build_model()