mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Restore state of _ml.py
This commit is contained in:
		
							parent
							
								
									426f84937f
								
							
						
					
					
						commit
						85794c1167
					
				
							
								
								
									
										90
									
								
								spacy/_ml.py
									
									
									
									
									
								
							
							
						
						
									
										90
									
								
								spacy/_ml.py
									
									
									
									
									
								
							|  | @ -5,6 +5,7 @@ from thinc.neural._classes.hash_embed import HashEmbed | ||||||
| from thinc.neural.ops import NumpyOps, CupyOps | from thinc.neural.ops import NumpyOps, CupyOps | ||||||
| from thinc.neural.util import get_array_module | from thinc.neural.util import get_array_module | ||||||
| import random | import random | ||||||
|  | import cytoolz | ||||||
| 
 | 
 | ||||||
| from thinc.neural._classes.convolution import ExtractWindow | from thinc.neural._classes.convolution import ExtractWindow | ||||||
| from thinc.neural._classes.static_vectors import StaticVectors | from thinc.neural._classes.static_vectors import StaticVectors | ||||||
|  | @ -26,6 +27,7 @@ from thinc.neural._classes.rnn import BiLSTM | ||||||
| 
 | 
 | ||||||
| from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP | from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP | ||||||
| from .tokens.doc import Doc | from .tokens.doc import Doc | ||||||
|  | from . import util | ||||||
| 
 | 
 | ||||||
| import numpy | import numpy | ||||||
| import io | import io | ||||||
|  | @ -56,6 +58,27 @@ def _logistic(X, drop=0.): | ||||||
|     return Y, logistic_bwd |     return Y, logistic_bwd | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @layerize | ||||||
|  | def add_tuples(X, drop=0.): | ||||||
|  |     """Give inputs of sequence pairs, where each sequence is (vals, length), | ||||||
|  |     sum the values, returning a single sequence. | ||||||
|  | 
 | ||||||
|  |     If input is: | ||||||
|  |     ((vals1, length), (vals2, length) | ||||||
|  |     Output is: | ||||||
|  |     (vals1+vals2, length) | ||||||
|  | 
 | ||||||
|  |     vals are a single tensor for the whole batch. | ||||||
|  |     """ | ||||||
|  |     (vals1, length1), (vals2, length2) = X | ||||||
|  |     assert length1 == length2 | ||||||
|  | 
 | ||||||
|  |     def add_tuples_bwd(dY, sgd=None): | ||||||
|  |         return (dY, dY) | ||||||
|  | 
 | ||||||
|  |     return (vals1+vals2, length), add_tuples_bwd | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def _zero_init(model): | def _zero_init(model): | ||||||
|     def _zero_init_impl(self, X, y): |     def _zero_init_impl(self, X, y): | ||||||
|         self.W.fill(0) |         self.W.fill(0) | ||||||
|  | @ -64,6 +87,7 @@ def _zero_init(model): | ||||||
|         model.W.fill(0.) |         model.W.fill(0.) | ||||||
|     return model |     return model | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| @layerize | @layerize | ||||||
| def _preprocess_doc(docs, drop=0.): | def _preprocess_doc(docs, drop=0.): | ||||||
|     keys = [doc.to_array([LOWER]) for doc in docs] |     keys = [doc.to_array([LOWER]) for doc in docs] | ||||||
|  | @ -75,7 +99,6 @@ def _preprocess_doc(docs, drop=0.): | ||||||
|     return (keys, vals, lengths), None |     return (keys, vals, lengths), None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| def _init_for_precomputed(W, ops): | def _init_for_precomputed(W, ops): | ||||||
|     if (W**2).sum() != 0.: |     if (W**2).sum() != 0.: | ||||||
|         return |         return | ||||||
|  | @ -83,6 +106,7 @@ def _init_for_precomputed(W, ops): | ||||||
|     ops.xavier_uniform_init(reshaped) |     ops.xavier_uniform_init(reshaped) | ||||||
|     W[:] = reshaped.reshape(W.shape) |     W[:] = reshaped.reshape(W.shape) | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| @describe.on_data(_set_dimensions_if_needed) | @describe.on_data(_set_dimensions_if_needed) | ||||||
| @describe.attributes( | @describe.attributes( | ||||||
|     nI=Dimension("Input size"), |     nI=Dimension("Input size"), | ||||||
|  | @ -187,10 +211,21 @@ class PrecomputableMaxouts(Model): | ||||||
|         return Yfp, backward |         return Yfp, backward | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def drop_layer(layer, factor=2.): | ||||||
|  |     def drop_layer_fwd(X, drop=0.): | ||||||
|  |         drop *= factor | ||||||
|  |         mask = layer.ops.get_dropout_mask((1,), drop) | ||||||
|  |         if mask is None or mask > 0: | ||||||
|  |             return layer.begin_update(X, drop=drop) | ||||||
|  |         else: | ||||||
|  |             return X, lambda dX, sgd=None: dX | ||||||
|  |     return wrap(drop_layer_fwd, layer) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def Tok2Vec(width, embed_size, preprocess=None): | def Tok2Vec(width, embed_size, preprocess=None): | ||||||
|     cols = [ID, NORM, PREFIX, SUFFIX, SHAPE] |     cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] | ||||||
|     with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}): |     with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}): | ||||||
|         norm = get_col(cols.index(NORM))   >> HashEmbed(width, embed_size, name='embed_lower') |         norm = get_col(cols.index(NORM))     >> HashEmbed(width, embed_size, name='embed_lower') | ||||||
|         prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2, name='embed_prefix') |         prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2, name='embed_prefix') | ||||||
|         suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix') |         suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix') | ||||||
|         shape = get_col(cols.index(SHAPE))   >> HashEmbed(width, embed_size//2, name='embed_shape') |         shape = get_col(cols.index(SHAPE))   >> HashEmbed(width, embed_size//2, name='embed_shape') | ||||||
|  | @ -300,7 +335,8 @@ def zero_init(model): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def doc2feats(cols=None): | def doc2feats(cols=None): | ||||||
|     cols = [ID, NORM, PREFIX, SUFFIX, SHAPE] |     if cols is None: | ||||||
|  |         cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] | ||||||
|     def forward(docs, drop=0.): |     def forward(docs, drop=0.): | ||||||
|         feats = [] |         feats = [] | ||||||
|         for doc in docs: |         for doc in docs: | ||||||
|  | @ -337,25 +373,22 @@ def fine_tune(embedding, combine=None): | ||||||
|         vecs, bp_vecs = embedding.begin_update(docs, drop=drop) |         vecs, bp_vecs = embedding.begin_update(docs, drop=drop) | ||||||
|         flat_tokvecs = embedding.ops.flatten(tokvecs) |         flat_tokvecs = embedding.ops.flatten(tokvecs) | ||||||
|         flat_vecs = embedding.ops.flatten(vecs) |         flat_vecs = embedding.ops.flatten(vecs) | ||||||
|         alpha = model.mix |  | ||||||
|         minus = 1-model.mix |  | ||||||
|         output = embedding.ops.unflatten( |         output = embedding.ops.unflatten( | ||||||
|                    (alpha * flat_tokvecs + minus * flat_vecs), lengths) |                    (model.mix[0] * flat_vecs + model.mix[1] * flat_tokvecs), | ||||||
|  |                     lengths) | ||||||
| 
 | 
 | ||||||
|         def fine_tune_bwd(d_output, sgd=None): |         def fine_tune_bwd(d_output, sgd=None): | ||||||
|  |             bp_vecs(d_output, sgd=sgd) | ||||||
|             flat_grad = model.ops.flatten(d_output) |             flat_grad = model.ops.flatten(d_output) | ||||||
|             model.d_mix += flat_tokvecs.dot(flat_grad.T).sum() |             model.d_mix[1] += flat_tokvecs.dot(flat_grad.T).sum() | ||||||
|             model.d_mix += 1-flat_vecs.dot(flat_grad.T).sum() |             model.d_mix[0] += flat_vecs.dot(flat_grad.T).sum() | ||||||
|              |             if sgd is not None: | ||||||
|             bp_vecs([d_o * minus for d_o in d_output], sgd=sgd) |                 sgd(model._mem.weights, model._mem.gradient, key=model.id) | ||||||
|             d_output = [d_o * alpha for d_o in d_output] |  | ||||||
|             sgd(model._mem.weights, model._mem.gradient, key=model.id) |  | ||||||
|             model.mix = model.ops.xp.minimum(model.mix, 1.0) |  | ||||||
|             return d_output |             return d_output | ||||||
|         return output, fine_tune_bwd |         return output, fine_tune_bwd | ||||||
|     model = wrap(fine_tune_fwd, embedding) |     model = wrap(fine_tune_fwd, embedding) | ||||||
|     model.mix = model._mem.add((model.id, 'mix'), (1,)) |     model.mix = model._mem.add((model.id, 'mix'), (2,)) | ||||||
|     model.mix.fill(0.0) |     model.mix.fill(1.) | ||||||
|     model.d_mix = model._mem.add_gradient((model.id, 'd_mix'), (model.id, 'mix')) |     model.d_mix = model._mem.add_gradient((model.id, 'd_mix'), (model.id, 'mix')) | ||||||
|     return model |     return model | ||||||
| 
 | 
 | ||||||
|  | @ -406,6 +439,27 @@ def preprocess_doc(docs, drop=0.): | ||||||
|     vals = ops.allocate(keys.shape[0]) + 1 |     vals = ops.allocate(keys.shape[0]) + 1 | ||||||
|     return (keys, vals, lengths), None |     return (keys, vals, lengths), None | ||||||
| 
 | 
 | ||||||
|  | def getitem(i): | ||||||
|  |     def getitem_fwd(X, drop=0.): | ||||||
|  |         return X[i], None | ||||||
|  |     return layerize(getitem_fwd) | ||||||
|  | 
 | ||||||
|  | def build_tagger_model(nr_class, token_vector_width, **cfg): | ||||||
|  |     embed_size = util.env_opt('embed_size', 7500) | ||||||
|  |     with Model.define_operators({'>>': chain, '+': add}): | ||||||
|  |         # Input: (doc, tensor) tuples | ||||||
|  |         private_tok2vec = Tok2Vec(token_vector_width, embed_size, preprocess=doc2feats()) | ||||||
|  | 
 | ||||||
|  |         model = ( | ||||||
|  |             fine_tune(private_tok2vec) | ||||||
|  |             >> with_flatten( | ||||||
|  |                 Maxout(token_vector_width, token_vector_width) | ||||||
|  |                 >> Softmax(nr_class, token_vector_width) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |     model.nI = None | ||||||
|  |     return model | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def build_text_classifier(nr_class, width=64, **cfg): | def build_text_classifier(nr_class, width=64, **cfg): | ||||||
|     nr_vector = cfg.get('nr_vector', 200) |     nr_vector = cfg.get('nr_vector', 200) | ||||||
|  | @ -420,7 +474,7 @@ def build_text_classifier(nr_class, width=64, **cfg): | ||||||
|             >> _flatten_add_lengths |             >> _flatten_add_lengths | ||||||
|             >> with_getitem(0, |             >> with_getitem(0, | ||||||
|                 uniqued( |                 uniqued( | ||||||
|                   (embed_lower | embed_prefix | embed_suffix | embed_shape)  |                   (embed_lower | embed_prefix | embed_suffix | embed_shape) | ||||||
|                   >> Maxout(width, width+(width//2)*3)) |                   >> Maxout(width, width+(width//2)*3)) | ||||||
|                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) |                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) | ||||||
|                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) |                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3)) | ||||||
|  | @ -441,7 +495,7 @@ def build_text_classifier(nr_class, width=64, **cfg): | ||||||
|             >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0)) |             >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0)) | ||||||
|             >> logistic |             >> logistic | ||||||
|         ) |         ) | ||||||
|   | 
 | ||||||
|     model.lsuv = False |     model.lsuv = False | ||||||
|     return model |     return model | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user