mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Add reference version
This commit is contained in:
		
							parent
							
								
									385b669c5d
								
							
						
					
					
						commit
						c1ead81691
					
				|  | @ -1,5 +1,6 @@ | ||||||
| from typing import List, Tuple, Any, Optional | from typing import List, Tuple, Any, Optional | ||||||
| from thinc.api import Ops, Model, normal_init, chain, list2array, Linear | from thinc.api import Ops, Model, normal_init, chain, list2array, Linear | ||||||
|  | from thinc.api import uniform_init | ||||||
| from thinc.types import Floats1d, Floats2d, Floats3d, Ints2d, Floats4d | from thinc.types import Floats1d, Floats2d, Floats3d, Ints2d, Floats4d | ||||||
| import numpy | import numpy | ||||||
| from ..tokens.doc import Doc | from ..tokens.doc import Doc | ||||||
|  | @ -27,7 +28,7 @@ def TransitionModel( | ||||||
| 
 | 
 | ||||||
|     return Model( |     return Model( | ||||||
|         name="parser_model", |         name="parser_model", | ||||||
|         forward=forward, |         forward=_forward_reference, | ||||||
|         init=init, |         init=init, | ||||||
|         layers=[tok2vec_projected], |         layers=[tok2vec_projected], | ||||||
|         refs={"tok2vec": tok2vec_projected}, |         refs={"tok2vec": tok2vec_projected}, | ||||||
|  | @ -184,6 +185,137 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo | ||||||
|         d_statevecs = model.ops.gemm(d_scores, upper_W) |         d_statevecs = model.ops.gemm(d_scores, upper_W) | ||||||
|         # Backprop through the maxout activation |         # Backprop through the maxout activation | ||||||
|         d_preacts = model.ops.backprop_maxout(d_statevecs, which, model.get_dim("nP")) |         d_preacts = model.ops.backprop_maxout(d_statevecs, which, model.get_dim("nP")) | ||||||
|  |         d_preacts2f = model.ops.reshape2f(d_preacts, d_preacts.shape[0], -1) | ||||||
|  |         model.inc_grad("lower_b", d_preacts2f.sum(axis=0)) | ||||||
|  |         model.inc_grad("lower_W", model.ops.gemm(d_preacts2f, tokfeats, trans1=True)) | ||||||
|  |         d_tokfeats = model.ops.gemm(d_preacts2f, lower_W) | ||||||
|  |         d_tokfeats3f = model.ops.reshape3f(d_tokfeats, nS, nF, nI) | ||||||
|  |         d_lower_pad = model.ops.alloc2f(nF, nI) | ||||||
|  |         for i in range(ids.shape[0]): | ||||||
|  |             for j in range(ids.shape[1]): | ||||||
|  |                 if ids[i, j] == -1: | ||||||
|  |                     d_lower_pad[j] += d_tokfeats3f[i, j] | ||||||
|  |                 else: | ||||||
|  |                     d_tokvecs[ids[i, j]] += d_tokfeats3f[i, j] | ||||||
|  |         model.inc_grad("lower_pad", d_lower_pad) | ||||||
|  |         # We don't need to backprop the summation, because we pass back the IDs instead | ||||||
|  |         # d_state_features = backprop_feats((d_preacts, all_ids)) | ||||||
|  |         # ids1d = model.ops.xp.vstack(all_ids).flatten() | ||||||
|  |         # d_state_features = d_state_features.reshape((ids1d.size, -1)) | ||||||
|  |         # d_tokvecs = model.ops.alloc((tokvecs.shape[0] + 1, tokvecs.shape[1])) | ||||||
|  |         # model.ops.scatter_add(d_tokvecs, ids1d, d_state_features) | ||||||
|  |         return (backprop_tok2vec(d_tokvecs), None) | ||||||
|  | 
 | ||||||
|  |     return (states, all_scores), backprop_parser | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _forward_reference(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: bool): | ||||||
|  |     """Slow reference implementation, without the precomputation""" | ||||||
|  |     nF = model.get_dim("nF") | ||||||
|  |     tok2vec = model.get_ref("tok2vec") | ||||||
|  |     lower_pad = model.get_param("lower_pad") | ||||||
|  |     lower_W = model.get_param("lower_W") | ||||||
|  |     lower_b = model.get_param("lower_b") | ||||||
|  |     upper_W = model.get_param("upper_W") | ||||||
|  |     upper_b = model.get_param("upper_b") | ||||||
|  |     nH = model.get_dim("nH") | ||||||
|  |     nP = model.get_dim("nP") | ||||||
|  |     nO = model.get_dim("nO") | ||||||
|  |     nI = model.get_dim("nI") | ||||||
|  | 
 | ||||||
|  |     ops = model.ops | ||||||
|  |     docs, moves = docs_moves | ||||||
|  |     states = moves.init_batch(docs) | ||||||
|  |     tokvecs, backprop_tok2vec = tok2vec(docs, is_train) | ||||||
|  |     all_ids = [] | ||||||
|  |     all_which = [] | ||||||
|  |     all_statevecs = [] | ||||||
|  |     all_scores = [] | ||||||
|  |     all_tokfeats = [] | ||||||
|  |     next_states = [s for s in states if not s.is_final()] | ||||||
|  |     unseen_mask = _get_unseen_mask(model) | ||||||
|  |     assert unseen_mask.all()  # TODO unhack | ||||||
|  |     ids = numpy.zeros((len(states), nF), dtype="i") | ||||||
|  |     while next_states: | ||||||
|  |         ids = ids[: len(next_states)] | ||||||
|  |         for i, state in enumerate(next_states): | ||||||
|  |             state.set_context_tokens(ids, i, nF) | ||||||
|  |         # Sum the state features, add the bias and apply the activation (maxout) | ||||||
|  |         # to create the state vectors. | ||||||
|  |         tokfeats3f = model.ops.alloc3f(ids.shape[0], nF, nI) | ||||||
|  |         for i in range(ids.shape[0]): | ||||||
|  |             for j in range(nF): | ||||||
|  |                 if ids[i, j] == -1: | ||||||
|  |                     tokfeats3f[i, j] = lower_pad | ||||||
|  |                 else: | ||||||
|  |                     tokfeats3f[i, j] = tokvecs[ids[i, j]] | ||||||
|  |         tokfeats = model.ops.reshape2f(tokfeats3f, tokfeats3f.shape[0], -1) | ||||||
|  |         preacts2f = model.ops.gemm(tokfeats, lower_W, trans2=True) | ||||||
|  |         preacts2f += lower_b | ||||||
|  |         preacts = model.ops.reshape3f(preacts2f, preacts2f.shape[0], nH, nP) | ||||||
|  |         statevecs, which = ops.maxout(preacts) | ||||||
|  |         # Multiply the state-vector by the scores weights and add the bias, | ||||||
|  |         # to get the logits. | ||||||
|  |         scores = model.ops.gemm(statevecs, upper_W, trans2=True) | ||||||
|  |         scores += upper_b | ||||||
|  |         scores[:, unseen_mask == 0] = model.ops.xp.nanmin(scores) | ||||||
|  |         # Transition the states, filtering out any that are finished. | ||||||
|  |         next_states = moves.transition_states(next_states, scores) | ||||||
|  |         all_scores.append(scores) | ||||||
|  |         if is_train: | ||||||
|  |             # Remember intermediate results for the backprop. | ||||||
|  |             all_tokfeats.append(tokfeats) | ||||||
|  |             all_ids.append(ids.copy()) | ||||||
|  |             all_statevecs.append(statevecs) | ||||||
|  |             all_which.append(which) | ||||||
|  | 
 | ||||||
|  |     nS = sum(len(s.history) for s in states) | ||||||
|  | 
 | ||||||
|  |     def backprop_parser(d_states_d_scores): | ||||||
|  |         d_tokvecs = model.ops.alloc2f(tokvecs.shape[0], tokvecs.shape[1]) | ||||||
|  |         ids = model.ops.xp.vstack(all_ids) | ||||||
|  |         which = ops.xp.vstack(all_which) | ||||||
|  |         statevecs = model.ops.xp.vstack(all_statevecs) | ||||||
|  |         tokfeats = model.ops.xp.vstack(all_tokfeats) | ||||||
|  |         _, d_scores = d_states_d_scores | ||||||
|  |         if model.attrs.get("unseen_classes"): | ||||||
|  |             # If we have a negative gradient (i.e. the probability should | ||||||
|  |             # increase) on any classes we filtered out as unseen, mark | ||||||
|  |             # them as seen. | ||||||
|  |             for clas in set(model.attrs["unseen_classes"]): | ||||||
|  |                 if (d_scores[:, clas] < 0).any(): | ||||||
|  |                     model.attrs["unseen_classes"].remove(clas) | ||||||
|  |         d_scores *= unseen_mask | ||||||
|  |         assert statevecs.shape == (nS, nH), statevecs.shape | ||||||
|  |         assert d_scores.shape == (nS, nO), d_scores.shape | ||||||
|  |         # Calculate the gradients for the parameters of the upper layer. | ||||||
|  |         # The weight gemm is (nS, nO) @ (nS, nH).T | ||||||
|  |         model.inc_grad("upper_b", d_scores.sum(axis=0)) | ||||||
|  |         model.inc_grad("upper_W", model.ops.gemm(d_scores, statevecs, trans1=True)) | ||||||
|  |         # Now calculate d_statevecs, by backproping through the upper linear layer. | ||||||
|  |         # This gemm is (nS, nO) @ (nO, nH) | ||||||
|  |         d_statevecs = model.ops.gemm(d_scores, upper_W) | ||||||
|  |         # Backprop through the maxout activation | ||||||
|  |         d_preacts = model.ops.backprop_maxout(d_statevecs, which, nP) | ||||||
|  |         d_preacts2f = model.ops.reshape2f(d_preacts, d_preacts.shape[0], nH*nP) | ||||||
|  |         # Now increment the gradients for the lower layer. | ||||||
|  |         # The gemm here is (nS, nH*nP) @ (nS, nF*nI) | ||||||
|  |         model.inc_grad("lower_b", d_preacts2f.sum(axis=0)) | ||||||
|  |         model.inc_grad("lower_W", model.ops.gemm(d_preacts2f, tokfeats, trans1=True)) | ||||||
|  |         # Caclulate d_tokfeats | ||||||
|  |         # The gemm here is (nS, nH*nP) @ (nH*nP, nF*nI) | ||||||
|  |         d_tokfeats = model.ops.gemm(d_preacts2f, lower_W) | ||||||
|  |         # Get the gradients of the tokvecs and the padding | ||||||
|  |         d_tokfeats3f = model.ops.reshape3f(d_tokfeats, nS, nF, nI) | ||||||
|  |         d_lower_pad = model.ops.alloc1f(nI) | ||||||
|  |         for i in range(ids.shape[0]): | ||||||
|  |             for j in range(ids.shape[1]): | ||||||
|  |                 if ids[i, j] == -1: | ||||||
|  |                     d_lower_pad += d_tokfeats3f[i, j] | ||||||
|  |                 else: | ||||||
|  |                     d_tokvecs[ids[i, j]] += d_tokfeats3f[i, j] | ||||||
|  |         model.inc_grad("lower_pad", d_lower_pad) | ||||||
|         # We don't need to backprop the summation, because we pass back the IDs instead |         # We don't need to backprop the summation, because we pass back the IDs instead | ||||||
|         d_state_features = backprop_feats((d_preacts, all_ids)) |         d_state_features = backprop_feats((d_preacts, all_ids)) | ||||||
|         ids1d = model.ops.xp.vstack(all_ids).flatten() |         ids1d = model.ops.xp.vstack(all_ids).flatten() | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user