mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Small fixes to parser
This commit is contained in:
		
							parent
							
								
									188c0f6949
								
							
						
					
					
						commit
						e6d71e1778
					
				| 
						 | 
					@ -33,7 +33,7 @@ from preshed.maps cimport MapStruct
 | 
				
			||||||
from preshed.maps cimport map_get
 | 
					from preshed.maps cimport map_get
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from thinc.api import layerize, chain
 | 
					from thinc.api import layerize, chain
 | 
				
			||||||
from thinc.neural import Affine, Model, Maxout
 | 
					from thinc.neural import BatchNorm, Model, Affine, ELU, ReLu, Maxout
 | 
				
			||||||
from thinc.neural.ops import NumpyOps
 | 
					from thinc.neural.ops import NumpyOps
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
 | 
					from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
 | 
				
			||||||
| 
						 | 
					@ -62,7 +62,8 @@ def set_debug(val):
 | 
				
			||||||
    DEBUG = val
 | 
					    DEBUG = val
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_greedy_model_for_batch(batch_size, tokvecs, lower_model, cuda_stream=None):
 | 
					def get_greedy_model_for_batch(batch_size, tokvecs, lower_model, cuda_stream=None,
 | 
				
			||||||
 | 
					                               drop=0.):
 | 
				
			||||||
    '''Allow a model to be "primed" by pre-computing input features in bulk.
 | 
					    '''Allow a model to be "primed" by pre-computing input features in bulk.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    This is used for the parser, where we want to take a batch of documents,
 | 
					    This is used for the parser, where we want to take a batch of documents,
 | 
				
			||||||
| 
						 | 
					@ -79,16 +80,17 @@ def get_greedy_model_for_batch(batch_size, tokvecs, lower_model, cuda_stream=Non
 | 
				
			||||||
    we can do all our hard maths up front, packed into large multiplications,
 | 
					    we can do all our hard maths up front, packed into large multiplications,
 | 
				
			||||||
    and do the hard-to-program parsing on the CPU.
 | 
					    and do the hard-to-program parsing on the CPU.
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    gpu_cached, bp_features = lower_model.begin_update(tokvecs, drop=0.)
 | 
					    gpu_cached, bp_features = lower_model.begin_update(tokvecs, drop=drop)
 | 
				
			||||||
    cdef np.ndarray cached
 | 
					    cdef np.ndarray cached
 | 
				
			||||||
    if not isinstance(gpu_cached, numpy.ndarray):
 | 
					    if not isinstance(gpu_cached, numpy.ndarray):
 | 
				
			||||||
        cached = gpu_cached.get(stream=cuda_stream)
 | 
					        cached = gpu_cached.get(stream=cuda_stream)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        cached = gpu_cached
 | 
					        cached = gpu_cached
 | 
				
			||||||
    nF = gpu_cached.shape[1]
 | 
					    nF = gpu_cached.shape[1]
 | 
				
			||||||
 | 
					    nO = gpu_cached.shape[2]
 | 
				
			||||||
    nP = gpu_cached.shape[3]
 | 
					    nP = gpu_cached.shape[3]
 | 
				
			||||||
    ops = lower_model.ops
 | 
					    ops = lower_model.ops
 | 
				
			||||||
    features = numpy.zeros((batch_size, cached.shape[2], nP), dtype='f')
 | 
					    features = numpy.zeros((batch_size, nO, nP), dtype='f')
 | 
				
			||||||
    synchronized = False
 | 
					    synchronized = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def forward(token_ids, drop=0.):
 | 
					    def forward(token_ids, drop=0.):
 | 
				
			||||||
| 
						 | 
					@ -108,7 +110,7 @@ def get_greedy_model_for_batch(batch_size, tokvecs, lower_model, cuda_stream=Non
 | 
				
			||||||
        cdef int[:, ::1] ids = token_ids
 | 
					        cdef int[:, ::1] ids = token_ids
 | 
				
			||||||
        _sum_features(<float*>&feats[0,0,0],
 | 
					        _sum_features(<float*>&feats[0,0,0],
 | 
				
			||||||
            <float*>cached.data, &ids[0,0],
 | 
					            <float*>cached.data, &ids[0,0],
 | 
				
			||||||
            token_ids.shape[0], nF, cached.shape[2]*nP)
 | 
					            token_ids.shape[0], nF, nO*nP)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if nP >= 2:
 | 
					        if nP >= 2:
 | 
				
			||||||
            best, which = ops.maxout(features)
 | 
					            best, which = ops.maxout(features)
 | 
				
			||||||
| 
						 | 
					@ -155,13 +157,16 @@ def get_batch_loss(TransitionSystem moves, states, golds, float[:, ::1] scores):
 | 
				
			||||||
    cdef int i
 | 
					    cdef int i
 | 
				
			||||||
    is_valid = <int*>mem.alloc(moves.n_moves, sizeof(int))
 | 
					    is_valid = <int*>mem.alloc(moves.n_moves, sizeof(int))
 | 
				
			||||||
    costs = <float*>mem.alloc(moves.n_moves, sizeof(float))
 | 
					    costs = <float*>mem.alloc(moves.n_moves, sizeof(float))
 | 
				
			||||||
    cdef np.ndarray d_scores = numpy.zeros((len(states), moves.n_moves), dtype='f')
 | 
					    cdef np.ndarray d_scores = numpy.zeros((len(states), moves.n_moves), dtype='f',
 | 
				
			||||||
 | 
					                                           order='c')
 | 
				
			||||||
    c_d_scores = <float*>d_scores.data
 | 
					    c_d_scores = <float*>d_scores.data
 | 
				
			||||||
    for i, (state, gold) in enumerate(zip(states, golds)):
 | 
					    for i, (state, gold) in enumerate(zip(states, golds)):
 | 
				
			||||||
        memset(is_valid, 0, moves.n_moves * sizeof(int))
 | 
					        memset(is_valid, 0, moves.n_moves * sizeof(int))
 | 
				
			||||||
        memset(costs, 0, moves.n_moves * sizeof(float))
 | 
					        memset(costs, 0, moves.n_moves * sizeof(float))
 | 
				
			||||||
        moves.set_costs(is_valid, costs, state, gold)
 | 
					        moves.set_costs(is_valid, costs, state, gold)
 | 
				
			||||||
        cpu_log_loss(c_d_scores, costs, is_valid, &scores[i, 0], d_scores.shape[1])
 | 
					        cpu_log_loss(c_d_scores, costs, is_valid, &scores[i, 0], d_scores.shape[1])
 | 
				
			||||||
 | 
					        #cpu_regression_loss(c_d_scores,
 | 
				
			||||||
 | 
					        #    costs, is_valid, &scores[i, 0], d_scores.shape[1])
 | 
				
			||||||
        c_d_scores += d_scores.shape[1]
 | 
					        c_d_scores += d_scores.shape[1]
 | 
				
			||||||
    return d_scores
 | 
					    return d_scores
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -231,7 +236,7 @@ def init_states(TransitionSystem moves, docs):
 | 
				
			||||||
def extract_token_ids(states, offsets=None, nF=1, nB=0, nS=2, nL=0, nR=0):
 | 
					def extract_token_ids(states, offsets=None, nF=1, nB=0, nS=2, nL=0, nR=0):
 | 
				
			||||||
    cdef StateClass state
 | 
					    cdef StateClass state
 | 
				
			||||||
    cdef int n_tokens = states[0].nr_context_tokens(nF, nB, nS, nL, nR)
 | 
					    cdef int n_tokens = states[0].nr_context_tokens(nF, nB, nS, nL, nR)
 | 
				
			||||||
    ids = numpy.zeros((len(states), n_tokens), dtype='i')
 | 
					    ids = numpy.zeros((len(states), n_tokens), dtype='i', order='c')
 | 
				
			||||||
    if offsets is None:
 | 
					    if offsets is None:
 | 
				
			||||||
        offsets = [0] * len(states)
 | 
					        offsets = [0] * len(states)
 | 
				
			||||||
    for i, (state, offset) in enumerate(zip(states, offsets)):
 | 
					    for i, (state, offset) in enumerate(zip(states, offsets)):
 | 
				
			||||||
| 
						 | 
					@ -240,6 +245,24 @@ def extract_token_ids(states, offsets=None, nF=1, nB=0, nS=2, nL=0, nR=0):
 | 
				
			||||||
    return ids
 | 
					    return ids
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_n_iter = 0
 | 
				
			||||||
 | 
					@layerize
 | 
				
			||||||
 | 
					def print_mean_variance(X, drop=0.):
 | 
				
			||||||
 | 
					    global _n_iter
 | 
				
			||||||
 | 
					    _n_iter += 1
 | 
				
			||||||
 | 
					    fwd_iter = _n_iter
 | 
				
			||||||
 | 
					    means = X.mean(axis=0)
 | 
				
			||||||
 | 
					    variance = X.var(axis=0)
 | 
				
			||||||
 | 
					    print(fwd_iter, "M", ', '.join(('%.2f' % m) for m in means))
 | 
				
			||||||
 | 
					    print(fwd_iter, "V", ', '.join(('%.2f' % m) for m in variance))
 | 
				
			||||||
 | 
					    def backward(dX, sgd=None):
 | 
				
			||||||
 | 
					        means = dX.mean(axis=0)
 | 
				
			||||||
 | 
					        variance = dX.var(axis=0)
 | 
				
			||||||
 | 
					        print(fwd_iter, "dM", ', '.join(('%.2f' % m) for m in means))
 | 
				
			||||||
 | 
					        print(fwd_iter, "dV", ', '.join(('%.2f' % m) for m in variance))
 | 
				
			||||||
 | 
					    return X, backward
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef class Parser:
 | 
					cdef class Parser:
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Base class of the DependencyParser and EntityRecognizer.
 | 
					    Base class of the DependencyParser and EntityRecognizer.
 | 
				
			||||||
| 
						 | 
					@ -301,13 +324,14 @@ cdef class Parser:
 | 
				
			||||||
        nr_context_tokens = StateClass.nr_context_tokens(nF, nB, nS, nL, nR)
 | 
					        nr_context_tokens = StateClass.nr_context_tokens(nF, nB, nS, nL, nR)
 | 
				
			||||||
        with Model.use_device('cpu'):
 | 
					        with Model.use_device('cpu'):
 | 
				
			||||||
            upper = chain(
 | 
					            upper = chain(
 | 
				
			||||||
                        Maxout(token_vector_width),
 | 
					                        Maxout(hidden_width, hidden_width),
 | 
				
			||||||
                        zero_init(Affine(self.moves.n_moves, token_vector_width)))
 | 
					                        #print_mean_variance,
 | 
				
			||||||
 | 
					                        zero_init(Affine(self.moves.n_moves, hidden_width)))
 | 
				
			||||||
        assert isinstance(upper.ops, NumpyOps)
 | 
					        assert isinstance(upper.ops, NumpyOps)
 | 
				
			||||||
        lower = PrecomputableMaxouts(token_vector_width, nF=nr_context_tokens, nI=token_vector_width,
 | 
					        lower = PrecomputableMaxouts(hidden_width, nF=nr_context_tokens, nI=token_vector_width,
 | 
				
			||||||
                                     pieces=cfg.get('maxout_pieces', 1))
 | 
					                                     pieces=cfg.get('maxout_pieces', 1))
 | 
				
			||||||
        upper.begin_training(upper.ops.allocate((500, token_vector_width)))
 | 
					 | 
				
			||||||
        lower.begin_training(lower.ops.allocate((500, token_vector_width)))
 | 
					        lower.begin_training(lower.ops.allocate((500, token_vector_width)))
 | 
				
			||||||
 | 
					        upper.begin_training(upper.ops.allocate((500, hidden_width)))
 | 
				
			||||||
        return upper, lower
 | 
					        return upper, lower
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __call__(self, Doc tokens):
 | 
					    def __call__(self, Doc tokens):
 | 
				
			||||||
| 
						 | 
					@ -390,13 +414,15 @@ cdef class Parser:
 | 
				
			||||||
    def update(self, docs_tokvecs, golds, drop=0., sgd=None):
 | 
					    def update(self, docs_tokvecs, golds, drop=0., sgd=None):
 | 
				
			||||||
        cdef:
 | 
					        cdef:
 | 
				
			||||||
            int nC
 | 
					            int nC
 | 
				
			||||||
            int[500] is_valid # Hack for now
 | 
					 | 
				
			||||||
            Doc doc
 | 
					            Doc doc
 | 
				
			||||||
            StateClass state
 | 
					            StateClass state
 | 
				
			||||||
            np.ndarray scores
 | 
					            np.ndarray scores
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        docs, tokvecs = docs_tokvecs
 | 
					        docs, tokvecs = docs_tokvecs
 | 
				
			||||||
        cuda_stream = Stream()
 | 
					        cuda_stream = Stream()
 | 
				
			||||||
 | 
					        lower_model = get_greedy_model_for_batch(len(docs),
 | 
				
			||||||
 | 
					                        tokvecs, self.feature_maps, cuda_stream=cuda_stream,
 | 
				
			||||||
 | 
					                        drop=drop)
 | 
				
			||||||
        if isinstance(docs, Doc) and isinstance(golds, GoldParse):
 | 
					        if isinstance(docs, Doc) and isinstance(golds, GoldParse):
 | 
				
			||||||
            return self.update(([docs], tokvecs), [golds], drop=drop)
 | 
					            return self.update(([docs], tokvecs), [golds], drop=drop)
 | 
				
			||||||
        for gold in golds:
 | 
					        for gold in golds:
 | 
				
			||||||
| 
						 | 
					@ -407,33 +433,47 @@ cdef class Parser:
 | 
				
			||||||
        todo = zip(states, offsets, golds)
 | 
					        todo = zip(states, offsets, golds)
 | 
				
			||||||
        todo = filter(lambda sp: not sp[0].py_is_final(), todo)
 | 
					        todo = filter(lambda sp: not sp[0].py_is_final(), todo)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        lower_model = get_greedy_model_for_batch(len(todo),
 | 
					        cdef Pool mem = Pool()
 | 
				
			||||||
                        tokvecs, self.feature_maps, cuda_stream=cuda_stream)
 | 
					        is_valid = <int*>mem.alloc(len(states) * self.moves.n_moves, sizeof(int))
 | 
				
			||||||
 | 
					        costs = <float*>mem.alloc(len(states) * self.moves.n_moves, sizeof(float))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        upper_model = self.model
 | 
					        upper_model = self.model
 | 
				
			||||||
        d_tokens = self.feature_maps.ops.allocate(tokvecs.shape)
 | 
					        d_tokens = self.feature_maps.ops.allocate(tokvecs.shape)
 | 
				
			||||||
        backprops = []
 | 
					        backprops = []
 | 
				
			||||||
        n_tokens = tokvecs.shape[0]
 | 
					        n_tokens = tokvecs.shape[0]
 | 
				
			||||||
        nF = self.feature_maps.nF
 | 
					        nF = self.feature_maps.nF
 | 
				
			||||||
        while todo:
 | 
					        loss = 0.
 | 
				
			||||||
 | 
					        total = 1e-4
 | 
				
			||||||
 | 
					        follow_gold = False
 | 
				
			||||||
 | 
					        while len(todo) >= 4:
 | 
				
			||||||
            states, offsets, golds = zip(*todo)
 | 
					            states, offsets, golds = zip(*todo)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            token_ids = extract_token_ids(states, offsets=offsets)
 | 
					            token_ids = extract_token_ids(states, offsets=offsets)
 | 
				
			||||||
            lower, bp_lower = lower_model(token_ids)
 | 
					            lower, bp_lower = lower_model(token_ids, drop=drop)
 | 
				
			||||||
            scores, bp_scores = upper_model.begin_update(lower)
 | 
					            scores, bp_scores = upper_model.begin_update(lower, drop=drop)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            d_scores = get_batch_loss(self.moves, states, golds, scores)
 | 
					            d_scores = get_batch_loss(self.moves, states, golds, scores)
 | 
				
			||||||
 | 
					            loss += numpy.abs(d_scores).sum()
 | 
				
			||||||
 | 
					            total += d_scores.shape[0]
 | 
				
			||||||
            d_lower = bp_scores(d_scores, sgd=sgd)
 | 
					            d_lower = bp_scores(d_scores, sgd=sgd)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            gpu_tok_ids = cupy.ndarray(token_ids.shape, dtype='i')
 | 
					            if isinstance(tokvecs, cupy.ndarray):
 | 
				
			||||||
            gpu_d_lower = cupy.ndarray(d_lower.shape, dtype='f')
 | 
					                gpu_tok_ids = cupy.ndarray(token_ids.shape, dtype='i', order='C')
 | 
				
			||||||
            gpu_tok_ids.set(token_ids, stream=cuda_stream)
 | 
					                gpu_d_lower = cupy.ndarray(d_lower.shape, dtype='f', order='C')
 | 
				
			||||||
            gpu_d_lower.set(d_lower, stream=cuda_stream)
 | 
					                gpu_tok_ids.set(token_ids, stream=cuda_stream)
 | 
				
			||||||
            backprops.append((gpu_tok_ids, gpu_d_lower, bp_lower))
 | 
					                gpu_d_lower.set(d_lower, stream=cuda_stream)
 | 
				
			||||||
 | 
					                backprops.append((gpu_tok_ids, gpu_d_lower, bp_lower))
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                backprops.append((token_ids, d_lower, bp_lower))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            c_scores = <float*>scores.data
 | 
					            c_scores = <float*>scores.data
 | 
				
			||||||
            for state in states:
 | 
					            for state, gold in zip(states, golds):
 | 
				
			||||||
                self.moves.set_valid(is_valid, state.c)
 | 
					                if follow_gold:
 | 
				
			||||||
                guess = arg_max_if_valid(c_scores, is_valid, scores.shape[1])
 | 
					                    self.moves.set_costs(is_valid, costs, state, gold)
 | 
				
			||||||
 | 
					                    guess = arg_max_if_gold(c_scores, costs, is_valid, scores.shape[1])
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    self.moves.set_valid(is_valid, state.c)
 | 
				
			||||||
 | 
					                    guess = arg_max_if_valid(c_scores, is_valid, scores.shape[1])
 | 
				
			||||||
                action = self.moves.c[guess]
 | 
					                action = self.moves.c[guess]
 | 
				
			||||||
                action.do(state.c, action.label)
 | 
					                action.do(state.c, action.label)
 | 
				
			||||||
                c_scores += scores.shape[1]
 | 
					                c_scores += scores.shape[1]
 | 
				
			||||||
| 
						 | 
					@ -451,7 +491,7 @@ cdef class Parser:
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                self.model.ops.xp.add.at(d_tokens,
 | 
					                self.model.ops.xp.add.at(d_tokens,
 | 
				
			||||||
                    token_ids, d_state_features * active_feats)
 | 
					                    token_ids, d_state_features * active_feats)
 | 
				
			||||||
        return d_tokens
 | 
					        return d_tokens, loss / total
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def step_through(self, Doc doc, GoldParse gold=None):
 | 
					    def step_through(self, Doc doc, GoldParse gold=None):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user