mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Update to use thinc 3.0
This commit is contained in:
		
							parent
							
								
									802ad3d71a
								
							
						
					
					
						commit
						b9991fbd20
					
				| 
						 | 
				
			
			@ -10,6 +10,8 @@ import json
 | 
			
		|||
import cython
 | 
			
		||||
import numpy.random
 | 
			
		||||
 | 
			
		||||
from libc.string cimport memcpy
 | 
			
		||||
 | 
			
		||||
from thinc.features cimport Feature, count_feats
 | 
			
		||||
from thinc.api cimport Example
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -52,28 +54,12 @@ cdef class Model:
 | 
			
		|||
    cdef const weight_t* score(self, atom_t* context) except NULL:
 | 
			
		||||
        memcpy(self._eg.c.atoms, context, self._eg.c.nr_atom * sizeof(context[0]))
 | 
			
		||||
        self._model(self._eg)
 | 
			
		||||
        return self._eg.scores
 | 
			
		||||
        return self._eg.c.scores
 | 
			
		||||
 | 
			
		||||
    cdef int set_scores(self, weight_t* scores, atom_t* context) nogil:
 | 
			
		||||
        cdef int nr_feat = self._model.extractor.set_feats(self._eg.features, context)
 | 
			
		||||
        cdef int nr_feat = self._extractor.set_feats(self._eg.c.features, context)
 | 
			
		||||
 | 
			
		||||
        self._model.set_scores(
 | 
			
		||||
            scores,
 | 
			
		||||
            self._model.weights.c_map,
 | 
			
		||||
            self._eg.c.features,
 | 
			
		||||
            nr_feat
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    cdef int update(self, atom_t* context, class_t guess, class_t gold, int cost) except -1:
 | 
			
		||||
        cdef int n_feats
 | 
			
		||||
        if cost == 0:
 | 
			
		||||
            self._model.update({})
 | 
			
		||||
        else:
 | 
			
		||||
            feats = self._extractor.get_feats(context, &n_feats)
 | 
			
		||||
            counts = {gold: {}, guess: {}}
 | 
			
		||||
            count_feats(counts[gold], feats, n_feats, cost)
 | 
			
		||||
            count_feats(counts[guess], feats, n_feats, -cost)
 | 
			
		||||
            self._model.update(counts)
 | 
			
		||||
        self._model.set_scores(scores, self._eg.c.features, nr_feat)
 | 
			
		||||
 | 
			
		||||
    def end_training(self, model_loc=None):
 | 
			
		||||
        if model_loc is None:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -40,7 +40,7 @@ from ._parse_features cimport CONTEXT_SIZE
 | 
			
		|||
from ._parse_features cimport fill_context
 | 
			
		||||
from .stateclass cimport StateClass
 | 
			
		||||
 | 
			
		||||
from .._ml cimport arg_max_if_true
 | 
			
		||||
from thinc.learner cimport arg_max_if_true
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
DEBUG = False
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,3 @@ cdef class Tagger:
 | 
			
		|||
    cdef readonly Vocab vocab
 | 
			
		||||
    cdef readonly Model model
 | 
			
		||||
    cdef public dict freqs
 | 
			
		||||
 | 
			
		||||
    cdef int predict(self, int i, const TokenC* tokens) except -1
 | 
			
		||||
    cdef int update(self, int i, const TokenC* tokens, int gold) except -1
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,6 +3,8 @@ from os import path
 | 
			
		|||
from collections import defaultdict
 | 
			
		||||
 | 
			
		||||
from thinc.typedefs cimport atom_t, weight_t
 | 
			
		||||
from thinc.learner cimport arg_max, arg_max_if_true, arg_max_if_zero
 | 
			
		||||
from thinc.api cimport Example
 | 
			
		||||
 | 
			
		||||
from .typedefs cimport attr_t
 | 
			
		||||
from .tokens.doc cimport Doc
 | 
			
		||||
| 
						 | 
				
			
			@ -11,7 +13,6 @@ from .parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON
 | 
			
		|||
from .parts_of_speech cimport VERB, X, PUNCT, EOL, SPACE
 | 
			
		||||
 | 
			
		||||
from .attrs cimport *
 | 
			
		||||
from ._ml cimport arg_max
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
cpdef enum:
 | 
			
		||||
| 
						 | 
				
			
			@ -138,12 +139,15 @@ cdef class Tagger:
 | 
			
		|||
        """
 | 
			
		||||
        if tokens.length == 0:
 | 
			
		||||
            return 0
 | 
			
		||||
 | 
			
		||||
        cdef Example eg = self.model._eg
 | 
			
		||||
        cdef int i
 | 
			
		||||
        cdef const weight_t* scores
 | 
			
		||||
        for i in range(tokens.length):
 | 
			
		||||
            if tokens.c[i].pos == 0:
 | 
			
		||||
                guess = self.predict(i, tokens.c)
 | 
			
		||||
                self.vocab.morphology.assign_tag(&tokens.c[i], guess)
 | 
			
		||||
                eg.wipe()
 | 
			
		||||
                fill_atoms(eg.c.atoms, tokens.c, i)
 | 
			
		||||
                self.model(eg)
 | 
			
		||||
                self.vocab.morphology.assign_tag(&tokens.c[i], eg.c.guess)
 | 
			
		||||
 | 
			
		||||
        tokens.is_tagged = True
 | 
			
		||||
        tokens._py_tokens = [None] * tokens.length
 | 
			
		||||
| 
						 | 
				
			
			@ -169,38 +173,25 @@ cdef class Tagger:
 | 
			
		|||
            raise ValueError(
 | 
			
		||||
                [g for g in gold_tag_strs if g is not None and g not in self.tag_names])
 | 
			
		||||
        correct = 0
 | 
			
		||||
        cdef Example eg = self.model._eg
 | 
			
		||||
        for i in range(tokens.length):
 | 
			
		||||
            guess = self.update(i, tokens.c, golds[i])
 | 
			
		||||
            loss = golds[i] != -1 and guess != golds[i]
 | 
			
		||||
            eg.wipe()
 | 
			
		||||
            fill_atoms(eg.c.atoms, tokens.c, i)
 | 
			
		||||
            self.train(eg)
 | 
			
		||||
 | 
			
		||||
            self.vocab.morphology.assign_tag(&tokens.c[i], guess)
 | 
			
		||||
            self.vocab.morphology.assign_tag(&tokens.c[i], eg.c.guess)
 | 
			
		||||
            
 | 
			
		||||
            correct += loss == 0
 | 
			
		||||
            correct += eg.c.cost == 0
 | 
			
		||||
            self.freqs[TAG][tokens.c[i].tag] += 1
 | 
			
		||||
        return correct
 | 
			
		||||
 | 
			
		||||
    cdef int predict(self, int i, const TokenC* tokens) except -1:
 | 
			
		||||
        cdef atom_t[N_CONTEXT_FIELDS] context
 | 
			
		||||
        _fill_from_token(&context[P2_orth], &tokens[i-2])
 | 
			
		||||
        _fill_from_token(&context[P1_orth], &tokens[i-1])
 | 
			
		||||
        _fill_from_token(&context[W_orth], &tokens[i])
 | 
			
		||||
        _fill_from_token(&context[N1_orth], &tokens[i+1])
 | 
			
		||||
        _fill_from_token(&context[N2_orth], &tokens[i+2])
 | 
			
		||||
        scores = self.model.score(context)
 | 
			
		||||
        return arg_max(scores, self.model.n_classes)
 | 
			
		||||
 | 
			
		||||
    cdef int update(self, int i, const TokenC* tokens, int gold) except -1:
 | 
			
		||||
        cdef atom_t[N_CONTEXT_FIELDS] context
 | 
			
		||||
        _fill_from_token(&context[P2_orth], &tokens[i-2])
 | 
			
		||||
        _fill_from_token(&context[P1_orth], &tokens[i-1])
 | 
			
		||||
        _fill_from_token(&context[W_orth], &tokens[i])
 | 
			
		||||
        _fill_from_token(&context[N1_orth], &tokens[i+1])
 | 
			
		||||
        _fill_from_token(&context[N2_orth], &tokens[i+2])
 | 
			
		||||
        scores = self.model.score(context)
 | 
			
		||||
        guess = arg_max(scores, self.model.n_classes)
 | 
			
		||||
        loss = guess != gold if gold != -1 else 0
 | 
			
		||||
        self.model.update(context, guess, gold, loss)
 | 
			
		||||
        return guess
 | 
			
		||||
cdef inline void fill_atoms(atom_t* atoms, const TokenC* tokens, int i) nogil:
 | 
			
		||||
    _fill_from_token(&atoms[P2_orth], &tokens[i-2])
 | 
			
		||||
    _fill_from_token(&atoms[P1_orth], &tokens[i-1])
 | 
			
		||||
    _fill_from_token(&atoms[W_orth], &tokens[i])
 | 
			
		||||
    _fill_from_token(&atoms[N1_orth], &tokens[i+1])
 | 
			
		||||
    _fill_from_token(&atoms[N2_orth], &tokens[i+2])
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user