mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			76 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Cython
		
	
	
	
	
	
			
		
		
	
	
			76 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Cython
		
	
	
	
	
	
from numpy cimport ndarray
 | 
						|
from ..vocab cimport Vocab
 | 
						|
from ..structs cimport TokenC
 | 
						|
from ..attrs cimport *
 | 
						|
from ..typedefs cimport attr_t, flags_t
 | 
						|
from ..parts_of_speech cimport univ_pos_t
 | 
						|
from .doc cimport Doc
 | 
						|
from ..lexeme cimport Lexeme
 | 
						|
 | 
						|
 | 
						|
cdef class Token:
 | 
						|
    cdef readonly Vocab vocab
 | 
						|
    cdef TokenC* c
 | 
						|
    cdef readonly int i
 | 
						|
    cdef readonly Doc doc
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    cdef inline Token cinit(Vocab vocab, const TokenC* token, int offset, Doc doc):
 | 
						|
        if offset < 0 or offset >= doc.length:
 | 
						|
            msg = "Attempt to access token at %d, max length %d"
 | 
						|
            raise IndexError(msg % (offset, doc.length))
 | 
						|
        if doc._py_tokens[offset] != None:
 | 
						|
            return doc._py_tokens[offset]
 | 
						|
        cdef Token self = Token.__new__(Token, vocab, doc, offset)
 | 
						|
        doc._py_tokens[offset] = self
 | 
						|
        return self
 | 
						|
 | 
						|
    #cdef inline TokenC struct_from_attrs(Vocab vocab, attrs):
 | 
						|
    #    cdef TokenC token
 | 
						|
    #    attrs = normalize_attrs(attrs)
 | 
						|
 | 
						|
    cpdef bint check_flag(self, attr_id_t flag_id) except -1
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    cdef inline attr_t get_struct_attr(const TokenC* token, attr_id_t feat_name) nogil:
 | 
						|
        if feat_name < (sizeof(flags_t) * 8):
 | 
						|
            return Lexeme.c_check_flag(token.lex, feat_name)
 | 
						|
        elif feat_name == LEMMA:
 | 
						|
            return token.lemma
 | 
						|
        elif feat_name == POS:
 | 
						|
            return token.pos
 | 
						|
        elif feat_name == TAG:
 | 
						|
            return token.tag
 | 
						|
        elif feat_name == DEP:
 | 
						|
            return token.dep
 | 
						|
        elif feat_name == HEAD:
 | 
						|
            return token.head
 | 
						|
        elif feat_name == SPACY:
 | 
						|
            return token.spacy
 | 
						|
        elif feat_name == ENT_IOB:
 | 
						|
            return token.ent_iob
 | 
						|
        elif feat_name == ENT_TYPE:
 | 
						|
            return token.ent_type
 | 
						|
        else:
 | 
						|
            return Lexeme.get_struct_attr(token.lex, feat_name)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    cdef inline attr_t set_struct_attr(TokenC* token, attr_id_t feat_name,
 | 
						|
                                       attr_t value) nogil:
 | 
						|
        if feat_name == LEMMA:
 | 
						|
            token.lemma = value
 | 
						|
        elif feat_name == POS:
 | 
						|
            token.pos = <univ_pos_t>value
 | 
						|
        elif feat_name == TAG:
 | 
						|
            token.tag = value
 | 
						|
        elif feat_name == DEP:
 | 
						|
            token.dep = value
 | 
						|
        elif feat_name == HEAD:
 | 
						|
            token.head = value
 | 
						|
        elif feat_name == SPACY:
 | 
						|
            token.spacy = value
 | 
						|
        elif feat_name == ENT_IOB:
 | 
						|
            token.ent_iob = value
 | 
						|
        elif feat_name == ENT_TYPE:
 | 
						|
            token.ent_type = value
 |