spaCy/spacy/tokens.pxd

76 lines
1.8 KiB
Cython
Raw Normal View History

2014-12-16 14:44:43 +03:00
from libc.stdint cimport uint32_t
2015-01-19 11:59:55 +03:00
from numpy cimport ndarray
cimport numpy
2014-12-02 15:48:05 +03:00
from cymem.cymem cimport Pool
2014-12-21 23:25:43 +03:00
from thinc.typedefs cimport atom_t
from .typedefs cimport flags_t, attr_id_t, attr_t
from .parts_of_speech cimport univ_pos_t
from .structs cimport Morphology, TokenC, LexemeC
2014-12-21 23:25:43 +03:00
from .vocab cimport Vocab
2014-12-19 23:03:26 +03:00
from .strings cimport StringStore
ctypedef const LexemeC* const_Lexeme_ptr
ctypedef TokenC* TokenC_ptr
ctypedef fused LexemeOrToken:
const_Lexeme_ptr
TokenC_ptr
cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil
2014-12-24 09:42:00 +03:00
cdef attr_t get_token_attr(const TokenC* lex, attr_id_t feat_name) nogil
cdef inline bint check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
2014-12-24 09:42:00 +03:00
return lexeme.flags & (1 << flag_id)
cdef class Tokens:
cdef Pool mem
2014-12-21 23:25:43 +03:00
cdef Vocab vocab
cdef TokenC* data
2015-01-30 10:04:41 +03:00
cdef list _py_tokens
2015-01-21 10:57:09 +03:00
cdef unicode _string
cdef list _tag_strings
cdef list _dep_strings
cdef public bint is_tagged
cdef public bint is_parsed
cdef int length
cdef int max_length
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
2015-01-05 09:54:13 +03:00
cpdef long[:,:] to_array(self, object features)
2014-12-02 15:48:05 +03:00
cdef class Token:
cdef readonly Tokens _seq
cdef readonly int i
cdef readonly attr_t idx
cdef readonly attr_t cluster
cdef readonly attr_t length
2015-01-22 18:08:25 +03:00
cdef readonly attr_t orth
cdef readonly attr_t lower
cdef readonly attr_t norm
cdef readonly attr_t shape
cdef readonly attr_t prefix
cdef readonly attr_t suffix
cdef readonly float prob
cdef readonly float sentiment
cdef readonly attr_t flags
cdef readonly attr_t lemma
cdef readonly univ_pos_t pos
2015-01-17 09:33:16 +03:00
cdef readonly attr_t tag
cdef readonly attr_t dep
2015-01-21 18:05:58 +03:00
cdef readonly ndarray repvec
cdef readonly unicode string