spaCy/spacy/tokens/doc.pxd
Wolfgang Seeker 5e2e8e951a add baseclass DocIterator for iterators over documents
add classes for English and German noun chunks

the respective iterators are set for the document when created by the parser
as they depend on the annotation scheme of the parsing model
2016-03-16 15:53:35 +01:00

54 lines
1.2 KiB
Cython

from cymem.cymem cimport Pool
cimport numpy as np
from preshed.counter cimport PreshCounter
from ..vocab cimport Vocab
from ..structs cimport TokenC, LexemeC
from ..typedefs cimport attr_t
from ..attrs cimport attr_id_t
from spacy.syntax.iterators cimport DocIterator
cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil
ctypedef const LexemeC* const_Lexeme_ptr
ctypedef const TokenC* const_TokenC_ptr
ctypedef fused LexemeOrToken:
const_Lexeme_ptr
const_TokenC_ptr
cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2
cdef int token_by_end(const TokenC* tokens, int length, int end_char) except -2
cdef class Doc:
cdef readonly Pool mem
cdef readonly Vocab vocab
cdef public object _vector
cdef public object _vector_norm
cdef TokenC* c
cdef public bint is_tagged
cdef public bint is_parsed
cdef public list _py_tokens
cdef int length
cdef int max_length
cdef DocIterator noun_chunks_iterator
cdef int push_back(self, LexemeOrToken lex_or_tok, bint trailing_space) except -1
cpdef np.ndarray to_array(self, object features)
cdef void set_parse(self, const TokenC* parsed) nogil