2016-10-14 04:24:13 +03:00
|
|
|
from numpy cimport ndarray
|
2015-07-13 20:20:48 +03:00
|
|
|
from ..vocab cimport Vocab
|
|
|
|
from ..structs cimport TokenC
|
2015-07-16 12:23:25 +03:00
|
|
|
from ..attrs cimport attr_id_t
|
2015-07-14 01:10:11 +03:00
|
|
|
from .doc cimport Doc
|
2015-07-13 20:20:48 +03:00
|
|
|
|
|
|
|
|
|
|
|
cdef class Token:
|
2016-09-21 15:54:55 +03:00
|
|
|
cdef readonly Vocab vocab
|
2016-03-11 19:31:06 +03:00
|
|
|
cdef TokenC* c
|
2015-07-13 20:20:48 +03:00
|
|
|
cdef readonly int i
|
2015-07-14 01:10:11 +03:00
|
|
|
cdef readonly Doc doc
|
2015-07-13 20:20:48 +03:00
|
|
|
|
|
|
|
@staticmethod
|
2015-07-14 01:10:11 +03:00
|
|
|
cdef inline Token cinit(Vocab vocab, const TokenC* token, int offset, Doc doc):
|
|
|
|
if offset < 0 or offset >= doc.length:
|
2015-07-13 20:20:48 +03:00
|
|
|
msg = "Attempt to access token at %d, max length %d"
|
2015-07-14 01:10:11 +03:00
|
|
|
raise IndexError(msg % (offset, doc.length))
|
|
|
|
if doc._py_tokens[offset] != None:
|
|
|
|
return doc._py_tokens[offset]
|
|
|
|
cdef Token self = Token.__new__(Token, vocab, doc, offset)
|
|
|
|
doc._py_tokens[offset] = self
|
2015-07-13 20:20:48 +03:00
|
|
|
return self
|
|
|
|
|
|
|
|
cpdef bint check_flag(self, attr_id_t flag_id) except -1
|