spaCy/spacy/structs.pxd

90 lines
1.8 KiB
Cython
Raw Normal View History

from libc.stdint cimport int8_t, uint8_t, uint16_t, uint32_t
from .typedefs cimport flags_t, attr_t, id_t, hash_t
from .parts_of_speech cimport univ_pos_t
cdef struct LexemeC:
2015-01-21 18:04:24 +03:00
const float* repvec
flags_t flags
attr_t id
attr_t length
2015-01-22 18:08:25 +03:00
attr_t orth
attr_t lower
attr_t norm
attr_t shape
attr_t prefix
attr_t suffix
attr_t cluster
float prob
float sentiment
2015-02-07 16:43:17 +03:00
float l2_norm
cdef struct Morphology:
uint8_t number
uint8_t tenspect # Tense/aspect/voice
uint8_t mood
uint8_t gender
uint8_t person
uint8_t case
uint8_t misc
cdef struct PosTag:
Morphology morph
int id
univ_pos_t pos
# Start and end will be offsets: i + ent.start will always take you to the
# "next" entity start. If inside an entity, ent.start will be negative ---
# the next entity is the start of the one the token is inside. If i _is_
# the start of an entity, then ent.start will be the beginning of the next one.
#
# The same/inverse is true for end. If ent.end has a negative value, we are either
# at the end of an entity, or outside one. If we're inside an entity, ent.end
# will have a positive value.
#
# This allows us to easily find the span of an entity we might be inside, while
# naturally sharing an API with iterating through all entities in the sentence
cdef struct Entity:
int32_t tag
uint16_t flags
int8_t start
int8_t end
cdef struct TokenC:
const LexemeC* lex
Morphology morph
Entity ent
univ_pos_t pos
int tag
int idx
int lemma
int sense
int head
int dep
2015-01-31 05:44:16 +03:00
bint sent_end
uint32_t l_kids
uint32_t r_kids
cdef struct Utf8Str:
id_t i
hash_t key
unsigned char* chars
int length
cdef struct UniStr:
Py_UNICODE* chars
size_t n
hash_t key