2014-12-09 13:16:17 +03:00
|
|
|
from .tokens cimport TokenC, Morphology
|
|
|
|
from .lexeme cimport Lexeme
|
|
|
|
from .utf8string cimport StringStore
|
|
|
|
|
|
|
|
from preshed.maps cimport PreshMapArray
|
|
|
|
from cymem.cymem cimport Pool
|
|
|
|
|
|
|
|
# Google universal tag set
|
|
|
|
cpdef enum univ_tag_t:
|
|
|
|
NO_TAG
|
|
|
|
ADJ
|
|
|
|
ADV
|
|
|
|
ADP
|
|
|
|
CONJ
|
|
|
|
DET
|
|
|
|
NOUN
|
|
|
|
NUM
|
|
|
|
PRON
|
|
|
|
PRT
|
|
|
|
VERB
|
|
|
|
X
|
|
|
|
PUNCT
|
|
|
|
EOL
|
|
|
|
N_UNIV_TAGS
|
|
|
|
|
|
|
|
|
|
|
|
cdef struct PosTag:
|
|
|
|
Morphology morph
|
|
|
|
int id
|
|
|
|
univ_tag_t pos
|
|
|
|
|
|
|
|
|
|
|
|
cdef class Morphologizer:
|
|
|
|
cdef Pool mem
|
|
|
|
cdef StringStore strings
|
|
|
|
cdef object lemmatizer
|
|
|
|
cdef PosTag* tags
|
2014-12-09 17:02:04 +03:00
|
|
|
cdef readonly list tag_names
|
2014-12-09 13:16:17 +03:00
|
|
|
|
2014-12-09 17:02:04 +03:00
|
|
|
cdef PreshMapArray _cache
|
2014-12-09 13:16:17 +03:00
|
|
|
cdef int lemmatize(self, const univ_tag_t pos, const Lexeme* lex) except -1
|
|
|
|
cdef int set_morph(self, const int i, TokenC* tokens) except -1
|