2014-07-07 06:21:06 +04:00
|
|
|
from libcpp.vector cimport vector
|
2014-07-05 22:51:42 +04:00
|
|
|
|
2014-07-07 06:21:06 +04:00
|
|
|
from spacy.spacy cimport StringHash
|
2014-08-22 01:49:14 +04:00
|
|
|
from spacy.lexeme cimport Lexeme
|
|
|
|
from spacy.lexeme cimport LexID
|
|
|
|
from spacy.lexeme cimport ClusterID
|
2014-07-05 22:51:42 +04:00
|
|
|
|
2014-07-07 14:47:21 +04:00
|
|
|
from spacy.spacy cimport Language
|
|
|
|
from spacy.tokens cimport Tokens
|
2014-08-22 01:49:14 +04:00
|
|
|
cimport cython
|
|
|
|
|
|
|
|
|
2014-07-07 14:47:21 +04:00
|
|
|
cdef class English(spacy.Language):
|
2014-08-18 21:14:00 +04:00
|
|
|
cdef int find_split(self, unicode word)
|
2014-08-20 15:39:39 +04:00
|
|
|
cdef int set_orth(self, unicode word, Lexeme* lex) except -1
|
2014-08-22 02:02:37 +04:00
|
|
|
|
2014-07-07 14:47:21 +04:00
|
|
|
|
|
|
|
cdef English EN
|
2014-07-05 22:51:42 +04:00
|
|
|
|
2014-08-22 02:02:37 +04:00
|
|
|
|
2014-08-22 01:49:14 +04:00
|
|
|
cpdef LexID lookup(unicode word) except 0
|
2014-07-07 14:47:21 +04:00
|
|
|
cpdef Tokens tokenize(unicode string)
|
2014-07-05 22:51:42 +04:00
|
|
|
cpdef unicode unhash(StringHash hash_value)
|