mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
* Fiddle with token features
This commit is contained in:
parent
1533041885
commit
5aa591106b
|
@ -4,10 +4,14 @@ from spacy.tokens cimport Tokens
|
|||
|
||||
|
||||
cdef class EnglishTokens(Tokens):
|
||||
cpdef size_t canon(self, size_t i)
|
||||
cpdef size_t shape(self, size_t i)
|
||||
cpdef size_t non_sparse(self, size_t i)
|
||||
cpdef size_t asciied(self, size_t i)
|
||||
cpdef unicode canon_string(self, size_t i)
|
||||
cpdef unicode shape_string(self, size_t i)
|
||||
cpdef unicode non_sparse_string(self, size_t i)
|
||||
cpdef unicode asciied(self, size_t i)
|
||||
cpdef unicode asciied_string(self, size_t i)
|
||||
cpdef bint is_alpha(self, size_t i)
|
||||
cpdef bint is_ascii(self, size_t i)
|
||||
cpdef bint is_digit(self, size_t i)
|
||||
|
|
16
spacy/en.pyx
16
spacy/en.pyx
|
@ -137,8 +137,20 @@ cdef class EnglishTokens(Tokens):
|
|||
cpdef unicode non_sparse_string(self, size_t i):
|
||||
return lexeme_string_view(self.lexemes[i], View_NonSparse)
|
||||
|
||||
cpdef unicode asciied(self, size_t i):
|
||||
return lexeme_check_flag(self.lexemes[i], View_Asciied)
|
||||
cpdef unicode asciied_string(self, size_t i):
|
||||
return lexeme_string_view(self.lexemes[i], View_Asciied)
|
||||
|
||||
cpdef size_t canon(self, size_t i):
|
||||
return id(self.lexemes[i].views[<size_t>View_CanonForm])
|
||||
|
||||
cpdef size_t shape(self, size_t i):
|
||||
return id(self.lexemes[i].views[<size_t>View_WordShape])
|
||||
|
||||
cpdef size_t non_sparse(self, size_t i):
|
||||
return id(self.lexemes[i].views[<size_t>View_NonSparse])
|
||||
|
||||
cpdef size_t asciied(self, size_t i):
|
||||
return id(self.lexemes[i].views[<size_t>View_Asciied])
|
||||
|
||||
cpdef bint is_alpha(self, size_t i):
|
||||
return lexeme_check_flag(self.lexemes[i], Flag_IsAlpha)
|
||||
|
|
|
@ -7,6 +7,7 @@ cdef class Tokens:
|
|||
cdef LexemeC** lexemes
|
||||
cdef int push_back(self, LexemeC* lexeme) except -1
|
||||
|
||||
cpdef size_t id(self, size_t i)
|
||||
cpdef unicode string(self, size_t i)
|
||||
cpdef double prob(self, size_t i)
|
||||
cpdef size_t cluster(self, size_t i)
|
||||
|
|
|
@ -55,6 +55,9 @@ cdef class Tokens:
|
|||
cdef bytes byte_string = self.lexemes[i].string
|
||||
return byte_string.decode('utf8')
|
||||
|
||||
cpdef size_t id(self, size_t i):
|
||||
return id(self.lexemes[i].string)
|
||||
|
||||
cpdef double prob(self, size_t i):
|
||||
return self.lexemes[i].prob
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user