diff --git a/spacy/en.pxd b/spacy/en.pxd index 0af541847..91d4db3af 100644 --- a/spacy/en.pxd +++ b/spacy/en.pxd @@ -4,10 +4,14 @@ from spacy.tokens cimport Tokens cdef class EnglishTokens(Tokens): + cpdef size_t canon(self, size_t i) + cpdef size_t shape(self, size_t i) + cpdef size_t non_sparse(self, size_t i) + cpdef size_t asciied(self, size_t i) cpdef unicode canon_string(self, size_t i) cpdef unicode shape_string(self, size_t i) cpdef unicode non_sparse_string(self, size_t i) - cpdef unicode asciied(self, size_t i) + cpdef unicode asciied_string(self, size_t i) cpdef bint is_alpha(self, size_t i) cpdef bint is_ascii(self, size_t i) cpdef bint is_digit(self, size_t i) diff --git a/spacy/en.pyx b/spacy/en.pyx index e996a7d73..5d4e6ef51 100644 --- a/spacy/en.pyx +++ b/spacy/en.pyx @@ -137,8 +137,20 @@ cdef class EnglishTokens(Tokens): cpdef unicode non_sparse_string(self, size_t i): return lexeme_string_view(self.lexemes[i], View_NonSparse) - cpdef unicode asciied(self, size_t i): - return lexeme_check_flag(self.lexemes[i], View_Asciied) + cpdef unicode asciied_string(self, size_t i): + return lexeme_string_view(self.lexemes[i], View_Asciied) + + cpdef size_t canon(self, size_t i): + return id(self.lexemes[i].views[View_CanonForm]) + + cpdef size_t shape(self, size_t i): + return id(self.lexemes[i].views[View_WordShape]) + + cpdef size_t non_sparse(self, size_t i): + return id(self.lexemes[i].views[View_NonSparse]) + + cpdef size_t asciied(self, size_t i): + return id(self.lexemes[i].views[View_Asciied]) cpdef bint is_alpha(self, size_t i): return lexeme_check_flag(self.lexemes[i], Flag_IsAlpha) diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd index 6ac78de1b..383c79f9e 100644 --- a/spacy/tokens.pxd +++ b/spacy/tokens.pxd @@ -7,6 +7,7 @@ cdef class Tokens: cdef LexemeC** lexemes cdef int push_back(self, LexemeC* lexeme) except -1 + cpdef size_t id(self, size_t i) cpdef unicode string(self, size_t i) cpdef double prob(self, size_t i) cpdef size_t cluster(self, size_t i) diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index 9ab5170d2..78f3dbea1 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -55,6 +55,9 @@ cdef class Tokens: cdef bytes byte_string = self.lexemes[i].string return byte_string.decode('utf8') + cpdef size_t id(self, size_t i): + return id(self.lexemes[i].string) + cpdef double prob(self, size_t i): return self.lexemes[i].prob