mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Switch to using a heap-allocated vector in tokens
This commit is contained in:
parent
f77b7098c0
commit
08cef75ffd
|
@ -219,7 +219,7 @@ cdef class Language:
|
||||||
lexemes = <LexemeC**>calloc(len(tokens) - first_token, sizeof(LexemeC*))
|
lexemes = <LexemeC**>calloc(len(tokens) - first_token, sizeof(LexemeC*))
|
||||||
cdef size_t j
|
cdef size_t j
|
||||||
for i, j in enumerate(range(first_token, tokens.v.size())):
|
for i, j in enumerate(range(first_token, tokens.v.size())):
|
||||||
lexemes[i] = tokens.v[j]
|
lexemes[i] = tokens.v[0][j]
|
||||||
self.cache.set(key, lexemes)
|
self.cache.set(key, lexemes)
|
||||||
|
|
||||||
cdef int _split_one(self, Py_UNICODE* characters, size_t length):
|
cdef int _split_one(self, Py_UNICODE* characters, size_t length):
|
||||||
|
|
|
@ -3,7 +3,7 @@ from libcpp.vector cimport vector
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokens:
|
cdef class Tokens:
|
||||||
cdef vector[LexemeC*] v
|
cdef vector[LexemeC*] *v
|
||||||
|
|
||||||
cpdef size_t id(self, size_t i) except 0
|
cpdef size_t id(self, size_t i) except 0
|
||||||
cpdef unicode string(self, size_t i)
|
cpdef unicode string(self, size_t i)
|
||||||
|
|
|
@ -65,6 +65,7 @@ cdef class Tokens:
|
||||||
"""
|
"""
|
||||||
def __cinit__(self, string_length=0):
|
def __cinit__(self, string_length=0):
|
||||||
size = int(string_length / 3) if string_length >= 3 else 1
|
size = int(string_length / 3) if string_length >= 3 else 1
|
||||||
|
self.v = new vector[LexemeC*]()
|
||||||
self.v.reserve(size)
|
self.v.reserve(size)
|
||||||
|
|
||||||
def __getitem__(self, i):
|
def __getitem__(self, i):
|
||||||
|
@ -73,11 +74,14 @@ cdef class Tokens:
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.v.size()
|
return self.v.size()
|
||||||
|
|
||||||
|
def __dealloc__(self):
|
||||||
|
del self.v
|
||||||
|
|
||||||
def append(self, Lexeme lexeme):
|
def append(self, Lexeme lexeme):
|
||||||
self.v.push_back(lexeme._c)
|
self.v.push_back(lexeme._c)
|
||||||
|
|
||||||
cpdef unicode string(self, size_t i):
|
cpdef unicode string(self, size_t i):
|
||||||
cdef bytes utf8_string = self.v[i].string[:self.v[i].length]
|
cdef bytes utf8_string = self.v.at(i).string[:self.v.at(i).length]
|
||||||
cdef unicode string = utf8_string.decode('utf8')
|
cdef unicode string = utf8_string.decode('utf8')
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
@ -91,7 +95,7 @@ cdef class Tokens:
|
||||||
return self.v.at(i).cluster
|
return self.v.at(i).cluster
|
||||||
|
|
||||||
cpdef bint check_flag(self, size_t i, size_t flag_id) except *:
|
cpdef bint check_flag(self, size_t i, size_t flag_id) except *:
|
||||||
return lexeme_check_flag(self.v[i], flag_id)
|
return lexeme_check_flag(self.v.at(i), flag_id)
|
||||||
|
|
||||||
cpdef unicode string_view(self, size_t i, size_t view_id):
|
cpdef unicode string_view(self, size_t i, size_t view_id):
|
||||||
return lexeme_string_view(self.v.at(i), view_id)
|
return lexeme_string_view(self.v.at(i), view_id)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user