From 08cef75ffdb38c6f431deeb09a3eb67bf240eb57 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 15 Sep 2014 03:46:14 +0200 Subject: [PATCH] * Switch to using a heap-allocated vector in tokens --- spacy/lang.pyx | 2 +- spacy/tokens.pxd | 2 +- spacy/tokens.pyx | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/spacy/lang.pyx b/spacy/lang.pyx index a2662fc5f..894d9a3c4 100644 --- a/spacy/lang.pyx +++ b/spacy/lang.pyx @@ -219,7 +219,7 @@ cdef class Language: lexemes = calloc(len(tokens) - first_token, sizeof(LexemeC*)) cdef size_t j for i, j in enumerate(range(first_token, tokens.v.size())): - lexemes[i] = tokens.v[j] + lexemes[i] = tokens.v[0][j] self.cache.set(key, lexemes) cdef int _split_one(self, Py_UNICODE* characters, size_t length): diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd index c014425d0..bff4c7742 100644 --- a/spacy/tokens.pxd +++ b/spacy/tokens.pxd @@ -3,7 +3,7 @@ from libcpp.vector cimport vector cdef class Tokens: - cdef vector[LexemeC*] v + cdef vector[LexemeC*] *v cpdef size_t id(self, size_t i) except 0 cpdef unicode string(self, size_t i) diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index cf88aeedb..52d1e7c32 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -65,6 +65,7 @@ cdef class Tokens: """ def __cinit__(self, string_length=0): size = int(string_length / 3) if string_length >= 3 else 1 + self.v = new vector[LexemeC*]() self.v.reserve(size) def __getitem__(self, i): @@ -73,11 +74,14 @@ cdef class Tokens: def __len__(self): return self.v.size() + def __dealloc__(self): + del self.v + def append(self, Lexeme lexeme): self.v.push_back(lexeme._c) cpdef unicode string(self, size_t i): - cdef bytes utf8_string = self.v[i].string[:self.v[i].length] + cdef bytes utf8_string = self.v.at(i).string[:self.v.at(i).length] cdef unicode string = utf8_string.decode('utf8') return string @@ -91,7 +95,7 @@ cdef class Tokens: return self.v.at(i).cluster cpdef bint check_flag(self, size_t i, size_t flag_id) except *: - return lexeme_check_flag(self.v[i], flag_id) + return lexeme_check_flag(self.v.at(i), flag_id) cpdef unicode string_view(self, size_t i, size_t view_id): return lexeme_string_view(self.v.at(i), view_id)