diff --git a/spacy/lang.pxd b/spacy/lang.pxd index 8319ecbd3..906b9231f 100644 --- a/spacy/lang.pxd +++ b/spacy/lang.pxd @@ -27,6 +27,8 @@ cdef class Lexicon: cdef Pool _mem cpdef readonly size_t size + cdef vector[LexemeC*] lexemes + cpdef Lexeme lookup(self, unicode string) cdef LexemeC* get(self, String* s) except NULL diff --git a/spacy/lang.pyx b/spacy/lang.pyx index 17a5fe10d..f6abf4aee 100644 --- a/spacy/lang.pyx +++ b/spacy/lang.pyx @@ -366,7 +366,7 @@ cdef class Lexicon: for i, flag_feature in enumerate(self._flag_features): if flag_feature(uni_string, prob, cluster, cases, tags): flags.add(i) - lexeme = lexeme_init(self._mem, uni_string, prob, cluster, views, flags) + lexeme = lexeme_init(self._mem, self.size, uni_string, prob, cluster, views, flags) string_from_unicode(&string, uni_string) self._dict.set(string.key, lexeme) self.size += 1 @@ -385,7 +385,7 @@ cdef class Lexicon: if flag_feature(uni_string, 0.0, {}, {}): flags.add(i) - lexeme = lexeme_init(self._mem, uni_string, 0, 0, views, flags) + lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags) self._dict.set(string.key, lexeme) self.size += 1 return lexeme diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd index 41156b673..c17994462 100644 --- a/spacy/lexeme.pxd +++ b/spacy/lexeme.pxd @@ -3,6 +3,7 @@ from cymem.cymem cimport Pool cdef struct LexemeC: + size_t i size_t length double prob size_t cluster @@ -13,7 +14,7 @@ cdef struct LexemeC: flag_t flags -cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster, +cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, size_t cluster, list views, set flags) cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index beeb14f0e..604ec6181 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -2,9 +2,10 @@ from cpython.ref cimport Py_INCREF from cymem.cymem cimport Pool -cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster, - list views, set flags): +cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, + size_t cluster, list views, set flags): cdef LexemeC* lexeme = mem.alloc(1, sizeof(LexemeC)) + lexeme.i = i lexeme.cluster = cluster lexeme.prob = prob lexeme.string = intern_and_encode(string, &lexeme.length)