* Add i attribute to lexeme, giving lexemes sequential IDs.

2025-07-14 18:22:27 +03:00 · 2014-10-09 13:50:05 +11:00 · 2014-10-09 13:50:05 +11:00 · d73d89a2de
commit d73d89a2de
parent 0c6402ab73
4 changed files with 9 additions and 5 deletions
--- a/spacy/lang.pxd
+++ b/spacy/lang.pxd
@ -27,6 +27,8 @@ cdef class Lexicon:
    cdef Pool _mem
    cpdef readonly size_t size
    cdef vector[LexemeC*] lexemes
    cpdef Lexeme lookup(self, unicode string)
    cdef LexemeC* get(self, String* s) except NULL
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@ -366,7 +366,7 @@ cdef class Lexicon:
            for i, flag_feature in enumerate(self._flag_features):
                if flag_feature(uni_string, prob, cluster, cases, tags):
                    flags.add(i)
-            lexeme = lexeme_init(self._mem, uni_string, prob, cluster, views, flags)
+            lexeme = lexeme_init(self._mem, self.size, uni_string, prob, cluster, views, flags)
            string_from_unicode(&string, uni_string)
            self._dict.set(string.key, lexeme)
            self.size += 1
@ -385,7 +385,7 @@ cdef class Lexicon:
            if flag_feature(uni_string, 0.0, {}, {}):
                flags.add(i)
-        lexeme = lexeme_init(self._mem, uni_string, 0, 0, views, flags)
+        lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags)
        self._dict.set(string.key, lexeme)
        self.size += 1
        return lexeme
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@ -3,6 +3,7 @@ from cymem.cymem cimport Pool
 cdef struct LexemeC:
    size_t i
    size_t length
    double prob
    size_t cluster
@ -13,7 +14,7 @@ cdef struct LexemeC:
    flag_t flags
-cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
+cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, size_t cluster,
                     list views, set flags)
 cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id)
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -2,9 +2,10 @@ from cpython.ref cimport Py_INCREF
 from cymem.cymem cimport Pool
-cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
+cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob,
-                     list views, set flags):
+                          size_t cluster, list views, set flags):
    cdef LexemeC* lexeme = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
    lexeme.i = i
    lexeme.cluster = cluster
    lexeme.prob = prob
    lexeme.string = intern_and_encode(string, &lexeme.length)