mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
* Add i attribute to lexeme, giving lexemes sequential IDs.
This commit is contained in:
parent
0c6402ab73
commit
d73d89a2de
|
@ -27,6 +27,8 @@ cdef class Lexicon:
|
|||
cdef Pool _mem
|
||||
cpdef readonly size_t size
|
||||
|
||||
cdef vector[LexemeC*] lexemes
|
||||
|
||||
cpdef Lexeme lookup(self, unicode string)
|
||||
cdef LexemeC* get(self, String* s) except NULL
|
||||
|
||||
|
|
|
@ -366,7 +366,7 @@ cdef class Lexicon:
|
|||
for i, flag_feature in enumerate(self._flag_features):
|
||||
if flag_feature(uni_string, prob, cluster, cases, tags):
|
||||
flags.add(i)
|
||||
lexeme = lexeme_init(self._mem, uni_string, prob, cluster, views, flags)
|
||||
lexeme = lexeme_init(self._mem, self.size, uni_string, prob, cluster, views, flags)
|
||||
string_from_unicode(&string, uni_string)
|
||||
self._dict.set(string.key, lexeme)
|
||||
self.size += 1
|
||||
|
@ -385,7 +385,7 @@ cdef class Lexicon:
|
|||
if flag_feature(uni_string, 0.0, {}, {}):
|
||||
flags.add(i)
|
||||
|
||||
lexeme = lexeme_init(self._mem, uni_string, 0, 0, views, flags)
|
||||
lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags)
|
||||
self._dict.set(string.key, lexeme)
|
||||
self.size += 1
|
||||
return lexeme
|
||||
|
|
|
@ -3,6 +3,7 @@ from cymem.cymem cimport Pool
|
|||
|
||||
|
||||
cdef struct LexemeC:
|
||||
size_t i
|
||||
size_t length
|
||||
double prob
|
||||
size_t cluster
|
||||
|
@ -13,7 +14,7 @@ cdef struct LexemeC:
|
|||
flag_t flags
|
||||
|
||||
|
||||
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
|
||||
cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, size_t cluster,
|
||||
list views, set flags)
|
||||
|
||||
cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id)
|
||||
|
|
|
@ -2,9 +2,10 @@ from cpython.ref cimport Py_INCREF
|
|||
from cymem.cymem cimport Pool
|
||||
|
||||
|
||||
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
|
||||
list views, set flags):
|
||||
cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob,
|
||||
size_t cluster, list views, set flags):
|
||||
cdef LexemeC* lexeme = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
|
||||
lexeme.i = i
|
||||
lexeme.cluster = cluster
|
||||
lexeme.prob = prob
|
||||
lexeme.string = intern_and_encode(string, &lexeme.length)
|
||||
|
|
Loading…
Reference in New Issue
Block a user