mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
* Add i attribute to lexeme, giving lexemes sequential IDs.
This commit is contained in:
parent
0c6402ab73
commit
d73d89a2de
|
@ -27,6 +27,8 @@ cdef class Lexicon:
|
||||||
cdef Pool _mem
|
cdef Pool _mem
|
||||||
cpdef readonly size_t size
|
cpdef readonly size_t size
|
||||||
|
|
||||||
|
cdef vector[LexemeC*] lexemes
|
||||||
|
|
||||||
cpdef Lexeme lookup(self, unicode string)
|
cpdef Lexeme lookup(self, unicode string)
|
||||||
cdef LexemeC* get(self, String* s) except NULL
|
cdef LexemeC* get(self, String* s) except NULL
|
||||||
|
|
||||||
|
|
|
@ -366,7 +366,7 @@ cdef class Lexicon:
|
||||||
for i, flag_feature in enumerate(self._flag_features):
|
for i, flag_feature in enumerate(self._flag_features):
|
||||||
if flag_feature(uni_string, prob, cluster, cases, tags):
|
if flag_feature(uni_string, prob, cluster, cases, tags):
|
||||||
flags.add(i)
|
flags.add(i)
|
||||||
lexeme = lexeme_init(self._mem, uni_string, prob, cluster, views, flags)
|
lexeme = lexeme_init(self._mem, self.size, uni_string, prob, cluster, views, flags)
|
||||||
string_from_unicode(&string, uni_string)
|
string_from_unicode(&string, uni_string)
|
||||||
self._dict.set(string.key, lexeme)
|
self._dict.set(string.key, lexeme)
|
||||||
self.size += 1
|
self.size += 1
|
||||||
|
@ -385,7 +385,7 @@ cdef class Lexicon:
|
||||||
if flag_feature(uni_string, 0.0, {}, {}):
|
if flag_feature(uni_string, 0.0, {}, {}):
|
||||||
flags.add(i)
|
flags.add(i)
|
||||||
|
|
||||||
lexeme = lexeme_init(self._mem, uni_string, 0, 0, views, flags)
|
lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags)
|
||||||
self._dict.set(string.key, lexeme)
|
self._dict.set(string.key, lexeme)
|
||||||
self.size += 1
|
self.size += 1
|
||||||
return lexeme
|
return lexeme
|
||||||
|
|
|
@ -3,6 +3,7 @@ from cymem.cymem cimport Pool
|
||||||
|
|
||||||
|
|
||||||
cdef struct LexemeC:
|
cdef struct LexemeC:
|
||||||
|
size_t i
|
||||||
size_t length
|
size_t length
|
||||||
double prob
|
double prob
|
||||||
size_t cluster
|
size_t cluster
|
||||||
|
@ -13,7 +14,7 @@ cdef struct LexemeC:
|
||||||
flag_t flags
|
flag_t flags
|
||||||
|
|
||||||
|
|
||||||
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
|
cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, size_t cluster,
|
||||||
list views, set flags)
|
list views, set flags)
|
||||||
|
|
||||||
cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id)
|
cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id)
|
||||||
|
|
|
@ -2,9 +2,10 @@ from cpython.ref cimport Py_INCREF
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
||||||
|
|
||||||
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
|
cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob,
|
||||||
list views, set flags):
|
size_t cluster, list views, set flags):
|
||||||
cdef LexemeC* lexeme = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
|
cdef LexemeC* lexeme = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
|
||||||
|
lexeme.i = i
|
||||||
lexeme.cluster = cluster
|
lexeme.cluster = cluster
|
||||||
lexeme.prob = prob
|
lexeme.prob = prob
|
||||||
lexeme.string = intern_and_encode(string, &lexeme.length)
|
lexeme.string = intern_and_encode(string, &lexeme.length)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user