* Add i attribute to lexeme, giving lexemes sequential IDs.

This commit is contained in:
Matthew Honnibal 2014-10-09 13:50:05 +11:00
parent 0c6402ab73
commit d73d89a2de
4 changed files with 9 additions and 5 deletions

View File

@ -27,6 +27,8 @@ cdef class Lexicon:
cdef Pool _mem
cpdef readonly size_t size
cdef vector[LexemeC*] lexemes
cpdef Lexeme lookup(self, unicode string)
cdef LexemeC* get(self, String* s) except NULL

View File

@ -366,7 +366,7 @@ cdef class Lexicon:
for i, flag_feature in enumerate(self._flag_features):
if flag_feature(uni_string, prob, cluster, cases, tags):
flags.add(i)
lexeme = lexeme_init(self._mem, uni_string, prob, cluster, views, flags)
lexeme = lexeme_init(self._mem, self.size, uni_string, prob, cluster, views, flags)
string_from_unicode(&string, uni_string)
self._dict.set(string.key, lexeme)
self.size += 1
@ -385,7 +385,7 @@ cdef class Lexicon:
if flag_feature(uni_string, 0.0, {}, {}):
flags.add(i)
lexeme = lexeme_init(self._mem, uni_string, 0, 0, views, flags)
lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags)
self._dict.set(string.key, lexeme)
self.size += 1
return lexeme

View File

@ -3,6 +3,7 @@ from cymem.cymem cimport Pool
cdef struct LexemeC:
size_t i
size_t length
double prob
size_t cluster
@ -13,7 +14,7 @@ cdef struct LexemeC:
flag_t flags
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, size_t cluster,
list views, set flags)
cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id)

View File

@ -2,9 +2,10 @@ from cpython.ref cimport Py_INCREF
from cymem.cymem cimport Pool
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster,
list views, set flags):
cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob,
size_t cluster, list views, set flags):
cdef LexemeC* lexeme = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
lexeme.i = i
lexeme.cluster = cluster
lexeme.prob = prob
lexeme.string = intern_and_encode(string, &lexeme.length)