* Add i attribute to lexeme, giving lexemes sequential IDs.

This commit is contained in:
Matthew Honnibal 2014-10-09 13:50:05 +11:00
parent 0c6402ab73
commit d73d89a2de
4 changed files with 9 additions and 5 deletions

View File

@ -27,6 +27,8 @@ cdef class Lexicon:
cdef Pool _mem cdef Pool _mem
cpdef readonly size_t size cpdef readonly size_t size
cdef vector[LexemeC*] lexemes
cpdef Lexeme lookup(self, unicode string) cpdef Lexeme lookup(self, unicode string)
cdef LexemeC* get(self, String* s) except NULL cdef LexemeC* get(self, String* s) except NULL

View File

@ -366,7 +366,7 @@ cdef class Lexicon:
for i, flag_feature in enumerate(self._flag_features): for i, flag_feature in enumerate(self._flag_features):
if flag_feature(uni_string, prob, cluster, cases, tags): if flag_feature(uni_string, prob, cluster, cases, tags):
flags.add(i) flags.add(i)
lexeme = lexeme_init(self._mem, uni_string, prob, cluster, views, flags) lexeme = lexeme_init(self._mem, self.size, uni_string, prob, cluster, views, flags)
string_from_unicode(&string, uni_string) string_from_unicode(&string, uni_string)
self._dict.set(string.key, lexeme) self._dict.set(string.key, lexeme)
self.size += 1 self.size += 1
@ -385,7 +385,7 @@ cdef class Lexicon:
if flag_feature(uni_string, 0.0, {}, {}): if flag_feature(uni_string, 0.0, {}, {}):
flags.add(i) flags.add(i)
lexeme = lexeme_init(self._mem, uni_string, 0, 0, views, flags) lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags)
self._dict.set(string.key, lexeme) self._dict.set(string.key, lexeme)
self.size += 1 self.size += 1
return lexeme return lexeme

View File

@ -3,6 +3,7 @@ from cymem.cymem cimport Pool
cdef struct LexemeC: cdef struct LexemeC:
size_t i
size_t length size_t length
double prob double prob
size_t cluster size_t cluster
@ -13,7 +14,7 @@ cdef struct LexemeC:
flag_t flags flag_t flags
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster, cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, size_t cluster,
list views, set flags) list views, set flags)
cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id) cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id)

View File

@ -2,9 +2,10 @@ from cpython.ref cimport Py_INCREF
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
cdef LexemeC* lexeme_init(Pool mem, unicode string, double prob, size_t cluster, cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob,
list views, set flags): size_t cluster, list views, set flags):
cdef LexemeC* lexeme = <LexemeC*>mem.alloc(1, sizeof(LexemeC)) cdef LexemeC* lexeme = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
lexeme.i = i
lexeme.cluster = cluster lexeme.cluster = cluster
lexeme.prob = prob lexeme.prob = prob
lexeme.string = intern_and_encode(string, &lexeme.length) lexeme.string = intern_and_encode(string, &lexeme.length)