mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Restore lexemes field to lexicon
This commit is contained in:
parent
6c807aa45f
commit
a8ca078b24
|
@ -26,6 +26,7 @@ cdef class Lexicon:
|
||||||
cdef Pool mem
|
cdef Pool mem
|
||||||
cpdef readonly size_t size
|
cpdef readonly size_t size
|
||||||
cpdef readonly StringStore strings
|
cpdef readonly StringStore strings
|
||||||
|
cdef vector[Lexeme*] lexemes
|
||||||
|
|
||||||
cdef Lexeme* get(self, String* s) except NULL
|
cdef Lexeme* get(self, String* s) except NULL
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ from murmurhash.mrmr cimport hash64
|
||||||
from preshed.maps cimport PreshMap
|
from preshed.maps cimport PreshMap
|
||||||
|
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
|
from .lexeme cimport EMPTY_LEXEME
|
||||||
from .lexeme cimport init as lexeme_init
|
from .lexeme cimport init as lexeme_init
|
||||||
|
|
||||||
from . import util
|
from . import util
|
||||||
|
@ -224,6 +225,7 @@ cdef class Lexicon:
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._dict = PreshMap(2 ** 20)
|
self._dict = PreshMap(2 ** 20)
|
||||||
self.strings = StringStore()
|
self.strings = StringStore()
|
||||||
|
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||||
self.size = 1
|
self.size = 1
|
||||||
|
|
||||||
cdef Lexeme* get(self, String* string) except NULL:
|
cdef Lexeme* get(self, String* string) except NULL:
|
||||||
|
@ -232,14 +234,19 @@ cdef class Lexicon:
|
||||||
if lex != NULL:
|
if lex != NULL:
|
||||||
return lex
|
return lex
|
||||||
lex = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
|
lex = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
|
||||||
lex[0] = lexeme_init(string.chars[:string.n], string.key, self.strings, {})
|
lex[0] = lexeme_init(self.size, string.chars[:string.n], string.key, self.strings, {})
|
||||||
self._dict.set(string.key, lex)
|
self._dict.set(string.key, lex)
|
||||||
|
while self.lexemes.size() < (lex.id + 1):
|
||||||
|
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||||
|
self.lexemes[lex.id] = lex
|
||||||
self.size += 1
|
self.size += 1
|
||||||
return lex
|
return lex
|
||||||
|
|
||||||
def __getitem__(self, unicode uni_string):
|
def __getitem__(self, id_or_string):
|
||||||
|
if type(id_or_string) == int:
|
||||||
|
return self.lexemes.at(id_or_string)[0]
|
||||||
cdef String string
|
cdef String string
|
||||||
string_from_unicode(&string, uni_string)
|
string_from_unicode(&string, id_or_string)
|
||||||
cdef Lexeme* lexeme = self.get(&string)
|
cdef Lexeme* lexeme = self.get(&string)
|
||||||
return lexeme[0]
|
return lexeme[0]
|
||||||
|
|
||||||
|
@ -247,7 +254,7 @@ cdef class Lexicon:
|
||||||
cdef String s
|
cdef String s
|
||||||
string_from_unicode(&s, uni_string)
|
string_from_unicode(&s, uni_string)
|
||||||
cdef Lexeme* lex = self.get(&s)
|
cdef Lexeme* lex = self.get(&s)
|
||||||
lex[0] = lexeme_init(s.chars[:s.n], s.key, self.strings, props)
|
lex[0] = lexeme_init(lex.id, s.chars[:s.n], s.key, self.strings, props)
|
||||||
|
|
||||||
def dump(self, loc):
|
def dump(self, loc):
|
||||||
if path.exists(loc):
|
if path.exists(loc):
|
||||||
|
@ -287,7 +294,11 @@ cdef class Lexicon:
|
||||||
if st != 1:
|
if st != 1:
|
||||||
break
|
break
|
||||||
self._dict.set(key, lexeme)
|
self._dict.set(key, lexeme)
|
||||||
|
while self.lexemes.size() < (lexeme.id + 1):
|
||||||
|
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||||
|
self.lexemes[lexeme.id] = lexeme
|
||||||
i += 1
|
i += 1
|
||||||
|
self.size += 1
|
||||||
fclose(fp)
|
fclose(fp)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user