mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
* Reinstate the fixed vocabulary --- words are only added to the lexicon in init_model, after that we create LexemeC structs with the Pool given to us.
This commit is contained in:
parent
5a7d060d9c
commit
1f7170e0e1
|
@ -55,7 +55,8 @@ cdef class Vocab:
|
||||||
if load_vectors and path.exists(path.join(data_dir, 'vec.bin')):
|
if load_vectors and path.exists(path.join(data_dir, 'vec.bin')):
|
||||||
self.repvec_length = self.load_rep_vectors(path.join(data_dir, 'vec.bin'))
|
self.repvec_length = self.load_rep_vectors(path.join(data_dir, 'vec.bin'))
|
||||||
|
|
||||||
self.packer = Packer(self, util.read_encoding_freqs(data_dir))
|
#self.packer = Packer(self, util.read_encoding_freqs(data_dir))
|
||||||
|
self.packer = None
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""The current number of lexemes stored."""
|
"""The current number of lexemes stored."""
|
||||||
|
@ -69,18 +70,17 @@ cdef class Vocab:
|
||||||
lex = <LexemeC*>self._by_hash.get(c_str.key)
|
lex = <LexemeC*>self._by_hash.get(c_str.key)
|
||||||
if lex != NULL:
|
if lex != NULL:
|
||||||
return lex
|
return lex
|
||||||
#if c_str.n < 3:
|
cdef bint is_oov = mem is not self.mem
|
||||||
oov = mem is not self.mem
|
if c_str.n < 3:
|
||||||
mem = self.mem
|
mem = self.mem
|
||||||
cdef unicode py_str = c_str.chars[:c_str.n]
|
cdef unicode py_str = c_str.chars[:c_str.n]
|
||||||
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
||||||
props = self.lexeme_props_getter(py_str)
|
props = self.lexeme_props_getter(py_str)
|
||||||
set_lex_struct_props(lex, props, self.strings, EMPTY_VEC)
|
set_lex_struct_props(lex, props, self.strings, EMPTY_VEC)
|
||||||
#if mem is self.mem:
|
if is_oov:
|
||||||
#else:
|
|
||||||
if oov:
|
|
||||||
lex.id = 0
|
lex.id = 0
|
||||||
self._add_lex_to_vocab(c_str.key, lex)
|
else:
|
||||||
|
self._add_lex_to_vocab(c_str.key, lex)
|
||||||
return lex
|
return lex
|
||||||
|
|
||||||
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1:
|
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user