mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
* Remove broken __reduce__ method on vocab
This commit is contained in:
parent
20235bde00
commit
478aa21cb0
|
@ -109,25 +109,6 @@ cdef class Vocab:
|
||||||
"""The current number of lexemes stored."""
|
"""The current number of lexemes stored."""
|
||||||
return self.length
|
return self.length
|
||||||
|
|
||||||
def __reduce__(self):
|
|
||||||
# TODO: This is hopelessly broken. The state is transferred as just
|
|
||||||
# a temp directory! We then fail to clean this up. This method therefore
|
|
||||||
# only pretends to work. What we need to do is form an archive file.
|
|
||||||
tmp_dir = tempfile.mkdtemp()
|
|
||||||
lex_loc = path.join(tmp_dir, 'lexemes.bin')
|
|
||||||
str_loc = path.join(tmp_dir, 'strings.json')
|
|
||||||
vec_loc = path.join(tmp_dir, 'vec.bin')
|
|
||||||
|
|
||||||
self.dump(lex_loc)
|
|
||||||
with io.open(str_loc, 'w', encoding='utf8') as file_:
|
|
||||||
self.strings.dump(file_)
|
|
||||||
|
|
||||||
self.dump_vectors(vec_loc)
|
|
||||||
|
|
||||||
state = (str_loc, lex_loc, vec_loc, self.morphology, self.get_lex_attr,
|
|
||||||
self.serializer_freqs, self.data_dir)
|
|
||||||
return (unpickle_vocab, state, None, None)
|
|
||||||
|
|
||||||
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL:
|
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL:
|
||||||
'''Get a pointer to a LexemeC from the lexicon, creating a new Lexeme
|
'''Get a pointer to a LexemeC from the lexicon, creating a new Lexeme
|
||||||
if necessary, using memory acquired from the given pool. If the pool
|
if necessary, using memory acquired from the given pool. If the pool
|
||||||
|
@ -391,27 +372,6 @@ cdef class Vocab:
|
||||||
return vec_len
|
return vec_len
|
||||||
|
|
||||||
|
|
||||||
def unpickle_vocab(strings_loc, lex_loc, vec_loc, morphology, get_lex_attr,
|
|
||||||
serializer_freqs, data_dir):
|
|
||||||
cdef Vocab vocab = Vocab()
|
|
||||||
|
|
||||||
vocab.get_lex_attr = get_lex_attr
|
|
||||||
vocab.morphology = morphology
|
|
||||||
vocab.strings = morphology.strings
|
|
||||||
vocab.data_dir = data_dir
|
|
||||||
vocab.serializer_freqs = serializer_freqs
|
|
||||||
|
|
||||||
with io.open(strings_loc, 'r', encoding='utf8') as file_:
|
|
||||||
vocab.strings.load(file_)
|
|
||||||
vocab.load_lexemes(lex_loc)
|
|
||||||
if vec_loc is not None:
|
|
||||||
vocab.vectors_length = vocab.load_vectors_from_bin_loc(vec_loc)
|
|
||||||
return vocab
|
|
||||||
|
|
||||||
|
|
||||||
copy_reg.constructor(unpickle_vocab)
|
|
||||||
|
|
||||||
|
|
||||||
def write_binary_vectors(in_loc, out_loc):
|
def write_binary_vectors(in_loc, out_loc):
|
||||||
cdef CFile out_file = CFile(out_loc, 'wb')
|
cdef CFile out_file = CFile(out_loc, 'wb')
|
||||||
cdef Address mem
|
cdef Address mem
|
||||||
|
|
Loading…
Reference in New Issue
Block a user