* Remove broken __reduce__ method on vocab

2025-12-19 08:04:42 +03:00 · 2016-03-08 15:47:11 +00:00 · 2016-03-08 15:47:11 +00:00 · 478aa21cb0
commit 478aa21cb0
parent 20235bde00
1 changed files with 0 additions and 40 deletions
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -109,25 +109,6 @@ cdef class Vocab:
        """The current number of lexemes stored."""
        return self.length
    def __reduce__(self):
        # TODO: This is hopelessly broken. The state is transferred as just 
        # a temp directory! We then fail to clean this up. This method therefore
        # only pretends to work. What we need to do is form an archive file.
        tmp_dir = tempfile.mkdtemp()
        lex_loc = path.join(tmp_dir, 'lexemes.bin')
        str_loc = path.join(tmp_dir, 'strings.json')
        vec_loc = path.join(tmp_dir, 'vec.bin')
        self.dump(lex_loc)
        with io.open(str_loc, 'w', encoding='utf8') as file_:
            self.strings.dump(file_)
        self.dump_vectors(vec_loc)
        state = (str_loc, lex_loc, vec_loc, self.morphology, self.get_lex_attr,
                 self.serializer_freqs, self.data_dir)
        return (unpickle_vocab, state, None, None)
    cdef const LexemeC* get(self, Pool mem, unicode string) except NULL:
        '''Get a pointer to a LexemeC from the lexicon, creating a new Lexeme
        if necessary, using memory acquired from the given pool.  If the pool
@ -391,27 +372,6 @@ cdef class Vocab:
        return vec_len
 def unpickle_vocab(strings_loc, lex_loc, vec_loc, morphology, get_lex_attr,
                   serializer_freqs, data_dir):
    cdef Vocab vocab = Vocab()
    vocab.get_lex_attr = get_lex_attr
    vocab.morphology = morphology
    vocab.strings = morphology.strings
    vocab.data_dir = data_dir
    vocab.serializer_freqs = serializer_freqs
    with io.open(strings_loc, 'r', encoding='utf8') as file_:
        vocab.strings.load(file_)
    vocab.load_lexemes(lex_loc)
    if vec_loc is not None:
        vocab.vectors_length = vocab.load_vectors_from_bin_loc(vec_loc)
    return vocab
 copy_reg.constructor(unpickle_vocab)
 def write_binary_vectors(in_loc, out_loc):
    cdef CFile out_file = CFile(out_loc, 'wb')
    cdef Address mem