mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-02 20:23:12 +03:00
* Start trying to pickle Vocab
This commit is contained in:
parent
5ca57bd859
commit
85e7944572
|
@ -25,7 +25,6 @@ cdef struct _Cached:
|
||||||
|
|
||||||
|
|
||||||
cdef class Vocab:
|
cdef class Vocab:
|
||||||
cpdef public lexeme_props_getter
|
|
||||||
cdef Pool mem
|
cdef Pool mem
|
||||||
cpdef readonly StringStore strings
|
cpdef readonly StringStore strings
|
||||||
cpdef readonly Morphology morphology
|
cpdef readonly Morphology morphology
|
||||||
|
@ -33,7 +32,6 @@ cdef class Vocab:
|
||||||
cdef public object _serializer
|
cdef public object _serializer
|
||||||
cdef public object data_dir
|
cdef public object data_dir
|
||||||
cdef public object get_lex_attr
|
cdef public object get_lex_attr
|
||||||
cdef public object pos_tags
|
|
||||||
cdef public object serializer_freqs
|
cdef public object serializer_freqs
|
||||||
|
|
||||||
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
||||||
|
|
|
@ -10,6 +10,8 @@ from os import path
|
||||||
import io
|
import io
|
||||||
import math
|
import math
|
||||||
import json
|
import json
|
||||||
|
import tempfile
|
||||||
|
import copy_reg
|
||||||
|
|
||||||
from .lexeme cimport EMPTY_LEXEME
|
from .lexeme cimport EMPTY_LEXEME
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
|
@ -96,6 +98,18 @@ cdef class Vocab:
|
||||||
"""The current number of lexemes stored."""
|
"""The current number of lexemes stored."""
|
||||||
return self.length
|
return self.length
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
tmp_dir = tempfile.mkdtmp()
|
||||||
|
lex_loc = path.join(tmp_dir, 'lexemes.bin')
|
||||||
|
str_loc = path.join(tmp_dir, 'strings.txt')
|
||||||
|
map_loc = path.join(tmp_dir, 'tag_map.json')
|
||||||
|
|
||||||
|
self.dump(lex_loc)
|
||||||
|
self.strings.dump(str_loc)
|
||||||
|
json.dump(self.morphology.tag_map, open(map_loc, 'w'))
|
||||||
|
|
||||||
|
return (Vocab.from_dir, (tmp_dir, self.get_lex_attr), None, None)
|
||||||
|
|
||||||
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL:
|
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL:
|
||||||
'''Get a pointer to a LexemeC from the lexicon, creating a new Lexeme
|
'''Get a pointer to a LexemeC from the lexicon, creating a new Lexeme
|
||||||
if necessary, using memory acquired from the given pool. If the pool
|
if necessary, using memory acquired from the given pool. If the pool
|
||||||
|
@ -339,6 +353,9 @@ cdef class Vocab:
|
||||||
return vec_len
|
return vec_len
|
||||||
|
|
||||||
|
|
||||||
|
copy_reg.constructor(Vocab.from_dir)
|
||||||
|
|
||||||
|
|
||||||
def write_binary_vectors(in_loc, out_loc):
|
def write_binary_vectors(in_loc, out_loc):
|
||||||
cdef CFile out_file = CFile(out_loc, 'wb')
|
cdef CFile out_file = CFile(out_loc, 'wb')
|
||||||
cdef Address mem
|
cdef Address mem
|
||||||
|
|
Loading…
Reference in New Issue
Block a user