mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
* Work on pickling Vocab instances. The current implementation is not correct, but it may serve to see whether this approach is workable. Pickling is necessary to address Issue #125
This commit is contained in:
parent
85e7944572
commit
f8de403483
|
@ -99,7 +99,7 @@ cdef class Vocab:
|
|||
return self.length
|
||||
|
||||
def __reduce__(self):
|
||||
tmp_dir = tempfile.mkdtmp()
|
||||
tmp_dir = tempfile.mkdtemp()
|
||||
lex_loc = path.join(tmp_dir, 'lexemes.bin')
|
||||
str_loc = path.join(tmp_dir, 'strings.txt')
|
||||
map_loc = path.join(tmp_dir, 'tag_map.json')
|
||||
|
@ -108,7 +108,7 @@ cdef class Vocab:
|
|||
self.strings.dump(str_loc)
|
||||
json.dump(self.morphology.tag_map, open(map_loc, 'w'))
|
||||
|
||||
return (Vocab.from_dir, (tmp_dir, self.get_lex_attr), None, None)
|
||||
return (unpickle_vocab, (tmp_dir,), None, None)
|
||||
|
||||
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL:
|
||||
'''Get a pointer to a LexemeC from the lexicon, creating a new Lexeme
|
||||
|
@ -353,7 +353,13 @@ cdef class Vocab:
|
|||
return vec_len
|
||||
|
||||
|
||||
copy_reg.constructor(Vocab.from_dir)
|
||||
def unpickle_vocab(data_dir):
|
||||
# TODO: This needs fixing --- the trouble is, we can't pickle staticmethods,
|
||||
# so we need to fiddle with the design of Language a little bit.
|
||||
from .language import Language
|
||||
return Vocab.from_dir(data_dir, Language.default_lex_attrs())
|
||||
|
||||
copy_reg.constructor(unpickle_vocab)
|
||||
|
||||
|
||||
def write_binary_vectors(in_loc, out_loc):
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
from __future__ import unicode_literals
|
||||
import pytest
|
||||
import StringIO
|
||||
import pickle
|
||||
|
||||
from spacy.attrs import LEMMA, ORTH, PROB, IS_ALPHA
|
||||
from spacy.parts_of_speech import NOUN, VERB
|
||||
|
||||
|
||||
|
||||
def test_neq(en_vocab):
|
||||
addr = en_vocab['Hello']
|
||||
assert en_vocab['bye'].orth != addr.orth
|
||||
|
@ -38,3 +41,11 @@ def test_symbols(en_vocab):
|
|||
assert en_vocab.strings['ORTH'] == ORTH
|
||||
assert en_vocab.strings['PROB'] == PROB
|
||||
|
||||
|
||||
def test_pickle_vocab(en_vocab):
|
||||
file_ = StringIO.StringIO()
|
||||
pickle.dump(en_vocab, file_)
|
||||
|
||||
file_.seek(0)
|
||||
|
||||
loaded = pickle.load(file_)
|
||||
|
|
Loading…
Reference in New Issue
Block a user