mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* Work on pickling Vocab instances. The current implementation is not correct, but it may serve to see whether this approach is workable. Pickling is necessary to address Issue #125
This commit is contained in:
		
							parent
							
								
									85e7944572
								
							
						
					
					
						commit
						f8de403483
					
				|  | @ -99,7 +99,7 @@ cdef class Vocab: | |||
|         return self.length | ||||
| 
 | ||||
|     def __reduce__(self): | ||||
|         tmp_dir = tempfile.mkdtmp() | ||||
|         tmp_dir = tempfile.mkdtemp() | ||||
|         lex_loc = path.join(tmp_dir, 'lexemes.bin') | ||||
|         str_loc = path.join(tmp_dir, 'strings.txt') | ||||
|         map_loc = path.join(tmp_dir, 'tag_map.json') | ||||
|  | @ -108,7 +108,7 @@ cdef class Vocab: | |||
|         self.strings.dump(str_loc) | ||||
|         json.dump(self.morphology.tag_map, open(map_loc, 'w')) | ||||
| 
 | ||||
|         return (Vocab.from_dir, (tmp_dir, self.get_lex_attr), None, None) | ||||
|         return (unpickle_vocab, (tmp_dir,), None, None) | ||||
| 
 | ||||
|     cdef const LexemeC* get(self, Pool mem, unicode string) except NULL: | ||||
|         '''Get a pointer to a LexemeC from the lexicon, creating a new Lexeme | ||||
|  | @ -353,7 +353,13 @@ cdef class Vocab: | |||
|         return vec_len | ||||
| 
 | ||||
| 
 | ||||
| copy_reg.constructor(Vocab.from_dir) | ||||
| def unpickle_vocab(data_dir): | ||||
|     # TODO: This needs fixing --- the trouble is, we can't pickle staticmethods, | ||||
|     # so we need to fiddle with the design of Language a little bit. | ||||
|     from .language import Language | ||||
|     return Vocab.from_dir(data_dir, Language.default_lex_attrs()) | ||||
| 
 | ||||
| copy_reg.constructor(unpickle_vocab) | ||||
| 
 | ||||
| 
 | ||||
| def write_binary_vectors(in_loc, out_loc): | ||||
|  |  | |||
|  | @ -1,10 +1,13 @@ | |||
| from __future__ import unicode_literals | ||||
| import pytest | ||||
| import StringIO | ||||
| import pickle | ||||
| 
 | ||||
| from spacy.attrs import LEMMA, ORTH, PROB, IS_ALPHA | ||||
| from spacy.parts_of_speech import NOUN, VERB | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def test_neq(en_vocab): | ||||
|     addr = en_vocab['Hello'] | ||||
|     assert en_vocab['bye'].orth != addr.orth | ||||
|  | @ -38,3 +41,11 @@ def test_symbols(en_vocab): | |||
|     assert en_vocab.strings['ORTH'] == ORTH | ||||
|     assert en_vocab.strings['PROB'] == PROB | ||||
|      | ||||
| 
 | ||||
| def test_pickle_vocab(en_vocab): | ||||
|     file_ = StringIO.StringIO() | ||||
|     pickle.dump(en_vocab, file_) | ||||
| 
 | ||||
|     file_.seek(0) | ||||
| 
 | ||||
|     loaded = pickle.load(file_) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user