mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Add __reduce__ to Tokenizer so that English pickles.
- Add tests to test_pickle and test_tokenizer that save to tempfiles.
This commit is contained in:
		
							parent
							
								
									f02a428fc7
								
							
						
					
					
						commit
						dac8fe7bdb
					
				| 
						 | 
					@ -19,6 +19,7 @@ cdef class Tokenizer:
 | 
				
			||||||
    cdef object _prefix_re
 | 
					    cdef object _prefix_re
 | 
				
			||||||
    cdef object _suffix_re
 | 
					    cdef object _suffix_re
 | 
				
			||||||
    cdef object _infix_re
 | 
					    cdef object _infix_re
 | 
				
			||||||
 | 
					    cdef object _rules
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cpdef Doc tokens_from_list(self, list strings)
 | 
					    cpdef Doc tokens_from_list(self, list strings)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -29,6 +29,16 @@ cdef class Tokenizer:
 | 
				
			||||||
        self._infix_re = infix_re
 | 
					        self._infix_re = infix_re
 | 
				
			||||||
        self.vocab = vocab
 | 
					        self.vocab = vocab
 | 
				
			||||||
        self._load_special_tokenization(rules)
 | 
					        self._load_special_tokenization(rules)
 | 
				
			||||||
 | 
					        self._rules = rules
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __reduce__(self):
 | 
				
			||||||
 | 
					        args = (self.vocab, 
 | 
				
			||||||
 | 
					                self._rules, 
 | 
				
			||||||
 | 
					                self._prefix_re, 
 | 
				
			||||||
 | 
					                self._suffix_re, 
 | 
				
			||||||
 | 
					                self._infix_re)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return (self.__class__, args, None, None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def from_dir(cls, Vocab vocab, data_dir):
 | 
					    def from_dir(cls, Vocab vocab, data_dir):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,8 +1,9 @@
 | 
				
			||||||
import pytest
 | 
					 | 
				
			||||||
import io
 | 
					 | 
				
			||||||
import cloudpickle
 | 
					import cloudpickle
 | 
				
			||||||
 | 
					import io
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
import pickle
 | 
					import pickle
 | 
				
			||||||
 | 
					import pytest
 | 
				
			||||||
 | 
					import tempfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.mark.models
 | 
					@pytest.mark.models
 | 
				
			||||||
def test_pickle_english(EN):
 | 
					def test_pickle_english(EN):
 | 
				
			||||||
| 
						 | 
					@ -12,4 +13,15 @@ def test_pickle_english(EN):
 | 
				
			||||||
    file_.seek(0)
 | 
					    file_.seek(0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    loaded = pickle.load(file_)
 | 
					    loaded = pickle.load(file_)
 | 
				
			||||||
 | 
					    assert loaded is not None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.models
 | 
				
			||||||
 | 
					def test_cloudpickle_to_file(EN):
 | 
				
			||||||
 | 
					    f = tempfile.NamedTemporaryFile(delete=False)
 | 
				
			||||||
 | 
					    p = cloudpickle.CloudPickler(f)
 | 
				
			||||||
 | 
					    p.dump(EN)
 | 
				
			||||||
 | 
					    f.close()
 | 
				
			||||||
 | 
					    loaded_en = cloudpickle.load(open(f.name))
 | 
				
			||||||
 | 
					    os.unlink(f.name)
 | 
				
			||||||
 | 
					    doc = loaded_en(unicode('test parse'))
 | 
				
			||||||
 | 
					    assert len(doc) == 2
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,6 +2,19 @@
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
 | 
					import io
 | 
				
			||||||
 | 
					import pickle
 | 
				
			||||||
 | 
					import cloudpickle
 | 
				
			||||||
 | 
					import tempfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.models
 | 
				
			||||||
 | 
					def test_pickle(en_tokenizer):
 | 
				
			||||||
 | 
					    file_ = io.BytesIO()
 | 
				
			||||||
 | 
					    cloudpickle.dump(en_tokenizer, file_)
 | 
				
			||||||
 | 
					    file_.seek(0)
 | 
				
			||||||
 | 
					    loaded = pickle.load(file_)
 | 
				
			||||||
 | 
					    assert loaded is not None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_no_word(en_tokenizer):
 | 
					def test_no_word(en_tokenizer):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user