mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Make sure serializers and deserializers are ordered
This commit is contained in:
		
							parent
							
								
									1ebd0d3f27
								
							
						
					
					
						commit
						7c919aeb09
					
				|  | @ -2,6 +2,7 @@ | ||||||
| # coding: utf8 | # coding: utf8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
|  | from collections import OrderedDict | ||||||
| from cython.operator cimport dereference as deref | from cython.operator cimport dereference as deref | ||||||
| from cython.operator cimport preincrement as preinc | from cython.operator cimport preincrement as preinc | ||||||
| from cymem.cymem cimport Pool | from cymem.cymem cimport Pool | ||||||
|  | @ -355,14 +356,14 @@ cdef class Tokenizer: | ||||||
|         **exclude: Named attributes to prevent from being serialized. |         **exclude: Named attributes to prevent from being serialized. | ||||||
|         RETURNS (bytes): The serialized form of the `Tokenizer` object. |         RETURNS (bytes): The serialized form of the `Tokenizer` object. | ||||||
|         """ |         """ | ||||||
|         serializers = { |         serializers = OrderedDict(( | ||||||
|             'vocab': lambda: self.vocab.to_bytes(), |             ('vocab', lambda: self.vocab.to_bytes()), | ||||||
|             'prefix_search': lambda: self.prefix_search.__self__.pattern, |             ('prefix_search', lambda: self.prefix_search.__self__.pattern), | ||||||
|             'suffix_search': lambda: self.suffix_search.__self__.pattern, |             ('suffix_search', lambda: self.suffix_search.__self__.pattern), | ||||||
|             'infix_finditer': lambda: self.infix_finditer.__self__.pattern, |             ('infix_finditer', lambda: self.infix_finditer.__self__.pattern), | ||||||
|             'token_match': lambda: self.token_match.__self__.pattern, |             ('token_match', lambda: self.token_match.__self__.pattern), | ||||||
|             'exceptions': lambda: self._rules |             ('exceptions', lambda: OrderedDict(sorted(self._rules.items()))) | ||||||
|         } |         )) | ||||||
|         return util.to_bytes(serializers, exclude) |         return util.to_bytes(serializers, exclude) | ||||||
| 
 | 
 | ||||||
|     def from_bytes(self, bytes_data, **exclude): |     def from_bytes(self, bytes_data, **exclude): | ||||||
|  | @ -372,15 +373,15 @@ cdef class Tokenizer: | ||||||
|         **exclude: Named attributes to prevent from being loaded. |         **exclude: Named attributes to prevent from being loaded. | ||||||
|         RETURNS (Tokenizer): The `Tokenizer` object. |         RETURNS (Tokenizer): The `Tokenizer` object. | ||||||
|         """ |         """ | ||||||
|         data = {} |         data = OrderedDict() | ||||||
|         deserializers = { |         deserializers = OrderedDict(( | ||||||
|             'vocab': lambda b: self.vocab.from_bytes(b), |             ('vocab', lambda b: self.vocab.from_bytes(b)), | ||||||
|             'prefix_search': lambda b: data.setdefault('prefix', b), |             ('prefix_search', lambda b: data.setdefault('prefix', b)), | ||||||
|             'suffix_search': lambda b: data.setdefault('suffix_search', b), |             ('suffix_search', lambda b: data.setdefault('suffix_search', b)), | ||||||
|             'infix_finditer': lambda b: data.setdefault('infix_finditer', b), |             ('infix_finditer', lambda b: data.setdefault('infix_finditer', b)), | ||||||
|             'token_match': lambda b: data.setdefault('token_match', b), |             ('token_match', lambda b: data.setdefault('token_match', b)), | ||||||
|             'exceptions': lambda b: data.setdefault('rules', b) |             ('exceptions', lambda b: data.setdefault('rules', b)) | ||||||
|         } |         )) | ||||||
|         msg = util.from_bytes(bytes_data, deserializers, exclude) |         msg = util.from_bytes(bytes_data, deserializers, exclude) | ||||||
|         if 'prefix_search' in data: |         if 'prefix_search' in data: | ||||||
|             self.prefix_search = re.compile(data['prefix_search']).search |             self.prefix_search = re.compile(data['prefix_search']).search | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user