Make sure serializers and deserializers are ordered

This commit is contained in:
ines 2017-06-03 17:05:09 +02:00
parent 1ebd0d3f27
commit 7c919aeb09

View File

@ -2,6 +2,7 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from collections import OrderedDict
from cython.operator cimport dereference as deref from cython.operator cimport dereference as deref
from cython.operator cimport preincrement as preinc from cython.operator cimport preincrement as preinc
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
@ -355,14 +356,14 @@ cdef class Tokenizer:
**exclude: Named attributes to prevent from being serialized. **exclude: Named attributes to prevent from being serialized.
RETURNS (bytes): The serialized form of the `Tokenizer` object. RETURNS (bytes): The serialized form of the `Tokenizer` object.
""" """
serializers = { serializers = OrderedDict((
'vocab': lambda: self.vocab.to_bytes(), ('vocab', lambda: self.vocab.to_bytes()),
'prefix_search': lambda: self.prefix_search.__self__.pattern, ('prefix_search', lambda: self.prefix_search.__self__.pattern),
'suffix_search': lambda: self.suffix_search.__self__.pattern, ('suffix_search', lambda: self.suffix_search.__self__.pattern),
'infix_finditer': lambda: self.infix_finditer.__self__.pattern, ('infix_finditer', lambda: self.infix_finditer.__self__.pattern),
'token_match': lambda: self.token_match.__self__.pattern, ('token_match', lambda: self.token_match.__self__.pattern),
'exceptions': lambda: self._rules ('exceptions', lambda: OrderedDict(sorted(self._rules.items())))
} ))
return util.to_bytes(serializers, exclude) return util.to_bytes(serializers, exclude)
def from_bytes(self, bytes_data, **exclude): def from_bytes(self, bytes_data, **exclude):
@ -372,15 +373,15 @@ cdef class Tokenizer:
**exclude: Named attributes to prevent from being loaded. **exclude: Named attributes to prevent from being loaded.
RETURNS (Tokenizer): The `Tokenizer` object. RETURNS (Tokenizer): The `Tokenizer` object.
""" """
data = {} data = OrderedDict()
deserializers = { deserializers = OrderedDict((
'vocab': lambda b: self.vocab.from_bytes(b), ('vocab', lambda b: self.vocab.from_bytes(b)),
'prefix_search': lambda b: data.setdefault('prefix', b), ('prefix_search', lambda b: data.setdefault('prefix', b)),
'suffix_search': lambda b: data.setdefault('suffix_search', b), ('suffix_search', lambda b: data.setdefault('suffix_search', b)),
'infix_finditer': lambda b: data.setdefault('infix_finditer', b), ('infix_finditer', lambda b: data.setdefault('infix_finditer', b)),
'token_match': lambda b: data.setdefault('token_match', b), ('token_match', lambda b: data.setdefault('token_match', b)),
'exceptions': lambda b: data.setdefault('rules', b) ('exceptions', lambda b: data.setdefault('rules', b))
} ))
msg = util.from_bytes(bytes_data, deserializers, exclude) msg = util.from_bytes(bytes_data, deserializers, exclude)
if 'prefix_search' in data: if 'prefix_search' in data:
self.prefix_search = re.compile(data['prefix_search']).search self.prefix_search = re.compile(data['prefix_search']).search