mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Make sure serializers and deserializers are ordered
This commit is contained in:
parent
1ebd0d3f27
commit
7c919aeb09
|
@ -2,6 +2,7 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
from cython.operator cimport preincrement as preinc
|
from cython.operator cimport preincrement as preinc
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
@ -355,14 +356,14 @@ cdef class Tokenizer:
|
||||||
**exclude: Named attributes to prevent from being serialized.
|
**exclude: Named attributes to prevent from being serialized.
|
||||||
RETURNS (bytes): The serialized form of the `Tokenizer` object.
|
RETURNS (bytes): The serialized form of the `Tokenizer` object.
|
||||||
"""
|
"""
|
||||||
serializers = {
|
serializers = OrderedDict((
|
||||||
'vocab': lambda: self.vocab.to_bytes(),
|
('vocab', lambda: self.vocab.to_bytes()),
|
||||||
'prefix_search': lambda: self.prefix_search.__self__.pattern,
|
('prefix_search', lambda: self.prefix_search.__self__.pattern),
|
||||||
'suffix_search': lambda: self.suffix_search.__self__.pattern,
|
('suffix_search', lambda: self.suffix_search.__self__.pattern),
|
||||||
'infix_finditer': lambda: self.infix_finditer.__self__.pattern,
|
('infix_finditer', lambda: self.infix_finditer.__self__.pattern),
|
||||||
'token_match': lambda: self.token_match.__self__.pattern,
|
('token_match', lambda: self.token_match.__self__.pattern),
|
||||||
'exceptions': lambda: self._rules
|
('exceptions', lambda: OrderedDict(sorted(self._rules.items())))
|
||||||
}
|
))
|
||||||
return util.to_bytes(serializers, exclude)
|
return util.to_bytes(serializers, exclude)
|
||||||
|
|
||||||
def from_bytes(self, bytes_data, **exclude):
|
def from_bytes(self, bytes_data, **exclude):
|
||||||
|
@ -372,15 +373,15 @@ cdef class Tokenizer:
|
||||||
**exclude: Named attributes to prevent from being loaded.
|
**exclude: Named attributes to prevent from being loaded.
|
||||||
RETURNS (Tokenizer): The `Tokenizer` object.
|
RETURNS (Tokenizer): The `Tokenizer` object.
|
||||||
"""
|
"""
|
||||||
data = {}
|
data = OrderedDict()
|
||||||
deserializers = {
|
deserializers = OrderedDict((
|
||||||
'vocab': lambda b: self.vocab.from_bytes(b),
|
('vocab', lambda b: self.vocab.from_bytes(b)),
|
||||||
'prefix_search': lambda b: data.setdefault('prefix', b),
|
('prefix_search', lambda b: data.setdefault('prefix', b)),
|
||||||
'suffix_search': lambda b: data.setdefault('suffix_search', b),
|
('suffix_search', lambda b: data.setdefault('suffix_search', b)),
|
||||||
'infix_finditer': lambda b: data.setdefault('infix_finditer', b),
|
('infix_finditer', lambda b: data.setdefault('infix_finditer', b)),
|
||||||
'token_match': lambda b: data.setdefault('token_match', b),
|
('token_match', lambda b: data.setdefault('token_match', b)),
|
||||||
'exceptions': lambda b: data.setdefault('rules', b)
|
('exceptions', lambda b: data.setdefault('rules', b))
|
||||||
}
|
))
|
||||||
msg = util.from_bytes(bytes_data, deserializers, exclude)
|
msg = util.from_bytes(bytes_data, deserializers, exclude)
|
||||||
if 'prefix_search' in data:
|
if 'prefix_search' in data:
|
||||||
self.prefix_search = re.compile(data['prefix_search']).search
|
self.prefix_search = re.compile(data['prefix_search']).search
|
||||||
|
|
Loading…
Reference in New Issue
Block a user