spaCy/spacy/tests/lang/vi/test_serialize.py
Daniël de Kok e2b70df012
Configure isort to use the Black profile, recursively isort the spacy module (#12721)
* Use isort with Black profile

* isort all the things

* Fix import cycles as a result of import sorting

* Add DOCBIN_ALL_ATTRS type definition

* Add isort to requirements

* Remove isort from build dependencies check

* Typo
2023-06-14 17:48:41 +02:00

43 lines
1.3 KiB
Python

import pickle
from spacy.lang.vi import Vietnamese
from ...util import make_tempdir
def test_vi_tokenizer_serialize(vi_tokenizer):
tokenizer_bytes = vi_tokenizer.to_bytes()
nlp = Vietnamese()
nlp.tokenizer.from_bytes(tokenizer_bytes)
assert tokenizer_bytes == nlp.tokenizer.to_bytes()
assert nlp.tokenizer.use_pyvi is True
with make_tempdir() as d:
file_path = d / "tokenizer"
vi_tokenizer.to_disk(file_path)
nlp = Vietnamese()
nlp.tokenizer.from_disk(file_path)
assert tokenizer_bytes == nlp.tokenizer.to_bytes()
assert nlp.tokenizer.use_pyvi is True
# mode is (de)serialized correctly
nlp = Vietnamese.from_config({"nlp": {"tokenizer": {"use_pyvi": False}}})
nlp_bytes = nlp.to_bytes()
nlp_r = Vietnamese()
nlp_r.from_bytes(nlp_bytes)
assert nlp_bytes == nlp_r.to_bytes()
assert nlp_r.tokenizer.use_pyvi is False
with make_tempdir() as d:
nlp.to_disk(d)
nlp_r = Vietnamese()
nlp_r.from_disk(d)
assert nlp_bytes == nlp_r.to_bytes()
assert nlp_r.tokenizer.use_pyvi is False
def test_vi_tokenizer_pickle(vi_tokenizer):
b = pickle.dumps(vi_tokenizer)
vi_tokenizer_re = pickle.loads(b)
assert vi_tokenizer.to_bytes() == vi_tokenizer_re.to_bytes()