spaCy/spacy/tests/lang/ko/test_serialize.py

import pickle

from spacy.lang.ko import Korean
from ...util import make_tempdir


def test_ko_tokenizer_serialize(ko_tokenizer):
    tokenizer_bytes = ko_tokenizer.to_bytes()
    nlp = Korean()
    nlp.tokenizer.from_bytes(tokenizer_bytes)
    assert tokenizer_bytes == nlp.tokenizer.to_bytes()

    with make_tempdir() as d:
        file_path = d / "tokenizer"
        ko_tokenizer.to_disk(file_path)
        nlp = Korean()
        nlp.tokenizer.from_disk(file_path)
        assert tokenizer_bytes == nlp.tokenizer.to_bytes()


def test_ko_tokenizer_pickle(ko_tokenizer):
    b = pickle.dumps(ko_tokenizer)
    ko_tokenizer_re = pickle.loads(b)
    assert ko_tokenizer.to_bytes() == ko_tokenizer_re.to_bytes()


def test_ko_tokenizer_natto_serialize(ko_tokenizer_natto):
    tokenizer_bytes = ko_tokenizer_natto.to_bytes()
    nlp = Korean()
    nlp.tokenizer.from_bytes(tokenizer_bytes)
    assert tokenizer_bytes == nlp.tokenizer.to_bytes()

    with make_tempdir() as d:
        file_path = d / "tokenizer"
        ko_tokenizer_natto.to_disk(file_path)
        nlp = Korean()
        nlp.tokenizer.from_disk(file_path)
        assert tokenizer_bytes == nlp.tokenizer.to_bytes()


def test_ko_tokenizer_natto_pickle(ko_tokenizer_natto):
    b = pickle.dumps(ko_tokenizer_natto)
    ko_tokenizer_natto_re = pickle.loads(b)
    assert ko_tokenizer_natto.to_bytes() == ko_tokenizer_natto_re.to_bytes()