mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Add test for custom tokenizer serialization (resolves #2494)
This commit is contained in:
parent
c2581f9172
commit
38e07ade4c
|
@ -2,6 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ...util import get_lang_class
|
||||
from ...tokenizer import Tokenizer
|
||||
from ..util import make_tempdir, assert_packed_msg_equal
|
||||
|
||||
import pytest
|
||||
|
@ -13,6 +14,14 @@ def load_tokenizer(b):
|
|||
return tok
|
||||
|
||||
|
||||
def test_serialize_custom_tokenizer(en_vocab, en_tokenizer):
|
||||
"""Test that custom tokenizer with not all functions defined can be
|
||||
serialized and deserialized correctly (see #2494)."""
|
||||
tokenizer = Tokenizer(en_vocab, suffix_search=en_tokenizer.suffix_search)
|
||||
tokenizer_bytes = tokenizer.to_bytes()
|
||||
new_tokenizer = Tokenizer(en_vocab).from_bytes(tokenizer_bytes)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Currently unreliable across platforms")
|
||||
@pytest.mark.parametrize('text', ["I💜you", "they’re", "“hello”"])
|
||||
def test_serialize_tokenizer_roundtrip_bytes(en_tokenizer, text):
|
||||
|
|
Loading…
Reference in New Issue
Block a user