Tidy up tokenizer test

This commit is contained in:
ines 2018-07-06 12:40:28 +02:00
parent 43dcaa473e
commit c2581f9172

View File

@ -13,19 +13,19 @@ def load_tokenizer(b):
return tok
@pytest.mark.xfail
@pytest.mark.skip(reason="Currently unreliable across platforms")
@pytest.mark.parametrize('text', ["I💜you", "theyre", "“hello”"])
def test_serialize_tokenizer_roundtrip_bytes(en_tokenizer, text):
tokenizer = en_tokenizer
new_tokenizer = load_tokenizer(tokenizer.to_bytes())
assert_packed_msg_equal(new_tokenizer.to_bytes(), tokenizer.to_bytes())
# assert new_tokenizer.to_bytes() == tokenizer.to_bytes()
assert new_tokenizer.to_bytes() == tokenizer.to_bytes()
doc1 = tokenizer(text)
doc2 = new_tokenizer(text)
assert [token.text for token in doc1] == [token.text for token in doc2]
@pytest.mark.xfail
@pytest.mark.skip(reason="Currently unreliable across platforms")
def test_serialize_tokenizer_roundtrip_disk(en_tokenizer):
tokenizer = en_tokenizer
with make_tempdir() as d: