mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Tidy up tokenizer test
This commit is contained in:
parent
43dcaa473e
commit
c2581f9172
|
@ -13,19 +13,19 @@ def load_tokenizer(b):
|
||||||
return tok
|
return tok
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
@pytest.mark.skip(reason="Currently unreliable across platforms")
|
||||||
@pytest.mark.parametrize('text', ["I💜you", "they’re", "“hello”"])
|
@pytest.mark.parametrize('text', ["I💜you", "they’re", "“hello”"])
|
||||||
def test_serialize_tokenizer_roundtrip_bytes(en_tokenizer, text):
|
def test_serialize_tokenizer_roundtrip_bytes(en_tokenizer, text):
|
||||||
tokenizer = en_tokenizer
|
tokenizer = en_tokenizer
|
||||||
new_tokenizer = load_tokenizer(tokenizer.to_bytes())
|
new_tokenizer = load_tokenizer(tokenizer.to_bytes())
|
||||||
assert_packed_msg_equal(new_tokenizer.to_bytes(), tokenizer.to_bytes())
|
assert_packed_msg_equal(new_tokenizer.to_bytes(), tokenizer.to_bytes())
|
||||||
# assert new_tokenizer.to_bytes() == tokenizer.to_bytes()
|
assert new_tokenizer.to_bytes() == tokenizer.to_bytes()
|
||||||
doc1 = tokenizer(text)
|
doc1 = tokenizer(text)
|
||||||
doc2 = new_tokenizer(text)
|
doc2 = new_tokenizer(text)
|
||||||
assert [token.text for token in doc1] == [token.text for token in doc2]
|
assert [token.text for token in doc1] == [token.text for token in doc2]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
@pytest.mark.skip(reason="Currently unreliable across platforms")
|
||||||
def test_serialize_tokenizer_roundtrip_disk(en_tokenizer):
|
def test_serialize_tokenizer_roundtrip_disk(en_tokenizer):
|
||||||
tokenizer = en_tokenizer
|
tokenizer = en_tokenizer
|
||||||
with make_tempdir() as d:
|
with make_tempdir() as d:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user