diff --git a/spacy/tests/serialize/test_serialization.py b/spacy/tests/serialize/test_serialization.py index bb6f604c3..6861fedc8 100644 --- a/spacy/tests/serialize/test_serialization.py +++ b/spacy/tests/serialize/test_serialization.py @@ -4,85 +4,83 @@ import pytest from spacy.tokens import Doc def equal(doc1, doc2): - # tokens - assert [ t.orth for t in doc1 ] == [ t.orth for t in doc2 ] + # tokens + assert [ t.orth for t in doc1 ] == [ t.orth for t in doc2 ] - # tags - assert [ t.pos for t in doc1 ] == [ t.pos for t in doc2 ] - assert [ t.tag for t in doc1 ] == [ t.tag for t in doc2 ] + # tags + assert [ t.pos for t in doc1 ] == [ t.pos for t in doc2 ] + assert [ t.tag for t in doc1 ] == [ t.tag for t in doc2 ] - # parse - assert [ t.head.i for t in doc1 ] == [ t.head.i for t in doc2 ] - assert [ t.dep for t in doc1 ] == [ t.dep for t in doc2 ] - - if doc1.is_parsed and doc2.is_parsed: - assert [ s for s in doc1.sents ] == [ s for s in doc2.sents ] + # parse + assert [ t.head.i for t in doc1 ] == [ t.head.i for t in doc2 ] + assert [ t.dep for t in doc1 ] == [ t.dep for t in doc2 ] + if doc1.is_parsed and doc2.is_parsed: + assert [ s for s in doc1.sents ] == [ s for s in doc2.sents ] - # entities - assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ] - assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ] - assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ] + # entities + assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ] + assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ] + assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ] @pytest.mark.models def test_serialize_tokens(EN): - doc1 = EN(u'This is a test sentence.',tag=False,parse=False,entity=False) + doc1 = EN(u'This is a test sentence.',tag=False, parse=False, entity=False) - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2) @pytest.mark.models def test_serialize_tokens_tags(EN): - doc1 = EN(u'This is a test sentence.',tag=True,parse=False,entity=False) - - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc1 = EN(u'This is a test sentence.',tag=True, parse=False, entity=False) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2) @pytest.mark.models def test_serialize_tokens_parse(EN): - doc1 = EN(u'This is a test sentence.',tag=False,parse=True,entity=False) + doc1 = EN(u'This is a test sentence.',tag=False, parse=True, entity=False) - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2) @pytest.mark.models def test_serialize_tokens_ner(EN): - doc1 = EN(u'This is a test sentence.',tag=False,parse=False,entity=True) + doc1 = EN(u'This is a test sentence.', tag=False, parse=False, entity=True) - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2) @pytest.mark.models def test_serialize_tokens_tags_parse(EN): - doc1 = EN(u'This is a test sentence.',tag=True,parse=True,entity=True) + doc1 = EN(u'This is a test sentence.', tag=True, parse=True, entity=False) - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2) @pytest.mark.models def test_serialize_tokens_tags_ner(EN): - doc1 = EN(u'This is a test sentence.',tag=True,parse=False,entity=True) + doc1 = EN(u'This is a test sentence.', tag=True, parse=False, entity=True) - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2) @pytest.mark.models def test_serialize_tokens_ner_parse(EN): - doc1 = EN(u'This is a test sentence.',tag=False,parse=True,entity=True) + doc1 = EN(u'This is a test sentence.', tag=False, parse=True, entity=True) - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2) @pytest.mark.models def test_serialize_tokens_tags_parse_ner(EN): - doc1 = EN(u'This is a test sentence.',tag=True,parse=True,entity=True) + doc1 = EN(u'This is a test sentence.', tag=True, parse=True, entity=True) - doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) - equal(doc1,doc2) + doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes()) + equal(doc1, doc2)