mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 07:27:28 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			52 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			52 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # coding: utf-8
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| from ..util import get_doc, assert_docs_equal
 | |
| from ...tokens import Doc
 | |
| from ...vocab import Vocab
 | |
| 
 | |
| import pytest
 | |
| 
 | |
| 
 | |
| TEXT = ["This", "is", "a", "test", "sentence", "."]
 | |
| TAGS = ['DT', 'VBZ', 'DT', 'NN', 'NN', '.']
 | |
| DEPS = ['nsubj', 'ROOT', 'det', 'compound', 'attr', 'punct']
 | |
| ENTS = [('hi', 'PERSON', 0, 1)]
 | |
| 
 | |
| 
 | |
| def test_serialize_empty_doc(en_vocab):
 | |
|     doc = get_doc(en_vocab)
 | |
|     data = doc.to_bytes()
 | |
|     doc2 = Doc(en_vocab)
 | |
|     doc2.from_bytes(data)
 | |
|     assert len(doc) == len(doc2)
 | |
|     for token1, token2 in zip(doc, doc2):
 | |
|         assert token1.text == token2.text
 | |
| 
 | |
| 
 | |
| @pytest.mark.xfail
 | |
| @pytest.mark.parametrize('text', ['rat'])
 | |
| def test_serialize_vocab(en_vocab, text):
 | |
|     text_hash = en_vocab.strings.add(text)
 | |
|     vocab_bytes = en_vocab.to_bytes()
 | |
|     new_vocab = Vocab().from_bytes(vocab_bytes)
 | |
|     assert new_vocab.strings(text_hash) == text
 | |
| 
 | |
| #
 | |
| #@pytest.mark.parametrize('text', [TEXT])
 | |
| #def test_serialize_tokens(en_vocab, text):
 | |
| #    doc1 = get_doc(en_vocab, [t for t in text])
 | |
| #    doc2 = get_doc(en_vocab).from_bytes(doc1.to_bytes())
 | |
| #    assert_docs_equal(doc1, doc2)
 | |
| #
 | |
| #
 | |
| #@pytest.mark.models
 | |
| #@pytest.mark.parametrize('text', [TEXT])
 | |
| #@pytest.mark.parametrize('tags', [TAGS, []])
 | |
| #@pytest.mark.parametrize('deps', [DEPS, []])
 | |
| #@pytest.mark.parametrize('ents', [ENTS, []])
 | |
| #def test_serialize_tokens_ner(EN, text, tags, deps, ents):
 | |
| #    doc1 = get_doc(EN.vocab, [t for t in text], tags=tags, deps=deps, ents=ents)
 | |
| #    doc2 = get_doc(EN.vocab).from_bytes(doc1.to_bytes())
 | |
| #    assert_docs_equal(doc1, doc2)
 |