mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			73 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			73 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # coding: utf-8
 | |
| 
 | |
| import pytest
 | |
| import numpy
 | |
| 
 | |
| 
 | |
| @pytest.mark.models
 | |
| class TestModelSanity:
 | |
|     """
 | |
|     This is to make sure the model works as expected. The tests make sure that
 | |
|     values are properly set.
 | |
|     Tests are not meant to evaluate the content of the output, only make sure
 | |
|     the output is formally okay.
 | |
|     """
 | |
|     @pytest.fixture(scope='class', params=['en','de'])
 | |
|     def example(self, request, EN, DE):
 | |
|         assert EN.entity != None
 | |
|         assert DE.entity != None
 | |
|         if request.param == 'en':
 | |
|             doc = EN(u'There was a stranger standing at the big ' +
 | |
|                       u'street talking to herself.')
 | |
|         elif request.param == 'de':
 | |
|             doc = DE(u'An der großen Straße stand eine merkwürdige ' +
 | |
|                 u'Gestalt und führte Selbstgespräche.')
 | |
|         return doc
 | |
| 
 | |
|     def test_tokenization(self, example):
 | |
|         # tokenization should split the document into tokens
 | |
|         assert len(example) > 1
 | |
| 
 | |
|     def test_tagging(self, example):
 | |
|         # if tagging was done properly, pos tags shouldn't be empty
 | |
|         assert example.is_tagged
 | |
|         assert all( t.pos != 0 for t in example )
 | |
|         assert all( t.tag != 0 for t in example )
 | |
| 
 | |
|     def test_parsing(self, example):
 | |
|         # if parsing was done properly
 | |
|         # - dependency labels shouldn't be empty
 | |
|         # - the head of some tokens should not be root
 | |
|         assert example.is_parsed
 | |
|         assert all( t.dep != 0 for t in example )
 | |
|         assert any( t.dep != i for i,t in enumerate(example) )
 | |
| 
 | |
|     def test_ner(self, example):
 | |
|         # if ner was done properly, ent_iob shouldn't be empty
 | |
|         assert all([t.ent_iob != 0 for t in example])
 | |
| 
 | |
|     def test_vectors(self, example):
 | |
|         # if vectors are available, they should differ on different words
 | |
|         # this isn't a perfect test since this could in principle fail
 | |
|         # in a sane model as well,
 | |
|         # but that's very unlikely and a good indicator if something is wrong
 | |
|         vector0 = example[0].vector
 | |
|         vector1 = example[1].vector
 | |
|         vector2 = example[2].vector
 | |
|         assert not numpy.array_equal(vector0,vector1)
 | |
|         assert not numpy.array_equal(vector0,vector2)
 | |
|         assert not numpy.array_equal(vector1,vector2)
 | |
| 
 | |
|     def test_probs(self, example):
 | |
|         # if frequencies/probabilities are okay, they should differ for
 | |
|         # different words
 | |
|         # this isn't a perfect test since this could in principle fail
 | |
|         # in a sane model as well,
 | |
|         # but that's very unlikely and a good indicator if something is wrong
 | |
|         prob0 = example[0].prob
 | |
|         prob1 = example[1].prob
 | |
|         prob2 = example[2].prob
 | |
|         assert not prob0 == prob1
 | |
|         assert not prob0 == prob2
 | |
|         assert not prob1 == prob2
 |