mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Update tests
This commit is contained in:
		
							parent
							
								
									66766c1454
								
							
						
					
					
						commit
						908809d488
					
				|  | @ -2,6 +2,8 @@ | |||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from ..util import get_doc | ||||
| from ...tokens import Doc | ||||
| from ...vocab import Vocab | ||||
| 
 | ||||
| import pytest | ||||
| import numpy | ||||
|  | @ -204,17 +206,11 @@ def test_doc_api_right_edge(en_tokenizer): | |||
|     assert doc[6].right_edge.text == ',' | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail | ||||
| @pytest.mark.parametrize('text,vectors', [ | ||||
|     ("apple orange pear", ["apple -1 -1 -1", "orange -1 -1 0", "pear -1 0 -1"]) | ||||
| ]) | ||||
| def test_doc_api_has_vector(en_tokenizer, text_file, text, vectors): | ||||
|     text_file.write('\n'.join(vectors)) | ||||
|     text_file.seek(0) | ||||
|     vector_length = en_tokenizer.vocab.load_vectors(text_file) | ||||
|     assert vector_length == 3 | ||||
| 
 | ||||
|     doc = en_tokenizer(text) | ||||
| def test_doc_api_has_vector(): | ||||
|     vocab = Vocab() | ||||
|     vocab.clear_vectors(2) | ||||
|     vocab.vectors.add('kitten', numpy.asarray([0., 2.], dtype='f')) | ||||
|     doc = Doc(vocab, words=['kitten']) | ||||
|     assert doc.has_vector | ||||
| 
 | ||||
| def test_lowest_common_ancestor(en_tokenizer): | ||||
|  |  | |||
|  | @ -3,6 +3,8 @@ from __future__ import unicode_literals | |||
| 
 | ||||
| from ...attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STOP | ||||
| from ..util import get_doc | ||||
| from ...vocab import Vocab | ||||
| from ...tokens import Doc | ||||
| 
 | ||||
| import pytest | ||||
| import numpy | ||||
|  | @ -68,26 +70,21 @@ def test_doc_token_api_is_properties(en_vocab): | |||
|     assert doc[5].like_email | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail | ||||
| @pytest.mark.parametrize('text,vectors', [ | ||||
|     ("apples oranges ldskbjls", ["apples -1 -1 -1", "oranges -1 -1 0"]) | ||||
| ]) | ||||
| def test_doc_token_api_vectors(en_tokenizer, text_file, text, vectors): | ||||
|     text_file.write('\n'.join(vectors)) | ||||
|     text_file.seek(0) | ||||
|     vector_length = en_tokenizer.vocab.load_vectors(text_file) | ||||
|     assert vector_length == 3 | ||||
| def test_doc_token_api_vectors(): | ||||
|     vocab = Vocab() | ||||
|     vocab.clear_vectors(2) | ||||
|     vocab.vectors.add('apples', numpy.asarray([0., 2.], dtype='f')) | ||||
|     vocab.vectors.add('oranges', numpy.asarray([0., 1.], dtype='f')) | ||||
|     doc = Doc(vocab, words=['apples', 'oranges', 'oov']) | ||||
|     assert doc.has_vector | ||||
| 
 | ||||
|     tokens = en_tokenizer(text) | ||||
|     assert tokens[0].has_vector | ||||
|     assert tokens[1].has_vector | ||||
|     assert not tokens[2].has_vector | ||||
|     assert tokens[0].similarity(tokens[1]) > tokens[0].similarity(tokens[2]) | ||||
|     assert tokens[0].similarity(tokens[1]) == tokens[1].similarity(tokens[0]) | ||||
|     assert sum(tokens[0].vector) != sum(tokens[1].vector) | ||||
|     assert numpy.isclose( | ||||
|         tokens[0].vector_norm, | ||||
|         numpy.sqrt(numpy.dot(tokens[0].vector, tokens[0].vector))) | ||||
|     assert doc[0].has_vector | ||||
|     assert doc[1].has_vector | ||||
|     assert not doc[2].has_vector | ||||
|     apples_norm = (0*0 + 2*2) ** 0.5 | ||||
|     oranges_norm = (0*0 + 1*1) ** 0.5 | ||||
|     cosine = ((0*0) + (2*1)) / (apples_norm * oranges_norm) | ||||
|     assert doc[0].similarity(doc[1]) == cosine | ||||
| 
 | ||||
| 
 | ||||
| def test_doc_token_api_ancestors(en_tokenizer): | ||||
|  |  | |||
|  | @ -1,8 +1,11 @@ | |||
| import pytest | ||||
| import spacy | ||||
| 
 | ||||
| @pytest.mark.models('en') | ||||
| def test_issue1305(EN): | ||||
| #@pytest.mark.models('en') | ||||
| def test_issue1305(): | ||||
|     '''Test lemmatization of English VBZ''' | ||||
|     assert EN.vocab.morphology.lemmatizer('works', 'verb') == set(['work']) | ||||
|     doc = EN(u'This app works well') | ||||
|     nlp = spacy.load('en_core_web_sm') | ||||
|     assert nlp.vocab.morphology.lemmatizer('works', 'verb') == ['work'] | ||||
|     doc = nlp(u'This app works well') | ||||
|     print([(w.text, w.tag_) for w in doc]) | ||||
|     assert doc[2].lemma_ == 'work' | ||||
|  |  | |||
|  | @ -9,4 +9,4 @@ import pytest | |||
| @pytest.mark.parametrize('word,lemmas', [("chromosomes", ["chromosome"]), ("endosomes", ["endosome"]), ("colocalizes", ["colocalize", "colocaliz"])]) | ||||
| def test_issue781(EN, word, lemmas): | ||||
|     lemmatizer = EN.Defaults.create_lemmatizer() | ||||
|     assert lemmatizer(word, 'noun', morphology={'number': 'plur'}) == set(lemmas) | ||||
|     assert lemmatizer(word, 'noun', morphology={'number': 'plur'}) == lemmas | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user