mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Modernise vector tests, use add_vecs_to_vocab and don't depend on models
This commit is contained in:
		
							parent
							
								
									96f0caa28a
								
							
						
					
					
						commit
						138deb80a1
					
				|  | @ -1,7 +1,7 @@ | |||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from ..util import get_doc, get_cosine | ||||
| from ..util import get_doc, get_cosine, add_vecs_to_vocab | ||||
| 
 | ||||
| import numpy | ||||
| import pytest | ||||
|  | @ -9,22 +9,16 @@ import pytest | |||
| 
 | ||||
| @pytest.fixture | ||||
| def vectors(): | ||||
|     return ("apple", [1, 2, 3], "orange", [-1, -2, -3]) | ||||
|     return [("apple", [1, 2, 3]), ("orange", [-1, -2, -3])] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture() | ||||
| def vocab(en_vocab, vectors): | ||||
|     word1, vec1, word2, vec2 = vectors | ||||
|     en_vocab.resize_vectors(3) | ||||
|     lex1 = en_vocab[word1] | ||||
|     lex2 = en_vocab[word2] | ||||
|     lex1.vector = vec1 | ||||
|     lex2.vector = vec2 | ||||
|     return en_vocab | ||||
|     return add_vecs_to_vocab(en_vocab, vectors) | ||||
| 
 | ||||
| 
 | ||||
| def test_vectors_similarity_LL(vocab, vectors): | ||||
|     word1, vec1, word2, vec2 = vectors | ||||
|     [(word1, vec1), (word2, vec2)] = vectors | ||||
|     lex1 = vocab[word1] | ||||
|     lex2 = vocab[word2] | ||||
|     assert lex1.has_vector | ||||
|  | @ -37,7 +31,7 @@ def test_vectors_similarity_LL(vocab, vectors): | |||
| 
 | ||||
| 
 | ||||
| def test_vectors_similarity_TT(vocab, vectors): | ||||
|     word1, vec1, word2, vec2 = vectors | ||||
|     [(word1, vec1), (word2, vec2)] = vectors | ||||
|     doc = get_doc(vocab, words=[word1, word2]) | ||||
|     assert doc[0].has_vector | ||||
|     assert doc[1].has_vector | ||||
|  | @ -49,18 +43,18 @@ def test_vectors_similarity_TT(vocab, vectors): | |||
| 
 | ||||
| 
 | ||||
| def test_vectors_similarity_TD(vocab, vectors): | ||||
|     word1, vec1, word2, vec2 = vectors | ||||
|     [(word1, vec1), (word2, vec2)] = vectors | ||||
|     doc = get_doc(vocab, words=[word1, word2]) | ||||
|     assert doc.similarity(doc[0]) == doc[0].similarity(doc) | ||||
| 
 | ||||
| 
 | ||||
| def test_vectors_similarity_DS(vocab, vectors): | ||||
|     word1, vec1, word2, vec2 = vectors | ||||
|     [(word1, vec1), (word2, vec2)] = vectors | ||||
|     doc = get_doc(vocab, words=[word1, word2]) | ||||
|     assert doc.similarity(doc[:2]) == doc[:2].similarity(doc) | ||||
| 
 | ||||
| 
 | ||||
| def test_vectors_similarity_TS(vocab, vectors): | ||||
|     word1, vec1, word2, vec2 = vectors | ||||
|     [(word1, vec1), (word2, vec2)] = vectors | ||||
|     doc = get_doc(vocab, words=[word1, word2]) | ||||
|     assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2]) | ||||
|  |  | |||
|  | @ -1,109 +1,126 @@ | |||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from ...tokenizer import Tokenizer | ||||
| from ..util import get_doc, add_vecs_to_vocab | ||||
| 
 | ||||
| import pytest | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_token_vector(EN): | ||||
|     token = EN(u'Apples and oranges')[0] | ||||
|     token.vector | ||||
|     token.vector_norm | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_lexeme_vector(EN): | ||||
|     lexeme = EN.vocab[u'apples'] | ||||
|     lexeme.vector | ||||
|     lexeme.vector_norm | ||||
| @pytest.fixture | ||||
| def vectors(): | ||||
|     return [("apple", [0.0, 1.0, 2.0]), ("orange", [3.0, -2.0, 4.0])] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_doc_vector(EN): | ||||
|     doc = EN(u'Apples and oranges') | ||||
|     doc.vector | ||||
|     doc.vector_norm | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_span_vector(EN): | ||||
|     span = EN(u'Apples and oranges')[0:2] | ||||
|     span.vector | ||||
|     span.vector_norm | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_token_token_similarity(EN): | ||||
|     apples, oranges = EN(u'apples oranges') | ||||
|     assert apples.similarity(oranges) == oranges.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(oranges) < 1.0 | ||||
|      | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_token_lexeme_similarity(EN): | ||||
|     apples = EN(u'apples') | ||||
|     oranges = EN.vocab[u'oranges'] | ||||
|     assert apples.similarity(oranges) == oranges.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(oranges) < 1.0 | ||||
|   | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_token_span_similarity(EN): | ||||
|     doc = EN(u'apples orange juice') | ||||
|     apples = doc[0] | ||||
|     oranges = doc[1:3] | ||||
|     assert apples.similarity(oranges) == oranges.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(oranges) < 1.0 | ||||
|   | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_token_doc_similarity(EN): | ||||
|     doc = EN(u'apples orange juice') | ||||
|     apples = doc[0] | ||||
|     assert apples.similarity(doc) == doc.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(doc) < 1.0 | ||||
|   | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_lexeme_span_similarity(EN): | ||||
|     doc = EN(u'apples orange juice') | ||||
|     apples = EN.vocab[u'apples'] | ||||
|     span = doc[1:3] | ||||
|     assert apples.similarity(span) == span.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(span) < 1.0 | ||||
| @pytest.fixture() | ||||
| def vocab(en_vocab, vectors): | ||||
|     return add_vecs_to_vocab(en_vocab, vectors) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_lexeme_lexeme_similarity(EN): | ||||
|     apples = EN.vocab[u'apples'] | ||||
|     oranges = EN.vocab[u'oranges'] | ||||
|     assert apples.similarity(oranges) == oranges.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(oranges) < 1.0 | ||||
|   | ||||
| @pytest.fixture() | ||||
| def tokenizer_v(vocab): | ||||
|     return Tokenizer(vocab, {}, None, None, None) | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_lexeme_doc_similarity(EN): | ||||
|     doc = EN(u'apples orange juice') | ||||
|     apples = EN.vocab[u'apples'] | ||||
|     assert apples.similarity(doc) == doc.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(doc) < 1.0 | ||||
|   | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_span_span_similarity(EN): | ||||
|     doc = EN(u'apples orange juice') | ||||
|     apples = doc[0:2] | ||||
|     oj = doc[1:3] | ||||
|     assert apples.similarity(oj) == oj.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(oj) < 1.0 | ||||
|   | ||||
| @pytest.mark.parametrize('text', ["apple and orange"]) | ||||
| def test_vectors_token_vector(tokenizer_v, vectors, text): | ||||
|     doc = tokenizer_v(text) | ||||
|     assert vectors[0] == (doc[0].text, list(doc[0].vector)) | ||||
|     assert vectors[1] == (doc[2].text, list(doc[2].vector)) | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_span_doc_similarity(EN): | ||||
|     doc = EN(u'apples orange juice') | ||||
|     apples = doc[0:2] | ||||
|     oj = doc[1:3] | ||||
|     assert apples.similarity(doc) == doc.similarity(apples) | ||||
|     assert 0.0 < apples.similarity(doc) < 1.0 | ||||
|   | ||||
| 
 | ||||
| @pytest.mark.models | ||||
| def test_doc_doc_similarity(EN): | ||||
|     apples = EN(u'apples and apple pie') | ||||
|     oranges = EN(u'orange juice') | ||||
|     assert apples.similarity(oranges) == apples.similarity(oranges) | ||||
|     assert 0.0 < apples.similarity(oranges) < 1.0 | ||||
|   | ||||
| @pytest.mark.parametrize('text', ["apple", "orange"]) | ||||
| def test_vectors_lexeme_vector(vocab, text): | ||||
|     lex = vocab[text] | ||||
|     assert list(lex.vector) | ||||
|     assert lex.vector_norm | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "and", "orange"]]) | ||||
| def test_vectors_doc_vector(vocab, text): | ||||
|     doc = get_doc(vocab, text) | ||||
|     assert list(doc.vector) | ||||
|     assert doc.vector_norm | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "and", "orange"]]) | ||||
| def test_vectors_span_vector(vocab, text): | ||||
|     span = get_doc(vocab, text)[0:2] | ||||
|     assert list(span.vector) | ||||
|     assert span.vector_norm | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', ["apple orange"]) | ||||
| def test_vectors_token_token_similarity(tokenizer_v, text): | ||||
|     doc = tokenizer_v(text) | ||||
|     assert doc[0].similarity(doc[1]) == doc[1].similarity(doc[0]) | ||||
|     assert 0.0 < doc[0].similarity(doc[1]) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text1,text2', [("apple", "orange")]) | ||||
| def test_vectors_token_lexeme_similarity(tokenizer_v, vocab, text1, text2): | ||||
|     token = tokenizer_v(text1) | ||||
|     lex = vocab[text2] | ||||
|     assert token.similarity(lex) == lex.similarity(token) | ||||
|     assert 0.0 < token.similarity(lex) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "orange", "juice"]]) | ||||
| def test_vectors_token_span_similarity(vocab, text): | ||||
|     doc = get_doc(vocab, text) | ||||
|     assert doc[0].similarity(doc[1:3]) == doc[1:3].similarity(doc[0]) | ||||
|     assert 0.0 < doc[0].similarity(doc[1:3]) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "orange", "juice"]]) | ||||
| def test_vectors_token_doc_similarity(vocab, text): | ||||
|     doc = get_doc(vocab, text) | ||||
|     assert doc[0].similarity(doc) == doc.similarity(doc[0]) | ||||
|     assert 0.0 < doc[0].similarity(doc) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "orange", "juice"]]) | ||||
| def test_vectors_lexeme_span_similarity(vocab, text): | ||||
|     doc = get_doc(vocab, text) | ||||
|     lex = vocab[text[0]] | ||||
|     assert lex.similarity(doc[1:3]) == doc[1:3].similarity(lex) | ||||
|     assert 0.0 < doc.similarity(doc[1:3]) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text1,text2', [("apple", "orange")]) | ||||
| def test_vectors_lexeme_lexeme_similarity(vocab, text1, text2): | ||||
|     lex1 = vocab[text1] | ||||
|     lex2 = vocab[text2] | ||||
|     assert lex1.similarity(lex2) == lex2.similarity(lex1) | ||||
|     assert 0.0 < lex1.similarity(lex2) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "orange", "juice"]]) | ||||
| def test_vectors_lexeme_doc_similarity(vocab, text): | ||||
|     doc = get_doc(vocab, text) | ||||
|     lex = vocab[text[0]] | ||||
|     assert lex.similarity(doc) == doc.similarity(lex) | ||||
|     assert 0.0 < lex.similarity(doc) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "orange", "juice"]]) | ||||
| def test_vectors_span_span_similarity(vocab, text): | ||||
|     doc = get_doc(vocab, text) | ||||
|     assert doc[0:2].similarity(doc[1:3]) == doc[1:3].similarity(doc[0:2]) | ||||
|     assert 0.0 < doc[0:2].similarity(doc[1:3]) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text', [["apple", "orange", "juice"]]) | ||||
| def test_vectors_span_doc_similarity(vocab, text): | ||||
|     doc = get_doc(vocab, text) | ||||
|     assert doc[0:2].similarity(doc) == doc.similarity(doc[0:2]) | ||||
|     assert 0.0 < doc[0:2].similarity(doc) < 1.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('text1,text2', [ | ||||
|     (["apple", "and", "apple", "pie"], ["orange", "juice"])]) | ||||
| def test_vectors_doc_doc_similarity(vocab, text1, text2): | ||||
|     doc1 = get_doc(vocab, text1) | ||||
|     doc2 = get_doc(vocab, text2) | ||||
|     assert doc1.similarity(doc2) == doc2.similarity(doc1) | ||||
|     assert 0.0 < doc1.similarity(doc2) < 1.0 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user