mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Bugfix for similarity return types (#10051)
This commit is contained in:
parent
7d528e607c
commit
e9c6314539
|
@ -130,7 +130,9 @@ cdef class Lexeme:
|
||||||
return 0.0
|
return 0.0
|
||||||
vector = self.vector
|
vector = self.vector
|
||||||
xp = get_array_module(vector)
|
xp = get_array_module(vector)
|
||||||
return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
|
result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||||
|
# ensure we get a scalar back (numpy does this automatically but cupy doesn't)
|
||||||
|
return result.item()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_vector(self):
|
def has_vector(self):
|
||||||
|
|
|
@ -35,6 +35,7 @@ def test_vectors_similarity_LL(vocab, vectors):
|
||||||
assert lex1.vector_norm != 0
|
assert lex1.vector_norm != 0
|
||||||
assert lex2.vector_norm != 0
|
assert lex2.vector_norm != 0
|
||||||
assert lex1.vector[0] != lex2.vector[0] and lex1.vector[1] != lex2.vector[1]
|
assert lex1.vector[0] != lex2.vector[0] and lex1.vector[1] != lex2.vector[1]
|
||||||
|
assert isinstance(lex1.similarity(lex2), float)
|
||||||
assert numpy.isclose(lex1.similarity(lex2), get_cosine(vec1, vec2))
|
assert numpy.isclose(lex1.similarity(lex2), get_cosine(vec1, vec2))
|
||||||
assert numpy.isclose(lex2.similarity(lex2), lex1.similarity(lex1))
|
assert numpy.isclose(lex2.similarity(lex2), lex1.similarity(lex1))
|
||||||
|
|
||||||
|
@ -47,25 +48,46 @@ def test_vectors_similarity_TT(vocab, vectors):
|
||||||
assert doc[0].vector_norm != 0
|
assert doc[0].vector_norm != 0
|
||||||
assert doc[1].vector_norm != 0
|
assert doc[1].vector_norm != 0
|
||||||
assert doc[0].vector[0] != doc[1].vector[0] and doc[0].vector[1] != doc[1].vector[1]
|
assert doc[0].vector[0] != doc[1].vector[0] and doc[0].vector[1] != doc[1].vector[1]
|
||||||
|
assert isinstance(doc[0].similarity(doc[1]), float)
|
||||||
assert numpy.isclose(doc[0].similarity(doc[1]), get_cosine(vec1, vec2))
|
assert numpy.isclose(doc[0].similarity(doc[1]), get_cosine(vec1, vec2))
|
||||||
assert numpy.isclose(doc[1].similarity(doc[0]), doc[0].similarity(doc[1]))
|
assert numpy.isclose(doc[1].similarity(doc[0]), doc[0].similarity(doc[1]))
|
||||||
|
|
||||||
|
|
||||||
|
def test_vectors_similarity_SS(vocab, vectors):
|
||||||
|
[(word1, vec1), (word2, vec2)] = vectors
|
||||||
|
doc = Doc(vocab, words=[word1, word2])
|
||||||
|
assert isinstance(doc[0:1].similarity(doc[0:2]), float)
|
||||||
|
assert doc[0:1].similarity(doc[0:2]) == doc[0:2].similarity(doc[0:1])
|
||||||
|
|
||||||
|
|
||||||
|
def test_vectors_similarity_DD(vocab, vectors):
|
||||||
|
[(word1, vec1), (word2, vec2)] = vectors
|
||||||
|
doc1 = Doc(vocab, words=[word1, word2])
|
||||||
|
doc2 = Doc(vocab, words=[word2, word1])
|
||||||
|
assert isinstance(doc1.similarity(doc2), float)
|
||||||
|
assert doc1.similarity(doc2) == doc2.similarity(doc1)
|
||||||
|
|
||||||
|
|
||||||
def test_vectors_similarity_TD(vocab, vectors):
|
def test_vectors_similarity_TD(vocab, vectors):
|
||||||
[(word1, vec1), (word2, vec2)] = vectors
|
[(word1, vec1), (word2, vec2)] = vectors
|
||||||
doc = Doc(vocab, words=[word1, word2])
|
doc = Doc(vocab, words=[word1, word2])
|
||||||
with pytest.warns(UserWarning):
|
with pytest.warns(UserWarning):
|
||||||
|
assert isinstance(doc.similarity(doc[0]), float)
|
||||||
|
assert isinstance(doc[0].similarity(doc), float)
|
||||||
assert doc.similarity(doc[0]) == doc[0].similarity(doc)
|
assert doc.similarity(doc[0]) == doc[0].similarity(doc)
|
||||||
|
|
||||||
|
|
||||||
def test_vectors_similarity_DS(vocab, vectors):
|
|
||||||
[(word1, vec1), (word2, vec2)] = vectors
|
|
||||||
doc = Doc(vocab, words=[word1, word2])
|
|
||||||
assert doc.similarity(doc[:2]) == doc[:2].similarity(doc)
|
|
||||||
|
|
||||||
|
|
||||||
def test_vectors_similarity_TS(vocab, vectors):
|
def test_vectors_similarity_TS(vocab, vectors):
|
||||||
[(word1, vec1), (word2, vec2)] = vectors
|
[(word1, vec1), (word2, vec2)] = vectors
|
||||||
doc = Doc(vocab, words=[word1, word2])
|
doc = Doc(vocab, words=[word1, word2])
|
||||||
with pytest.warns(UserWarning):
|
with pytest.warns(UserWarning):
|
||||||
|
assert isinstance(doc[:2].similarity(doc[0]), float)
|
||||||
|
assert isinstance(doc[0].similarity(doc[-2]), float)
|
||||||
assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2])
|
assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2])
|
||||||
|
|
||||||
|
|
||||||
|
def test_vectors_similarity_DS(vocab, vectors):
|
||||||
|
[(word1, vec1), (word2, vec2)] = vectors
|
||||||
|
doc = Doc(vocab, words=[word1, word2])
|
||||||
|
assert isinstance(doc.similarity(doc[:2]), float)
|
||||||
|
assert doc.similarity(doc[:2]) == doc[:2].similarity(doc)
|
||||||
|
|
|
@ -364,7 +364,9 @@ cdef class Span:
|
||||||
return 0.0
|
return 0.0
|
||||||
vector = self.vector
|
vector = self.vector
|
||||||
xp = get_array_module(vector)
|
xp = get_array_module(vector)
|
||||||
return xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||||
|
# ensure we get a scalar back (numpy does this automatically but cupy doesn't)
|
||||||
|
return result.item()
|
||||||
|
|
||||||
cpdef np.ndarray to_array(self, object py_attr_ids):
|
cpdef np.ndarray to_array(self, object py_attr_ids):
|
||||||
"""Given a list of M attribute IDs, export the tokens to a numpy
|
"""Given a list of M attribute IDs, export the tokens to a numpy
|
||||||
|
|
|
@ -209,7 +209,9 @@ cdef class Token:
|
||||||
return 0.0
|
return 0.0
|
||||||
vector = self.vector
|
vector = self.vector
|
||||||
xp = get_array_module(vector)
|
xp = get_array_module(vector)
|
||||||
return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
|
result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||||
|
# ensure we get a scalar back (numpy does this automatically but cupy doesn't)
|
||||||
|
return result.item()
|
||||||
|
|
||||||
def has_morph(self):
|
def has_morph(self):
|
||||||
"""Check whether the token has annotated morph information.
|
"""Check whether the token has annotated morph information.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user