mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Bugfix for similarity return types (#10051)
This commit is contained in:
parent
7d528e607c
commit
e9c6314539
|
@ -130,8 +130,10 @@ cdef class Lexeme:
|
|||
return 0.0
|
||||
vector = self.vector
|
||||
xp = get_array_module(vector)
|
||||
return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
|
||||
|
||||
result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
# ensure we get a scalar back (numpy does this automatically but cupy doesn't)
|
||||
return result.item()
|
||||
|
||||
@property
|
||||
def has_vector(self):
|
||||
"""RETURNS (bool): Whether a word vector is associated with the object.
|
||||
|
|
|
@ -35,6 +35,7 @@ def test_vectors_similarity_LL(vocab, vectors):
|
|||
assert lex1.vector_norm != 0
|
||||
assert lex2.vector_norm != 0
|
||||
assert lex1.vector[0] != lex2.vector[0] and lex1.vector[1] != lex2.vector[1]
|
||||
assert isinstance(lex1.similarity(lex2), float)
|
||||
assert numpy.isclose(lex1.similarity(lex2), get_cosine(vec1, vec2))
|
||||
assert numpy.isclose(lex2.similarity(lex2), lex1.similarity(lex1))
|
||||
|
||||
|
@ -47,25 +48,46 @@ def test_vectors_similarity_TT(vocab, vectors):
|
|||
assert doc[0].vector_norm != 0
|
||||
assert doc[1].vector_norm != 0
|
||||
assert doc[0].vector[0] != doc[1].vector[0] and doc[0].vector[1] != doc[1].vector[1]
|
||||
assert isinstance(doc[0].similarity(doc[1]), float)
|
||||
assert numpy.isclose(doc[0].similarity(doc[1]), get_cosine(vec1, vec2))
|
||||
assert numpy.isclose(doc[1].similarity(doc[0]), doc[0].similarity(doc[1]))
|
||||
|
||||
|
||||
def test_vectors_similarity_SS(vocab, vectors):
|
||||
[(word1, vec1), (word2, vec2)] = vectors
|
||||
doc = Doc(vocab, words=[word1, word2])
|
||||
assert isinstance(doc[0:1].similarity(doc[0:2]), float)
|
||||
assert doc[0:1].similarity(doc[0:2]) == doc[0:2].similarity(doc[0:1])
|
||||
|
||||
|
||||
def test_vectors_similarity_DD(vocab, vectors):
|
||||
[(word1, vec1), (word2, vec2)] = vectors
|
||||
doc1 = Doc(vocab, words=[word1, word2])
|
||||
doc2 = Doc(vocab, words=[word2, word1])
|
||||
assert isinstance(doc1.similarity(doc2), float)
|
||||
assert doc1.similarity(doc2) == doc2.similarity(doc1)
|
||||
|
||||
|
||||
def test_vectors_similarity_TD(vocab, vectors):
|
||||
[(word1, vec1), (word2, vec2)] = vectors
|
||||
doc = Doc(vocab, words=[word1, word2])
|
||||
with pytest.warns(UserWarning):
|
||||
assert isinstance(doc.similarity(doc[0]), float)
|
||||
assert isinstance(doc[0].similarity(doc), float)
|
||||
assert doc.similarity(doc[0]) == doc[0].similarity(doc)
|
||||
|
||||
|
||||
def test_vectors_similarity_DS(vocab, vectors):
|
||||
[(word1, vec1), (word2, vec2)] = vectors
|
||||
doc = Doc(vocab, words=[word1, word2])
|
||||
assert doc.similarity(doc[:2]) == doc[:2].similarity(doc)
|
||||
|
||||
|
||||
def test_vectors_similarity_TS(vocab, vectors):
|
||||
[(word1, vec1), (word2, vec2)] = vectors
|
||||
doc = Doc(vocab, words=[word1, word2])
|
||||
with pytest.warns(UserWarning):
|
||||
assert isinstance(doc[:2].similarity(doc[0]), float)
|
||||
assert isinstance(doc[0].similarity(doc[-2]), float)
|
||||
assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2])
|
||||
|
||||
|
||||
def test_vectors_similarity_DS(vocab, vectors):
|
||||
[(word1, vec1), (word2, vec2)] = vectors
|
||||
doc = Doc(vocab, words=[word1, word2])
|
||||
assert isinstance(doc.similarity(doc[:2]), float)
|
||||
assert doc.similarity(doc[:2]) == doc[:2].similarity(doc)
|
||||
|
|
|
@ -364,8 +364,10 @@ cdef class Span:
|
|||
return 0.0
|
||||
vector = self.vector
|
||||
xp = get_array_module(vector)
|
||||
return xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
|
||||
result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
# ensure we get a scalar back (numpy does this automatically but cupy doesn't)
|
||||
return result.item()
|
||||
|
||||
cpdef np.ndarray to_array(self, object py_attr_ids):
|
||||
"""Given a list of M attribute IDs, export the tokens to a numpy
|
||||
`ndarray` of shape `(N, M)`, where `N` is the length of the document.
|
||||
|
|
|
@ -209,8 +209,10 @@ cdef class Token:
|
|||
return 0.0
|
||||
vector = self.vector
|
||||
xp = get_array_module(vector)
|
||||
return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
|
||||
|
||||
result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
# ensure we get a scalar back (numpy does this automatically but cupy doesn't)
|
||||
return result.item()
|
||||
|
||||
def has_morph(self):
|
||||
"""Check whether the token has annotated morph information.
|
||||
Return False when the morph annotation is unset/missing.
|
||||
|
|
Loading…
Reference in New Issue
Block a user