diff --git a/spacy/tests/vocab_vectors/test_vectors.py b/spacy/tests/vocab_vectors/test_vectors.py index 214285b01..b688ab9dd 100644 --- a/spacy/tests/vocab_vectors/test_vectors.py +++ b/spacy/tests/vocab_vectors/test_vectors.py @@ -141,7 +141,6 @@ def test_vectors_most_similar(most_similar_vectors_data): assert all(row[0] == i for i, row in enumerate(best_rows)) -@pytest.mark.xfail def test_vectors_most_similar_identical(): """Test that most similar identical vectors are assigned a score of 1.0.""" data = numpy.asarray([[4, 2, 2, 2], [4, 2, 2, 2], [1, 1, 1, 1]], dtype="f") @@ -315,4 +314,4 @@ def test_vocab_prune_vectors(): assert list(remap.keys()) == ["kitten"] neighbour, similarity = list(remap.values())[0] assert neighbour == "cat", remap - assert_allclose(similarity, cosine(data[0], data[2]), atol=1e-6) + assert_allclose(similarity, cosine(data[0], data[2]), atol=1e-4, rtol=1e-3) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 0f015521a..44dddb30c 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -344,8 +344,12 @@ cdef class Vectors: sorted_index = xp.arange(scores.shape[0])[:,None][i:i+batch_size],xp.argsort(scores[i:i+batch_size], axis=1)[:,::-1] scores[i:i+batch_size] = scores[sorted_index] best_rows[i:i+batch_size] = best_rows[sorted_index] - + xp = get_array_module(self.data) + # Round values really close to 1 or -1 + scores = xp.around(scores, decimals=4, out=scores) + # Account for numerical error we want to return in range -1, 1 + scores = xp.clip(scores, a_min=-1, a_max=1, out=scores) row2key = {row: key for key, row in self.key2row.items()} keys = xp.asarray( [[row2key[row] for row in best_rows[i] if row in row2key]