2020-03-06 16:45:28 +03:00
|
|
|
import numpy as np
|
|
|
|
from spacy.lang.en import English
|
|
|
|
|
|
|
|
|
|
|
|
def test_issue5082():
|
|
|
|
# Ensure the 'merge_entities' pipeline does something sensible for the vectors of the merged tokens
|
|
|
|
nlp = English()
|
|
|
|
vocab = nlp.vocab
|
|
|
|
array1 = np.asarray([0.1, 0.5, 0.8], dtype=np.float32)
|
|
|
|
array2 = np.asarray([-0.2, -0.6, -0.9], dtype=np.float32)
|
|
|
|
array3 = np.asarray([0.3, -0.1, 0.7], dtype=np.float32)
|
|
|
|
array4 = np.asarray([0.5, 0, 0.3], dtype=np.float32)
|
|
|
|
array34 = np.asarray([0.4, -0.05, 0.5], dtype=np.float32)
|
|
|
|
|
|
|
|
vocab.set_vector("I", array1)
|
|
|
|
vocab.set_vector("like", array2)
|
|
|
|
vocab.set_vector("David", array3)
|
|
|
|
vocab.set_vector("Bowie", array4)
|
|
|
|
|
|
|
|
text = "I like David Bowie"
|
|
|
|
patterns = [
|
|
|
|
{"label": "PERSON", "pattern": [{"LOWER": "david"}, {"LOWER": "bowie"}]}
|
|
|
|
]
|
2020-07-22 14:42:59 +03:00
|
|
|
ruler = nlp.add_pipe("entity_ruler")
|
2020-03-06 16:45:28 +03:00
|
|
|
ruler.add_patterns(patterns)
|
|
|
|
parsed_vectors_1 = [t.vector for t in nlp(text)]
|
|
|
|
assert len(parsed_vectors_1) == 4
|
|
|
|
np.testing.assert_array_equal(parsed_vectors_1[0], array1)
|
|
|
|
np.testing.assert_array_equal(parsed_vectors_1[1], array2)
|
|
|
|
np.testing.assert_array_equal(parsed_vectors_1[2], array3)
|
|
|
|
np.testing.assert_array_equal(parsed_vectors_1[3], array4)
|
2020-07-22 14:42:59 +03:00
|
|
|
nlp.add_pipe("merge_entities")
|
2020-03-06 16:45:28 +03:00
|
|
|
parsed_vectors_2 = [t.vector for t in nlp(text)]
|
|
|
|
assert len(parsed_vectors_2) == 3
|
|
|
|
np.testing.assert_array_equal(parsed_vectors_2[0], array1)
|
|
|
|
np.testing.assert_array_equal(parsed_vectors_2[1], array2)
|
|
|
|
np.testing.assert_array_equal(parsed_vectors_2[2], array34)
|