spaCy/spacy/tests/regression/test_issue5152.py
Sofie Van Landeghem d6d95674c1
bugfix in span similarity (#5155)
* bugfix in span similarity

* also rewrite doc.pyx for clarity

* formatting
2020-03-29 13:56:07 +02:00

19 lines
642 B
Python

from spacy.lang.en import English
def test_issue5152():
# Test that the comparison between a Span and a Token, goes well
# There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
nlp = English()
text = nlp("Talk about being boring!")
text_var = nlp("Talk of being boring!")
y = nlp("Let")
span = text[0:3] # Talk about being
span_2 = text[0:3] # Talk about being
span_3 = text_var[0:3] # Talk of being
token = y[0] # Let
assert span.similarity(token) == 0.0
assert span.similarity(span_2) == 1.0
assert span_2.similarity(span_3) < 1.0