mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
* bugfix in span similarity * also rewrite doc.pyx for clarity * formatting Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
9203d821ae
commit
f8ac5b9f56
18
spacy/tests/regression/test_issue5152.py
Normal file
18
spacy/tests/regression/test_issue5152.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
from spacy.lang.en import English
|
||||
|
||||
|
||||
def test_issue5152():
|
||||
# Test that the comparison between a Span and a Token, goes well
|
||||
# There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
|
||||
nlp = English()
|
||||
text = nlp("Talk about being boring!")
|
||||
text_var = nlp("Talk of being boring!")
|
||||
y = nlp("Let")
|
||||
|
||||
span = text[0:3] # Talk about being
|
||||
span_2 = text[0:3] # Talk about being
|
||||
span_3 = text_var[0:3] # Talk of being
|
||||
token = y[0] # Let
|
||||
assert span.similarity(token) == 0.0
|
||||
assert span.similarity(span_2) == 1.0
|
||||
assert span_2.similarity(span_3) < 1.0
|
|
@ -387,13 +387,14 @@ cdef class Doc:
|
|||
if isinstance(other, (Lexeme, Token)) and self.length == 1:
|
||||
if self.c[0].lex.orth == other.orth:
|
||||
return 1.0
|
||||
elif isinstance(other, (Span, Doc)):
|
||||
if len(self) == len(other):
|
||||
for i in range(self.length):
|
||||
if self[i].orth != other[i].orth:
|
||||
break
|
||||
else:
|
||||
return 1.0
|
||||
elif isinstance(other, (Span, Doc)) and len(self) == len(other):
|
||||
similar = True
|
||||
for i in range(self.length):
|
||||
if self[i].orth != other[i].orth:
|
||||
similar = False
|
||||
break
|
||||
if similar:
|
||||
return 1.0
|
||||
if self.vocab.vectors.n_keys == 0:
|
||||
models_warning(Warnings.W007.format(obj="Doc"))
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
|
|
|
@ -324,11 +324,13 @@ cdef class Span:
|
|||
if len(self) == 1 and hasattr(other, "orth"):
|
||||
if self[0].orth == other.orth:
|
||||
return 1.0
|
||||
elif hasattr(other, "__len__") and len(self) == len(other):
|
||||
elif isinstance(other, (Doc, Span)) and len(self) == len(other):
|
||||
similar = True
|
||||
for i in range(len(self)):
|
||||
if self[i].orth != getattr(other[i], "orth", None):
|
||||
similar = False
|
||||
break
|
||||
else:
|
||||
if similar:
|
||||
return 1.0
|
||||
if self.vocab.vectors.n_keys == 0:
|
||||
models_warning(Warnings.W007.format(obj="Span"))
|
||||
|
|
Loading…
Reference in New Issue
Block a user