mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-30 11:14:08 +03:00
ada4fc0f09
* Fix on_match callback and remove empty patterns (#6312) For the `DependencyMatcher`: * Fix on_match callback so that it is called once per matched pattern * Fix results so that patterns with empty match lists are not returned * Add --prefer-binary for python 3.5 * Add version pins for pyrsistent * Use backwards-compatible super() * Try to fix tests on Travis (2.7) * Fix naming conflict and formatting * Update pkuseg version in Chinese tokenizer warnings * Some changes for Armenian (#5616) * Fixing numericals * We need a Armenian question sign to make the sentence a question * Update lex_attrs.py (#5608) * Fix compat * Update Armenian from v2.3.x Co-authored-by: Ines Montani <ines@ines.io> Co-authored-by: Karen Hambardzumyan <mahnerak@gmail.com> Co-authored-by: Marat M. Yavrumyan <myavrum@ysu.am>
21 lines
697 B
Python
21 lines
697 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
from spacy.lang.en import English
|
|
|
|
|
|
def test_issue5152():
|
|
# Test that the comparison between a Span and a Token, goes well
|
|
# There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
|
|
nlp = English()
|
|
text = nlp("Talk about being boring!")
|
|
text_var = nlp("Talk of being boring!")
|
|
y = nlp("Let")
|
|
|
|
span = text[0:3] # Talk about being
|
|
span_2 = text[0:3] # Talk about being
|
|
span_3 = text_var[0:3] # Talk of being
|
|
token = y[0] # Let
|
|
assert span.similarity(token) == 0.0
|
|
assert span.similarity(span_2) == 1.0
|
|
assert span_2.similarity(span_3) < 1.0
|