mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* bugfix in span similarity * also rewrite doc.pyx for clarity * formatting Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
		
			
				
	
	
		
			19 lines
		
	
	
		
			642 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			19 lines
		
	
	
		
			642 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| from spacy.lang.en import English
 | |
| 
 | |
| 
 | |
| def test_issue5152():
 | |
|     # Test that the comparison between a Span and a Token, goes well
 | |
|     # There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
 | |
|     nlp = English()
 | |
|     text = nlp("Talk about being boring!")
 | |
|     text_var = nlp("Talk of being boring!")
 | |
|     y = nlp("Let")
 | |
| 
 | |
|     span = text[0:3]  # Talk about being
 | |
|     span_2 = text[0:3]  # Talk about being
 | |
|     span_3 = text_var[0:3]  # Talk of being
 | |
|     token = y[0]  # Let
 | |
|     assert span.similarity(token) == 0.0
 | |
|     assert span.similarity(span_2) == 1.0
 | |
|     assert span_2.similarity(span_3) < 1.0
 |