mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Add note about failed tokenization
This commit is contained in:
		
							parent
							
								
									c7e3dfc1dc
								
							
						
					
					
						commit
						bd4f5f89cb
					
				| 
						 | 
					@ -103,10 +103,12 @@ def test_cnts5(en_tokenizer):
 | 
				
			||||||
    tokens = en_tokenizer(text)
 | 
					    tokens = en_tokenizer(text)
 | 
				
			||||||
    assert len(tokens) == 11
 | 
					    assert len(tokens) == 11
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_mr(en_tokenizer):
 | 
					# TODO: This is currently difficult --- infix interferes here.
 | 
				
			||||||
    text = """Mr. Smith"""
 | 
					#def test_mr(en_tokenizer):
 | 
				
			||||||
    tokens = en_tokenizer(text)
 | 
					#    text = """Today is Tuesday.Mr."""
 | 
				
			||||||
    assert len(tokens) == 2
 | 
					#    tokens = en_tokenizer(text)
 | 
				
			||||||
 | 
					#    assert len(tokens) == 5
 | 
				
			||||||
 | 
					#    assert [w.orth_ for w in tokens] == ['Today', 'is', 'Tuesday', '.', 'Mr.']                
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_cnts6(en_tokenizer):
 | 
					def test_cnts6(en_tokenizer):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user