mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Modernize and merge tokenizer tests for whitespace
This commit is contained in:
		
							parent
							
								
									8b284fc6f1
								
							
						
					
					
						commit
						a11f684822
					
				| 
						 | 
				
			
			@ -143,11 +143,6 @@ def test_ie(en_tokenizer):
 | 
			
		|||
    assert tokens[3].orth_ == "i.e."
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_two_whitespace(en_tokenizer):
 | 
			
		||||
    orig_str = u'there are 2 spaces after this  '
 | 
			
		||||
    tokens = en_tokenizer(orig_str)
 | 
			
		||||
    assert repr(tokens.text_with_ws) == repr(orig_str)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#def test_cnts7():
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,6 +20,12 @@ def test_tokenizer_splits_double_space(en_tokenizer, text):
 | 
			
		|||
    assert tokens[1].text == " "
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize('text', ["two spaces after this  "])
 | 
			
		||||
def test_tokenizer_handles_double_trainling_ws(en_tokenizer, text):
 | 
			
		||||
    tokens = en_tokenizer(text)
 | 
			
		||||
    assert repr(tokens.text_with_ws) == repr(text)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize('text', ["hello\npossums"])
 | 
			
		||||
def test_tokenizer_splits_newline(en_tokenizer, text):
 | 
			
		||||
    tokens = en_tokenizer(text)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user