mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* Add xfail test for Issue #225: tokenization with non-whitespace delimiters
This commit is contained in:
		
							parent
							
								
									7abe653223
								
							
						
					
					
						commit
						515493c675
					
				|  | @ -116,12 +116,13 @@ def test_cnts5(en_tokenizer): | |||
|     tokens = en_tokenizer(text) | ||||
|     assert len(tokens) == 11 | ||||
| 
 | ||||
| # TODO: This is currently difficult --- infix interferes here. | ||||
| #def test_mr(en_tokenizer): | ||||
| #    text = """Today is Tuesday.Mr.""" | ||||
| #    tokens = en_tokenizer(text) | ||||
| #    assert len(tokens) == 5 | ||||
| #    assert [w.orth_ for w in tokens] == ['Today', 'is', 'Tuesday', '.', 'Mr.'] | ||||
| 
 | ||||
| @pytest.mark.xfail | ||||
| def test_mr(en_tokenizer): | ||||
|     text = """Today is Tuesday.Mr.""" | ||||
|     tokens = en_tokenizer(text) | ||||
|     assert len(tokens) == 5 | ||||
|     assert [w.orth_ for w in tokens] == ['Today', 'is', 'Tuesday', '.', 'Mr.'] | ||||
| 
 | ||||
| 
 | ||||
| def test_cnts6(en_tokenizer): | ||||
|  | @ -148,6 +149,16 @@ def test_two_whitespace(en_tokenizer): | |||
|     tokens = en_tokenizer(orig_str) | ||||
|     assert repr(tokens.text_with_ws) == repr(orig_str) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail | ||||
| def test_em_dash_infix(en_tokenizer): | ||||
|     # Re Issue #225 | ||||
|     tokens = en_tokenizer('''Will this road take me to Puddleton?\u2014No, ''' | ||||
|                           '''you'll have to walk there.\u2014Ariel.''') | ||||
|     assert tokens[6].text == 'Puddleton' | ||||
|     assert tokens[7].text == '?' | ||||
|     assert tokens[8].text == '\u2014' | ||||
| 
 | ||||
| #def test_cnts7(): | ||||
| #    text = 'But then the 6,000-year ice age came...' | ||||
| #    tokens = EN.tokenize(text) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user