mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	remove cause without apostrophe from norm exceptions (#6636)
This commit is contained in:
		
							parent
							
								
									87562e470d
								
							
						
					
					
						commit
						6f7e7d88b9
					
				|  | @ -319,7 +319,6 @@ for exc_data in [ | |||
| # Other contractions with leading apostrophe | ||||
| 
 | ||||
| for exc_data in [ | ||||
|     {ORTH: "cause", NORM: "because"}, | ||||
|     {ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"}, | ||||
|     {ORTH: "ll", LEMMA: "will", NORM: "will"}, | ||||
|     {ORTH: "nuff", LEMMA: "enough", NORM: "enough"}, | ||||
|  |  | |||
|  | @ -111,7 +111,15 @@ def test_en_tokenizer_handles_times(en_tokenizer, text): | |||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "text,norms", [("I'm", ["i", "am"]), ("shan't", ["shall", "not"])] | ||||
|     "text,norms", | ||||
|     [ | ||||
|         ("I'm", ["i", "am"]), | ||||
|         ("shan't", ["shall", "not"]), | ||||
|         ( | ||||
|             "Many factors cause cancer 'cause it is complex", | ||||
|             ["many", "factors", "cause", "cancer", "because", "it", "is", "complex"], | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms): | ||||
|     tokens = en_tokenizer(text) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user