mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Fix/irreg adverbs extension (#3499)
* extended list of irreg adverbs * added test to exceptions * fixed typo
This commit is contained in:
		
							parent
							
								
									1db3e47509
								
							
						
					
					
						commit
						5a7bc6b39d
					
				|  | @ -5,9 +5,27 @@ from __future__ import unicode_literals | |||
| ADVERBS_IRREG = { | ||||
|     "best": ("well",), | ||||
|     "better": ("well",), | ||||
|     "closer": ("close",), | ||||
|     "closest": ("close",), | ||||
|     "deeper": ("deeply",), | ||||
|     "earlier": ("early",), | ||||
|     "earliest": ("early",), | ||||
|     "farther": ("far",), | ||||
|     "further": ("far",), | ||||
|     "faster": ("fast",), | ||||
|     "fastest": ("fast",), | ||||
|     "harder": ("hard",), | ||||
|     "hardest": ("hard",), | ||||
|     "longer": ("long",), | ||||
|     "longest": ("long",), | ||||
|     "nearer": ("near",), | ||||
|     "nearest": ("near",), | ||||
|     "nigher": ("nigh",), | ||||
|     "nighest": ("nigh",), | ||||
|     "quicker": ("quick",), | ||||
|     "quickest": ("quick",), | ||||
|     "slower": ("slow",), | ||||
|     "slowest": ("slowest",), | ||||
|     "sooner": ("soon",), | ||||
|     "soonest": ("soon",) | ||||
| } | ||||
|  |  | |||
|  | @ -124,3 +124,9 @@ def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms): | |||
| def test_en_lex_attrs_norm_exceptions(en_tokenizer, text, norm): | ||||
|     tokens = en_tokenizer(text) | ||||
|     assert tokens[0].norm_ == norm | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize("text", ["faster", "fastest", "better", "best"]) | ||||
| def test_en_lemmatizer_handles_irreg_adverbs(en_tokenizer, text): | ||||
|     tokens = en_tokenizer(text) | ||||
|     assert tokens[0].lemma_ in ["fast", "well"] | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user