mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix/irreg adverbs extension (#3499)
* extended list of irreg adverbs * added test to exceptions * fixed typo
This commit is contained in:
parent
1db3e47509
commit
5a7bc6b39d
|
@ -5,9 +5,27 @@ from __future__ import unicode_literals
|
|||
ADVERBS_IRREG = {
|
||||
"best": ("well",),
|
||||
"better": ("well",),
|
||||
"closer": ("close",),
|
||||
"closest": ("close",),
|
||||
"deeper": ("deeply",),
|
||||
"earlier": ("early",),
|
||||
"earliest": ("early",),
|
||||
"farther": ("far",),
|
||||
"further": ("far",),
|
||||
"faster": ("fast",),
|
||||
"fastest": ("fast",),
|
||||
"harder": ("hard",),
|
||||
"hardest": ("hard",),
|
||||
"longer": ("long",),
|
||||
"longest": ("long",),
|
||||
"nearer": ("near",),
|
||||
"nearest": ("near",),
|
||||
"nigher": ("nigh",),
|
||||
"nighest": ("nigh",),
|
||||
"quicker": ("quick",),
|
||||
"quickest": ("quick",),
|
||||
"slower": ("slow",),
|
||||
"slowest": ("slowest",),
|
||||
"sooner": ("soon",),
|
||||
"soonest": ("soon",)
|
||||
}
|
||||
|
|
|
@ -124,3 +124,9 @@ def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms):
|
|||
def test_en_lex_attrs_norm_exceptions(en_tokenizer, text, norm):
|
||||
tokens = en_tokenizer(text)
|
||||
assert tokens[0].norm_ == norm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("text", ["faster", "fastest", "better", "best"])
|
||||
def test_en_lemmatizer_handles_irreg_adverbs(en_tokenizer, text):
|
||||
tokens = en_tokenizer(text)
|
||||
assert tokens[0].lemma_ in ["fast", "well"]
|
||||
|
|
Loading…
Reference in New Issue
Block a user