mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Fix/irreg adverbs extension (#3499)
* extended list of irreg adverbs * added test to exceptions * fixed typo
This commit is contained in:
parent
1db3e47509
commit
5a7bc6b39d
|
@ -5,9 +5,27 @@ from __future__ import unicode_literals
|
||||||
ADVERBS_IRREG = {
|
ADVERBS_IRREG = {
|
||||||
"best": ("well",),
|
"best": ("well",),
|
||||||
"better": ("well",),
|
"better": ("well",),
|
||||||
|
"closer": ("close",),
|
||||||
|
"closest": ("close",),
|
||||||
"deeper": ("deeply",),
|
"deeper": ("deeply",),
|
||||||
|
"earlier": ("early",),
|
||||||
|
"earliest": ("early",),
|
||||||
"farther": ("far",),
|
"farther": ("far",),
|
||||||
"further": ("far",),
|
"further": ("far",),
|
||||||
|
"faster": ("fast",),
|
||||||
|
"fastest": ("fast",),
|
||||||
"harder": ("hard",),
|
"harder": ("hard",),
|
||||||
"hardest": ("hard",),
|
"hardest": ("hard",),
|
||||||
|
"longer": ("long",),
|
||||||
|
"longest": ("long",),
|
||||||
|
"nearer": ("near",),
|
||||||
|
"nearest": ("near",),
|
||||||
|
"nigher": ("nigh",),
|
||||||
|
"nighest": ("nigh",),
|
||||||
|
"quicker": ("quick",),
|
||||||
|
"quickest": ("quick",),
|
||||||
|
"slower": ("slow",),
|
||||||
|
"slowest": ("slowest",),
|
||||||
|
"sooner": ("soon",),
|
||||||
|
"soonest": ("soon",)
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,3 +124,9 @@ def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms):
|
||||||
def test_en_lex_attrs_norm_exceptions(en_tokenizer, text, norm):
|
def test_en_lex_attrs_norm_exceptions(en_tokenizer, text, norm):
|
||||||
tokens = en_tokenizer(text)
|
tokens = en_tokenizer(text)
|
||||||
assert tokens[0].norm_ == norm
|
assert tokens[0].norm_ == norm
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text", ["faster", "fastest", "better", "best"])
|
||||||
|
def test_en_lemmatizer_handles_irreg_adverbs(en_tokenizer, text):
|
||||||
|
tokens = en_tokenizer(text)
|
||||||
|
assert tokens[0].lemma_ in ["fast", "well"]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user