2022-10-22 19:23:25 +03:00
|
|
|
import pymorphy2
|
2022-10-22 17:31:40 +03:00
|
|
|
from spellchecker import SpellChecker
|
2022-10-22 04:58:10 +03:00
|
|
|
|
2022-10-22 19:23:25 +03:00
|
|
|
speller_ru = SpellChecker(language="ru")
|
|
|
|
speller_eng = SpellChecker(language="en")
|
2022-10-22 04:58:10 +03:00
|
|
|
|
2022-10-22 18:26:28 +03:00
|
|
|
|
2022-10-22 17:33:32 +03:00
|
|
|
def spell_check_ru(word: str) -> str:
|
|
|
|
res = speller_ru.correction(word)
|
2022-10-22 19:23:25 +03:00
|
|
|
if not res or not len(res):
|
2022-10-22 17:33:32 +03:00
|
|
|
return word
|
|
|
|
return res
|
|
|
|
|
2022-10-22 19:23:25 +03:00
|
|
|
|
2022-10-22 17:33:32 +03:00
|
|
|
def spell_check_en(word: str) -> str:
|
2022-10-22 18:00:28 +03:00
|
|
|
res = speller_eng.correction(word)
|
2022-10-22 19:23:25 +03:00
|
|
|
if not res or not len(res):
|
2022-10-22 17:31:40 +03:00
|
|
|
return word
|
|
|
|
return res
|
2022-10-22 19:23:25 +03:00
|
|
|
|
|
|
|
|
2022-10-23 01:20:57 +03:00
|
|
|
morph = pymorphy2.MorphAnalyzer()
|
|
|
|
|
|
|
|
|
2022-10-22 19:23:25 +03:00
|
|
|
def lemmatize(word):
|
2022-10-23 01:20:57 +03:00
|
|
|
p = morph.parse(word)[0]
|
2022-10-22 19:23:25 +03:00
|
|
|
return p.normal_form
|