diff --git a/spacy/lang/ky/lex_attrs.py b/spacy/lang/ky/lex_attrs.py new file mode 100644 index 000000000..af926b138 --- /dev/null +++ b/spacy/lang/ky/lex_attrs.py @@ -0,0 +1,51 @@ +# coding: utf8 +from __future__ import unicode_literals + +from ...attrs import LIKE_NUM + +_num_words = [ + "нөл", + "ноль", + "бир", + "эки", + "үч", + "төрт", + "беш", + "алты", + "жети", + "сегиз", + "тогуз", + "он", + "жыйырма", + "отуз", + "кырк", + "элүү", + "алтымыш", + "жетмиш", + "сексен", + "токсон", + "жүз", + "миң", + "миллион", + "миллиард", + "триллион", + "триллиард", +] + + +def like_num(text): + if text.startswith(("+", "-", "±", "~")): + text = text[1:] + text = text.replace(",", "").replace(".", "") + if text.isdigit(): + return True + if text.count("/") == 1: + num, denom = text.split("/") + if num.isdigit() and denom.isdigit(): + return True + if text in _num_words: + return True + return False + + +LEX_ATTRS = {LIKE_NUM: like_num}