mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Add lex_attrs
This commit is contained in:
		
							parent
							
								
									4418ec2eee
								
							
						
					
					
						commit
						d53724ba1d
					
				
							
								
								
									
										51
									
								
								spacy/lang/ky/lex_attrs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								spacy/lang/ky/lex_attrs.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,51 @@
 | 
			
		|||
# coding: utf8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from ...attrs import LIKE_NUM
 | 
			
		||||
 | 
			
		||||
_num_words = [
 | 
			
		||||
    "нөл",
 | 
			
		||||
    "ноль",
 | 
			
		||||
    "бир",
 | 
			
		||||
    "эки",
 | 
			
		||||
    "үч",
 | 
			
		||||
    "төрт",
 | 
			
		||||
    "беш",
 | 
			
		||||
    "алты",
 | 
			
		||||
    "жети",
 | 
			
		||||
    "сегиз",
 | 
			
		||||
    "тогуз",
 | 
			
		||||
    "он",
 | 
			
		||||
    "жыйырма",
 | 
			
		||||
    "отуз",
 | 
			
		||||
    "кырк",
 | 
			
		||||
    "элүү",
 | 
			
		||||
    "алтымыш",
 | 
			
		||||
    "жетмиш",
 | 
			
		||||
    "сексен",
 | 
			
		||||
    "токсон",
 | 
			
		||||
    "жүз",
 | 
			
		||||
    "миң",
 | 
			
		||||
    "миллион",
 | 
			
		||||
    "миллиард",
 | 
			
		||||
    "триллион",
 | 
			
		||||
    "триллиард",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def like_num(text):
 | 
			
		||||
    if text.startswith(("+", "-", "±", "~")):
 | 
			
		||||
        text = text[1:]
 | 
			
		||||
    text = text.replace(",", "").replace(".", "")
 | 
			
		||||
    if text.isdigit():
 | 
			
		||||
        return True
 | 
			
		||||
    if text.count("/") == 1:
 | 
			
		||||
        num, denom = text.split("/")
 | 
			
		||||
        if num.isdigit() and denom.isdigit():
 | 
			
		||||
            return True
 | 
			
		||||
    if text in _num_words:
 | 
			
		||||
        return True
 | 
			
		||||
    return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
LEX_ATTRS = {LIKE_NUM: like_num}
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user