mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			69 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			69 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						||
from __future__ import unicode_literals
 | 
						||
 | 
						||
# Source: āĻāĻā§āĻāϤāϰ āĻŦāĻžāĻāϞāĻž āĻŦā§āϝāĻžāĻāϰāĻŖ āĻ āϰāĻāύāĻž - āĻ
āϧā§āϝāĻžāĻĒāĻ āύāĻŋāϰāĻā§āĻāύ āĻ
āϧāĻŋāĻāĻžāϰ⧠āĻ āĻ
āϧā§āϝāĻžāĻĒāĻ āĻĄ. āϏāĻĢāĻŋāĻāĻĻā§āĻĻāĻŋāύ āĻāĻšāĻŽāĻĻ
 | 
						||
 | 
						||
LEMMA_RULES = {
 | 
						||
    "noun": [
 | 
						||
        ["āĻāĻž", ""],
 | 
						||
        ["āĻāĻŋ", ""],
 | 
						||
        ["āĻāĻžāύ", ""],
 | 
						||
        ["āĻāĻžāύāĻž", ""],
 | 
						||
        ["āĻāĻžāύāĻŋ", ""],
 | 
						||
        ["āĻāĻžāĻāĻž", ""],
 | 
						||
        ["āĻāĻžāĻāĻŋ", ""],
 | 
						||
        ["āĻāĻĄāĻŧāĻž", ""],
 | 
						||
 | 
						||
        ["āĻā§", ""],
 | 
						||
        ["ā§", ""],
 | 
						||
        ["āϤā§", ""],
 | 
						||
 | 
						||
        ["āϰ", ""],
 | 
						||
        ["āϰāĻž", ""],
 | 
						||
        ["āϰā§", ""],
 | 
						||
        ["ā§āϰ", ""],  # āĻāϰ
 | 
						||
        ["ā§āϰāĻž", ""],  # āĻāϰāĻž
 | 
						||
        ["āĻĻā§āϰ", ""],
 | 
						||
        ["āĻĻā§āϰāĻā§", ""],
 | 
						||
        ["āĻā§āϞāĻž", ""],
 | 
						||
        ["āĻā§āϞā§", ""],
 | 
						||
        ["āĻā§āϞāĻŋ", ""],
 | 
						||
 | 
						||
        ["āĻā§āϞ", ""],
 | 
						||
        ["āĻāĻŖ", ""],
 | 
						||
        ["āĻĻāϞ", ""],
 | 
						||
        ["āĻĒāĻžāϞ", ""],
 | 
						||
        ["āĻĒā§āĻā§āĻ", ""],
 | 
						||
        ["āĻŽāĻŖā§āĻĄāϞā§", ""],
 | 
						||
        ["āĻŽāĻžāϞāĻž", ""],
 | 
						||
        ["āϰāĻžāĻāĻŋ", ""],
 | 
						||
        ["āĻŦā§āύā§āĻĻ", ""],
 | 
						||
        ["āĻŦāϰā§āĻ", ""],
 | 
						||
        ["āĻļā§āϰā§āĻŖā§", ""],
 | 
						||
        ["āĻļā§āϰā§āύāĻŋ", ""],
 | 
						||
        ["āϰāĻžāĻļāĻŋ", ""],
 | 
						||
        ["āϏāĻāϞ", ""],
 | 
						||
        ["āĻŽāĻšāϞ", ""],
 | 
						||
        ["āĻžāĻŦāϞāĻŋ", ""],  # āĻāĻŦāϞāĻŋ
 | 
						||
 | 
						||
        # Bengali digit representations
 | 
						||
        ["ā§Ļ", "0"],
 | 
						||
        ["ā§§", "1"],
 | 
						||
        ["⧍", "2"],
 | 
						||
        ["ā§Š", "3"],
 | 
						||
        ["ā§Ē", "4"],
 | 
						||
        ["ā§Ģ", "5"],
 | 
						||
        ["ā§Ŧ", "6"],
 | 
						||
        ["ā§", "7"],
 | 
						||
        ["ā§Ž", "8"],
 | 
						||
        ["⧝", "9"],
 | 
						||
    ],
 | 
						||
 | 
						||
    "punct": [
 | 
						||
        ["â", "\""],
 | 
						||
        ["â", "\""],
 | 
						||
        ["\u2018", "'"],
 | 
						||
        ["\u2019", "'"]
 | 
						||
    ]
 | 
						||
}
 |