mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
69 lines
1.7 KiB
Python
69 lines
1.7 KiB
Python
# encoding: utf8
|
||
from __future__ import unicode_literals
|
||
|
||
# Source: āĻāĻā§āĻāĻ¤āĻ° āĻŦāĻžāĻāĻ˛āĻž āĻŦā§āĻ¯āĻžāĻāĻ°āĻŖ āĻ āĻ°āĻāĻ¨āĻž - āĻ
āĻ§ā§āĻ¯āĻžāĻĒāĻ āĻ¨āĻŋāĻ°āĻā§āĻāĻ¨ āĻ
āĻ§āĻŋāĻāĻžāĻ°ā§ āĻ āĻ
āĻ§ā§āĻ¯āĻžāĻĒāĻ āĻĄ. āĻ¸āĻĢāĻŋāĻāĻĻā§āĻĻāĻŋāĻ¨ āĻāĻšāĻŽāĻĻ
|
||
|
||
LEMMA_RULES = {
|
||
"noun": [
|
||
["āĻāĻž", ""],
|
||
["āĻāĻŋ", ""],
|
||
["āĻāĻžāĻ¨", ""],
|
||
["āĻāĻžāĻ¨āĻž", ""],
|
||
["āĻāĻžāĻ¨āĻŋ", ""],
|
||
["āĻāĻžāĻāĻž", ""],
|
||
["āĻāĻžāĻāĻŋ", ""],
|
||
["āĻāĻĄāĻŧāĻž", ""],
|
||
|
||
["āĻā§", ""],
|
||
["ā§", ""],
|
||
["āĻ¤ā§", ""],
|
||
|
||
["āĻ°", ""],
|
||
["āĻ°āĻž", ""],
|
||
["āĻ°ā§", ""],
|
||
["ā§āĻ°", ""], # āĻāĻ°
|
||
["ā§āĻ°āĻž", ""], # āĻāĻ°āĻž
|
||
["āĻĻā§āĻ°", ""],
|
||
["āĻĻā§āĻ°āĻā§", ""],
|
||
["āĻā§āĻ˛āĻž", ""],
|
||
["āĻā§āĻ˛ā§", ""],
|
||
["āĻā§āĻ˛āĻŋ", ""],
|
||
|
||
["āĻā§āĻ˛", ""],
|
||
["āĻāĻŖ", ""],
|
||
["āĻĻāĻ˛", ""],
|
||
["āĻĒāĻžāĻ˛", ""],
|
||
["āĻĒā§āĻā§āĻ", ""],
|
||
["āĻŽāĻŖā§āĻĄāĻ˛ā§", ""],
|
||
["āĻŽāĻžāĻ˛āĻž", ""],
|
||
["āĻ°āĻžāĻāĻŋ", ""],
|
||
["āĻŦā§āĻ¨ā§āĻĻ", ""],
|
||
["āĻŦāĻ°ā§āĻ", ""],
|
||
["āĻļā§āĻ°ā§āĻŖā§", ""],
|
||
["āĻļā§āĻ°ā§āĻ¨āĻŋ", ""],
|
||
["āĻ°āĻžāĻļāĻŋ", ""],
|
||
["āĻ¸āĻāĻ˛", ""],
|
||
["āĻŽāĻšāĻ˛", ""],
|
||
["āĻžāĻŦāĻ˛āĻŋ", ""], # āĻāĻŦāĻ˛āĻŋ
|
||
|
||
# Bengali digit representations
|
||
["ā§Ļ", "0"],
|
||
["ā§§", "1"],
|
||
["ā§¨", "2"],
|
||
["ā§Š", "3"],
|
||
["ā§Ē", "4"],
|
||
["ā§Ģ", "5"],
|
||
["ā§Ŧ", "6"],
|
||
["ā§", "7"],
|
||
["ā§Ž", "8"],
|
||
["ā§¯", "9"],
|
||
],
|
||
|
||
"punct": [
|
||
["â", "\""],
|
||
["â", "\""],
|
||
["\u2018", "'"],
|
||
["\u2019", "'"]
|
||
]
|
||
}
|