mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-25 03:13:41 +03:00
69 lines
1.7 KiB
Python
69 lines
1.7 KiB
Python
# coding: utf8
|
||
from __future__ import unicode_literals
|
||
|
||
# Source: āĻāĻā§āĻāϤāϰ āĻŦāĻžāĻāϞāĻž āĻŦā§āϝāĻžāĻāϰāĻŖ āĻ āϰāĻāύāĻž - āĻ
āϧā§āϝāĻžāĻĒāĻ āύāĻŋāϰāĻā§āĻāύ āĻ
āϧāĻŋāĻāĻžāϰ⧠āĻ āĻ
āϧā§āϝāĻžāĻĒāĻ āĻĄ. āϏāĻĢāĻŋāĻāĻĻā§āĻĻāĻŋāύ āĻāĻšāĻŽāĻĻ
|
||
|
||
LEMMA_RULES = {
|
||
"noun": [
|
||
["āĻāĻž", ""],
|
||
["āĻāĻŋ", ""],
|
||
["āĻāĻžāύ", ""],
|
||
["āĻāĻžāύāĻž", ""],
|
||
["āĻāĻžāύāĻŋ", ""],
|
||
["āĻāĻžāĻāĻž", ""],
|
||
["āĻāĻžāĻāĻŋ", ""],
|
||
["āĻāĻĄāĻŧāĻž", ""],
|
||
|
||
["āĻā§", ""],
|
||
["ā§", ""],
|
||
["āϤā§", ""],
|
||
|
||
["āϰ", ""],
|
||
["āϰāĻž", ""],
|
||
["āϰā§", ""],
|
||
["ā§āϰ", ""], # āĻāϰ
|
||
["ā§āϰāĻž", ""], # āĻāϰāĻž
|
||
["āĻĻā§āϰ", ""],
|
||
["āĻĻā§āϰāĻā§", ""],
|
||
["āĻā§āϞāĻž", ""],
|
||
["āĻā§āϞā§", ""],
|
||
["āĻā§āϞāĻŋ", ""],
|
||
|
||
["āĻā§āϞ", ""],
|
||
["āĻāĻŖ", ""],
|
||
["āĻĻāϞ", ""],
|
||
["āĻĒāĻžāϞ", ""],
|
||
["āĻĒā§āĻā§āĻ", ""],
|
||
["āĻŽāĻŖā§āĻĄāϞā§", ""],
|
||
["āĻŽāĻžāϞāĻž", ""],
|
||
["āϰāĻžāĻāĻŋ", ""],
|
||
["āĻŦā§āύā§āĻĻ", ""],
|
||
["āĻŦāϰā§āĻ", ""],
|
||
["āĻļā§āϰā§āĻŖā§", ""],
|
||
["āĻļā§āϰā§āύāĻŋ", ""],
|
||
["āϰāĻžāĻļāĻŋ", ""],
|
||
["āϏāĻāϞ", ""],
|
||
["āĻŽāĻšāϞ", ""],
|
||
["āĻžāĻŦāϞāĻŋ", ""], # āĻāĻŦāϞāĻŋ
|
||
|
||
# Bengali digit representations
|
||
["ā§Ļ", "0"],
|
||
["ā§§", "1"],
|
||
["⧍", "2"],
|
||
["ā§Š", "3"],
|
||
["ā§Ē", "4"],
|
||
["ā§Ģ", "5"],
|
||
["ā§Ŧ", "6"],
|
||
["ā§", "7"],
|
||
["ā§Ž", "8"],
|
||
["⧝", "9"],
|
||
],
|
||
|
||
"punct": [
|
||
["â", "\""],
|
||
["â", "\""],
|
||
["\u2018", "'"],
|
||
["\u2019", "'"]
|
||
]
|
||
}
|