Add Kurdish Kurmanji language

This commit is contained in:
Muzaffer Cıkay 2024-07-09 14:27:28 +03:00
parent 8cda27aefa
commit 90b77cfe5e
6 changed files with 81 additions and 1 deletions

View File

@ -0,0 +1,12 @@
from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
class KurmanjiDefaults(BaseDefaults):
stop_words = STOP_WORDS
class Kurmanji(Language):
lang = "kmr"
Defaults = KurmanjiDefaults
__all__ = ["Kurmanji"]

View File

@ -0,0 +1,17 @@
"""
Example sentences to test spaCy and its language models.
>>> from spacy.lang.kmr.examples import sentences
>>> docs = nlp.pipe(sentences)
"""
sentences = [
"Berê mirovan her tim li geşedana pêşerojê ye", # People's gaze is always on the development of the future
"Kawa Nemir di 14 salan de Ulysses wergerand Kurmancî.", # Kawa Nemir translated Ulysses into Kurmanji in 14 years.
"Mem Ararat hunermendekî Kurd yê bi nav û deng e.", # Mem Ararat is a famous Kurdish artist
"Firat Cewerî 40 sal e pirtûkên Kurdî dinivîsîne.", # Firat Ceweri has been writing Kurdish books for 40 years
"Rojnamegerê ciwan nûçeyeke balkêş li ser rewşa aborî nivîsand", # The young journalist wrote an interesting news article about the economic situation
"Sektora çandiniyê beşeke giring a belavkirina gaza serayê li seranserê cîhanê pêk tîne", # The agricultural sector constitutes an important part of greenhouse gas emissions worldwide
"Xwendekarên jêhatî di pêşbaziya matematîkê de serkeftî bûn", # Talented students succeeded in the mathematics competition
"Ji ber ji tunebûnê bavê min xwişkeke min nedan xwendin ew ji min re bû derd û kulek.", # Because of poverty, my father didn't send my sister to school, which became a pain and sorrow for me
]

View File

@ -0,0 +1,44 @@
STOP_WORDS = set(
"""
û
li
bi
di
da
de
ji
ku
ew
ez
tu
em
hûn
ew
ev
min
te
me
we
wan
va
çi
çawa
çima
kengî
li ku
çend
çiqas
her
hin
gelek
hemû
kes
tişt
""".split()
)

View File

@ -10,7 +10,7 @@ LANGUAGES = ["af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el",
"hr", "hu", "hy", "id", "is", "it", "kn", "ky", "lb", "lt", "lv",
"mk", "ml", "mr", "nb", "ne", "nl", "pl", "pt", "ro", "ru", "sa",
"si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "ti", "tl", "tn",
"tr", "tt", "uk", "ur", "xx", "yo"]
"tr", "tt", "uk", "ur", "xx", "yo", "kmr"]
# fmt: on

View File

@ -57,6 +57,7 @@ LANGUAGES = [
pytest.param("tr", marks=pytest.mark.slow()),
pytest.param("tt", marks=pytest.mark.slow()),
pytest.param("ur", marks=pytest.mark.slow()),
pytest.param("kmr", marks=pytest.mark.slow()),
]

View File

@ -480,6 +480,12 @@
],
"example": "这是一个用于示例的句子。",
"has_examples": true
},
{
"code": "kmr",
"name": "Kurdish Kurmanji",
"example": "Ev hevokek e",
"has_examples": true
}
],
"licenses": [