mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-21 01:21:58 +03:00
Add Kurdish Kurmanji language
This commit is contained in:
parent
8cda27aefa
commit
90b77cfe5e
12
spacy/lang/kmr/__init__.py
Normal file
12
spacy/lang/kmr/__init__.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
from ...language import BaseDefaults, Language
|
||||
from .stop_words import STOP_WORDS
|
||||
|
||||
class KurmanjiDefaults(BaseDefaults):
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
class Kurmanji(Language):
|
||||
lang = "kmr"
|
||||
Defaults = KurmanjiDefaults
|
||||
|
||||
__all__ = ["Kurmanji"]
|
17
spacy/lang/kmr/examples.py
Normal file
17
spacy/lang/kmr/examples.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.kmr.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
sentences = [
|
||||
"Berê mirovan her tim li geşedana pêşerojê ye", # People's gaze is always on the development of the future
|
||||
"Kawa Nemir di 14 salan de Ulysses wergerand Kurmancî.", # Kawa Nemir translated Ulysses into Kurmanji in 14 years.
|
||||
"Mem Ararat hunermendekî Kurd yê bi nav û deng e.", # Mem Ararat is a famous Kurdish artist
|
||||
"Firat Cewerî 40 sal e pirtûkên Kurdî dinivîsîne.", # Firat Ceweri has been writing Kurdish books for 40 years
|
||||
"Rojnamegerê ciwan nûçeyeke balkêş li ser rewşa aborî nivîsand", # The young journalist wrote an interesting news article about the economic situation
|
||||
"Sektora çandiniyê beşeke giring a belavkirina gaza serayê li seranserê cîhanê pêk tîne", # The agricultural sector constitutes an important part of greenhouse gas emissions worldwide
|
||||
"Xwendekarên jêhatî di pêşbaziya matematîkê de serkeftî bûn", # Talented students succeeded in the mathematics competition
|
||||
"Ji ber ji tunebûnê bavê min xwişkeke min nedan xwendin ew ji min re bû derd û kulek.", # Because of poverty, my father didn't send my sister to school, which became a pain and sorrow for me
|
||||
]
|
44
spacy/lang/kmr/stop_words.py
Normal file
44
spacy/lang/kmr/stop_words.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
STOP_WORDS = set(
|
||||
"""
|
||||
û
|
||||
li
|
||||
bi
|
||||
di
|
||||
da
|
||||
de
|
||||
ji
|
||||
ku
|
||||
ew
|
||||
ez
|
||||
tu
|
||||
em
|
||||
hûn
|
||||
ew
|
||||
ev
|
||||
min
|
||||
te
|
||||
wî
|
||||
wê
|
||||
me
|
||||
we
|
||||
wan
|
||||
vê
|
||||
vî
|
||||
va
|
||||
çi
|
||||
kî
|
||||
kê
|
||||
çawa
|
||||
çima
|
||||
kengî
|
||||
li ku
|
||||
çend
|
||||
çiqas
|
||||
her
|
||||
hin
|
||||
gelek
|
||||
hemû
|
||||
kes
|
||||
tişt
|
||||
""".split()
|
||||
)
|
|
@ -10,7 +10,7 @@ LANGUAGES = ["af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el",
|
|||
"hr", "hu", "hy", "id", "is", "it", "kn", "ky", "lb", "lt", "lv",
|
||||
"mk", "ml", "mr", "nb", "ne", "nl", "pl", "pt", "ro", "ru", "sa",
|
||||
"si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "ti", "tl", "tn",
|
||||
"tr", "tt", "uk", "ur", "xx", "yo"]
|
||||
"tr", "tt", "uk", "ur", "xx", "yo", "kmr"]
|
||||
# fmt: on
|
||||
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ LANGUAGES = [
|
|||
pytest.param("tr", marks=pytest.mark.slow()),
|
||||
pytest.param("tt", marks=pytest.mark.slow()),
|
||||
pytest.param("ur", marks=pytest.mark.slow()),
|
||||
pytest.param("kmr", marks=pytest.mark.slow()),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -480,6 +480,12 @@
|
|||
],
|
||||
"example": "这是一个用于示例的句子。",
|
||||
"has_examples": true
|
||||
},
|
||||
{
|
||||
"code": "kmr",
|
||||
"name": "Kurdish Kurmanji",
|
||||
"example": "Ev hevokek e",
|
||||
"has_examples": true
|
||||
}
|
||||
],
|
||||
"licenses": [
|
||||
|
|
Loading…
Reference in New Issue
Block a user