spaCy/website/meta/languages.json
2024-09-09 11:18:03 +02:00

552 lines
16 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"languages": [
{
"code": "af",
"name": "Afrikaans"
},
{
"code": "am",
"name": "Amharic",
"has_examples": true
},
{
"code": "ar",
"name": "Arabic",
"example": "هذه جملة",
"has_examples": true
},
{
"code": "az",
"name": "Azerbaijani",
"has_examples": true
},
{
"code": "bg",
"name": "Bulgarian",
"example": "Това е изречение",
"has_examples": true
},
{
"code": "bn",
"name": "Bengali",
"has_examples": true
},
{
"code": "bo",
"name": "Tibetan",
"example": "འདི་ཚིག་གྲུབ་རེད།",
"has_examples": true
},
{
"code": "ca",
"name": "Catalan",
"example": "Això és una frase.",
"has_examples": true,
"models": ["ca_core_news_sm", "ca_core_news_md", "ca_core_news_lg", "ca_core_news_trf"]
},
{
"code": "cs",
"name": "Czech",
"has_examples": true
},
{
"code": "da",
"name": "Danish",
"example": "Dette er en sætning.",
"has_examples": true,
"models": ["da_core_news_sm", "da_core_news_md", "da_core_news_lg", "da_core_news_trf"]
},
{
"code": "de",
"name": "German",
"models": ["de_core_news_sm", "de_core_news_md", "de_core_news_lg", "de_dep_news_trf"],
"example": "Dies ist ein Satz.",
"has_examples": true
},
{
"code": "dsb",
"name": "Lower Sorbian",
"has_examples": true
},
{
"code": "el",
"name": "Greek",
"models": ["el_core_news_sm", "el_core_news_md", "el_core_news_lg"],
"example": "Αυτή είναι μια πρόταση.",
"has_examples": true
},
{
"code": "en",
"name": "English",
"models": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_core_web_trf"],
"example": "This is a sentence.",
"has_examples": true
},
{
"code": "es",
"name": "Spanish",
"models": ["es_core_news_sm", "es_core_news_md", "es_core_news_lg", "es_dep_news_trf"],
"example": "Esto es una frase.",
"has_examples": true
},
{
"code": "et",
"name": "Estonian"
},
{
"code": "eu",
"name": "Basque",
"has_examples": true
},
{
"code": "fa",
"name": "Persian",
"has_examples": true
},
{
"code": "fi",
"name": "Finnish",
"has_examples": true,
"models": ["fi_core_news_sm", "fi_core_news_md", "fi_core_news_lg"]
},
{
"code": "fo",
"name": "Faroese"
},
{
"code": "fr",
"name": "French",
"models": ["fr_core_news_sm", "fr_core_news_md", "fr_core_news_lg", "fr_dep_news_trf"],
"example": "C'est une phrase.",
"has_examples": true
},
{
"code": "ga",
"name": "Irish"
},
{
"code": "grc",
"name": "Ancient Greek",
"has_examples": true
},
{
"code": "gu",
"name": "Gujarati",
"has_examples": true
},
{
"code": "he",
"name": "Hebrew",
"example": "זהו משפט.",
"has_examples": true
},
{
"code": "hi",
"name": "Hindi",
"example": "यह एक वाक्य है।",
"has_examples": true
},
{
"code": "hr",
"name": "Croatian",
"has_examples": true,
"models": ["hr_core_news_sm", "hr_core_news_md", "hr_core_news_lg"]
},
{
"code": "hsb",
"name": "Upper Sorbian",
"has_examples": true
},
{
"code": "hu",
"name": "Hungarian",
"example": "Ez egy mondat.",
"has_examples": true
},
{
"code": "hy",
"name": "Armenian",
"has_examples": true
},
{
"code": "id",
"name": "Indonesian",
"example": "Ini adalah sebuah kalimat.",
"has_examples": true
},
{
"code": "is",
"name": "Icelandic"
},
{
"code": "it",
"name": "Italian",
"models": ["it_core_news_sm", "it_core_news_md", "it_core_news_lg"],
"example": "Questa è una frase.",
"has_examples": true
},
{
"code": "ja",
"name": "Japanese",
"models": ["ja_core_news_sm", "ja_core_news_md", "ja_core_news_lg", "ja_core_news_trf"],
"dependencies": [
{
"name": "SudachiPy",
"url": "https://github.com/WorksApplications/SudachiPy"
}
],
"example": "これは文章です。",
"has_examples": true
},
{
"code": "kn",
"name": "Kannada",
"has_examples": true
},
{
"code": "ko",
"name": "Korean",
"dependencies": [
{
"name": "mecab-ko",
"url": "https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md"
},
{
"name": "mecab-ko-dic",
"url": "https://bitbucket.org/eunjeon/mecab-ko-dic"
},
{
"name": "natto-py",
"url": "https://github.com/buruzaemon/natto-py"
}
],
"example": "이것은 문장입니다.",
"has_examples": true,
"models": ["ko_core_news_sm", "ko_core_news_md", "ko_core_news_lg"]
},
{
"code": "ky",
"name": "Kyrgyz",
"example": "Адамга эң кыйыны — күн сайын адам болуу",
"has_examples": true
},
{
"code": "la",
"name": "Latin",
"example": "In principio creavit Deus caelum et terram.",
"has_examples": true
},
{
"code": "lb",
"name": "Luxembourgish",
"has_examples": true
},
{
"code": "lg",
"name": "Luganda",
"has_examples": true
},
{
"code": "lij",
"name": "Ligurian",
"example": "Sta chì a l'é unna fraxe.",
"has_examples": true
},
{
"code": "lt",
"name": "Lithuanian",
"has_examples": true,
"models": ["lt_core_news_sm", "lt_core_news_md", "lt_core_news_lg"]
},
{
"code": "lv",
"name": "Latvian"
},
{
"code": "mk",
"name": "Macedonian",
"models": ["mk_core_news_sm", "mk_core_news_md", "mk_core_news_lg"]
},
{
"code": "ml",
"name": "Malayalam",
"has_examples": true
},
{
"code": "mr",
"name": "Marathi"
},
{
"code": "ms",
"name": "Malay",
"has_examples": true
},
{
"code": "nb",
"name": "Norwegian Bokmål",
"example": "Dette er en setning.",
"has_examples": true,
"models": ["nb_core_news_sm", "nb_core_news_md", "nb_core_news_lg"]
},
{
"code": "ne",
"name": "Nepali",
"has_examples": true
},
{
"code": "nl",
"name": "Dutch",
"models": ["nl_core_news_sm", "nl_core_news_md", "nl_core_news_lg"],
"example": "Dit is een zin.",
"has_examples": true
},
{
"code": "nn",
"name": "Norwegian Nynorsk",
"example": "Det er ein meir enn i same periode i fjor.",
"has_examples": true
},
{
"code": "pl",
"name": "Polish",
"example": "To jest zdanie.",
"has_examples": true,
"models": ["pl_core_news_sm", "pl_core_news_md", "pl_core_news_lg"]
},
{
"code": "pt",
"name": "Portuguese",
"models": ["pt_core_news_sm", "pt_core_news_md", "pt_core_news_lg"],
"example": "Esta é uma frase.",
"has_examples": true
},
{
"code": "ro",
"name": "Romanian",
"example": "Aceasta este o propoziție.",
"has_examples": true,
"models": ["ro_core_news_sm", "ro_core_news_md", "ro_core_news_lg"]
},
{
"code": "ru",
"name": "Russian",
"has_examples": true,
"dependencies": [
{
"name": "pymorphy3",
"url": "https://github.com/no-plagiarism/pymorphy3"
}
],
"models": ["ru_core_news_sm", "ru_core_news_md", "ru_core_news_lg"]
},
{
"code": "sa",
"name": "Sanskrit",
"has_examples": true
},
{
"code": "si",
"name": "Sinhala",
"example": "මෙය වාක්‍යයකි.",
"has_examples": true
},
{
"code": "sk",
"name": "Slovak",
"has_examples": true
},
{
"code": "sl",
"name": "Slovenian",
"example": "France Prešeren je umrl 8. februarja 1849 v Kranju",
"has_examples": true,
"models": ["sl_core_news_sm", "sl_core_news_md", "sl_core_news_lg", "sl_core_news_trf"]
},
{
"code": "sq",
"name": "Albanian",
"example": "Kjo është një fjali.",
"has_examples": true
},
{
"code": "sr",
"name": "Serbian",
"has_examples": true
},
{
"code": "sv",
"name": "Swedish",
"has_examples": true,
"models": ["sv_core_news_sm", "sv_core_news_md", "sv_core_news_lg"]
},
{
"code": "ta",
"name": "Tamil",
"has_examples": true
},
{
"code": "te",
"name": "Telugu",
"example": "ఇది ఒక వాక్యం.",
"has_examples": true
},
{
"code": "th",
"name": "Thai",
"dependencies": [
{
"name": "pythainlp",
"url": "https://github.com/wannaphongcom/pythainlp"
}
],
"example": "นี่คือประโยค",
"has_examples": true
},
{
"code": "ti",
"name": "Tigrinya",
"has_examples": true
},
{
"code": "tl",
"name": "Tagalog"
},
{
"code": "tn",
"name": "Setswana",
"has_examples": true
},
{
"code": "tr",
"name": "Turkish",
"example": "Bu bir cümledir.",
"has_examples": true
},
{
"code": "tt",
"name": "Tatar",
"has_examples": true
},
{
"code": "uk",
"name": "Ukrainian",
"has_examples": true,
"models": ["uk_core_news_sm", "uk_core_news_md", "uk_core_news_lg", "uk_core_news_trf"],
"dependencies": [
{
"name": "pymorphy3",
"url": "https://github.com/no-plagiarism/pymorphy3"
},
{
"name": "pymorphy3-dicts-uk",
"url": "https://github.com/no-plagiarism/pymorphy3-dicts"
}
]
},
{
"code": "ur",
"name": "Urdu",
"example": "یہ ایک جملہ ہے",
"has_examples": true
},
{
"code": "vi",
"name": "Vietnamese",
"dependencies": [
{
"name": "Pyvi",
"url": "https://github.com/trungtv/pyvi"
}
]
},
{
"code": "xx",
"name": "Multi-language",
"models": ["xx_ent_wiki_sm", "xx_sent_ud_sm"],
"example": "This is a sentence about Facebook."
},
{
"code": "yo",
"name": "Yoruba",
"has_examples": true
},
{
"code": "zh",
"name": "Chinese",
"models": ["zh_core_web_sm", "zh_core_web_md", "zh_core_web_lg", "zh_core_web_trf"],
"dependencies": [
{
"name": "Jieba",
"url": "https://github.com/fxsjy/jieba"
},
{
"name": "spacy-pkuseg",
"url": "https://github.com/explosion/spacy-pkuseg"
}
],
"example": "这是一个用于示例的句子。",
"has_examples": true
},
{
"code": "kmr",
"name": "Kurdish Kurmanji",
"example": "Ev hevokek e",
"has_examples": true
}
],
"licenses": [
{
"id": "CC BY 4.0",
"url": "https://creativecommons.org/licenses/by/4.0/"
},
{
"id": "CC BY-SA",
"url": "https://creativecommons.org/licenses/by-sa/3.0/"
},
{
"id": "CC BY-SA 3.0",
"url": "https://creativecommons.org/licenses/by-sa/3.0/"
},
{
"id": "CC BY-SA 4.0",
"url": "https://creativecommons.org/licenses/by-sa/4.0/"
},
{
"id": "CC BY-NC",
"url": "https://creativecommons.org/licenses/by-nc/3.0/"
},
{
"id": "CC BY-NC 3.0",
"url": "https://creativecommons.org/licenses/by-nc/3.0/"
},
{
"id": "CC BY-NC 4.0",
"url": "https://creativecommons.org/licenses/by-nc/4.0/"
},
{
"id": "CC-BY-NC-SA 3.0",
"url": "https://creativecommons.org/licenses/by-nc-sa/3.0/"
},
{
"id": "GPL",
"url": "https://www.gnu.org/licenses/gpl.html"
},
{
"id": "GPU GPL 3.0",
"url": "https://www.gnu.org/licenses/gpl-3.0.en.html"
},
{
"id": "LGPL",
"url": "https://www.gnu.org/licenses/lgpl.html"
},
{
"id": "MIT",
"url": "https://opensource.org/licenses/MIT"
},
{
"id": "LGPL-LR",
"url": "https://github.com/UniversalDependencies/UD_French-Sequoia/blob/master/LICENSE.txt"
}
]
}