spaCy/website/meta/languages.json
Adriane Boyd d5110ffbf2
Documentation updates for v2.3.0 (#5593)
* Update website models for v2.3.0

* Add docs for Chinese word segmentation

* Tighten up Chinese docs section

* Merge branch 'master' into docs/v2.3.0 [ci skip]

* Merge branch 'master' into docs/v2.3.0 [ci skip]

* Auto-format and update version

* Update matcher.md

* Update languages and sorting

* Typo in landing page

* Infobox about token_match behavior

* Add meta and basic docs for Japanese

* POS -> TAG in models table

* Add info about lookups for normalization

* Updates to API docs for v2.3

* Update adding norm exceptions for adding languages

* Add --omit-extra-lookups to CLI API docs

* Add initial draft of "What's New in v2.3"

* Add new in v2.3 tags to Chinese and Japanese sections

* Add tokenizer to migration section

* Add new in v2.3 flags to init-model

* Typo

* More what's new in v2.3

Co-authored-by: Ines Montani <ines@ines.io>
2020-06-16 15:37:35 +02:00

266 lines
10 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"languages": [
{
"code": "zh",
"name": "Chinese",
"models": ["zh_core_web_sm", "zh_core_web_md", "zh_core_web_lg"],
"dependencies": [
{
"name": "Jieba",
"url": "https://github.com/fxsjy/jieba"
},
{
"name": "PKUSeg",
"url": "https://github.com/lancopku/PKUSeg-python"
}
],
"has_examples": true
},
{
"code": "da",
"name": "Danish",
"example": "Dette er en sætning.",
"has_examples": true,
"models": ["da_core_news_sm", "da_core_news_md", "da_core_news_lg"]
},
{
"code": "nl",
"name": "Dutch",
"models": ["nl_core_news_sm", "nl_core_news_md", "nl_core_news_lg"],
"example": "Dit is een zin.",
"has_examples": true
},
{
"code": "en",
"name": "English",
"models": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg"],
"starters": [
"en_vectors_web_lg",
"en_trf_bertbaseuncased_lg",
"en_trf_robertabase_lg",
"en_trf_distilbertbaseuncased_lg",
"en_trf_xlnetbasecased_lg"
],
"example": "This is a sentence.",
"has_examples": true
},
{
"code": "fr",
"name": "French",
"models": ["fr_core_news_sm", "fr_core_news_md", "fr_core_news_lg"],
"example": "C'est une phrase.",
"has_examples": true
},
{
"code": "de",
"name": "German",
"models": ["de_core_news_sm", "de_core_news_md", "de_core_news_lg"],
"starters": ["de_trf_bertbasecased_lg"],
"example": "Dies ist ein Satz.",
"has_examples": true
},
{
"code": "el",
"name": "Greek",
"models": ["el_core_news_sm", "el_core_news_md", "el_core_news_lg"],
"example": "Αυτή είναι μια πρόταση.",
"has_examples": true
},
{
"code": "it",
"name": "Italian",
"models": ["it_core_news_sm", "it_core_news_md", "it_core_news_lg"],
"example": "Questa è una frase.",
"has_examples": true
},
{
"code": "ja",
"name": "Japanese",
"models": ["ja_core_news_sm", "ja_core_news_md", "ja_core_news_lg"],
"dependencies": [
{
"name": "SudachiPy",
"url": "https://github.com/WorksApplications/SudachiPy"
}
],
"has_examples": true
},
{
"code": "lt",
"name": "Lithuanian",
"has_examples": true,
"models": ["lt_core_news_sm", "lt_core_news_md", "lt_core_news_lg"]
},
{
"code": "nb",
"name": "Norwegian Bokmål",
"example": "Dette er en setning.",
"has_examples": true,
"models": ["nb_core_news_sm", "nb_core_news_md", "nb_core_news_lg"]
},
{
"code": "pl",
"name": "Polish",
"example": "To jest zdanie.",
"has_examples": true,
"models": ["pl_core_news_sm", "pl_core_news_md", "pl_core_news_lg"]
},
{
"code": "pt",
"name": "Portuguese",
"models": ["pt_core_news_sm", "pt_core_news_md", "pt_core_news_lg"],
"example": "Esta é uma frase.",
"has_examples": true
},
{
"code": "ro",
"name": "Romanian",
"example": "Aceasta este o propoziție.",
"has_examples": true,
"models": ["ro_core_news_sm", "ro_core_news_md", "ro_core_news_lg"]
},
{
"code": "es",
"name": "Spanish",
"models": ["es_core_news_sm", "es_core_news_md", "es_core_news_lg"],
"example": "Esto es una frase.",
"has_examples": true
},
{ "code": "sv", "name": "Swedish", "has_examples": true },
{ "code": "fi", "name": "Finnish", "has_examples": true },
{ "code": "hu", "name": "Hungarian", "example": "Ez egy mondat.", "has_examples": true },
{
"code": "ru",
"name": "Russian",
"has_examples": true,
"dependencies": [{ "name": "pymorphy2", "url": "https://github.com/kmike/pymorphy2" }]
},
{
"code": "uk",
"name": "Ukrainian",
"has_examples": true,
"dependencies": [{ "name": "pymorphy2", "url": "https://github.com/kmike/pymorphy2" }]
},
{ "code": "hr", "name": "Croatian", "has_examples": true },
{ "code": "eu", "name": "Basque", "has_examples": true },
{ "code": "yo", "name": "Yoruba", "has_examples": true },
{ "code": "tr", "name": "Turkish", "example": "Bu bir cümledir.", "has_examples": true },
{ "code": "ca", "name": "Catalan", "example": "Això és una frase.", "has_examples": true },
{ "code": "he", "name": "Hebrew", "example": "זהו משפט.", "has_examples": true },
{ "code": "ar", "name": "Arabic", "example": "هذه جملة", "has_examples": true },
{ "code": "fa", "name": "Persian", "has_examples": true },
{ "code": "ur", "name": "Urdu", "example": "یہ ایک جملہ ہے", "has_examples": true },
{ "code": "tt", "name": "Tatar", "has_examples": true },
{ "code": "te", "name": "Telugu", "example": "ఇది ఒక వాక్యం.", "has_examples": true },
{ "code": "si", "name": "Sinhala", "example": "මෙය වාක්‍යයකි.", "has_examples": true },
{ "code": "ga", "name": "Irish" },
{ "code": "bn", "name": "Bengali", "has_examples": true },
{ "code": "hi", "name": "Hindi", "example": "यह एक वाक्य है।", "has_examples": true },
{ "code": "mr", "name": "Marathi" },
{ "code": "kn", "name": "Kannada" },
{ "code": "ta", "name": "Tamil", "has_examples": true },
{
"code": "id",
"name": "Indonesian",
"example": "Ini adalah sebuah kalimat.",
"has_examples": true
},
{ "code": "tl", "name": "Tagalog" },
{ "code": "af", "name": "Afrikaans" },
{ "code": "bg", "name": "Bulgarian", "example": "Това е изречение", "has_examples": true },
{ "code": "cs", "name": "Czech" },
{ "code": "is", "name": "Icelandic" },
{ "code": "lv", "name": "Latvian" },
{ "code": "sr", "name": "Serbian" },
{ "code": "sk", "name": "Slovak" },
{ "code": "sl", "name": "Slovenian" },
{ "code": "lb", "name": "Luxembourgish" },
{
"code": "sq",
"name": "Albanian",
"example": "Kjo është një fjali.",
"has_examples": true
},
{ "code": "et", "name": "Estonian" },
{
"code": "th",
"name": "Thai",
"dependencies": [
{ "name": "pythainlp", "url": "https://github.com/wannaphongcom/pythainlp" }
],
"example": "นี่คือประโยค",
"has_examples": true
},
{
"code": "ja",
"name": "Japanese",
"dependencies": [
{ "name": "Unidic", "url": "http://unidic.ninjal.ac.jp/back_number#unidic_cwj" },
{ "name": "Mecab", "url": "https://github.com/taku910/mecab" },
{ "name": "fugashi", "url": "https://github.com/polm/fugashi" }
],
"example": "これは文章です。",
"has_examples": true
},
{
"code": "ko",
"name": "Korean",
"dependencies": [
{
"name": "mecab-ko",
"url": "https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md"
},
{ "name": "mecab-ko-dic", "url": "https://bitbucket.org/eunjeon/mecab-ko-dic" },
{ "name": "natto-py", "url": "https://github.com/buruzaemon/natto-py" }
],
"example": "이것은 문장입니다.",
"has_examples": true
},
{
"code": "vi",
"name": "Vietnamese",
"dependencies": [{ "name": "Pyvi", "url": "https://github.com/trungtv/pyvi" }]
},
{
"code": "lij",
"name": "Ligurian",
"example": "Sta chì a l'é unna fraxe.",
"has_examples": true
},
{
"code": "hy",
"name": "Armenian",
"has_examples": true
},
{
"code": "gu",
"name": "Gujarati",
"has_examples": true
},
{
"code": "ml",
"name": "Malayalam",
"has_examples": true
},
{
"code": "xx",
"name": "Multi-language",
"models": ["xx_ent_wiki_sm"],
"example": "This is a sentence about Facebook."
}
],
"licenses": [
{ "id": "CC BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" },
{ "id": "CC BY-SA", "url": "https://creativecommons.org/licenses/by-sa/3.0/" },
{ "id": "CC BY-SA 3.0", "url": "https://creativecommons.org/licenses/by-sa/3.0/" },
{ "id": "CC BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" },
{ "id": "CC BY-NC", "url": "https://creativecommons.org/licenses/by-nc/3.0/" },
{ "id": "CC BY-NC 3.0", "url": "https://creativecommons.org/licenses/by-nc/3.0/" },
{ "id": "CC BY-NC 4.0", "url": "https://creativecommons.org/licenses/by-nc/4.0/" },
{ "id": "CC-BY-NC-SA 3.0", "url": "https://creativecommons.org/licenses/by-nc-sa/3.0/" },
{ "id": "GPL", "url": "https://www.gnu.org/licenses/gpl.html" },
{ "id": "LGPL", "url": "https://www.gnu.org/licenses/lgpl.html" },
{ "id": "MIT", "url": "https://opensource.org/licenses/MIT" }
]
}