mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 21:57:15 +03:00
d5110ffbf2
* Update website models for v2.3.0 * Add docs for Chinese word segmentation * Tighten up Chinese docs section * Merge branch 'master' into docs/v2.3.0 [ci skip] * Merge branch 'master' into docs/v2.3.0 [ci skip] * Auto-format and update version * Update matcher.md * Update languages and sorting * Typo in landing page * Infobox about token_match behavior * Add meta and basic docs for Japanese * POS -> TAG in models table * Add info about lookups for normalization * Updates to API docs for v2.3 * Update adding norm exceptions for adding languages * Add --omit-extra-lookups to CLI API docs * Add initial draft of "What's New in v2.3" * Add new in v2.3 tags to Chinese and Japanese sections * Add tokenizer to migration section * Add new in v2.3 flags to init-model * Typo * More what's new in v2.3 Co-authored-by: Ines Montani <ines@ines.io>
266 lines
10 KiB
JSON
266 lines
10 KiB
JSON
{
|
||
"languages": [
|
||
{
|
||
"code": "zh",
|
||
"name": "Chinese",
|
||
"models": ["zh_core_web_sm", "zh_core_web_md", "zh_core_web_lg"],
|
||
"dependencies": [
|
||
{
|
||
"name": "Jieba",
|
||
"url": "https://github.com/fxsjy/jieba"
|
||
},
|
||
{
|
||
"name": "PKUSeg",
|
||
"url": "https://github.com/lancopku/PKUSeg-python"
|
||
}
|
||
],
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "da",
|
||
"name": "Danish",
|
||
"example": "Dette er en sætning.",
|
||
"has_examples": true,
|
||
"models": ["da_core_news_sm", "da_core_news_md", "da_core_news_lg"]
|
||
},
|
||
{
|
||
"code": "nl",
|
||
"name": "Dutch",
|
||
"models": ["nl_core_news_sm", "nl_core_news_md", "nl_core_news_lg"],
|
||
"example": "Dit is een zin.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "en",
|
||
"name": "English",
|
||
"models": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg"],
|
||
"starters": [
|
||
"en_vectors_web_lg",
|
||
"en_trf_bertbaseuncased_lg",
|
||
"en_trf_robertabase_lg",
|
||
"en_trf_distilbertbaseuncased_lg",
|
||
"en_trf_xlnetbasecased_lg"
|
||
],
|
||
"example": "This is a sentence.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "fr",
|
||
"name": "French",
|
||
"models": ["fr_core_news_sm", "fr_core_news_md", "fr_core_news_lg"],
|
||
"example": "C'est une phrase.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "de",
|
||
"name": "German",
|
||
"models": ["de_core_news_sm", "de_core_news_md", "de_core_news_lg"],
|
||
"starters": ["de_trf_bertbasecased_lg"],
|
||
"example": "Dies ist ein Satz.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "el",
|
||
"name": "Greek",
|
||
"models": ["el_core_news_sm", "el_core_news_md", "el_core_news_lg"],
|
||
"example": "Αυτή είναι μια πρόταση.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "it",
|
||
"name": "Italian",
|
||
"models": ["it_core_news_sm", "it_core_news_md", "it_core_news_lg"],
|
||
"example": "Questa è una frase.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "ja",
|
||
"name": "Japanese",
|
||
"models": ["ja_core_news_sm", "ja_core_news_md", "ja_core_news_lg"],
|
||
"dependencies": [
|
||
{
|
||
"name": "SudachiPy",
|
||
"url": "https://github.com/WorksApplications/SudachiPy"
|
||
}
|
||
],
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "lt",
|
||
"name": "Lithuanian",
|
||
"has_examples": true,
|
||
"models": ["lt_core_news_sm", "lt_core_news_md", "lt_core_news_lg"]
|
||
},
|
||
{
|
||
"code": "nb",
|
||
"name": "Norwegian Bokmål",
|
||
"example": "Dette er en setning.",
|
||
"has_examples": true,
|
||
"models": ["nb_core_news_sm", "nb_core_news_md", "nb_core_news_lg"]
|
||
},
|
||
{
|
||
"code": "pl",
|
||
"name": "Polish",
|
||
"example": "To jest zdanie.",
|
||
"has_examples": true,
|
||
"models": ["pl_core_news_sm", "pl_core_news_md", "pl_core_news_lg"]
|
||
},
|
||
{
|
||
"code": "pt",
|
||
"name": "Portuguese",
|
||
"models": ["pt_core_news_sm", "pt_core_news_md", "pt_core_news_lg"],
|
||
"example": "Esta é uma frase.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "ro",
|
||
"name": "Romanian",
|
||
"example": "Aceasta este o propoziție.",
|
||
"has_examples": true,
|
||
"models": ["ro_core_news_sm", "ro_core_news_md", "ro_core_news_lg"]
|
||
},
|
||
{
|
||
"code": "es",
|
||
"name": "Spanish",
|
||
"models": ["es_core_news_sm", "es_core_news_md", "es_core_news_lg"],
|
||
"example": "Esto es una frase.",
|
||
"has_examples": true
|
||
},
|
||
{ "code": "sv", "name": "Swedish", "has_examples": true },
|
||
{ "code": "fi", "name": "Finnish", "has_examples": true },
|
||
{ "code": "hu", "name": "Hungarian", "example": "Ez egy mondat.", "has_examples": true },
|
||
{
|
||
"code": "ru",
|
||
"name": "Russian",
|
||
"has_examples": true,
|
||
"dependencies": [{ "name": "pymorphy2", "url": "https://github.com/kmike/pymorphy2" }]
|
||
},
|
||
{
|
||
"code": "uk",
|
||
"name": "Ukrainian",
|
||
"has_examples": true,
|
||
"dependencies": [{ "name": "pymorphy2", "url": "https://github.com/kmike/pymorphy2" }]
|
||
},
|
||
{ "code": "hr", "name": "Croatian", "has_examples": true },
|
||
{ "code": "eu", "name": "Basque", "has_examples": true },
|
||
{ "code": "yo", "name": "Yoruba", "has_examples": true },
|
||
{ "code": "tr", "name": "Turkish", "example": "Bu bir cümledir.", "has_examples": true },
|
||
{ "code": "ca", "name": "Catalan", "example": "Això és una frase.", "has_examples": true },
|
||
{ "code": "he", "name": "Hebrew", "example": "זהו משפט.", "has_examples": true },
|
||
{ "code": "ar", "name": "Arabic", "example": "هذه جملة", "has_examples": true },
|
||
{ "code": "fa", "name": "Persian", "has_examples": true },
|
||
{ "code": "ur", "name": "Urdu", "example": "یہ ایک جملہ ہے", "has_examples": true },
|
||
{ "code": "tt", "name": "Tatar", "has_examples": true },
|
||
{ "code": "te", "name": "Telugu", "example": "ఇది ఒక వాక్యం.", "has_examples": true },
|
||
{ "code": "si", "name": "Sinhala", "example": "මෙය වාක්යයකි.", "has_examples": true },
|
||
{ "code": "ga", "name": "Irish" },
|
||
{ "code": "bn", "name": "Bengali", "has_examples": true },
|
||
{ "code": "hi", "name": "Hindi", "example": "यह एक वाक्य है।", "has_examples": true },
|
||
{ "code": "mr", "name": "Marathi" },
|
||
{ "code": "kn", "name": "Kannada" },
|
||
{ "code": "ta", "name": "Tamil", "has_examples": true },
|
||
{
|
||
"code": "id",
|
||
"name": "Indonesian",
|
||
"example": "Ini adalah sebuah kalimat.",
|
||
"has_examples": true
|
||
},
|
||
{ "code": "tl", "name": "Tagalog" },
|
||
{ "code": "af", "name": "Afrikaans" },
|
||
{ "code": "bg", "name": "Bulgarian", "example": "Това е изречение", "has_examples": true },
|
||
{ "code": "cs", "name": "Czech" },
|
||
{ "code": "is", "name": "Icelandic" },
|
||
{ "code": "lv", "name": "Latvian" },
|
||
{ "code": "sr", "name": "Serbian" },
|
||
{ "code": "sk", "name": "Slovak" },
|
||
{ "code": "sl", "name": "Slovenian" },
|
||
{ "code": "lb", "name": "Luxembourgish" },
|
||
{
|
||
"code": "sq",
|
||
"name": "Albanian",
|
||
"example": "Kjo është një fjali.",
|
||
"has_examples": true
|
||
},
|
||
{ "code": "et", "name": "Estonian" },
|
||
{
|
||
"code": "th",
|
||
"name": "Thai",
|
||
"dependencies": [
|
||
{ "name": "pythainlp", "url": "https://github.com/wannaphongcom/pythainlp" }
|
||
],
|
||
"example": "นี่คือประโยค",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "ja",
|
||
"name": "Japanese",
|
||
"dependencies": [
|
||
{ "name": "Unidic", "url": "http://unidic.ninjal.ac.jp/back_number#unidic_cwj" },
|
||
{ "name": "Mecab", "url": "https://github.com/taku910/mecab" },
|
||
{ "name": "fugashi", "url": "https://github.com/polm/fugashi" }
|
||
],
|
||
"example": "これは文章です。",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "ko",
|
||
"name": "Korean",
|
||
"dependencies": [
|
||
{
|
||
"name": "mecab-ko",
|
||
"url": "https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md"
|
||
},
|
||
{ "name": "mecab-ko-dic", "url": "https://bitbucket.org/eunjeon/mecab-ko-dic" },
|
||
{ "name": "natto-py", "url": "https://github.com/buruzaemon/natto-py" }
|
||
],
|
||
"example": "이것은 문장입니다.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "vi",
|
||
"name": "Vietnamese",
|
||
"dependencies": [{ "name": "Pyvi", "url": "https://github.com/trungtv/pyvi" }]
|
||
},
|
||
{
|
||
"code": "lij",
|
||
"name": "Ligurian",
|
||
"example": "Sta chì a l'é unna fraxe.",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "hy",
|
||
"name": "Armenian",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "gu",
|
||
"name": "Gujarati",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "ml",
|
||
"name": "Malayalam",
|
||
"has_examples": true
|
||
},
|
||
{
|
||
"code": "xx",
|
||
"name": "Multi-language",
|
||
"models": ["xx_ent_wiki_sm"],
|
||
"example": "This is a sentence about Facebook."
|
||
}
|
||
],
|
||
"licenses": [
|
||
{ "id": "CC BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" },
|
||
{ "id": "CC BY-SA", "url": "https://creativecommons.org/licenses/by-sa/3.0/" },
|
||
{ "id": "CC BY-SA 3.0", "url": "https://creativecommons.org/licenses/by-sa/3.0/" },
|
||
{ "id": "CC BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" },
|
||
{ "id": "CC BY-NC", "url": "https://creativecommons.org/licenses/by-nc/3.0/" },
|
||
{ "id": "CC BY-NC 3.0", "url": "https://creativecommons.org/licenses/by-nc/3.0/" },
|
||
{ "id": "CC BY-NC 4.0", "url": "https://creativecommons.org/licenses/by-nc/4.0/" },
|
||
{ "id": "CC-BY-NC-SA 3.0", "url": "https://creativecommons.org/licenses/by-nc-sa/3.0/" },
|
||
{ "id": "GPL", "url": "https://www.gnu.org/licenses/gpl.html" },
|
||
{ "id": "LGPL", "url": "https://www.gnu.org/licenses/lgpl.html" },
|
||
{ "id": "MIT", "url": "https://opensource.org/licenses/MIT" }
|
||
]
|
||
}
|