spaCy/website/models/_data.json

136 lines
4.4 KiB
JSON
Raw Normal View History

2017-10-03 15:28:03 +03:00
{
"sidebar": {
"Models": {
"Overview": "./",
"Comparison": "comparison"
2017-10-03 15:28:03 +03:00
},
"Language models": {
"English": "en",
"German": "de",
"Spanish": "es",
"Portuguese": "pt",
2017-10-03 15:28:03 +03:00
"French": "fr",
"Italian": "it",
2017-11-02 18:13:38 +03:00
"Dutch": "nl",
2017-10-03 15:28:03 +03:00
"Multi-Language": "xx"
}
},
"index": {
"title": "Models Overview",
"teaser": "Downloadable statistical models for spaCy to predict and assign linguistic features.",
"quickstart": true,
"menu": {
"Quickstart": "quickstart",
"Installation": "install",
"Naming Conventions": "conventions"
}
},
"comparison": {
"title": "Model Comparison",
"teaser": "Compare spaCy's statistical models and their accuracy.",
"tag": "experimental",
"compare_models": true,
"default_models": {
"model1": "en_core_web_sm",
"model2": "en_core_web_lg"
}
},
2017-10-03 15:28:03 +03:00
"MODELS": {
"en": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_vectors_web_lg"],
"de": ["de_core_news_sm"],
"es": ["es_core_news_sm", "es_core_news_md"],
2017-11-06 20:19:00 +03:00
"pt": ["pt_core_news_sm"],
2017-11-08 03:06:30 +03:00
"fr": ["fr_core_news_sm", "fr_core_news_md"],
2017-11-02 18:13:38 +03:00
"it": ["it_core_news_sm"],
2017-11-06 04:44:59 +03:00
"nl": ["nl_core_news_sm"],
2017-10-03 15:28:03 +03:00
"xx": ["xx_ent_wiki_sm"]
},
"MODEL_META": {
"core": "Vocabulary, syntax, entities, vectors",
"core_sm": "Vocabulary, syntax, entities",
2017-10-03 15:28:03 +03:00
"dep": "Vocabulary, syntax",
"ent": "Named entities",
"vectors": "Word vectors",
"web": "written text (blogs, news, comments)",
"news": "written text (news, media)",
"wiki": "Wikipedia",
"uas": "Unlabelled dependencies",
"las": "Labelled dependencies",
2017-11-06 16:12:11 +03:00
"tags_acc": "Part-of-speech tags (fine grained tags, Token.tag)",
2017-10-03 15:28:03 +03:00
"ents_f": "Entities (F-score)",
"ents_p": "Entities (precision)",
"ents_r": "Entities (recall)",
"cpu": "words per second on CPU",
"gpu": "words per second on GPU",
2017-10-03 15:28:03 +03:00
"pipeline": "Processing pipeline components in order",
"sources": "Sources of training data",
2017-11-02 22:04:13 +03:00
"vecs": "Word vectors included in the model. Models that only support context vectors compute similarity via the tensors shared with the pipeline.",
2017-11-06 16:12:11 +03:00
"benchmark_parser": "Syntax accuracy",
"benchmark_ner": "NER accuracy",
"benchmark_speed": "Speed"
2017-10-03 15:28:03 +03:00
},
"MODEL_LICENSES": {
2018-03-24 19:12:48 +03:00
"CC BY 4.0": "https://creativecommons.org/licenses/by/4.0/",
"CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/",
"CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
"CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
"CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/",
"CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",
"CC-BY-NC-SA 3.0": "https://creativecommons.org/licenses/by-nc-sa/3.0/",
"GPL": "https://www.gnu.org/licenses/gpl.html",
"LGPL": "https://www.gnu.org/licenses/lgpl.html"
2017-10-03 15:28:03 +03:00
},
"MODEL_BENCHMARKS": {
"parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },
"ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" }
2017-10-03 15:28:03 +03:00
},
"LANGUAGES": {
"en": "English",
"de": "German",
"fr": "French",
"es": "Spanish",
"it": "Italian",
"pt": "Portuguese",
"nl": "Dutch",
"sv": "Swedish",
"fi": "Finnish",
"nb": "Norwegian Bokmål",
"da": "Danish",
"hu": "Hungarian",
"pl": "Polish",
2017-12-06 15:40:32 +03:00
"ru": "Russian",
"ro": "Romanian",
"hr": "Croatian",
2017-11-02 18:32:24 +03:00
"tr": "Turkish",
2017-10-03 15:28:03 +03:00
"he": "Hebrew",
"fa": "Persian",
"ga": "Irish",
2017-10-03 15:28:03 +03:00
"bn": "Bengali",
2017-10-14 16:16:41 +03:00
"hi": "Hindi",
2017-10-03 15:28:03 +03:00
"id": "Indonesian",
"th": "Thai",
"zh": "Chinese",
"ja": "Japanese",
"xx": "Multi-language"
},
"EXAMPLE_SENTENCES": {
"en": "This is a sentence.",
"de": "Dies ist ein Satz.",
"fr": "C'est une phrase.",
"es": "Esto es una frase.",
"pt": "Esta é uma frase.",
"it": "Questa è una frase.",
2017-11-06 15:04:29 +03:00
"nl": "Dit is een zin.",
2017-10-03 15:28:03 +03:00
"xx": "This is a sentence about Facebook."
}
}