2017-10-03 15:28:03 +03:00
|
|
|
{
|
|
|
|
"sidebar": {
|
|
|
|
"Models": {
|
2017-10-30 16:09:43 +03:00
|
|
|
"Overview": "./",
|
|
|
|
"Comparison": "comparison"
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
|
|
|
"Language models": {
|
|
|
|
"English": "en",
|
|
|
|
"German": "de",
|
|
|
|
"Spanish": "es",
|
2017-10-29 02:29:39 +03:00
|
|
|
"Portuguese": "pt",
|
2017-10-03 15:28:03 +03:00
|
|
|
"French": "fr",
|
2017-10-29 02:29:39 +03:00
|
|
|
"Italian": "it",
|
2017-10-03 15:28:03 +03:00
|
|
|
"Multi-Language": "xx"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
"index": {
|
|
|
|
"title": "Models Overview",
|
|
|
|
"teaser": "Downloadable statistical models for spaCy to predict and assign linguistic features.",
|
|
|
|
"quickstart": true,
|
|
|
|
"menu": {
|
|
|
|
"Quickstart": "quickstart",
|
|
|
|
"Installation": "install",
|
|
|
|
"Naming Conventions": "conventions"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-10-30 16:09:43 +03:00
|
|
|
"comparison": {
|
|
|
|
"title": "Model Comparison",
|
|
|
|
"teaser": "Compare spaCy's statistical models and their accuracy.",
|
|
|
|
"tag": "experimental",
|
|
|
|
"compare_models": true,
|
|
|
|
"default_models": {
|
|
|
|
"model1": "en_core_web_sm",
|
|
|
|
"model2": "en_core_web_lg"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-10-03 15:28:03 +03:00
|
|
|
"MODELS": {
|
|
|
|
"en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"],
|
|
|
|
"de": ["de_dep_news_sm"],
|
|
|
|
"es": ["es_core_web_sm"],
|
2017-10-29 02:29:39 +03:00
|
|
|
"pt": [],
|
2017-10-03 15:28:03 +03:00
|
|
|
"fr": [],
|
2017-10-29 02:29:39 +03:00
|
|
|
"it": [],
|
2017-10-03 15:28:03 +03:00
|
|
|
"xx": ["xx_ent_wiki_sm"]
|
|
|
|
},
|
|
|
|
|
|
|
|
"MODEL_META": {
|
|
|
|
"core": "Vocabulary, syntax, entities, vectors",
|
|
|
|
"dep": "Vocabulary, syntax",
|
|
|
|
"ent": "Named entities",
|
|
|
|
"vectors": "Word vectors",
|
|
|
|
"web": "written text (blogs, news, comments)",
|
|
|
|
"news": "written text (news, media)",
|
|
|
|
"wiki": "Wikipedia",
|
|
|
|
"uas": "Unlabelled dependencies",
|
|
|
|
"las": "Labelled dependencies",
|
|
|
|
"tags_acc": "Part-of-speech tags",
|
|
|
|
"ents_f": "Entities (F-score)",
|
2017-10-04 01:37:15 +03:00
|
|
|
"ents_p": "Entities (precision)",
|
|
|
|
"ents_r": "Entities (recall)",
|
2017-10-29 05:58:19 +03:00
|
|
|
"cpu": "words per second on CPU",
|
|
|
|
"gpu": "words per second on GPU",
|
2017-10-03 15:28:03 +03:00
|
|
|
"pipeline": "Processing pipeline components in order",
|
2017-10-29 05:58:19 +03:00
|
|
|
"sources": "Sources of training data",
|
|
|
|
"benchmark_parser": "Parser accuracy",
|
|
|
|
"benchmark_ner": "NER accuracy",
|
|
|
|
"benchmark_speed": "Speed"
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
|
|
|
"MODEL_LICENSES": {
|
2017-10-29 05:58:19 +03:00
|
|
|
"CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/",
|
2017-10-03 15:28:03 +03:00
|
|
|
"CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
|
2017-10-29 05:58:19 +03:00
|
|
|
"CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/",
|
2017-10-29 02:18:32 +03:00
|
|
|
"CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",
|
2017-10-29 05:58:19 +03:00
|
|
|
"GPL": "http://www.gnu.de/documents/gpl.en.html"
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
2017-10-29 05:58:19 +03:00
|
|
|
"MODEL_BENCHMARKS": {
|
|
|
|
"parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },
|
|
|
|
"ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" },
|
|
|
|
"speed": { "nwords": "Words", "cpu": "w/s CPU", "gpu": "w/s GPU" }
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
|
|
|
"LANGUAGES": {
|
|
|
|
"en": "English",
|
|
|
|
"de": "German",
|
|
|
|
"fr": "French",
|
|
|
|
"es": "Spanish",
|
|
|
|
"it": "Italian",
|
|
|
|
"pt": "Portuguese",
|
|
|
|
"nl": "Dutch",
|
|
|
|
"sv": "Swedish",
|
|
|
|
"fi": "Finnish",
|
|
|
|
"nb": "Norwegian Bokmål",
|
|
|
|
"da": "Danish",
|
|
|
|
"hu": "Hungarian",
|
|
|
|
"pl": "Polish",
|
|
|
|
"he": "Hebrew",
|
2017-11-01 02:56:21 +03:00
|
|
|
"ga": "Irish",
|
2017-10-03 15:28:03 +03:00
|
|
|
"bn": "Bengali",
|
2017-10-14 16:16:41 +03:00
|
|
|
"hi": "Hindi",
|
2017-10-03 15:28:03 +03:00
|
|
|
"id": "Indonesian",
|
|
|
|
"th": "Thai",
|
|
|
|
"zh": "Chinese",
|
|
|
|
"ja": "Japanese",
|
|
|
|
"xx": "Multi-language"
|
|
|
|
},
|
|
|
|
|
|
|
|
"EXAMPLE_SENTENCES": {
|
|
|
|
"en": "This is a sentence.",
|
|
|
|
"de": "Dies ist ein Satz.",
|
|
|
|
"fr": "C'est une phrase.",
|
|
|
|
"es": "Esto es una frase.",
|
2017-11-01 03:57:33 +03:00
|
|
|
"pt": "Esta é uma frase.",
|
|
|
|
"it": "Questa è una frase.",
|
2017-10-03 15:28:03 +03:00
|
|
|
"xx": "This is a sentence about Facebook."
|
|
|
|
}
|
|
|
|
}
|