2017-10-03 15:28:03 +03:00
|
|
|
{
|
|
|
|
"sidebar": {
|
|
|
|
"Models": {
|
2018-04-29 03:06:46 +03:00
|
|
|
"Overview": "./"
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
|
|
|
"Language models": {
|
|
|
|
"English": "en",
|
|
|
|
"German": "de",
|
|
|
|
"Spanish": "es",
|
2017-10-29 02:29:39 +03:00
|
|
|
"Portuguese": "pt",
|
2017-10-03 15:28:03 +03:00
|
|
|
"French": "fr",
|
2017-10-29 02:29:39 +03:00
|
|
|
"Italian": "it",
|
2017-11-02 18:13:38 +03:00
|
|
|
"Dutch": "nl",
|
2017-10-03 15:28:03 +03:00
|
|
|
"Multi-Language": "xx"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
"index": {
|
|
|
|
"title": "Models Overview",
|
|
|
|
"teaser": "Downloadable statistical models for spaCy to predict and assign linguistic features.",
|
|
|
|
"quickstart": true,
|
|
|
|
"menu": {
|
|
|
|
"Quickstart": "quickstart",
|
|
|
|
"Installation": "install",
|
|
|
|
"Naming Conventions": "conventions"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-10-30 16:09:43 +03:00
|
|
|
"comparison": {
|
|
|
|
"title": "Model Comparison",
|
|
|
|
"teaser": "Compare spaCy's statistical models and their accuracy.",
|
|
|
|
"tag": "experimental",
|
|
|
|
"compare_models": true,
|
|
|
|
"default_models": {
|
|
|
|
"model1": "en_core_web_sm",
|
|
|
|
"model2": "en_core_web_lg"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-10-03 15:28:03 +03:00
|
|
|
"MODELS": {
|
2017-11-08 13:43:00 +03:00
|
|
|
"en": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_vectors_web_lg"],
|
2017-11-03 13:29:34 +03:00
|
|
|
"de": ["de_core_news_sm"],
|
|
|
|
"es": ["es_core_news_sm", "es_core_news_md"],
|
2017-11-06 20:19:00 +03:00
|
|
|
"pt": ["pt_core_news_sm"],
|
2017-11-08 03:06:30 +03:00
|
|
|
"fr": ["fr_core_news_sm", "fr_core_news_md"],
|
2017-11-02 18:13:38 +03:00
|
|
|
"it": ["it_core_news_sm"],
|
2017-11-06 04:44:59 +03:00
|
|
|
"nl": ["nl_core_news_sm"],
|
2017-10-03 15:28:03 +03:00
|
|
|
"xx": ["xx_ent_wiki_sm"]
|
|
|
|
},
|
|
|
|
|
|
|
|
"MODEL_META": {
|
|
|
|
"core": "Vocabulary, syntax, entities, vectors",
|
2017-11-08 18:24:27 +03:00
|
|
|
"core_sm": "Vocabulary, syntax, entities",
|
2017-10-03 15:28:03 +03:00
|
|
|
"dep": "Vocabulary, syntax",
|
|
|
|
"ent": "Named entities",
|
|
|
|
"vectors": "Word vectors",
|
|
|
|
"web": "written text (blogs, news, comments)",
|
|
|
|
"news": "written text (news, media)",
|
|
|
|
"wiki": "Wikipedia",
|
|
|
|
"uas": "Unlabelled dependencies",
|
|
|
|
"las": "Labelled dependencies",
|
2017-11-06 16:12:11 +03:00
|
|
|
"tags_acc": "Part-of-speech tags (fine grained tags, Token.tag)",
|
2017-10-03 15:28:03 +03:00
|
|
|
"ents_f": "Entities (F-score)",
|
2017-10-04 01:37:15 +03:00
|
|
|
"ents_p": "Entities (precision)",
|
|
|
|
"ents_r": "Entities (recall)",
|
2017-10-29 05:58:19 +03:00
|
|
|
"cpu": "words per second on CPU",
|
|
|
|
"gpu": "words per second on GPU",
|
2017-10-03 15:28:03 +03:00
|
|
|
"pipeline": "Processing pipeline components in order",
|
2017-10-29 05:58:19 +03:00
|
|
|
"sources": "Sources of training data",
|
2017-11-02 22:04:13 +03:00
|
|
|
"vecs": "Word vectors included in the model. Models that only support context vectors compute similarity via the tensors shared with the pipeline.",
|
2017-11-06 16:12:11 +03:00
|
|
|
"benchmark_parser": "Syntax accuracy",
|
2017-10-29 05:58:19 +03:00
|
|
|
"benchmark_ner": "NER accuracy",
|
2018-04-29 03:06:46 +03:00
|
|
|
"benchmark_speed": "Speed",
|
|
|
|
"compat": "Latest compatible model version for your spaCy installation"
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
|
|
|
"MODEL_LICENSES": {
|
2018-03-24 19:12:48 +03:00
|
|
|
"CC BY 4.0": "https://creativecommons.org/licenses/by/4.0/",
|
|
|
|
"CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/",
|
|
|
|
"CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
|
|
|
|
"CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
|
|
|
|
"CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/",
|
|
|
|
"CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",
|
|
|
|
"CC-BY-NC-SA 3.0": "https://creativecommons.org/licenses/by-nc-sa/3.0/",
|
|
|
|
"GPL": "https://www.gnu.org/licenses/gpl.html",
|
|
|
|
"LGPL": "https://www.gnu.org/licenses/lgpl.html"
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
2017-10-29 05:58:19 +03:00
|
|
|
"MODEL_BENCHMARKS": {
|
|
|
|
"parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },
|
2017-11-08 13:43:00 +03:00
|
|
|
"ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" }
|
2017-10-03 15:28:03 +03:00
|
|
|
},
|
|
|
|
|
2018-04-29 03:06:46 +03:00
|
|
|
"EXAMPLE_SENT_LANGS": [
|
|
|
|
"da", "de", "en", "es", "fa", "fr", "he", "hi", "hu", "id", "it", "ja",
|
|
|
|
"nb", "nl", "pl", "pt", "ru", "sv", "tr", "zh"
|
|
|
|
],
|
|
|
|
|
2017-10-03 15:28:03 +03:00
|
|
|
"LANGUAGES": {
|
|
|
|
"en": "English",
|
|
|
|
"de": "German",
|
|
|
|
"fr": "French",
|
|
|
|
"es": "Spanish",
|
|
|
|
"it": "Italian",
|
|
|
|
"pt": "Portuguese",
|
|
|
|
"nl": "Dutch",
|
|
|
|
"sv": "Swedish",
|
|
|
|
"fi": "Finnish",
|
|
|
|
"nb": "Norwegian Bokmål",
|
|
|
|
"da": "Danish",
|
|
|
|
"hu": "Hungarian",
|
|
|
|
"pl": "Polish",
|
2017-12-06 15:40:32 +03:00
|
|
|
"ru": "Russian",
|
2017-11-02 01:04:28 +03:00
|
|
|
"ro": "Romanian",
|
|
|
|
"hr": "Croatian",
|
2017-11-02 18:32:24 +03:00
|
|
|
"tr": "Turkish",
|
2017-10-03 15:28:03 +03:00
|
|
|
"he": "Hebrew",
|
2018-02-01 06:47:34 +03:00
|
|
|
"fa": "Persian",
|
2017-11-01 02:56:21 +03:00
|
|
|
"ga": "Irish",
|
2017-10-03 15:28:03 +03:00
|
|
|
"bn": "Bengali",
|
2017-10-14 16:16:41 +03:00
|
|
|
"hi": "Hindi",
|
2017-10-03 15:28:03 +03:00
|
|
|
"id": "Indonesian",
|
|
|
|
"th": "Thai",
|
|
|
|
"zh": "Chinese",
|
|
|
|
"ja": "Japanese",
|
2018-04-03 17:01:36 +03:00
|
|
|
"vi": "Vietnamese",
|
2017-10-03 15:28:03 +03:00
|
|
|
"xx": "Multi-language"
|
|
|
|
},
|
|
|
|
|
|
|
|
"EXAMPLE_SENTENCES": {
|
|
|
|
"en": "This is a sentence.",
|
|
|
|
"de": "Dies ist ein Satz.",
|
|
|
|
"fr": "C'est une phrase.",
|
|
|
|
"es": "Esto es una frase.",
|
2017-11-01 03:57:33 +03:00
|
|
|
"pt": "Esta é uma frase.",
|
|
|
|
"it": "Questa è una frase.",
|
2017-11-06 15:04:29 +03:00
|
|
|
"nl": "Dit is een zin.",
|
2017-10-03 15:28:03 +03:00
|
|
|
"xx": "This is a sentence about Facebook."
|
|
|
|
}
|
|
|
|
}
|