spaCy/website/models/_data.json

{
    "sidebar": {
        "Models": {
            "Overview": "./",
            "Comparison": "comparison"
        },

        "Language models": {
            "English": "en",
            "German": "de",
            "Spanish": "es",
            "Portuguese": "pt",
            "French": "fr",
            "Italian": "it",
            "Dutch": "nl",
            "Multi-Language": "xx"
        }
    },

    "index": {
        "title": "Models Overview",
        "teaser": "Downloadable statistical models for spaCy to predict and assign linguistic features.",
        "quickstart": true,
        "menu": {
            "Quickstart": "quickstart",
            "Installation": "install",
            "Naming Conventions": "conventions"
        }
    },

    "comparison": {
        "title": "Model Comparison",
        "teaser": "Compare spaCy's statistical models and their accuracy.",
        "tag": "experimental",
        "compare_models": true,
        "default_models": {
            "model1": "en_core_web_sm",
            "model2": "en_core_web_lg"
        }
    },

    "MODELS": {
        "en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"],
        "de": ["de_core_news_sm"],
        "es": ["es_core_news_sm", "es_core_news_md"],
        "pt": ["pt_core_news_sm"],
        "fr": ["fr_core_news_sm"],
        "it": ["it_core_news_sm"],
        "xx": ["xx_ent_wiki_sm"]
    },

    "MODEL_META": {
        "core": "Vocabulary, syntax, entities, vectors",
        "dep": "Vocabulary, syntax",
        "ent": "Named entities",
        "vectors": "Word vectors",
        "web": "written text (blogs, news, comments)",
        "news": "written text (news, media)",
        "wiki": "Wikipedia",
        "uas": "Unlabelled dependencies",
        "las": "Labelled dependencies",
        "tags_acc": "Part-of-speech tags",
        "ents_f": "Entities (F-score)",
        "ents_p": "Entities (precision)",
        "ents_r": "Entities (recall)",
        "cpu": "words per second on CPU",
        "gpu": "words per second on GPU",
        "pipeline": "Processing pipeline components in order",
        "sources": "Sources of training data",
        "vecs": "Word vectors included in the model. Models that only support context vectors compute similarity via the tensors shared with the pipeline.",
        "benchmark_parser": "Parser accuracy",
        "benchmark_ner": "NER accuracy",
        "benchmark_speed": "Speed"
    },

    "MODEL_LICENSES": {
        "CC BY-SA":     "https://creativecommons.org/licenses/by-sa/3.0/",
        "CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
        "CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
        "CC BY-NC":     "https://creativecommons.org/licenses/by-nc/3.0/",
        "CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",
        "GPL":          "https://www.gnu.org/licenses/gpl.html",
        "LGPL":         "https://www.gnu.org/licenses/lgpl.html"
    },

    "MODEL_BENCHMARKS": {
        "parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },
        "ner":    { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" },
        "speed":  { "nwords": "Words", "cpu": "w/s CPU", "gpu": "w/s GPU" }
    },

    "LANGUAGES": {
        "en": "English",
        "de": "German",
        "fr": "French",
        "es": "Spanish",
        "it": "Italian",
        "pt": "Portuguese",
        "nl": "Dutch",
        "sv": "Swedish",
        "fi": "Finnish",
        "nb": "Norwegian Bokmål",
        "da": "Danish",
        "hu": "Hungarian",
        "pl": "Polish",
        "ro": "Romanian",
        "hr": "Croatian",
        "tr": "Turkish",
        "he": "Hebrew",
        "ga": "Irish",
        "bn": "Bengali",
        "hi": "Hindi",
        "id": "Indonesian",
        "th": "Thai",
        "zh": "Chinese",
        "ja": "Japanese",
        "xx": "Multi-language"
    },

    "EXAMPLE_SENTENCES": {
        "en": "This is a sentence.",
        "de": "Dies ist ein Satz.",
        "fr": "C'est une phrase.",
        "es": "Esto es una frase.",
        "pt": "Esta é uma frase.",
        "it": "Questa è una frase.",
        "xx": "This is a sentence about Facebook."
    }
}
Add models documentation 2017-10-03 15:28:03 +03:00			`{`
			`"sidebar": {`
			`"Models": {`
Add tool for model comparison (experimental) User can select two model and their meta is fetched from GitHub. Features, accuracy figures and speed benchmarks are displayed in a table, with an additional chart comparing the accuracy scores if available. Main use case: demonstrating and visualising trade-offs between larger and smaller models of the same type. 2017-10-30 16:09:43 +03:00			`"Overview": "./",`
			`"Comparison": "comparison"`
Add models documentation 2017-10-03 15:28:03 +03:00			`},`

			`"Language models": {`
			`"English": "en",`
			`"German": "de",`
			`"Spanish": "es",`
Add placeholders for Italian and Portuguese models 2017-10-29 02:29:39 +03:00			`"Portuguese": "pt",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"French": "fr",`
Add placeholders for Italian and Portuguese models 2017-10-29 02:29:39 +03:00			`"Italian": "it",`
Update model families 2017-11-02 18:13:38 +03:00			`"Dutch": "nl",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"Multi-Language": "xx"`
			`}`
			`},`

			`"index": {`
			`"title": "Models Overview",`
			`"teaser": "Downloadable statistical models for spaCy to predict and assign linguistic features.",`
			`"quickstart": true,`
			`"menu": {`
			`"Quickstart": "quickstart",`
			`"Installation": "install",`
			`"Naming Conventions": "conventions"`
			`}`
			`},`

Add tool for model comparison (experimental) User can select two model and their meta is fetched from GitHub. Features, accuracy figures and speed benchmarks are displayed in a table, with an additional chart comparing the accuracy scores if available. Main use case: demonstrating and visualising trade-offs between larger and smaller models of the same type. 2017-10-30 16:09:43 +03:00			`"comparison": {`
			`"title": "Model Comparison",`
			`"teaser": "Compare spaCy's statistical models and their accuracy.",`
			`"tag": "experimental",`
			`"compare_models": true,`
			`"default_models": {`
			`"model1": "en_core_web_sm",`
			`"model2": "en_core_web_lg"`
			`}`
			`},`

Add models documentation 2017-10-03 15:28:03 +03:00			`"MODELS": {`
Update models list to reflect spaCy v2.0.0a18 2017-11-03 13:29:34 +03:00			`"en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"],`
			`"de": ["de_core_news_sm"],`
			`"es": ["es_core_news_sm", "es_core_news_md"],`
Add Portuguese and French 2017-11-05 01:07:21 +03:00			`"pt": ["pt_core_news_sm"],`
			`"fr": ["fr_core_news_sm"],`
Update model families 2017-11-02 18:13:38 +03:00			`"it": ["it_core_news_sm"],`
Add models documentation 2017-10-03 15:28:03 +03:00			`"xx": ["xx_ent_wiki_sm"]`
			`},`

			`"MODEL_META": {`
			`"core": "Vocabulary, syntax, entities, vectors",`
			`"dep": "Vocabulary, syntax",`
			`"ent": "Named entities",`
			`"vectors": "Word vectors",`
			`"web": "written text (blogs, news, comments)",`
			`"news": "written text (news, media)",`
			`"wiki": "Wikipedia",`
			`"uas": "Unlabelled dependencies",`
			`"las": "Labelled dependencies",`
			`"tags_acc": "Part-of-speech tags",`
			`"ents_f": "Entities (F-score)",`
Add NER P and NER R scores to model overview 2017-10-04 01:37:15 +03:00			`"ents_p": "Entities (precision)",`
			`"ents_r": "Entities (recall)",`
Remove charts from model direcory and add speed benchmarks With speed benchmarks, charts ended up taking up too much space – and they were mostly data porn and not particularly useful anyways. Instead, we might add a "Compare" page that fetches all models and lets the user compare two or more models in terms of accuracy, speed etc. 2017-10-29 05:58:19 +03:00			`"cpu": "words per second on CPU",`
			`"gpu": "words per second on GPU",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"pipeline": "Processing pipeline components in order",`
Remove charts from model direcory and add speed benchmarks With speed benchmarks, charts ended up taking up too much space – and they were mostly data porn and not particularly useful anyways. Instead, we might add a "Compare" page that fetches all models and lets the user compare two or more models in terms of accuracy, speed etc. 2017-10-29 05:58:19 +03:00			`"sources": "Sources of training data",`
Fix vector details in model overview 2017-11-02 22:04:13 +03:00			`"vecs": "Word vectors included in the model. Models that only support context vectors compute similarity via the tensors shared with the pipeline.",`
Remove charts from model direcory and add speed benchmarks With speed benchmarks, charts ended up taking up too much space – and they were mostly data porn and not particularly useful anyways. Instead, we might add a "Compare" page that fetches all models and lets the user compare two or more models in terms of accuracy, speed etc. 2017-10-29 05:58:19 +03:00			`"benchmark_parser": "Parser accuracy",`
			`"benchmark_ner": "NER accuracy",`
			`"benchmark_speed": "Speed"`
Add models documentation 2017-10-03 15:28:03 +03:00			`},`

			`"MODEL_LICENSES": {`
Remove charts from model direcory and add speed benchmarks With speed benchmarks, charts ended up taking up too much space – and they were mostly data porn and not particularly useful anyways. Instead, we might add a "Compare" page that fetches all models and lets the user compare two or more models in terms of accuracy, speed etc. 2017-10-29 05:58:19 +03:00			`"CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",`
Update licenses 2017-11-02 01:04:40 +03:00			`"CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",`
Remove charts from model direcory and add speed benchmarks With speed benchmarks, charts ended up taking up too much space – and they were mostly data porn and not particularly useful anyways. Instead, we might add a "Compare" page that fetches all models and lets the user compare two or more models in terms of accuracy, speed etc. 2017-10-29 05:58:19 +03:00			`"CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/",`
Add GPL license link 2017-10-29 02:18:32 +03:00			`"CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",`
Update licenses 2017-11-01 23:49:57 +03:00			`"GPL": "https://www.gnu.org/licenses/gpl.html",`
			`"LGPL": "https://www.gnu.org/licenses/lgpl.html"`
Add models documentation 2017-10-03 15:28:03 +03:00			`},`

Remove charts from model direcory and add speed benchmarks With speed benchmarks, charts ended up taking up too much space – and they were mostly data porn and not particularly useful anyways. Instead, we might add a "Compare" page that fetches all models and lets the user compare two or more models in terms of accuracy, speed etc. 2017-10-29 05:58:19 +03:00			`"MODEL_BENCHMARKS": {`
			`"parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },`
			`"ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" },`
			`"speed": { "nwords": "Words", "cpu": "w/s CPU", "gpu": "w/s GPU" }`
Add models documentation 2017-10-03 15:28:03 +03:00			`},`

			`"LANGUAGES": {`
			`"en": "English",`
			`"de": "German",`
			`"fr": "French",`
			`"es": "Spanish",`
			`"it": "Italian",`
			`"pt": "Portuguese",`
			`"nl": "Dutch",`
			`"sv": "Swedish",`
			`"fi": "Finnish",`
			`"nb": "Norwegian Bokmål",`
			`"da": "Danish",`
			`"hu": "Hungarian",`
			`"pl": "Polish",`
Add Romanian and Croatian skeletons (experimental) Add language data templates to make it easier for others to contribute to the language support 2017-11-02 01:04:28 +03:00			`"ro": "Romanian",`
			`"hr": "Croatian",`
Add skeleton language data for Turkish 2017-11-02 18:32:24 +03:00			`"tr": "Turkish",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"he": "Hebrew",`
Add Irish to list of languages (see #1152) 2017-11-01 02:56:21 +03:00			`"ga": "Irish",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"bn": "Bengali",`
Add Hindi to supported languages 2017-10-14 16:16:41 +03:00			`"hi": "Hindi",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"id": "Indonesian",`
			`"th": "Thai",`
			`"zh": "Chinese",`
			`"ja": "Japanese",`
			`"xx": "Multi-language"`
			`},`

			`"EXAMPLE_SENTENCES": {`
			`"en": "This is a sentence.",`
			`"de": "Dies ist ein Satz.",`
			`"fr": "C'est une phrase.",`
			`"es": "Esto es una frase.",`
Update example sentences for models quickstart 2017-11-01 03:57:33 +03:00			`"pt": "Esta é uma frase.",`
			`"it": "Questa è una frase.",`
Add models documentation 2017-10-03 15:28:03 +03:00			`"xx": "This is a sentence about Facebook."`
			`}`
			`}`