mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Add tool for model comparison (experimental)
User can select two model and their meta is fetched from GitHub. Features, accuracy figures and speed benchmarks are displayed in a table, with an additional chart comparing the accuracy scores if available. Main use case: demonstrating and visualising trade-offs between larger and smaller models of the same type.
This commit is contained in:
parent
fb2710211b
commit
1eb1ed0c7c
|
@ -46,6 +46,7 @@ if IS_PAGE
|
|||
- NavHighlighter = "new NavHighlighter('data-section', 'data-nav');"
|
||||
- GitHubEmbed = "new GitHubEmbed('" + SOCIAL.github + "', 'data-gh-embed');"
|
||||
- ModelLoader = "new ModelLoader('" + MODELS_REPO + "'," + JSON.stringify(CURRENT_MODELS) + "," + JSON.stringify(MODEL_LICENSES) + "," + JSON.stringify(MODEL_BENCHMARKS) + ");"
|
||||
- ModelComparer = "new ModelComparer('" + MODELS_REPO + "'," + JSON.stringify(MODEL_LICENSES) + "," + JSON.stringify(MODEL_BENCHMARKS) + "," + JSON.stringify(LANGUAGES) + "," + JSON.stringify(MODEL_META) + "," + JSON.stringify(default_models || false) + ");"
|
||||
|
||||
//- Browsers with JS module support.
|
||||
Will be ignored otherwise.
|
||||
|
@ -64,6 +65,9 @@ script(type="module")
|
|||
if HAS_MODELS
|
||||
| import { ModelLoader } from '/assets/js/models.js';
|
||||
!=ModelLoader
|
||||
if compare_models
|
||||
| import { ModelComparer } from '/assets/js/models.js';
|
||||
!=ModelComparer
|
||||
|
||||
//- Browsers with no JS module support.
|
||||
Won't be fetched or interpreted otherwise.
|
||||
|
@ -78,3 +82,5 @@ script(nomodule)
|
|||
!=GitHubEmbed
|
||||
if HAS_MODELS
|
||||
!=ModeLoader
|
||||
if compare_models
|
||||
!=ModelComparer
|
||||
|
|
|
@ -158,3 +158,152 @@ export class ModelLoader {
|
|||
}
|
||||
}
|
||||
|
||||
export class ModelComparer {
|
||||
/**
|
||||
* Compare to model meta files and render chart and comparison table.
|
||||
* @param {string} repo - Path tp GitHub repository containing releases.
|
||||
* @param {Object} licenses - License IDs mapped to URLs.
|
||||
* @param {Object} benchmarkKeys - Objects of available keys by type, e.g.
|
||||
* 'parser', 'ner', 'speed', mapped to labels.
|
||||
* @param {Object} languages - Available languages, ID mapped to name.
|
||||
* @param {Object} defaultModels - Models to compare on load, 'model1' and
|
||||
* 'model2' mapped to model names.
|
||||
*/
|
||||
constructor(repo, licenses = {}, benchmarkKeys = {}, languages = {}, labels = {}, defaultModels) {
|
||||
this.url = `https://raw.githubusercontent.com/${repo}/master`;
|
||||
this.repo = `https://github.com/${repo}`;
|
||||
this.tpl = new Templater('compare');
|
||||
this.benchKeys = benchmarkKeys;
|
||||
this.licenses = licenses;
|
||||
this.languages = languages;
|
||||
this.labels = labels;
|
||||
this.models = {};
|
||||
this.colors = CHART_COLORS;
|
||||
this.defaultModels = defaultModels;
|
||||
this.fetchCompat()
|
||||
.then(compat => this.init(compat))
|
||||
.catch(this.showError.bind(this))
|
||||
}
|
||||
|
||||
init(compat) {
|
||||
this.compat = compat;
|
||||
const selectA = this.tpl.get('model1');
|
||||
const selectB = this.tpl.get('model2');
|
||||
selectA.addEventListener('change', this.onSelect.bind(this));
|
||||
selectB.addEventListener('change', this.onSelect.bind(this));
|
||||
this.chart = new Chart('chart_compare_accuracy', { type: 'bar',
|
||||
options: { responsive: true, scales: {
|
||||
yAxes: [{ label: 'Accuracy', ticks: { min: 70 }}],
|
||||
xAxes: [{ barPercentage: 0.75 }]
|
||||
}}
|
||||
});
|
||||
if (this.defaultModels) {
|
||||
selectA.value = this.defaultModels.model1;
|
||||
selectB.value = this.defaultModels.model2;
|
||||
this.getModels(this.defaultModels);
|
||||
}
|
||||
}
|
||||
|
||||
fetchCompat() {
|
||||
return new Promise((resolve, reject) =>
|
||||
fetch(`${this.url}/compatibility.json`)
|
||||
.then(res => handleResponse(res))
|
||||
.then(json => json.ok ? resolve(json.spacy) : reject()))
|
||||
}
|
||||
|
||||
fetchModel(name) {
|
||||
const version = getLatestVersion(name, this.compat);
|
||||
const modelName = `${name}-${version}`;
|
||||
return new Promise((resolve, reject) => {
|
||||
// resolve immediately if model already loaded, e.g. in this.models
|
||||
if (this.models[name]) resolve(this.models[name]);
|
||||
else fetch(`${this.url}/meta/${modelName}.json`)
|
||||
.then(res => handleResponse(res))
|
||||
.then(json => json.ok ? resolve(this.saveModel(name, json)) : reject())
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* "Save" meta to this.models so it only has to be fetched from GitHub once.
|
||||
* @param {string} name - The model name.
|
||||
* @param {Object} data - The model meta data.
|
||||
*/
|
||||
saveModel(name, data) {
|
||||
this.models[name] = data;
|
||||
return data;
|
||||
}
|
||||
|
||||
showError() {
|
||||
this.tpl.get('result').style.display = 'none';
|
||||
this.tpl.get('error').style.display = 'block';
|
||||
}
|
||||
|
||||
onSelect(ev) {
|
||||
const modelId = ev.target.value;
|
||||
const otherId = (ev.target.id == 'model1') ? 'model2' : 'model1';
|
||||
const otherVal = this.tpl.get(otherId);
|
||||
const otherModel = otherVal.options[otherVal.selectedIndex].value;
|
||||
if (otherModel != '') this.getModels({
|
||||
[ev.target.id]: modelId,
|
||||
[otherId]: otherModel
|
||||
})
|
||||
}
|
||||
|
||||
getModels({ model1, model2 }) {
|
||||
this.tpl.get('result').setAttribute('data-loading', '');
|
||||
this.fetchModel(model1)
|
||||
.then(data1 => this.fetchModel(model2)
|
||||
.then(data2 => this.render({ model1: data1, model2: data2 })))
|
||||
.catch(this.showError.bind(this))
|
||||
}
|
||||
|
||||
/**
|
||||
* Render two models, and populate the chart and table. Currently quite hacky :(
|
||||
* @param {Object} models - The models to render.
|
||||
* @param {Object} models.model1 - The first model (via first <select>).
|
||||
* @param {Object} models.model2 - The second model (via second <select>).
|
||||
*/
|
||||
render({ model1, model2 }) {
|
||||
const accKeys = Object.assign({}, this.benchKeys.parser, this.benchKeys.ner);
|
||||
const allKeys = [...Object.keys(model1.accuracy || []), ...Object.keys(model2.accuracy || [])];
|
||||
const metaKeys = Object.keys(accKeys).filter(k => allKeys.includes(k));
|
||||
const labels = metaKeys.map(key => accKeys[key]);
|
||||
const datasets = [model1, model2]
|
||||
.map(({ lang, name, version, accuracy = {} }, i) => ({
|
||||
label: `${lang}_${name}-${version}`,
|
||||
backgroundColor: this.colors[`model${i + 1}`],
|
||||
data: metaKeys.map(key => (accuracy[key] || 0).toFixed(2))
|
||||
}));
|
||||
this.chart.data = { labels, datasets };
|
||||
this.chart.update();
|
||||
[model1, model2].forEach((model, i) => this.renderTable(metaKeys, i + 1, model));
|
||||
this.tpl.get('result').removeAttribute('data-loading');
|
||||
}
|
||||
|
||||
renderTable(metaKeys, i, { lang, name, version, size, description,
|
||||
notes, author, url, license, sources, vectors, pipeline, accuracy = {},
|
||||
speed = {}}) {
|
||||
const type = name.split('_')[0]; // extract type from model name
|
||||
const genre = name.split('_')[1]; // extract genre from model name
|
||||
this.tpl.fill(`table-head${i}`, `${lang}_${name}`);
|
||||
this.tpl.get(`link${i}`).setAttribute('href', `/models/${lang}#${lang}_${name}`);
|
||||
this.tpl.fill(`download${i}`, `spacy download ${lang}_${name}\n`);
|
||||
this.tpl.fill(`lang${i}`, this.languages[lang] || lang);
|
||||
this.tpl.fill(`type${i}`, this.labels[type] || type);
|
||||
this.tpl.fill(`genre${i}`, this.labels[genre] || genre);
|
||||
this.tpl.fill(`version${i}`, formats.version(version), true);
|
||||
this.tpl.fill(`size${i}`, size);
|
||||
this.tpl.fill(`desc${i}`, description || 'n/a');
|
||||
this.tpl.fill(`pipeline${i}`, formats.pipeline(pipeline), true);
|
||||
this.tpl.fill(`vectors${i}`, formats.vectors(vectors));
|
||||
this.tpl.fill(`sources${i}`, formats.sources(sources));
|
||||
this.tpl.fill(`author${i}`, formats.author(author, url), true);
|
||||
this.tpl.fill(`license${i}`, formats.license(license, this.licenses[license]), true);
|
||||
// check if model accuracy or speed includes one of the pre-set keys
|
||||
for (let key of [...metaKeys, ...Object.keys(this.benchKeys.speed)]) {
|
||||
if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2))
|
||||
else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key])))
|
||||
else this.tpl.fill(`${key}${i}`, 'n/a')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
{
|
||||
"sidebar": {
|
||||
"Models": {
|
||||
"Overview": "./"
|
||||
"Overview": "./",
|
||||
"Comparison": "comparison"
|
||||
},
|
||||
|
||||
"Language models": {
|
||||
|
@ -26,6 +27,17 @@
|
|||
}
|
||||
},
|
||||
|
||||
"comparison": {
|
||||
"title": "Model Comparison",
|
||||
"teaser": "Compare spaCy's statistical models and their accuracy.",
|
||||
"tag": "experimental",
|
||||
"compare_models": true,
|
||||
"default_models": {
|
||||
"model1": "en_core_web_sm",
|
||||
"model2": "en_core_web_lg"
|
||||
}
|
||||
},
|
||||
|
||||
"MODELS": {
|
||||
"en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"],
|
||||
"de": ["de_dep_news_sm"],
|
||||
|
|
81
website/models/comparison.jade
Normal file
81
website/models/comparison.jade
Normal file
|
@ -0,0 +1,81 @@
|
|||
//- 💫 DOCS > MODELS > COMPARISON
|
||||
|
||||
include ../_includes/_mixins
|
||||
|
||||
p
|
||||
| This experimental tool helps you compare spaCy's statistical models
|
||||
| by features, accuracy and speed. This can be especially useful to get an
|
||||
| idea of the trade-offs between larger and smaller models of the same
|
||||
| type. For example, #[code lg] models tend to be more accurate than
|
||||
| the corresponding #[code sm] versions – but they're often significantly
|
||||
| larger in file size and memory usage.
|
||||
|
||||
- TPL = "compare"
|
||||
|
||||
+grid.o-box
|
||||
for i in [1, 2]
|
||||
+grid-col("half", "no-gutter")
|
||||
label.u-heading.u-text-label.u-text-center.u-color-theme(for="model#{i}") Model #{i}
|
||||
.o-field.o-grid.o-grid--vcenter.u-padding-small
|
||||
select.o-field__select.u-text-small(id="model#{i}" data-tpl=TPL data-tpl-key="model#{i}")
|
||||
option(selected="" disabled="" value="") Select model...
|
||||
for models, _ in MODELS
|
||||
for model in models
|
||||
option(value=model)=model
|
||||
|
||||
div(data-tpl=TPL data-tpl-key="error" style="display: none")
|
||||
+infobox
|
||||
| Unable to load model details and accuracy figures from GitHub to
|
||||
| compare the models. For details of the individual models, see the
|
||||
| overview of the
|
||||
| #[+a(gh("spacy-models") + "/releases") latest model releases].
|
||||
|
||||
div(data-tpl=TPL data-tpl-key="result")
|
||||
+chart("compare_accuracy", 350)
|
||||
|
||||
+aside-code("Download", "text")(style="display: none")
|
||||
for i in [1, 2]
|
||||
span(data-tpl=TPL data-tpl-key="download#{i}")
|
||||
|
||||
+table.o-block-small(data-tpl=TPL data-tpl-key="table")
|
||||
+row("head")
|
||||
+head-cell
|
||||
for i in [1, 2]
|
||||
+head-cell(style="width: 40%")
|
||||
a(data-tpl=TPL data-tpl-key="link#{i}")
|
||||
code(data-tpl=TPL data-tpl-key="table-head#{i}" style="text-transform: initial; font-weight: normal")
|
||||
|
||||
for label, id in {lang: "Language", type: "Type", genre: "Genre"}
|
||||
+row
|
||||
+cell #[+label=label]
|
||||
for i in [1, 2]
|
||||
+cell(data-tpl=TPL data-tpl-key="#{id}#{i}") n/a
|
||||
|
||||
for label in ["Version", "Size", "Pipeline", "Vectors", "Sources", "Author", "License"]
|
||||
- var field = label.toLowerCase()
|
||||
+row
|
||||
+cell.u-nowrap
|
||||
+label=label
|
||||
if MODEL_META[field]
|
||||
| #[+help(MODEL_META[field]).u-color-subtle]
|
||||
for i in [1, 2]
|
||||
+cell
|
||||
span(data-tpl=TPL data-tpl-key=field + i) #[em n/a]
|
||||
|
||||
+row
|
||||
+cell #[+label Description]
|
||||
for i in [1, 2]
|
||||
+cell.u-text-tiny(data-tpl=TPL data-tpl-key="desc#{i}") n/a
|
||||
|
||||
for benchmark, _ in MODEL_BENCHMARKS
|
||||
- var counter = 0
|
||||
for label, field in benchmark
|
||||
+row((counter == 0) ? "divider" : null)
|
||||
+cell.u-nowrap
|
||||
+label=label
|
||||
if MODEL_META[field]
|
||||
| #[+help(MODEL_META[field]).u-color-subtle]
|
||||
for i in [1, 2]
|
||||
+cell
|
||||
span(data-tpl=TPL data-tpl-key=field + i) n/a
|
||||
- counter++
|
Loading…
Reference in New Issue
Block a user