Merge pull request #1473 from explosion/refactor-javascript

Refactor website JS and add model comparison tool
This commit is contained in:
Ines Montani 2017-10-31 14:02:05 +01:00 committed by GitHub
commit 3c8db3e4da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 782 additions and 376 deletions

View File

@ -84,8 +84,8 @@
],
"ALPHA": true,
"V_CSS": "2.0a1",
"V_JS": "2.0a0",
"V_CSS": "2.0a2",
"V_JS": "2.0a1",
"DEFAULT_SYNTAX": "python",
"ANALYTICS": "UA-58931649-1",
"MAILCHIMP": {

View File

@ -281,7 +281,12 @@ mixin github(repo, file, height, alt_file, language)
figure.o-block
pre.c-code-block.o-block-small(class="lang-#{(language || DEFAULT_SYNTAX)}" style="height: #{height}px; min-height: #{height}px")
code.c-code-block__content(data-gh-embed="#{repo}/#{branch}/#{file}")
code.c-code-block__content(data-gh-embed="#{repo}/#{branch}/#{file}").
Can't fetch code example from GitHub :(
Please use the link below to view the example. If you've come across
a broken link, we always appreciate a pull request to the repository,
or a report on the issue tracker. Thanks!
footer.o-grid.u-text
.o-block-small.u-flex-full.u-padding-small #[+icon("github")] #[code.u-break.u-break--all=repo + '/' + (alt_file || file)]

View File

@ -20,7 +20,7 @@ for id in CURRENT_MODELS
p(data-tpl=id data-tpl-key="description")
div(data-tpl=id data-tpl-key="error" style="display: none")
div(data-tpl=id data-tpl-key="error")
+infobox
| Unable to load model details from GitHub. To find out more
| about this model, see the overview of the
@ -54,7 +54,7 @@ for id in CURRENT_MODELS
+cell
.o-field.u-float-left
select.o-field__select.u-text-small(data-tpl=id data-tpl-key="compat")
.o-empty(data-tpl=id data-tpl-key="compat-versions")  
div(data-tpl=id data-tpl-key="compat-versions")  
section(data-tpl=id data-tpl-key="benchmarks" style="display: none")
+grid.o-block-small

View File

@ -1,43 +1,86 @@
//- 💫 INCLUDES > SCRIPTS
if quickstart
script(src="/assets/js/quickstart.min.js")
script(src="/assets/js/vendor/quickstart.min.js")
if IS_PAGE
script(src="/assets/js/in-view.min.js")
script(src="/assets/js/vendor/in-view.min.js")
if environment == "deploy"
script(async src="https://www.google-analytics.com/analytics.js")
script(src="/assets/js/prism.min.js")
script(src="/assets/js/main.js?v#{V_JS}")
script(src="/assets/js/vendor/prism.min.js")
if SECTION == "models"
script(src="/assets/js/vendor/chart.min.js")
script(src="/assets/js/models.js?v#{V_JS}" type="module")
script
| new ProgressBar('.js-progress');
if changelog
| new Changelog('!{SOCIAL.github}', 'spacy');
if quickstart
| new Quickstart("#qs");
if IS_PAGE
| new SectionHighlighter('data-section', 'data-nav');
| new GitHubEmbed('!{SOCIAL.github}', 'data-gh-embed');
| ((window.gitter = {}).chat = {}).options = {
| useStyles: false,
| activationElement: '.js-gitter-button',
| targetElement: '.js-gitter',
| room: '!{SOCIAL.gitter}'
| };
if HAS_MODELS
| new ModelLoader('!{MODELS_REPO}', !{JSON.stringify(CURRENT_MODELS)}, !{JSON.stringify(MODEL_LICENSES)}, !{JSON.stringify(MODEL_BENCHMARKS)});
if environment == "deploy"
| window.ga=window.ga||function(){
| (ga.q=ga.q||[]).push(arguments)}; ga.l=+new Date;
| ga('create', '#{ANALYTICS}', 'auto'); ga('send', 'pageview');
if IS_PAGE
script
| ((window.gitter = {}).chat = {}).options = {
| useStyles: false,
| activationElement: '.js-gitter-button',
| targetElement: '.js-gitter',
| room: '!{SOCIAL.gitter}'
| };
script(src="https://sidecar.gitter.im/dist/sidecar.v1.js" async defer)
//- JS modules slightly hacky, but necessary to dynamically instantiate the
classes with data from the Harp JSON files, while still being able to
support older browsers that can't handle JS modules. More details:
https://medium.com/dev-channel/es6-modules-in-chrome-canary-m60-ba588dfb8ab7
- ProgressBar = "new ProgressBar('.js-progress');"
- Changelog = "new Changelog('" + SOCIAL.github + "', 'spacy');"
- NavHighlighter = "new NavHighlighter('data-section', 'data-nav');"
- GitHubEmbed = "new GitHubEmbed('" + SOCIAL.github + "', 'data-gh-embed');"
- ModelLoader = "new ModelLoader('" + MODELS_REPO + "'," + JSON.stringify(CURRENT_MODELS) + "," + JSON.stringify(MODEL_LICENSES) + "," + JSON.stringify(MODEL_BENCHMARKS) + ");"
- ModelComparer = "new ModelComparer('" + MODELS_REPO + "'," + JSON.stringify(MODEL_LICENSES) + "," + JSON.stringify(MODEL_BENCHMARKS) + "," + JSON.stringify(LANGUAGES) + "," + JSON.stringify(MODEL_META) + "," + JSON.stringify(default_models || false) + ");"
//- Browsers with JS module support.
Will be ignored otherwise.
script(type="module")
| import ProgressBar from '/assets/js/progress.js';
!=ProgressBar
if changelog
| import Changelog from '/assets/js/changelog.js';
!=Changelog
if IS_PAGE
| import NavHighlighter from '/assets/js/nav-highlighter.js';
!=NavHighlighter
| import GitHubEmbed from '/assets/js/github-embed.js';
!=GitHubEmbed
if HAS_MODELS
| import { ModelLoader } from '/assets/js/models.js';
!=ModelLoader
if compare_models
| import { ModelComparer } from '/assets/js/models.js';
!=ModelComparer
//- Browsers with no JS module support.
Won't be fetched or interpreted otherwise.
script(nomodule src="/assets/js/rollup.js")
script(nomodule)
!=ProgressBar
if changelog
!=Changelog
if IS_PAGE
!=NavHighlighter
!=GitHubEmbed
if HAS_MODELS
!=ModeLoader
if compare_models
!=ModelComparer

View File

@ -19,5 +19,5 @@ menu.c-sidebar.js-sidebar.u-text
- var counter = 0
for id, title in menu
- counter++
li.c-sidebar__crumb__item(data-nav=id class=(counter == 1) ? "is-active" : null)
li.c-sidebar__crumb__item(data-nav=id)
+a("#section-" + id)=title

View File

@ -163,11 +163,4 @@
height: 1.4em
border: none
text-align-last: center
.o-empty:empty:before
@include size(1em)
border-radius: 50%
content: ""
display: inline-block
background: $color-red
vertical-align: middle
width: 100%

View File

@ -0,0 +1,72 @@
'use strict';
import { Templater, handleResponse } from './util.js';
export default class Changelog {
/**
* Fetch and render changelog from GitHub. Clones a template node (table row)
* to avoid doubling templating markup in JavaScript.
* @param {string} user - GitHub username.
* @param {string} repo - Repository to fetch releases from.
*/
constructor(user, repo) {
this.url = `https://api.github.com/repos/${user}/${repo}/releases`;
this.template = new Templater('changelog');
this.fetchChangelog()
.then(json => this.render(json))
.catch(this.showError.bind(this));
// make sure scroll positions for progress bar etc. are recalculated
window.dispatchEvent(new Event('resize'));
}
fetchChangelog() {
return new Promise((resolve, reject) =>
fetch(this.url)
.then(res => handleResponse(res))
.then(json => json.ok ? resolve(json) : reject()))
}
showError() {
this.template.get('error').style.display = 'block';
}
/**
* Get template section from template row. Hacky, but does make sense.
* @param {node} item - Parent element.
* @param {string} id - ID of child element, set via data-changelog.
*/
getField(item, id) {
return item.querySelector(`[data-changelog="${id}"]`);
}
render(json) {
this.template.get('table').style.display = 'block';
this.row = this.template.get('item');
this.releases = this.template.get('releases');
this.prereleases = this.template.get('prereleases');
Object.values(json)
.filter(release => release.name)
.forEach(release => this.renderRelease(release));
this.row.remove();
}
/**
* Clone the template row and populate with content from API response.
* https://developer.github.com/v3/repos/releases/#list-releases-for-a-repository
* @param {string} name - Release title.
* @param {string} tag (tag_name) - Release tag.
* @param {string} url (html_url) - URL to the release page on GitHub.
* @param {string} date (published_at) - Timestamp of release publication.
* @param {boolean} prerelease - Whether the release is a prerelease.
*/
renderRelease({ name, tag_name: tag, html_url: url, published_at: date, prerelease }) {
const container = prerelease ? this.prereleases : this.releases;
const tagLink = `<a href="${url}" target="_blank"><code>${tag}</code></a>`;
const title = (name.split(': ').length == 2) ? name.split(': ')[1] : name;
const row = this.row.cloneNode(true);
this.getField(row, 'date').textContent = date.split('T')[0];
this.getField(row, 'tag').innerHTML = tagLink;
this.getField(row, 'title').textContent = title;
container.appendChild(row);
}
}

View File

@ -0,0 +1,42 @@
'use strict';
import { $$ } from './util.js';
export default class GitHubEmbed {
/**
* Embed code from GitHub repositories, similar to Gist embeds. Fetches the
* raw text and places it inside element.
* Usage: <pre><code data-gh-embed="spacy/master/examples/x.py"></code><pre>
* @param {string} user - GitHub user or organization.
* @param {string} attr - Data attribute used to select containers. Attribute
* value should be path to file relative to user.
*/
constructor(user, attr) {
this.url = `https://raw.githubusercontent.com/${user}`;
this.attr = attr;
[...$$(`[${this.attr}]`)].forEach(el => this.embed(el));
}
/**
* Fetch code from GitHub and insert it as element content. File path is
* read off the container's data attribute.
* @param {node} el - The element.
*/
embed(el) {
el.parentElement.setAttribute('data-loading', '');
fetch(`${this.url}/${el.getAttribute(this.attr)}`)
.then(res => res.text().then(text => ({ text, ok: res.ok })))
.then(({ text, ok }) => ok ? this.render(el, text) : false)
el.parentElement.removeAttribute('data-loading');
}
/**
* Add text to container and apply syntax highlighting via Prism, if available.
* @param {node} el - The element.
* @param {string} text - The raw code, fetched from GitHub.
*/
render(el, text) {
el.textContent = text;
if (window.Prism) Prism.highlightElement(el);
}
}

View File

@ -1,323 +0,0 @@
//- 💫 MAIN JAVASCRIPT
//- Note: Will be compiled using Babel before deployment.
'use strict'
const $ = document.querySelector.bind(document);
const $$ = document.querySelectorAll.bind(document);
class ProgressBar {
/**
* Animated reading progress bar.
* @param {String} selector CSS selector of progress bar element.
*/
constructor(selector) {
this.el = $(selector);
this.scrollY = 0;
this.sizes = this.updateSizes();
this.el.setAttribute('max', 100);
this.init();
}
init() {
window.addEventListener('scroll', () => {
this.scrollY = (window.pageYOffset || document.scrollTop) - (document.clientTop || 0);
requestAnimationFrame(this.update.bind(this));
}, false);
window.addEventListener('resize', () => {
this.sizes = this.updateSizes();
requestAnimationFrame(this.update.bind(this));
})
}
update() {
const offset = 100 - ((this.sizes.height - this.scrollY - this.sizes.vh) / this.sizes.height * 100);
this.el.setAttribute('value', (this.scrollY == 0) ? 0 : offset || 0);
}
updateSizes() {
const body = document.body;
const html = document.documentElement;
return {
height: Math.max(body.scrollHeight, body.offsetHeight, html.clientHeight, html.scrollHeight, html.offsetHeight),
vh: Math.max(html.clientHeight, window.innerHeight || 0)
}
}
}
class SectionHighlighter {
/**
* Hightlight section in viewport in sidebar, using in-view library.
* @param {String} sectionAttr - Data attribute of sections.
* @param {String} navAttr - Data attribute of navigation items.
* @param {String} activeClass Class name of active element.
*/
constructor(sectionAttr, navAttr, activeClass = 'is-active') {
this.sections = [...$$(`[${navAttr}]`)];
this.navAttr = navAttr;
this.sectionAttr = sectionAttr;
this.activeClass = activeClass;
inView(`[${sectionAttr}]`).on('enter', this.highlightSection.bind(this));
}
highlightSection(section) {
const id = section.getAttribute(this.sectionAttr);
const el = $(`[${this.navAttr}="${id}"]`);
if (el) {
this.sections.forEach(el => el.classList.remove(this.activeClass));
el.classList.add(this.activeClass);
}
}
}
class Templater {
/**
* Mini templating engine based on data attributes. Selects elements based
* on a data-tpl and data-tpl-key attribute and can set textContent
* and innterHtml.
*
* @param {String} templateId - Template section, e.g. value of data-tpl.
*/
constructor(templateId) {
this.templateId = templateId;
}
get(key) {
return $(`[data-tpl="${this.templateId}"][data-tpl-key="${key}"]`);
}
fill(key, value, html = false) {
const el = this.get(key);
if (html) el.innerHTML = value || '';
else el.textContent = value || '';
return el;
}
}
class ModelLoader {
/**
* Load model meta from GitHub and update model details on site. Uses the
* Templater mini template engine to update DOM.
*
* @param {String} repo - Path tp GitHub repository containing releases.
* @param {Array} models - List of model IDs, e.g. "en_core_web_sm".
* @param {Object} licenses - License IDs mapped to URLs.
* @param {Object} accKeys - Available accuracy keys mapped to display labels.
*/
constructor(repo, models = [], licenses = {}, benchmarkKeys = {}) {
this.url = `https://raw.githubusercontent.com/${repo}/master`;
this.repo = `https://github.com/${repo}`;
this.modelIds = models;
this.licenses = licenses;
this.benchKeys = benchmarkKeys;
this.init();
}
init() {
this.modelIds.forEach(modelId =>
new Templater(modelId).get('table').setAttribute('data-loading', ''));
fetch(`${this.url}/compatibility.json`)
.then(res => this.handleResponse(res))
.then(json => json.ok ? this.getModels(json['spacy']) : this.modelIds.forEach(modelId => this.showError(modelId)))
}
handleResponse(res) {
if (res.ok) return res.json().then(json => Object.assign({}, json, { ok: res.ok }))
else return ({ ok: res.ok })
}
convertNumber(num, separator = ',') {
return num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, separator);
}
getModels(compat) {
this.compat = compat;
for (let modelId of this.modelIds) {
const version = this.getLatestVersion(modelId, compat);
if (!version) {
this.showError(modelId); return;
}
fetch(`${this.url}/meta/${modelId}-${version}.json`)
.then(res => this.handleResponse(res))
.then(json => json.ok ? this.render(json) : this.showError(modelId))
}
// make sure scroll positions for progress bar etc. are recalculated
window.dispatchEvent(new Event('resize'));
}
showError(modelId) {
const template = new Templater(modelId);
template.get('table').removeAttribute('data-loading');
template.get('error').style.display = 'block';
for (let key of ['sources', 'pipeline', 'vectors', 'author', 'license']) {
template.get(key).parentElement.parentElement.style.display = 'none';
}
}
/**
* Update model details in tables. Currently quite hacky :(
*/
render({ lang, name, version, sources, pipeline, vectors, url, author, license, accuracy, speed, size, description, notes }) {
const modelId = `${lang}_${name}`;
const model = `${modelId}-${version}`;
const template = new Templater(modelId);
const getSources = s => (s instanceof Array) ? s.join(', ') : s;
const getPipeline = p => p.map(comp => `<code>${comp}</code>`).join(', ');
const getVectors = v => `${this.convertNumber(v.entries)} (${v.width} dimensions)`;
const getLink = (t, l) => `<a href="${l}" target="_blank">${t}</a>`;
const keys = { version, size, description, notes }
Object.keys(keys).forEach(key => template.fill(key, keys[key]));
if (sources) template.fill('sources', getSources(sources));
if (pipeline && pipeline.length) template.fill('pipeline', getPipeline(pipeline), true);
else template.get('pipeline').parentElement.parentElement.style.display = 'none';
if (vectors) template.fill('vectors', getVectors(vectors));
else template.get('vectors').parentElement.parentElement.style.display = 'none';
if (author) template.fill('author', url ? getLink(author, url) : author, true);
if (license) template.fill('license', this.licenses[license] ? getLink(license, this.licenses[license]) : license, true);
template.get('download').setAttribute('href', `${this.repo}/releases/tag/${model}`);
this.renderBenchmarks(template, accuracy, speed);
this.renderCompat(template, modelId);
template.get('table').removeAttribute('data-loading');
}
renderBenchmarks(template, accuracy = {}, speed = {}) {
if (!accuracy && !speed) return;
template.get('benchmarks').style.display = 'block';
this.renderTable(template, 'parser', accuracy, val => val.toFixed(2));
this.renderTable(template, 'ner', accuracy, val => val.toFixed(2));
this.renderTable(template, 'speed', speed, Math.round);
}
renderTable(template, id, benchmarks, convertVal = val => val) {
if (!this.benchKeys[id] || !Object.keys(this.benchKeys[id]).some(key => benchmarks[key])) return;
const keys = Object.keys(this.benchKeys[id]).map(k => benchmarks[k] ? k : false).filter(k => k);
template.get(id).style.display = 'block';
for (let key of keys) {
template
.fill(key, this.convertNumber(convertVal(benchmarks[key])))
.parentElement.style.display = 'table-row';
}
}
renderCompat(template, modelId) {
template.get('compat-wrapper').style.display = 'table-row';
const options = Object.keys(this.compat).map(v => `<option value="${v}">v${v}</option>`).join('');
template
.fill('compat', '<option selected disabled>spaCy version</option>' + options, true)
.addEventListener('change', ev => {
const result = this.compat[ev.target.value][modelId];
if (result) template.fill('compat-versions', `<code>${modelId}-${result[0]}</code>`, true);
else template.fill('compat-versions', '');
});
}
getLatestVersion(model, compat = {}) {
for (let spacy_v of Object.keys(compat)) {
const models = compat[spacy_v];
if (models[model]) return models[model][0];
}
}
}
class Changelog {
/**
* Fetch and render changelog from GitHub. Clones a template node (table row)
* to avoid doubling templating markup in JavaScript.
*
* @param {String} user - GitHub username.
* @param {String} repo - Repository to fetch releases from.
*/
constructor(user, repo) {
this.url = `https://api.github.com/repos/${user}/${repo}/releases`;
this.template = new Templater('changelog');
fetch(this.url)
.then(res => this.handleResponse(res))
.then(json => json.ok ? this.render(json) : false)
}
/**
* Get template section from template row. Slightly hacky, but does make sense.
*/
$(item, id) {
return item.querySelector(`[data-changelog="${id}"]`);
}
handleResponse(res) {
if (res.ok) return res.json().then(json => Object.assign({}, json, { ok: res.ok }))
else return ({ ok: res.ok })
}
render(json) {
this.template.get('error').style.display = 'none';
this.template.get('table').style.display = 'block';
this.row = this.template.get('item');
this.releases = this.template.get('releases');
this.prereleases = this.template.get('prereleases');
Object.values(json)
.filter(release => release.name)
.forEach(release => this.renderRelease(release));
this.row.remove();
// make sure scroll positions for progress bar etc. are recalculated
window.dispatchEvent(new Event('resize'));
}
/**
* Clone the template row and populate with content from API response.
* https://developer.github.com/v3/repos/releases/#list-releases-for-a-repository
*
* @param {String} name - Release title.
* @param {String} tag (tag_name) - Release tag.
* @param {String} url (html_url) - URL to the release page on GitHub.
* @param {String} date (published_at) - Timestamp of release publication.
* @param {Boolean} pre (prerelease) - Whether the release is a prerelease.
*/
renderRelease({ name, tag_name: tag, html_url: url, published_at: date, prerelease: pre }) {
const container = pre ? this.prereleases : this.releases;
const row = this.row.cloneNode(true);
this.$(row, 'date').textContent = date.split('T')[0];
this.$(row, 'tag').innerHTML = `<a href="${url}" target="_blank"><code>${tag}</code></a>`;
this.$(row, 'title').textContent = (name.split(': ').length == 2) ? name.split(': ')[1] : name;
container.appendChild(row);
}
}
class GitHubEmbed {
/**
* Embed code from GitHub repositories, similar to Gist embeds. Fetches the
* raw text and places it inside element.
* Usage: <pre><code data-gh-embed="spacy/master/examples/x.py"></code><pre>
*
* @param {String} user - GitHub user or organization.
* @param {String} attr - Data attribute used to select containers. Attribute
* value should be path to file relative to user.
*/
constructor(user, attr) {
this.url = `https://raw.githubusercontent.com/${user}`;
this.attr = attr;
this.error = `\nCan't fetch code example from GitHub :(\n\nPlease use the link below to view the example. If you've come across\na broken link, we always appreciate a pull request to the repository,\nor a report on the issue tracker. Thanks!`;
[...$$(`[${this.attr}]`)].forEach(el => this.embed(el));
}
embed(el) {
el.parentElement.setAttribute('data-loading', '');
fetch(`${this.url}/${el.getAttribute(this.attr)}`)
.then(res => res.text().then(text => ({ text, ok: res.ok })))
.then(({ text, ok }) => {
el.textContent = ok ? text : this.error;
if (ok && window.Prism) Prism.highlightElement(el);
})
el.parentElement.removeAttribute('data-loading');
}
}

316
website/assets/js/models.js Normal file
View File

@ -0,0 +1,316 @@
'use strict';
import { Templater, handleResponse, convertNumber } from './util.js';
/**
* Chart.js defaults
*/
const CHART_COLORS = { model1: '#09a3d5', model2: '#066B8C' };
const CHART_FONTS = {
legend: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"',
ticks: 'Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace'
};
/**
* Formatters for model details.
* @property {function} author Format model author with optional link.
* @property {function} license - Format model license with optional link.
* @property {function} sources - Format training data sources (list or string).
* @property {function} pipeline - Format list of pipeline components.
* @property {function} vectors - Format vector data (entries and dimensions).
* @property {function} version - Format model version number.
*/
export const formats = {
author: (author, url) => url ? `<a href="${url}" target="_blank">${author}</a>` : author,
license: (license, url) => url ? `<a href="${url}" target="_blank">${license}</a>` : license,
sources: sources => (sources instanceof Array) ? sources.join(', ') : sources,
pipeline: pipes => (pipes && pipes.length) ? pipes.map(p => `<code>${p}</code>`).join(', ') : '-',
vectors: vec => vec ? `${convertNumber(vec.entries)} (${vec.width} dimensions)` : 'n/a',
version: version => `<code>v${version}</code>`
};
/**
* Find the latest version of a model in a compatibility table.
* @param {string} model - The model name.
* @param {Object} compat - Compatibility table, keyed by spaCy version.
*/
export const getLatestVersion = (model, compat = {}) => {
for (let [spacy_v, models] of Object.entries(compat)) {
if (models[model]) return models[model][0];
}
};
export class ModelLoader {
/**
* Load model meta from GitHub and update model details on site. Uses the
* Templater mini template engine to update DOM.
* @param {string} repo - Path tp GitHub repository containing releases.
* @param {Array} models - List of model IDs, e.g. "en_core_web_sm".
* @param {Object} licenses - License IDs mapped to URLs.
* @param {Object} benchmarkKeys - Objects of available keys by type, e.g.
* 'parser', 'ner', 'speed', mapped to labels.
*/
constructor(repo, models = [], licenses = {}, benchmarkKeys = {}) {
this.url = `https://raw.githubusercontent.com/${repo}/master`;
this.repo = `https://github.com/${repo}`;
this.modelIds = models;
this.licenses = licenses;
this.benchKeys = benchmarkKeys;
this.init();
}
init() {
this.modelIds.forEach(modelId =>
new Templater(modelId).get('table').setAttribute('data-loading', ''));
this.fetch(`${this.url}/compatibility.json`)
.then(json => this.getModels(json.spacy))
.catch(_ => this.modelIds.forEach(modelId => this.showError(modelId)));
// make sure scroll positions for progress bar etc. are recalculated
window.dispatchEvent(new Event('resize'));
}
fetch(url) {
return new Promise((resolve, reject) =>
fetch(url).then(res => handleResponse(res))
.then(json => json.ok ? resolve(json) : reject()))
}
getModels(compat) {
this.compat = compat;
for (let modelId of this.modelIds) {
const version = getLatestVersion(modelId, compat);
if (version) this.fetch(`${this.url}/meta/${modelId}-${version}.json`)
.then(json => this.render(json))
.catch(_ => this.showError(modelId))
else this.showError(modelId);
}
}
showError(modelId) {
const tpl = new Templater(modelId);
tpl.get('table').removeAttribute('data-loading');
tpl.get('error').style.display = 'block';
for (let key of ['sources', 'pipeline', 'vectors', 'author', 'license']) {
tpl.get(key).parentElement.parentElement.style.display = 'none';
}
}
/**
* Update model details in tables. Currently quite hacky :(
*/
render(data) {
const modelId = `${data.lang}_${data.name}`;
const model = `${modelId}-${data.version}`;
const tpl = new Templater(modelId);
tpl.get('error').style.display = 'none';
this.renderDetails(tpl, data)
this.renderBenchmarks(tpl, data.accuracy, data.speed);
this.renderCompat(tpl, modelId);
tpl.get('download').setAttribute('href', `${this.repo}/releases/tag/${model}`);
tpl.get('table').removeAttribute('data-loading');
}
renderDetails(tpl, { version, size, description, notes, author, url,
license, sources, vectors, pipeline }) {
const basics = { version, size, description, notes }
for (let [key, value] of Object.entries(basics)) {
if (value) tpl.fill(key, value);
}
if (author) tpl.fill('author', formats.author(author, url), true);
if (license) tpl.fill('license', formats.license(license, this.licenses[license]), true);
if (sources) tpl.fill('sources', formats.sources(sources));
if (vectors) tpl.fill('vectors', formats.vectors(vectors));
else tpl.get('vectors').parentElement.parentElement.style.display = 'none';
if (pipeline && pipeline.length) tpl.fill('pipeline', formats.pipeline(pipeline), true);
else tpl.get('pipeline').parentElement.parentElement.style.display = 'none';
}
renderBenchmarks(tpl, accuracy = {}, speed = {}) {
if (!accuracy && !speed) return;
this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2));
this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2));
this.renderTable(tpl, 'speed', speed, Math.round);
tpl.get('benchmarks').style.display = 'block';
}
renderTable(tpl, id, benchmarks, converter = val => val) {
if (!this.benchKeys[id] || !Object.keys(this.benchKeys[id]).some(key => benchmarks[key])) return;
for (let key of Object.keys(this.benchKeys[id])) {
if (benchmarks[key]) tpl
.fill(key, convertNumber(converter(benchmarks[key])))
.parentElement.style.display = 'table-row';
}
tpl.get(id).style.display = 'block';
}
renderCompat(tpl, modelId) {
tpl.get('compat-wrapper').style.display = 'table-row';
const header = '<option selected disabled>spaCy version</option>';
const options = Object.keys(this.compat)
.map(v => `<option value="${v}">v${v}</option>`)
.join('');
tpl
.fill('compat', header + options, true)
.addEventListener('change', ({ target: { value }}) =>
tpl.fill('compat-versions', this.getCompat(value, modelId), true))
}
getCompat(version, model) {
const res = this.compat[version][model];
return res ? `<code>${model}-${res[0]}</code>` : '<em>not compatible</em>';
}
}
export class ModelComparer {
/**
* Compare to model meta files and render chart and comparison table.
* @param {string} repo - Path tp GitHub repository containing releases.
* @param {Object} licenses - License IDs mapped to URLs.
* @param {Object} benchmarkKeys - Objects of available keys by type, e.g.
* 'parser', 'ner', 'speed', mapped to labels.
* @param {Object} languages - Available languages, ID mapped to name.
* @param {Object} defaultModels - Models to compare on load, 'model1' and
* 'model2' mapped to model names.
*/
constructor(repo, licenses = {}, benchmarkKeys = {}, languages = {}, labels = {}, defaultModels) {
this.url = `https://raw.githubusercontent.com/${repo}/master`;
this.repo = `https://github.com/${repo}`;
this.tpl = new Templater('compare');
this.benchKeys = benchmarkKeys;
this.licenses = licenses;
this.languages = languages;
this.labels = labels;
this.models = {};
this.colors = CHART_COLORS;
this.fonts = CHART_FONTS;
this.defaultModels = defaultModels;
this.tpl.get('result').style.display = 'block';
this.fetchCompat()
.then(compat => this.init(compat))
.catch(this.showError.bind(this))
}
init(compat) {
this.compat = compat;
const selectA = this.tpl.get('model1');
const selectB = this.tpl.get('model2');
selectA.addEventListener('change', this.onSelect.bind(this));
selectB.addEventListener('change', this.onSelect.bind(this));
this.chart = new Chart('chart_compare_accuracy', { type: 'bar', options: {
responsive: true,
legend: { position: 'bottom', labels: { fontFamily: this.fonts.legend, fontSize: 13 }},
scales: {
yAxes: [{ label: 'Accuracy', ticks: { min: 70, fontFamily: this.fonts.ticks }}],
xAxes: [{ barPercentage: 0.75, ticks: { fontFamily: this.fonts.ticks }}]
}
}});
if (this.defaultModels) {
selectA.value = this.defaultModels.model1;
selectB.value = this.defaultModels.model2;
this.getModels(this.defaultModels);
}
}
fetchCompat() {
return new Promise((resolve, reject) =>
fetch(`${this.url}/compatibility.json`)
.then(res => handleResponse(res))
.then(json => json.ok ? resolve(json.spacy) : reject()))
}
fetchModel(name) {
const version = getLatestVersion(name, this.compat);
const modelName = `${name}-${version}`;
return new Promise((resolve, reject) => {
// resolve immediately if model already loaded, e.g. in this.models
if (this.models[name]) resolve(this.models[name]);
else fetch(`${this.url}/meta/${modelName}.json`)
.then(res => handleResponse(res))
.then(json => json.ok ? resolve(this.saveModel(name, json)) : reject())
})
}
/**
* "Save" meta to this.models so it only has to be fetched from GitHub once.
* @param {string} name - The model name.
* @param {Object} data - The model meta data.
*/
saveModel(name, data) {
this.models[name] = data;
return data;
}
showError() {
this.tpl.get('result').style.display = 'none';
this.tpl.get('error').style.display = 'block';
}
onSelect(ev) {
const modelId = ev.target.value;
const otherId = (ev.target.id == 'model1') ? 'model2' : 'model1';
const otherVal = this.tpl.get(otherId);
const otherModel = otherVal.options[otherVal.selectedIndex].value;
if (otherModel != '') this.getModels({
[ev.target.id]: modelId,
[otherId]: otherModel
})
}
getModels({ model1, model2 }) {
this.tpl.get('result').setAttribute('data-loading', '');
this.fetchModel(model1)
.then(data1 => this.fetchModel(model2)
.then(data2 => this.render({ model1: data1, model2: data2 })))
.catch(this.showError.bind(this))
}
/**
* Render two models, and populate the chart and table. Currently quite hacky :(
* @param {Object} models - The models to render.
* @param {Object} models.model1 - The first model (via first <select>).
* @param {Object} models.model2 - The second model (via second <select>).
*/
render({ model1, model2 }) {
const accKeys = Object.assign({}, this.benchKeys.parser, this.benchKeys.ner);
const allKeys = [...Object.keys(model1.accuracy || []), ...Object.keys(model2.accuracy || [])];
const metaKeys = Object.keys(accKeys).filter(k => allKeys.includes(k));
const labels = metaKeys.map(key => accKeys[key]);
const datasets = [model1, model2]
.map(({ lang, name, version, accuracy = {} }, i) => ({
label: `${lang}_${name}-${version}`,
backgroundColor: this.colors[`model${i + 1}`],
data: metaKeys.map(key => (accuracy[key] || 0).toFixed(2))
}));
this.chart.data = { labels, datasets };
this.chart.update();
[model1, model2].forEach((model, i) => this.renderTable(metaKeys, i + 1, model));
this.tpl.get('result').removeAttribute('data-loading');
}
renderTable(metaKeys, i, { lang, name, version, size, description,
notes, author, url, license, sources, vectors, pipeline, accuracy = {},
speed = {}}) {
const type = name.split('_')[0]; // extract type from model name
const genre = name.split('_')[1]; // extract genre from model name
this.tpl.fill(`table-head${i}`, `${lang}_${name}`);
this.tpl.get(`link${i}`).setAttribute('href', `/models/${lang}#${lang}_${name}`);
this.tpl.fill(`download${i}`, `spacy download ${lang}_${name}\n`);
this.tpl.fill(`lang${i}`, this.languages[lang] || lang);
this.tpl.fill(`type${i}`, this.labels[type] || type);
this.tpl.fill(`genre${i}`, this.labels[genre] || genre);
this.tpl.fill(`version${i}`, formats.version(version), true);
this.tpl.fill(`size${i}`, size);
this.tpl.fill(`desc${i}`, description || 'n/a');
this.tpl.fill(`pipeline${i}`, formats.pipeline(pipeline), true);
this.tpl.fill(`vectors${i}`, formats.vectors(vectors));
this.tpl.fill(`sources${i}`, formats.sources(sources));
this.tpl.fill(`author${i}`, formats.author(author, url), true);
this.tpl.fill(`license${i}`, formats.license(license, this.licenses[license]), true);
// check if model accuracy or speed includes one of the pre-set keys
for (let key of [...metaKeys, ...Object.keys(this.benchKeys.speed)]) {
if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2))
else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key])))
else this.tpl.fill(`${key}${i}`, 'n/a')
}
}
}

View File

@ -0,0 +1,35 @@
'use strict';
import { $, $$ } from './util.js';
export default class NavHighlighter {
/**
* Hightlight section in viewport in sidebar, using in-view library.
* @param {string} sectionAttr - Data attribute of sections.
* @param {string} navAttr - Data attribute of navigation items.
* @param {string} activeClass Class name of active element.
*/
constructor(sectionAttr, navAttr, activeClass = 'is-active') {
this.sections = [...$$(`[${navAttr}]`)];
// highlight first item regardless
if (this.sections.length) this.sections[0].classList.add(activeClass);
this.navAttr = navAttr;
this.sectionAttr = sectionAttr;
this.activeClass = activeClass;
if (window.inView) inView(`[${sectionAttr}]`)
.on('enter', this.highlightSection.bind(this));
}
/**
* Check if section in view exists in sidebar and mark as active.
* @param {node} section - The section in view.
*/
highlightSection(section) {
const id = section.getAttribute(this.sectionAttr);
const el = $(`[${this.navAttr}="${id}"]`);
if (el) {
this.sections.forEach(el => el.classList.remove(this.activeClass));
el.classList.add(this.activeClass);
}
}
}

View File

@ -0,0 +1,52 @@
'use strict';
import { $ } from './util.js';
export default class ProgressBar {
/**
* Animated reading progress bar.
* @param {string} selector CSS selector of progress bar element.
*/
constructor(selector) {
this.scrollY = 0;
this.sizes = this.updateSizes();
this.el = $(selector);
this.el.setAttribute('max', 100);
window.addEventListener('scroll', this.onScroll.bind(this));
window.addEventListener('resize', this.onResize.bind(this));
}
onScroll(ev) {
this.scrollY = (window.pageYOffset || document.scrollTop) - (document.clientTop || 0);
requestAnimationFrame(this.update.bind(this));
}
onResize(ev) {
this.sizes = this.updateSizes();
requestAnimationFrame(this.update.bind(this));
}
update() {
const offset = 100 - ((this.sizes.height - this.scrollY - this.sizes.vh) / this.sizes.height * 100);
this.el.setAttribute('value', (this.scrollY == 0) ? 0 : offset || 0);
}
/**
* Update scroll and viewport height. Called on load and window resize.
*/
updateSizes() {
return {
height: Math.max(
document.body.scrollHeight,
document.body.offsetHeight,
document.documentElement.clientHeight,
document.documentElement.scrollHeight,
document.documentElement.offsetHeight
),
vh: Math.max(
document.documentElement.clientHeight,
window.innerHeight || 0
)
}
}
}

View File

@ -0,0 +1,23 @@
/**
* This file is bundled by Rollup, compiled with Babel and included as
* <script nomodule> for older browsers that don't yet support JavaScript
* modules. Browsers that do will ignore this bundle and won't even fetch it
* from the server. Details:
* https://github.com/rollup/rollup
* https://medium.com/dev-channel/es6-modules-in-chrome-canary-m60-ba588dfb8ab7
*/
// Import all modules that are instantiated directly in _includes/_scripts.jade
import ProgressBar from './progress.js';
import NavHighlighter from './nav-highlighter.js';
import Changelog from './changelog.js';
import GitHubEmbed from './github-embed.js';
import { ModelLoader, ModelComparer } from './models.js';
// Assign to window so they are bundled by rollup
window.ProgressBar = ProgressBar;
window.NavHighlighter = NavHighlighter;
window.Changelog = Changelog;
window.GitHubEmbed = GitHubEmbed;
window.ModelLoader = ModelLoader;
window.ModelComparer = ModelComparer;

56
website/assets/js/util.js Normal file
View File

@ -0,0 +1,56 @@
'use strict';
export const $ = document.querySelector.bind(document);
export const $$ = document.querySelectorAll.bind(document);
export class Templater {
/**
* Mini templating engine based on data attributes. Selects elements based
* on a data-tpl and data-tpl-key attribute and can set textContent
* and innterHtml.
* @param {string} templateId - Template section, e.g. value of data-tpl.
*/
constructor(templateId) {
this.templateId = templateId;
}
/**
* Get an element from the template and return it.
* @param {string} key - Name of the key within the current template.
*/
get(key) {
return $(`[data-tpl="${this.templateId}"][data-tpl-key="${key}"]`);
}
/**
* Fill the content of a template element with a value.
* @param {string} key - Name of the key within the current template.
* @param {string} value - Content to insert into template element.
* @param {boolean} html - Insert content as HTML. Defaults to false.
*/
fill(key, value, html = false) {
const el = this.get(key);
if (html) el.innerHTML = value || '';
else el.textContent = value || '';
return el;
}
}
/**
* Handle API response and assign status to returned JSON.
* @param {Response} res The response.
*/
export const handleResponse = res => {
if (res.ok) return res.json()
.then(json => Object.assign({}, json, { ok: res.ok }))
else return ({ ok: res.ok })
};
/**
* Convert a number to a string and add thousand separator.
* @param {number|string} num - The number to convert.
* @param {string} separator Thousand separator.
*/
export const convertNumber = (num, separator = ',') =>
num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, separator);

View File

@ -1,7 +1,8 @@
{
"sidebar": {
"Models": {
"Overview": "./"
"Overview": "./",
"Comparison": "comparison"
},
"Language models": {
@ -26,6 +27,17 @@
}
},
"comparison": {
"title": "Model Comparison",
"teaser": "Compare spaCy's statistical models and their accuracy.",
"tag": "experimental",
"compare_models": true,
"default_models": {
"model1": "en_core_web_sm",
"model2": "en_core_web_lg"
}
},
"MODELS": {
"en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"],
"de": ["de_dep_news_sm"],

View File

@ -0,0 +1,81 @@
//- 💫 DOCS > MODELS > COMPARISON
include ../_includes/_mixins
p
| This experimental tool helps you compare spaCy's statistical models
| by features, accuracy and speed. This can be especially useful to get an
| idea of the trade-offs between larger and smaller models of the same
| type. For example, #[code lg] models tend to be more accurate than
| the corresponding #[code sm] versions but they're often significantly
| larger in file size and memory usage.
- TPL = "compare"
+grid.o-box
for i in [1, 2]
+grid-col("half", "no-gutter")
label.u-heading.u-text-label.u-text-center.u-color-theme(for="model#{i}") Model #{i}
.o-field.o-grid.o-grid--vcenter.u-padding-small
select.o-field__select.u-text-small(id="model#{i}" data-tpl=TPL data-tpl-key="model#{i}")
option(selected="" disabled="" value="") Select model...
for models, _ in MODELS
for model in models
option(value=model)=model
div(data-tpl=TPL data-tpl-key="error")
+infobox
| Unable to load model details and accuracy figures from GitHub to
| compare the models. For details of the individual models, see the
| overview of the
| #[+a(gh("spacy-models") + "/releases") latest model releases].
div(data-tpl=TPL data-tpl-key="result" style="display: none")
+chart("compare_accuracy", 350)
+aside-code("Download", "text")
for i in [1, 2]
span(data-tpl=TPL data-tpl-key="download#{i}")
+table.o-block-small(data-tpl=TPL data-tpl-key="table")
+row("head")
+head-cell
for i in [1, 2]
+head-cell(style="width: 40%")
a(data-tpl=TPL data-tpl-key="link#{i}")
code(data-tpl=TPL data-tpl-key="table-head#{i}" style="text-transform: initial; font-weight: normal")
for label, id in {lang: "Language", type: "Type", genre: "Genre"}
+row
+cell #[+label=label]
for i in [1, 2]
+cell(data-tpl=TPL data-tpl-key="#{id}#{i}") n/a
for label in ["Version", "Size", "Pipeline", "Vectors", "Sources", "Author", "License"]
- var field = label.toLowerCase()
+row
+cell.u-nowrap
+label=label
if MODEL_META[field]
| #[+help(MODEL_META[field]).u-color-subtle]
for i in [1, 2]
+cell
span(data-tpl=TPL data-tpl-key=field + i) #[em n/a]
+row
+cell #[+label Description]
for i in [1, 2]
+cell.u-text-tiny(data-tpl=TPL data-tpl-key="desc#{i}") n/a
for benchmark, _ in MODEL_BENCHMARKS
- var counter = 0
for label, field in benchmark
+row((counter == 0) ? "divider" : null)
+cell.u-nowrap
+label=label
if MODEL_META[field]
| #[+help(MODEL_META[field]).u-color-subtle]
for i in [1, 2]
+cell
span(data-tpl=TPL data-tpl-key=field + i) n/a
- counter++

View File

@ -8,13 +8,15 @@
"devDependencies": {
"babel-cli": "^6.14.0",
"harp": "^0.24.0",
"rollup": "^0.50.0",
"uglify-js": "^2.7.3"
},
"dependencies": {},
"scripts": {
"compile": "NODE_ENV=deploy harp compile",
"compile_js": "babel www/assets/js/main.js --out-file www/assets/js/main.js --presets=es2015",
"uglify": "uglifyjs www/assets/js/main.js --output www/assets/js/main.js",
"build": "npm run compile && npm run compile_js && npm run uglify"
"rollup_js": "rollup www/assets/js/rollup.js --output.format iife --output.file www/assets/js/rollup.js",
"compile_rollup": "babel www/assets/js/rollup.js --out-file www/assets/js/rollup.js --presets=es2015",
"uglify": "uglifyjs www/assets/js/rollup.js --output www/assets/js/rollup.js",
"build": "npm run compile && echo 'Compiled website' && npm run rollup_js && echo 'Bundled rollup.js' && npm run compile_rollup && echo 'Compiled rollup.js' && npm run uglify && echo 'Uglified rollup.js'"
}
}

View File

@ -130,10 +130,11 @@ include _includes/_mixins
| capabilities and can be used to mark features that require a
| respective model to be installed.
p.o-block.o-inline-list
+tag I'm a tag
+tag-new(2)
+tag-model("Named entities")
.o-block
p.o-inline-list
+tag I'm a tag
+tag-new(2)
+tag-model("Named entities")
+h(3, "icons", "website/_includes/_svg.jade") Icons
@ -359,18 +360,14 @@ include _includes/_mixins
script(src="/assets/js/chart.min.js")
script new Chart('chart_accuracy', { datasets: [] })
+grid
+grid-col("half")
+chart("accuracy", 400)
+chart("accuracy", 400)
+chart("speed", 300)
+grid-col("half")
+chart("speed", 300)
script(src="/assets/js/chart.min.js")
script(src="/assets/js/vendor/chart.min.js")
script.
Chart.defaults.global.defaultFontFamily = "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'";
new Chart('chart_accuracy', { type: 'bar', options: { legend: false, responsive: true, scales: { yAxes: [{ label: 'Accuracy', ticks: { suggestedMin: 70 } }], xAxes: [{ barPercentage: 0.425 }]}}, data: { labels: ['UAS', 'LAS', 'POS', 'NER F', 'NER P', 'NER R'], datasets: [{ label: 'en_core_web_sm', data: [91.49, 89.66, 97.23, 86.46, 86.78, 86.15], backgroundColor: '#09a3d5' }]}});
new Chart('chart_speed', { type: 'horizontalBar', options: { legend: false, responsive: true, scales: { xAxes: [{ label: 'Speed', ticks: { suggestedMin: 0 }}], yAxes: [{ barPercentage: 0.425 }]}}, data: { labels: ['w/s CPU', 'w/s GPU'], datasets: [{ label: 'en_core_web_sm', data: [9575, 25531], backgroundColor: '#09a3d5'}]}});
new Chart('chart_accuracy', { type: 'bar', options: { legend: { position: 'bottom'}, responsive: true, scales: { yAxes: [{ label: 'Accuracy', ticks: { suggestedMin: 70 } }], xAxes: [{ barPercentage: 0.75 }]}}, data: { labels: ['UAS', 'LAS', 'POS', 'NER F', 'NER P', 'NER R'], datasets: [{ label: 'en_core_web_sm', data: [91.65, 89.77, 97.05, 84.80, 84.53, 85.06], backgroundColor: '#09a3d5' }, { label: 'en_core_web_lg', data: [91.49, 89.66, 97.23, 86.46, 86.78, 86.15], backgroundColor: '#066B8C'}]}});
new Chart('chart_speed', { type: 'horizontalBar', options: { legend: { position: 'bottom'}, responsive: true, scales: { xAxes: [{ label: 'Speed', ticks: { suggestedMin: 0 }}], yAxes: [{ barPercentage: 0.75 }]}}, data: { labels: ['w/s CPU', 'w/s GPU'], datasets: [{ label: 'en_core_web_sm', data: [9575, 25531], backgroundColor: '#09a3d5'}, { label: 'en_core_web_lg', data: [8421, 22092], backgroundColor: '#066B8C'}]}});
+section("embeds")
+h(2, "embeds") Embeds