mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 18:07:26 +03:00 
			
		
		
		
	Fix vector details in model overview
This commit is contained in:
		
							parent
							
								
									9baab241b4
								
							
						
					
					
						commit
						43512c68b2
					
				| 
						 | 
					@ -40,6 +40,8 @@ for id in CURRENT_MODELS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            each label in ["Pipeline", "Vectors", "Sources", "Author", "License"]
 | 
					            each label in ["Pipeline", "Vectors", "Sources", "Author", "License"]
 | 
				
			||||||
                - var field = label.toLowerCase()
 | 
					                - var field = label.toLowerCase()
 | 
				
			||||||
 | 
					                if field == "vectors"
 | 
				
			||||||
 | 
					                    - field = "vecs"
 | 
				
			||||||
                +row
 | 
					                +row
 | 
				
			||||||
                    +cell.u-nowrap
 | 
					                    +cell.u-nowrap
 | 
				
			||||||
                        +label=label
 | 
					                        +label=label
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -20,21 +20,33 @@ const CHART_FONTS = {
 | 
				
			||||||
 * @property {function} vectors - Format vector data (entries and dimensions).
 | 
					 * @property {function} vectors - Format vector data (entries and dimensions).
 | 
				
			||||||
 * @property {function} version - Format model version number.
 | 
					 * @property {function} version - Format model version number.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
export const formats = {
 | 
					const formats = {
 | 
				
			||||||
    author: (author, url) => url ? `<a href="${url}" target="_blank">${author}</a>` : author,
 | 
					    author: (author, url) => url ? `<a href="${url}" target="_blank">${author}</a>` : author,
 | 
				
			||||||
    license: (license, url) => url ? `<a href="${url}" target="_blank">${license}</a>` : license,
 | 
					    license: (license, url) => url ? `<a href="${url}" target="_blank">${license}</a>` : license,
 | 
				
			||||||
    sources: sources => (sources instanceof Array) ? sources.join(', ') : sources,
 | 
					    sources: sources => (sources instanceof Array) ? sources.join(', ') : sources,
 | 
				
			||||||
    pipeline: pipes => (pipes && pipes.length) ? pipes.map(p => `<code>${p}</code>`).join(', ') : '-',
 | 
					    pipeline: pipes => (pipes && pipes.length) ? pipes.map(p => `<code>${p}</code>`).join(', ') : '-',
 | 
				
			||||||
    vectors: vec => vec ? `${abbrNumber(vec.keys)} keys, ${abbrNumber(vec.vectors)} unique vectors (${vec.width} dimensions)` : 'n/a',
 | 
					    vectors: vec => formatVectors(vec),
 | 
				
			||||||
    version: version => `<code>v${version}</code>`
 | 
					    version: version => `<code>v${version}</code>`
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * Format word vectors data depending on contents.
 | 
				
			||||||
 | 
					 * @property {Object} data - The vectors object from the model's meta.json.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					const formatVectors = data => {
 | 
				
			||||||
 | 
					    if (!data) return 'n/a';
 | 
				
			||||||
 | 
					    if (Object.values(data).every(n => n == 0)) return 'context vectors only';
 | 
				
			||||||
 | 
					    const { keys, vectors: vecs, width } = data;
 | 
				
			||||||
 | 
					    return `${abbrNumber(keys)} keys, ${abbrNumber(vecs)} unique vectors (${width} dimensions)`;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * Find the latest version of a model in a compatibility table.
 | 
					 * Find the latest version of a model in a compatibility table.
 | 
				
			||||||
 * @param {string} model - The model name.
 | 
					 * @param {string} model - The model name.
 | 
				
			||||||
 * @param {Object} compat - Compatibility table, keyed by spaCy version.
 | 
					 * @param {Object} compat - Compatibility table, keyed by spaCy version.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
export const getLatestVersion = (model, compat = {}) => {
 | 
					const getLatestVersion = (model, compat = {}) => {
 | 
				
			||||||
    for (let [spacy_v, models] of Object.entries(compat)) {
 | 
					    for (let [spacy_v, models] of Object.entries(compat)) {
 | 
				
			||||||
        if (models[model]) return models[model][0];
 | 
					        if (models[model]) return models[model][0];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
| 
						 | 
					@ -90,7 +102,7 @@ export class ModelLoader {
 | 
				
			||||||
        const tpl = new Templater(modelId);
 | 
					        const tpl = new Templater(modelId);
 | 
				
			||||||
        tpl.get('table').removeAttribute('data-loading');
 | 
					        tpl.get('table').removeAttribute('data-loading');
 | 
				
			||||||
        tpl.get('error').style.display = 'block';
 | 
					        tpl.get('error').style.display = 'block';
 | 
				
			||||||
        for (let key of ['sources', 'pipeline', 'vectors', 'author', 'license']) {
 | 
					        for (let key of ['sources', 'pipeline', 'vecs', 'author', 'license']) {
 | 
				
			||||||
            tpl.get(key).parentElement.parentElement.style.display = 'none';
 | 
					            tpl.get(key).parentElement.parentElement.style.display = 'none';
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
| 
						 | 
					@ -120,8 +132,8 @@ export class ModelLoader {
 | 
				
			||||||
        if (author) tpl.fill('author', formats.author(author, url), true);
 | 
					        if (author) tpl.fill('author', formats.author(author, url), true);
 | 
				
			||||||
        if (license) tpl.fill('license', formats.license(license, this.licenses[license]), true);
 | 
					        if (license) tpl.fill('license', formats.license(license, this.licenses[license]), true);
 | 
				
			||||||
        if (sources) tpl.fill('sources', formats.sources(sources));
 | 
					        if (sources) tpl.fill('sources', formats.sources(sources));
 | 
				
			||||||
        if (vectors) tpl.fill('vectors', formats.vectors(vectors));
 | 
					        if (vectors) tpl.fill('vecs', formats.vectors(vectors));
 | 
				
			||||||
        else tpl.get('vectors').parentElement.parentElement.style.display = 'none';
 | 
					        else tpl.get('vecs').parentElement.parentElement.style.display = 'none';
 | 
				
			||||||
        if (pipeline && pipeline.length) tpl.fill('pipeline', formats.pipeline(pipeline), true);
 | 
					        if (pipeline && pipeline.length) tpl.fill('pipeline', formats.pipeline(pipeline), true);
 | 
				
			||||||
        else tpl.get('pipeline').parentElement.parentElement.style.display = 'none';
 | 
					        else tpl.get('pipeline').parentElement.parentElement.style.display = 'none';
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
| 
						 | 
					@ -223,8 +235,9 @@ export class ModelComparer {
 | 
				
			||||||
        const version = getLatestVersion(name, this.compat);
 | 
					        const version = getLatestVersion(name, this.compat);
 | 
				
			||||||
        const modelName = `${name}-${version}`;
 | 
					        const modelName = `${name}-${version}`;
 | 
				
			||||||
        return new Promise((resolve, reject) => {
 | 
					        return new Promise((resolve, reject) => {
 | 
				
			||||||
 | 
					            if (!version) reject();
 | 
				
			||||||
            // resolve immediately if model already loaded, e.g. in this.models
 | 
					            // resolve immediately if model already loaded, e.g. in this.models
 | 
				
			||||||
            if (this.models[name]) resolve(this.models[name]);
 | 
					            else if (this.models[name]) resolve(this.models[name]);
 | 
				
			||||||
            else fetch(`${this.url}/meta/${modelName}.json`)
 | 
					            else fetch(`${this.url}/meta/${modelName}.json`)
 | 
				
			||||||
                .then(res => handleResponse(res))
 | 
					                .then(res => handleResponse(res))
 | 
				
			||||||
                .then(json => json.ok ? resolve(this.saveModel(name, json)) : reject())
 | 
					                .then(json => json.ok ? resolve(this.saveModel(name, json)) : reject())
 | 
				
			||||||
| 
						 | 
					@ -306,12 +319,13 @@ export class ModelComparer {
 | 
				
			||||||
        this.tpl.fill(`size${i}`, size);
 | 
					        this.tpl.fill(`size${i}`, size);
 | 
				
			||||||
        this.tpl.fill(`desc${i}`, description || 'n/a');
 | 
					        this.tpl.fill(`desc${i}`, description || 'n/a');
 | 
				
			||||||
        this.tpl.fill(`pipeline${i}`, formats.pipeline(pipeline), true);
 | 
					        this.tpl.fill(`pipeline${i}`, formats.pipeline(pipeline), true);
 | 
				
			||||||
        this.tpl.fill(`vectors${i}`, formats.vectors(vectors));
 | 
					        this.tpl.fill(`vecs${i}`, formats.vectors(vectors));
 | 
				
			||||||
        this.tpl.fill(`sources${i}`, formats.sources(sources));
 | 
					        this.tpl.fill(`sources${i}`, formats.sources(sources));
 | 
				
			||||||
        this.tpl.fill(`author${i}`, formats.author(author, url), true);
 | 
					        this.tpl.fill(`author${i}`, formats.author(author, url), true);
 | 
				
			||||||
        this.tpl.fill(`license${i}`, formats.license(license, this.licenses[license]), true);
 | 
					        this.tpl.fill(`license${i}`, formats.license(license, this.licenses[license]), true);
 | 
				
			||||||
        // check if model accuracy or speed includes one of the pre-set keys
 | 
					        // check if model accuracy or speed includes one of the pre-set keys
 | 
				
			||||||
        for (let key of [...metaKeys, ...Object.keys(this.benchKeys.speed)]) {
 | 
					        const allKeys = [].concat(...Object.entries(this.benchKeys).map(([_, v]) => Object.keys(v)));
 | 
				
			||||||
 | 
					        for (let key of allKeys) {
 | 
				
			||||||
            if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2))
 | 
					            if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2))
 | 
				
			||||||
            else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key])))
 | 
					            else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key])))
 | 
				
			||||||
            else this.tpl.fill(`${key}${i}`, 'n/a')
 | 
					            else this.tpl.fill(`${key}${i}`, 'n/a')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,6 +68,7 @@
 | 
				
			||||||
        "gpu": "words per second on GPU",
 | 
					        "gpu": "words per second on GPU",
 | 
				
			||||||
        "pipeline": "Processing pipeline components in order",
 | 
					        "pipeline": "Processing pipeline components in order",
 | 
				
			||||||
        "sources": "Sources of training data",
 | 
					        "sources": "Sources of training data",
 | 
				
			||||||
 | 
					        "vecs": "Word vectors included in the model. Models that only support context vectors compute similarity via the tensors shared with the pipeline.",
 | 
				
			||||||
        "benchmark_parser": "Parser accuracy",
 | 
					        "benchmark_parser": "Parser accuracy",
 | 
				
			||||||
        "benchmark_ner": "NER accuracy",
 | 
					        "benchmark_ner": "NER accuracy",
 | 
				
			||||||
        "benchmark_speed": "Speed"
 | 
					        "benchmark_speed": "Speed"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -53,6 +53,8 @@ div(data-tpl=TPL data-tpl-key="result" style="display: none")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for label in ["Version", "Size", "Pipeline", "Vectors", "Sources", "Author", "License"]
 | 
					        for label in ["Version", "Size", "Pipeline", "Vectors", "Sources", "Author", "License"]
 | 
				
			||||||
            - var field = label.toLowerCase()
 | 
					            - var field = label.toLowerCase()
 | 
				
			||||||
 | 
					            if field == "vectors"
 | 
				
			||||||
 | 
					                - field = "vecs"
 | 
				
			||||||
            +row
 | 
					            +row
 | 
				
			||||||
                +cell.u-nowrap
 | 
					                +cell.u-nowrap
 | 
				
			||||||
                    +label=label
 | 
					                    +label=label
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,9 +4,9 @@ p
 | 
				
			||||||
    |  Similarity is determined by comparing #[strong word vectors] or "word
 | 
					    |  Similarity is determined by comparing #[strong word vectors] or "word
 | 
				
			||||||
    |  embeddings", multi-dimensional meaning representations of a word. Word
 | 
					    |  embeddings", multi-dimensional meaning representations of a word. Word
 | 
				
			||||||
    |  vectors can be generated using an algorithm like
 | 
					    |  vectors can be generated using an algorithm like
 | 
				
			||||||
    |  #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]. Most of spaCy's
 | 
					    |  #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]. spaCy's medium
 | 
				
			||||||
    |  #[+a("/models") default models] come with
 | 
					    |  #[code md] and large #[code lg] #[+a("/models") models] come with
 | 
				
			||||||
    |  #[strong 300-dimensional vectors] that look like this:
 | 
					    |  #[strong multi-dimensional vectors] that look like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
+code("banana.vector", false, false, 250).
 | 
					+code("banana.vector", false, false, 250).
 | 
				
			||||||
    array([2.02280000e-01,  -7.66180009e-02,   3.70319992e-01,
 | 
					    array([2.02280000e-01,  -7.66180009e-02,   3.70319992e-01,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,12 +4,9 @@
 | 
				
			||||||
    |  Dense, real valued vectors representing distributional similarity
 | 
					    |  Dense, real valued vectors representing distributional similarity
 | 
				
			||||||
    |  information are now a cornerstone of practical NLP. The most common way
 | 
					    |  information are now a cornerstone of practical NLP. The most common way
 | 
				
			||||||
    |  to train these vectors is the #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]
 | 
					    |  to train these vectors is the #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]
 | 
				
			||||||
    |  family of algorithms. The default
 | 
					    |  family of algorithms. If you need to train a word2vec model, we recommend
 | 
				
			||||||
    |  #[+a("/models/en") English model] installs
 | 
					    |  the implementation in the Python library
 | 
				
			||||||
    |  300-dimensional vectors trained on the
 | 
					    |  #[+a("https://radimrehurek.com/gensim/") Gensim].
 | 
				
			||||||
    |  #[+a("http://commoncrawl.org") Common Crawl] corpus.
 | 
					 | 
				
			||||||
    |  If you need to train a word2vec model, we recommend the implementation in
 | 
					 | 
				
			||||||
    |  the Python library #[+a("https://radimrehurek.com/gensim/") Gensim].
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
include ../_spacy-101/_similarity
 | 
					include ../_spacy-101/_similarity
 | 
				
			||||||
include ../_spacy-101/_word-vectors
 | 
					include ../_spacy-101/_word-vectors
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user