mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-22 19:04:25 +03:00
Merge pull request #1462 from explosion/feature/vector-meta-data
💫 Add vector meta data to model meta.json on train/package and show in docs
This commit is contained in:
commit
b6b4f1aaf7
|
@ -43,7 +43,7 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force
|
||||||
prints(meta_path, title="Reading meta.json from file")
|
prints(meta_path, title="Reading meta.json from file")
|
||||||
meta = util.read_json(meta_path)
|
meta = util.read_json(meta_path)
|
||||||
else:
|
else:
|
||||||
meta = generate_meta()
|
meta = generate_meta(input_dir)
|
||||||
meta = validate_meta(meta, ['lang', 'name', 'version'])
|
meta = validate_meta(meta, ['lang', 'name', 'version'])
|
||||||
|
|
||||||
model_name = meta['lang'] + '_' + meta['name']
|
model_name = meta['lang'] + '_' + meta['name']
|
||||||
|
@ -77,7 +77,8 @@ def create_file(file_path, contents):
|
||||||
file_path.open('w', encoding='utf-8').write(contents)
|
file_path.open('w', encoding='utf-8').write(contents)
|
||||||
|
|
||||||
|
|
||||||
def generate_meta():
|
def generate_meta(model_path):
|
||||||
|
meta = {}
|
||||||
settings = [('lang', 'Model language', 'en'),
|
settings = [('lang', 'Model language', 'en'),
|
||||||
('name', 'Model name', 'model'),
|
('name', 'Model name', 'model'),
|
||||||
('version', 'Model version', '0.0.0'),
|
('version', 'Model version', '0.0.0'),
|
||||||
|
@ -87,31 +88,21 @@ def generate_meta():
|
||||||
('email', 'Author email', False),
|
('email', 'Author email', False),
|
||||||
('url', 'Author website', False),
|
('url', 'Author website', False),
|
||||||
('license', 'License', 'CC BY-NC 3.0')]
|
('license', 'License', 'CC BY-NC 3.0')]
|
||||||
prints("Enter the package settings for your model.", title="Generating meta.json")
|
nlp = util.load_model_from_path(Path(model_path))
|
||||||
meta = {}
|
meta['pipeline'] = nlp.pipe_names
|
||||||
|
meta['vectors'] = {'width': nlp.vocab.vectors_length,
|
||||||
|
'entries': len(nlp.vocab.vectors)}
|
||||||
|
prints("Enter the package settings for your model. The following "
|
||||||
|
"information will be read from your model data: pipeline, vectors.",
|
||||||
|
title="Generating meta.json")
|
||||||
for setting, desc, default in settings:
|
for setting, desc, default in settings:
|
||||||
response = util.get_raw_input(desc, default)
|
response = util.get_raw_input(desc, default)
|
||||||
meta[setting] = default if response == '' and default else response
|
meta[setting] = default if response == '' and default else response
|
||||||
meta['pipeline'] = generate_pipeline()
|
|
||||||
if about.__title__ != 'spacy':
|
if about.__title__ != 'spacy':
|
||||||
meta['parent_package'] = about.__title__
|
meta['parent_package'] = about.__title__
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
||||||
def generate_pipeline():
|
|
||||||
prints("If set to 'True', the default pipeline is used. If set to 'False', "
|
|
||||||
"the pipeline will be disabled. Components should be specified as a "
|
|
||||||
"comma-separated list of component names, e.g. tagger, "
|
|
||||||
"parser, ner. For more information, see the docs on processing pipelines.",
|
|
||||||
title="Enter your model's pipeline components")
|
|
||||||
pipeline = util.get_raw_input("Pipeline components", True)
|
|
||||||
subs = {'True': True, 'False': False}
|
|
||||||
if pipeline in subs:
|
|
||||||
return subs[pipeline]
|
|
||||||
else:
|
|
||||||
return [p.strip() for p in pipeline.split(',')]
|
|
||||||
|
|
||||||
|
|
||||||
def validate_meta(meta, keys):
|
def validate_meta(meta, keys):
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key not in meta or meta[key] == '':
|
if key not in meta or meta[key] == '':
|
||||||
|
|
|
@ -144,7 +144,10 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
|
||||||
file_.write(json_dumps(scorer.scores))
|
file_.write(json_dumps(scorer.scores))
|
||||||
meta_loc = output_path / ('model%d' % i) / 'meta.json'
|
meta_loc = output_path / ('model%d' % i) / 'meta.json'
|
||||||
meta['accuracy'] = scorer.scores
|
meta['accuracy'] = scorer.scores
|
||||||
meta['speed'] = {'nwords': nwords, 'cpu':cpu_wps, 'gpu': gpu_wps}
|
meta['speed'] = {'nwords': nwords, 'cpu': cpu_wps,
|
||||||
|
'gpu': gpu_wps}
|
||||||
|
meta['vectors'] = {'width': nlp.vocab.vectors_length,
|
||||||
|
'entries': len(nlp.vocab.vectors)}
|
||||||
meta['lang'] = nlp.lang
|
meta['lang'] = nlp.lang
|
||||||
meta['pipeline'] = pipeline
|
meta['pipeline'] = pipeline
|
||||||
meta['spacy_version'] = '>=%s' % about.__version__
|
meta['spacy_version'] = '>=%s' % about.__version__
|
||||||
|
|
|
@ -38,7 +38,7 @@ for id in CURRENT_MODELS
|
||||||
+cell #[+label Size]
|
+cell #[+label Size]
|
||||||
+cell #[+tag=comps.size] #[span(data-tpl=id data-tpl-key="size") #[em n/a]]
|
+cell #[+tag=comps.size] #[span(data-tpl=id data-tpl-key="size") #[em n/a]]
|
||||||
|
|
||||||
each label in ["Pipeline", "Sources", "Author", "License"]
|
each label in ["Pipeline", "Vectors", "Sources", "Author", "License"]
|
||||||
- var field = label.toLowerCase()
|
- var field = label.toLowerCase()
|
||||||
+row
|
+row
|
||||||
+cell.u-nowrap
|
+cell.u-nowrap
|
||||||
|
|
|
@ -140,6 +140,10 @@ class ModelLoader {
|
||||||
else return ({ ok: res.ok })
|
else return ({ ok: res.ok })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
convertNumber(num, separator = ',') {
|
||||||
|
return num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, separator);
|
||||||
|
}
|
||||||
|
|
||||||
getModels(compat) {
|
getModels(compat) {
|
||||||
this.compat = compat;
|
this.compat = compat;
|
||||||
for (let modelId of this.modelIds) {
|
for (let modelId of this.modelIds) {
|
||||||
|
@ -159,7 +163,7 @@ class ModelLoader {
|
||||||
const template = new Templater(modelId);
|
const template = new Templater(modelId);
|
||||||
template.get('table').removeAttribute('data-loading');
|
template.get('table').removeAttribute('data-loading');
|
||||||
template.get('error').style.display = 'block';
|
template.get('error').style.display = 'block';
|
||||||
for (let key of ['sources', 'pipeline', 'author', 'license']) {
|
for (let key of ['sources', 'pipeline', 'vectors', 'author', 'license']) {
|
||||||
template.get(key).parentElement.parentElement.style.display = 'none';
|
template.get(key).parentElement.parentElement.style.display = 'none';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -167,13 +171,14 @@ class ModelLoader {
|
||||||
/**
|
/**
|
||||||
* Update model details in tables. Currently quite hacky :(
|
* Update model details in tables. Currently quite hacky :(
|
||||||
*/
|
*/
|
||||||
render({ lang, name, version, sources, pipeline, url, author, license, accuracy, size, description, notes }) {
|
render({ lang, name, version, sources, pipeline, vectors, url, author, license, accuracy, size, description, notes }) {
|
||||||
const modelId = `${lang}_${name}`;
|
const modelId = `${lang}_${name}`;
|
||||||
const model = `${modelId}-${version}`;
|
const model = `${modelId}-${version}`;
|
||||||
const template = new Templater(modelId);
|
const template = new Templater(modelId);
|
||||||
|
|
||||||
const getSources = s => (s instanceof Array) ? s.join(', ') : s;
|
const getSources = s => (s instanceof Array) ? s.join(', ') : s;
|
||||||
const getPipeline = p => p.map(comp => `<code>${comp}</code>`).join(', ');
|
const getPipeline = p => p.map(comp => `<code>${comp}</code>`).join(', ');
|
||||||
|
const getVectors = v => `${this.convertNumber(v.entries)} (${v.width} dimensions)`;
|
||||||
const getLink = (t, l) => `<a href="${l}" target="_blank">${t}</a>`;
|
const getLink = (t, l) => `<a href="${l}" target="_blank">${t}</a>`;
|
||||||
|
|
||||||
const keys = { version, size, description, notes }
|
const keys = { version, size, description, notes }
|
||||||
|
@ -182,6 +187,8 @@ class ModelLoader {
|
||||||
if (sources) template.fill('sources', getSources(sources));
|
if (sources) template.fill('sources', getSources(sources));
|
||||||
if (pipeline && pipeline.length) template.fill('pipeline', getPipeline(pipeline), true);
|
if (pipeline && pipeline.length) template.fill('pipeline', getPipeline(pipeline), true);
|
||||||
else template.get('pipeline').parentElement.parentElement.style.display = 'none';
|
else template.get('pipeline').parentElement.parentElement.style.display = 'none';
|
||||||
|
if (vectors) template.fill('vectors', getVectors(vectors));
|
||||||
|
else template.get('vectors').parentElement.parentElement.style.display = 'none';
|
||||||
|
|
||||||
if (author) template.fill('author', url ? getLink(author, url) : author, true);
|
if (author) template.fill('author', url ? getLink(author, url) : author, true);
|
||||||
if (license) template.fill('license', this.licenses[license] ? getLink(license, this.licenses[license]) : license, true);
|
if (license) template.fill('license', this.licenses[license] ? getLink(license, this.licenses[license]) : license, true);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user