From abf8aa05d39688a69afd6c389ab517263982572e Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 18:39:38 +0100 Subject: [PATCH] Populate --create-meta defaults from file if available If meta.json is found in directory and user chooses to overwrite it, show existing data as defaults. --- spacy/cli/package.py | 40 +++++++++++++++++--------------- website/api/_top-level/_cli.jade | 18 +++++++------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/spacy/cli/package.py b/spacy/cli/package.py index d1984fe65..3157ba99d 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -16,10 +16,11 @@ from .. import about input_dir=("directory with model data", "positional", None, str), output_dir=("output parent directory", "positional", None, str), meta_path=("path to meta.json", "option", "m", str), - create_meta=("create meta.json, even if one exists in directory", "flag", - "c", bool), - force=("force overwriting of existing folder in output directory", "flag", - "f", bool)) + create_meta=("create meta.json, even if one exists in directory – if " + "existing meta is found, entries are shown as defaults in " + "the command line prompt", "flag", "c", bool), + force=("force overwriting of existing model directory in output directory", + "flag", "f", bool)) def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force=False): """ @@ -41,13 +42,13 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, template_manifest = get_template('MANIFEST.in') template_init = get_template('xx_model_name/__init__.py') meta_path = meta_path or input_path / 'meta.json' - if not create_meta and meta_path.is_file(): - prints(meta_path, title="Reading meta.json from file") + if meta_path.is_file(): meta = util.read_json(meta_path) - else: - meta = generate_meta(input_dir) + if not create_meta: # only print this if user doesn't want to overwrite + prints(meta_path, title="Loaded meta.json from file") + else: + meta = generate_meta(input_dir, meta) meta = validate_meta(meta, ['lang', 'name', 'version']) - model_name = meta['lang'] + '_' + meta['name'] model_name_v = model_name + '-' + meta['version'] main_path = output_path / model_name_v @@ -82,18 +83,19 @@ def create_file(file_path, contents): file_path.open('w', encoding='utf-8').write(contents) -def generate_meta(model_path): - meta = {} - settings = [('lang', 'Model language', 'en'), - ('name', 'Model name', 'model'), - ('version', 'Model version', '0.0.0'), +def generate_meta(model_path, existing_meta): + meta = existing_meta or {} + settings = [('lang', 'Model language', meta.get('lang', 'en')), + ('name', 'Model name', meta.get('name', 'model')), + ('version', 'Model version', meta.get('version', '0.0.0')), ('spacy_version', 'Required spaCy version', '>=%s,<3.0.0' % about.__version__), - ('description', 'Model description', False), - ('author', 'Author', False), - ('email', 'Author email', False), - ('url', 'Author website', False), - ('license', 'License', 'CC BY-NC 3.0')] + ('description', 'Model description', + meta.get('description', False)), + ('author', 'Author', meta.get('author', False)), + ('email', 'Author email', meta.get('email', False)), + ('url', 'Author website', meta.get('url', False)), + ('license', 'License', meta.get('license', 'CC BY-SA 3.0'))] nlp = util.load_model_from_path(Path(model_path)) meta['pipeline'] = nlp.pipe_names meta['vectors'] = {'width': nlp.vocab.vectors_length, diff --git a/website/api/_top-level/_cli.jade b/website/api/_top-level/_cli.jade index f19eb43d0..aa13abc12 100644 --- a/website/api/_top-level/_cli.jade +++ b/website/api/_top-level/_cli.jade @@ -453,10 +453,11 @@ p p | Generate a #[+a("/usage/training#models-generating") model Python package] | from an existing model data directory. All data files are copied over. - | If the path to a meta.json is supplied, or a meta.json is found in the - | input directory, this file is used. Otherwise, the data can be entered - | directly from the command line. The required file templates are downloaded - | from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make + | If the path to a #[code meta.json] is supplied, or a #[code meta.json] is + | found in the input directory, this file is used. Otherwise, the data can + | be entered directly from the command line. The required file templates + | are downloaded from + | #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make | sure you're always using the latest versions. This means you need to be | connected to the internet to use this command. @@ -477,15 +478,16 @@ p +row +cell #[code --meta-path], #[code -m] +cell option - +cell #[+tag-new(2)] Path to meta.json file (optional). + +cell #[+tag-new(2)] Path to #[code meta.json] file (optional). +row +cell #[code --create-meta], #[code -c] +cell flag +cell - | #[+tag-new(2)] Create a meta.json file on the command line, even - | if one already exists in the directory. - + | #[+tag-new(2)] Create a #[code meta.json] file on the command + | line, even if one already exists in the directory. If an + | existing file is found, its entries will be shown as the defaults + | in the command line prompt. +row +cell #[code --force], #[code -f] +cell flag