Populate --create-meta defaults from file if available

If meta.json is found in directory and user chooses to overwrite it, show existing data as defaults.
This commit is contained in:
ines 2017-10-30 18:39:38 +01:00
parent ce98fa7934
commit abf8aa05d3
2 changed files with 31 additions and 27 deletions

View File

@ -16,10 +16,11 @@ from .. import about
input_dir=("directory with model data", "positional", None, str), input_dir=("directory with model data", "positional", None, str),
output_dir=("output parent directory", "positional", None, str), output_dir=("output parent directory", "positional", None, str),
meta_path=("path to meta.json", "option", "m", str), meta_path=("path to meta.json", "option", "m", str),
create_meta=("create meta.json, even if one exists in directory", "flag", create_meta=("create meta.json, even if one exists in directory if "
"c", bool), "existing meta is found, entries are shown as defaults in "
force=("force overwriting of existing folder in output directory", "flag", "the command line prompt", "flag", "c", bool),
"f", bool)) force=("force overwriting of existing model directory in output directory",
"flag", "f", bool))
def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
force=False): force=False):
""" """
@ -41,13 +42,13 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
template_manifest = get_template('MANIFEST.in') template_manifest = get_template('MANIFEST.in')
template_init = get_template('xx_model_name/__init__.py') template_init = get_template('xx_model_name/__init__.py')
meta_path = meta_path or input_path / 'meta.json' meta_path = meta_path or input_path / 'meta.json'
if not create_meta and meta_path.is_file(): if meta_path.is_file():
prints(meta_path, title="Reading meta.json from file")
meta = util.read_json(meta_path) meta = util.read_json(meta_path)
else: if not create_meta: # only print this if user doesn't want to overwrite
meta = generate_meta(input_dir) prints(meta_path, title="Loaded meta.json from file")
else:
meta = generate_meta(input_dir, meta)
meta = validate_meta(meta, ['lang', 'name', 'version']) meta = validate_meta(meta, ['lang', 'name', 'version'])
model_name = meta['lang'] + '_' + meta['name'] model_name = meta['lang'] + '_' + meta['name']
model_name_v = model_name + '-' + meta['version'] model_name_v = model_name + '-' + meta['version']
main_path = output_path / model_name_v main_path = output_path / model_name_v
@ -82,18 +83,19 @@ def create_file(file_path, contents):
file_path.open('w', encoding='utf-8').write(contents) file_path.open('w', encoding='utf-8').write(contents)
def generate_meta(model_path): def generate_meta(model_path, existing_meta):
meta = {} meta = existing_meta or {}
settings = [('lang', 'Model language', 'en'), settings = [('lang', 'Model language', meta.get('lang', 'en')),
('name', 'Model name', 'model'), ('name', 'Model name', meta.get('name', 'model')),
('version', 'Model version', '0.0.0'), ('version', 'Model version', meta.get('version', '0.0.0')),
('spacy_version', 'Required spaCy version', ('spacy_version', 'Required spaCy version',
'>=%s,<3.0.0' % about.__version__), '>=%s,<3.0.0' % about.__version__),
('description', 'Model description', False), ('description', 'Model description',
('author', 'Author', False), meta.get('description', False)),
('email', 'Author email', False), ('author', 'Author', meta.get('author', False)),
('url', 'Author website', False), ('email', 'Author email', meta.get('email', False)),
('license', 'License', 'CC BY-NC 3.0')] ('url', 'Author website', meta.get('url', False)),
('license', 'License', meta.get('license', 'CC BY-SA 3.0'))]
nlp = util.load_model_from_path(Path(model_path)) nlp = util.load_model_from_path(Path(model_path))
meta['pipeline'] = nlp.pipe_names meta['pipeline'] = nlp.pipe_names
meta['vectors'] = {'width': nlp.vocab.vectors_length, meta['vectors'] = {'width': nlp.vocab.vectors_length,

View File

@ -453,10 +453,11 @@ p
p p
| Generate a #[+a("/usage/training#models-generating") model Python package] | Generate a #[+a("/usage/training#models-generating") model Python package]
| from an existing model data directory. All data files are copied over. | from an existing model data directory. All data files are copied over.
| If the path to a meta.json is supplied, or a meta.json is found in the | If the path to a #[code meta.json] is supplied, or a #[code meta.json] is
| input directory, this file is used. Otherwise, the data can be entered | found in the input directory, this file is used. Otherwise, the data can
| directly from the command line. The required file templates are downloaded | be entered directly from the command line. The required file templates
| from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make | are downloaded from
| #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make
| sure you're always using the latest versions. This means you need to be | sure you're always using the latest versions. This means you need to be
| connected to the internet to use this command. | connected to the internet to use this command.
@ -477,15 +478,16 @@ p
+row +row
+cell #[code --meta-path], #[code -m] +cell #[code --meta-path], #[code -m]
+cell option +cell option
+cell #[+tag-new(2)] Path to meta.json file (optional). +cell #[+tag-new(2)] Path to #[code meta.json] file (optional).
+row +row
+cell #[code --create-meta], #[code -c] +cell #[code --create-meta], #[code -c]
+cell flag +cell flag
+cell +cell
| #[+tag-new(2)] Create a meta.json file on the command line, even | #[+tag-new(2)] Create a #[code meta.json] file on the command
| if one already exists in the directory. | line, even if one already exists in the directory. If an
| existing file is found, its entries will be shown as the defaults
| in the command line prompt.
+row +row
+cell #[code --force], #[code -f] +cell #[code --force], #[code -f]
+cell flag +cell flag