spaCy/spacy/cli/package.py
Ines Montani 3141e04822
💫 New system for error messages and warnings (#2163)
* Add spacy.errors module

* Update deprecation and user warnings

* Replace errors and asserts with new error message system

* Remove redundant asserts

* Fix whitespace

* Add messages for print/util.prints statements

* Fix typo

* Fix typos

* Move CLI messages to spacy.cli._messages

* Add decorator to display error code with message

An implementation like this is nice because it only modifies the string when it's retrieved from the containing class – so we don't have to worry about manipulating tracebacks etc.

* Remove unused link in spacy.about

* Update errors for invalid pipeline components

* Improve error for unknown factories

* Add displaCy warnings

* Update formatting consistency

* Move error message to spacy.errors

* Update errors and check if doc returned by component is None
2018-04-03 15:50:31 +02:00

198 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding: utf8
from __future__ import unicode_literals
import plac
import shutil
from pathlib import Path
from ._messages import Messages
from ..compat import path2str, json_dumps
from ..util import prints
from .. import util
from .. import about
@plac.annotations(
input_dir=("directory with model data", "positional", None, str),
output_dir=("output parent directory", "positional", None, str),
meta_path=("path to meta.json", "option", "m", str),
create_meta=("create meta.json, even if one exists in directory if "
"existing meta is found, entries are shown as defaults in "
"the command line prompt", "flag", "c", bool),
force=("force overwriting of existing model directory in output directory",
"flag", "f", bool))
def package(input_dir, output_dir, meta_path=None, create_meta=False,
force=False):
"""
Generate Python package for model data, including meta and required
installation files. A new directory will be created in the specified
output directory, and model data will be copied over.
"""
input_path = util.ensure_path(input_dir)
output_path = util.ensure_path(output_dir)
meta_path = util.ensure_path(meta_path)
if not input_path or not input_path.exists():
prints(input_path, title=Messages.M008, exits=1)
if not output_path or not output_path.exists():
prints(output_path, title=Messages.M040, exits=1)
if meta_path and not meta_path.exists():
prints(meta_path, title=Messages.M020, exits=1)
meta_path = meta_path or input_path / 'meta.json'
if meta_path.is_file():
meta = util.read_json(meta_path)
if not create_meta: # only print this if user doesn't want to overwrite
prints(meta_path, title=Messages.M041)
else:
meta = generate_meta(input_dir, meta)
meta = validate_meta(meta, ['lang', 'name', 'version'])
model_name = meta['lang'] + '_' + meta['name']
model_name_v = model_name + '-' + meta['version']
main_path = output_path / model_name_v
package_path = main_path / model_name
create_dirs(package_path, force)
shutil.copytree(path2str(input_path),
path2str(package_path / model_name_v))
create_file(main_path / 'meta.json', json_dumps(meta))
create_file(main_path / 'setup.py', TEMPLATE_SETUP)
create_file(main_path / 'MANIFEST.in', TEMPLATE_MANIFEST)
create_file(package_path / '__init__.py', TEMPLATE_INIT)
prints(main_path, Messages.M043,
title=Messages.M042.format(name=model_name_v))
def create_dirs(package_path, force):
if package_path.exists():
if force:
shutil.rmtree(path2str(package_path))
else:
prints(package_path, Messages.M045, title=Messages.M044, exits=1)
Path.mkdir(package_path, parents=True)
def create_file(file_path, contents):
file_path.touch()
file_path.open('w', encoding='utf-8').write(contents)
def generate_meta(model_path, existing_meta):
meta = existing_meta or {}
settings = [('lang', 'Model language', meta.get('lang', 'en')),
('name', 'Model name', meta.get('name', 'model')),
('version', 'Model version', meta.get('version', '0.0.0')),
('spacy_version', 'Required spaCy version',
'>=%s,<3.0.0' % about.__version__),
('description', 'Model description',
meta.get('description', False)),
('author', 'Author', meta.get('author', False)),
('email', 'Author email', meta.get('email', False)),
('url', 'Author website', meta.get('url', False)),
('license', 'License', meta.get('license', 'CC BY-SA 3.0'))]
nlp = util.load_model_from_path(Path(model_path))
meta['pipeline'] = nlp.pipe_names
meta['vectors'] = {'width': nlp.vocab.vectors_length,
'vectors': len(nlp.vocab.vectors),
'keys': nlp.vocab.vectors.n_keys}
prints(Messages.M047, title=Messages.Mo46)
for setting, desc, default in settings:
response = util.get_raw_input(desc, default)
meta[setting] = default if response == '' and default else response
if about.__title__ != 'spacy':
meta['parent_package'] = about.__title__
return meta
def validate_meta(meta, keys):
for key in keys:
if key not in meta or meta[key] == '':
prints(Messages.M049, title=Messages.M048.format(key=key), exits=1)
return meta
TEMPLATE_SETUP = """
#!/usr/bin/env python
# coding: utf8
from __future__ import unicode_literals
import io
import json
from os import path, walk
from shutil import copy
from setuptools import setup
def load_meta(fp):
with io.open(fp, encoding='utf8') as f:
return json.load(f)
def list_files(data_dir):
output = []
for root, _, filenames in walk(data_dir):
for filename in filenames:
if not filename.startswith('.'):
output.append(path.join(root, filename))
output = [path.relpath(p, path.dirname(data_dir)) for p in output]
output.append('meta.json')
return output
def list_requirements(meta):
parent_package = meta.get('parent_package', 'spacy')
requirements = [parent_package + ">=" + meta['spacy_version']]
if 'setup_requires' in meta:
requirements += meta['setup_requires']
return requirements
def setup_package():
root = path.abspath(path.dirname(__file__))
meta_path = path.join(root, 'meta.json')
meta = load_meta(meta_path)
model_name = str(meta['lang'] + '_' + meta['name'])
model_dir = path.join(model_name, model_name + '-' + meta['version'])
copy(meta_path, path.join(model_name))
copy(meta_path, model_dir)
setup(
name=model_name,
description=meta['description'],
author=meta['author'],
author_email=meta['email'],
url=meta['url'],
version=meta['version'],
license=meta['license'],
packages=[model_name],
package_data={model_name: list_files(model_dir)},
install_requires=list_requirements(meta),
zip_safe=False,
)
if __name__ == '__main__':
setup_package()
""".strip()
TEMPLATE_MANIFEST = """
include meta.json
""".strip()
TEMPLATE_INIT = """
# coding: utf8
from __future__ import unicode_literals
from pathlib import Path
from spacy.util import load_model_from_init_py, get_model_meta
__version__ = get_model_meta(Path(__file__).parent)['version']
def load(**overrides):
return load_model_from_init_py(__file__, **overrides)
""".strip()