diff --git a/spacy/__init__.py b/spacy/__init__.py index 62ab41c90..80bd1c539 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -5,7 +5,7 @@ import json from pathlib import Path from .util import set_lang_class, get_lang_class, parse_package_meta from .deprecated import resolve_model_name -from .cli.info import info +from .cli import info from . import en from . import de diff --git a/spacy/__main__.py b/spacy/__main__.py index 9addbccde..cde146cba 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -1,5 +1,4 @@ # coding: utf8 -# from __future__ import print_function # NB! This breaks in plac on Python 2!! #from __future__ import unicode_literals, @@ -8,12 +7,13 @@ import plac from spacy.cli import download as cli_download from spacy.cli import link as cli_link from spacy.cli import info as cli_info +from spacy.cli import package as cli_package class CLI(object): """Command-line interface for spaCy""" - commands = ('download', 'link', 'info') + commands = ('download', 'link', 'info', 'package') @plac.annotations( model=("model to download (shortcut or model name)", "positional", None, str), @@ -32,8 +32,8 @@ class CLI(object): @plac.annotations( origin=("package name or local path to model", "positional", None, str), - link_name=("Name of shortuct link to create", "positional", None, str), - force=("Force overwriting of existing link", "flag", "f", bool) + link_name=("name of shortuct link to create", "positional", None, str), + force=("force overwriting of existing link", "flag", "f", bool) ) def link(self, origin, link_name, force=False): """ @@ -59,6 +59,21 @@ class CLI(object): cli_info(model, markdown) + @plac.annotations( + input_dir=("directory with model data", "positional", None, str), + output_dir=("output directory", "positional", None, str), + force=("force overwriting of existing folder in output directory", "flag", "f", bool) + ) + def package(self, input_dir, output_dir, force=False): + """ + Generate Python package for model data, including meta and required + installation files. A new directory will be created in the specified + output directory, and model data will be copied over. + """ + + cli_package(input_dir, output_dir, force) + + def __missing__(self, name): print("\n Command %r does not exist\n" % name) diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py index 2c45b471a..2383e04b9 100644 --- a/spacy/cli/__init__.py +++ b/spacy/cli/__init__.py @@ -1,3 +1,4 @@ from .download import download from .info import info from .link import link +from .package import package diff --git a/spacy/cli/package.py b/spacy/cli/package.py new file mode 100644 index 000000000..5cab2b4bc --- /dev/null +++ b/spacy/cli/package.py @@ -0,0 +1,91 @@ +# coding: utf8 +from __future__ import unicode_literals + +import json +import shutil +import requests +from pathlib import Path + +from .. import about +from .. import util + + +def package(input_dir, output_dir, force): + input_path = Path(input_dir) + output_path = Path(output_dir) + check_dirs(input_path, output_path) + + template_setup = get_template('setup.py') + template_manifest = get_template('MANIFEST.in') + template_init = get_template('en_model_name/__init__.py') + meta = generate_meta() + + model_name = meta['lang'] + '_' + meta['name'] + model_name_v = model_name + '-' + meta['version'] + main_path = output_path / model_name_v + package_path = main_path / model_name + + create_dirs(package_path, force) + shutil.copytree(input_path.as_posix(), (package_path / model_name_v).as_posix()) + create_file(main_path / 'meta.json', json.dumps(meta, indent=2)) + create_file(main_path / 'setup.py', template_setup) + create_file(main_path / 'MANIFEST.in', template_manifest) + create_file(package_path / '__init__.py', template_init) + + util.print_msg( + main_path.as_posix(), + "To build the package, run `python setup.py sdist` in that directory.", + title="Successfully created package {p}".format(p=model_name_v)) + + +def check_dirs(input_path, output_path): + if not input_path.exists(): + util.sys_exit(input_path.as_poisx(), title="Model directory not found") + if not output_path.exists(): + util.sys_exit(output_path.as_posix(), title="Output directory not found") + + +def create_dirs(package_path, force): + if package_path.exists(): + if force: + shutil.rmtree(package_path.as_posix()) + else: + util.sys_exit(package_path.as_posix(), + "Please delete the directory and try again.", + title="Package directory already exists") + Path.mkdir(package_path, parents=True) + + +def create_file(file_path, contents): + file_path.touch() + file_path.open('w').write(contents, encoding='utf-8') + + +def generate_meta(): + settings = [('lang', 'Model language', 'en'), + ('name', 'Model name', 'model'), + ('version', 'Model version', '0.0.0'), + ('spacy_version', 'Required spaCy version', '>=1.7.0,<2.0.0'), + ('description', 'Model description', False), + ('author', 'Author', False), + ('email', 'Author email', False), + ('url', 'Author website', False), + ('license', 'License', 'CC BY-NC 3.0')] + + util.print_msg("Enter the package settings for your model.", title="Generating meta.json") + + meta = {} + for setting, desc, default in settings: + response = util.get_raw_input(desc, default) + meta[setting] = default if response == '' and default else response + return meta + + +def get_template(filepath): + url = 'https://raw.githubusercontent.com/explosion/spacy-dev-resources/master/templates/model/' + r = requests.get(url + filepath) + if r.status_code != 200: + util.sys_exit( + "Couldn't fetch template files from GitHub.", + title="Server error ({c})".format(c=r.status_code)) + return r.text diff --git a/spacy/util.py b/spacy/util.py index b255b92db..893ba87c1 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -10,12 +10,19 @@ import sys import textwrap + try: basestring except NameError: basestring = str +try: + raw_input +except NameError: # Python 3 + raw_input = input + + LANGUAGES = {} _data_path = pathlib.Path(__file__).parent / 'data' @@ -158,6 +165,17 @@ def parse_package_meta(package_path, package, require=True): return None +def get_raw_input(description, default=False): + """Get user input via raw_input / input and return input value. Takes a + description for the prompt, and an optional default value that's displayed + with the prompt.""" + + additional = ' (default: {d})'.format(d=default) if default else '' + prompt = ' {d}{a}: '.format(d=description, a=additional) + user_input = raw_input(prompt) + return user_input + + def print_table(data, **kwargs): """Print data in table format. Can either take a list of tuples or a dictionary, which will be converted to a list of tuples.""" diff --git a/website/assets/css/_variables.sass b/website/assets/css/_variables.sass index bfef915be..1c38d114a 100644 --- a/website/assets/css/_variables.sass +++ b/website/assets/css/_variables.sass @@ -44,7 +44,7 @@ $color-red: #d9515d $color-green: #3ec930 $color-yellow: #f4c025 -$syntax-highlighting: ( comment: #949e9b, tag: #3ec930, number: #B084EB, selector: #FFB86C, operator: #FF2C6D, function: #09a3d5, keyword: #45A9F9, regex: #f4c025 ) +$syntax-highlighting: ( comment: #949e9b, tag: #b084eb, number: #b084eb, selector: #ffb86c, operator: #ff2c6d, function: #35b3dc, keyword: #45a9f9, regex: #f4c025 ) $pattern: $color-theme url("/assets/img/pattern_#{$theme}.jpg") center top repeat $pattern-overlay: transparent url("/assets/img/pattern_landing.jpg") center -138px no-repeat diff --git a/website/docs/usage/cli.jade b/website/docs/usage/cli.jade index 990117542..66be83923 100644 --- a/website/docs/usage/cli.jade +++ b/website/docs/usage/cli.jade @@ -103,3 +103,38 @@ p +cell #[code --help], #[code -h] +cell flag +cell Show help message and available arguments. + ++h(2, "package") Package + +tag experimental + +p + | Generate a #[+a("/docs/usage/models#own-models") model Python package] + | from an existing model data directory. All data files are copied over, + | and the meta data can be entered directly from the command line. While + | this feature is still experimental, the required file templates are + | downloaded from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub]. + | This means you need to be connected to the internet to use this command. + ++code(false, "bash"). + python -m spacy package [input_dir] [output_dir] [--force] + ++table(["Argument", "Type", "Description"]) + +row + +cell #[code input_dir] + +cell positional + +cell Path to directory containing model data. + + +row + +cell #[code output_dir] + +cell positional + +cell Directory to create package folder in. + + +row + +cell #[code --force], #[code -f] + +cell flag + +cell Force overwriting of existing folder in output directory. + + +row + +cell #[code --help], #[code -h] + +cell flag + +cell Show help message and available arguments. diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade index ae1417a29..39c271df4 100644 --- a/website/docs/usage/models.jade +++ b/website/docs/usage/models.jade @@ -238,7 +238,11 @@ p | #[+a("/docs/usage/adding-languages") additional languages], you can | create a shortuct link for it by pointing #[code spacy.link] to the | model's data directory. To allow your model to be downloaded and - | installed via pip, you'll also need to generate a package for it. + | installed via pip, you'll also need to generate a package for it. You can + | do this manually, or via the new + | #[+a("/docs/usage/cli#package") #[code spacy package] command] that will + | create all required files, and walk you through generating the meta data. + +infobox("Important note") | The model packages are #[strong not suitable] for the public