Merge branch 'develop'

# Conflicts:
#	spacy/util.py
This commit is contained in:
ines 2017-03-22 17:14:15 +01:00
commit 3f20efe165
8 changed files with 171 additions and 7 deletions

View File

@ -5,7 +5,7 @@ import json
from pathlib import Path from pathlib import Path
from .util import set_lang_class, get_lang_class, parse_package_meta from .util import set_lang_class, get_lang_class, parse_package_meta
from .deprecated import resolve_model_name from .deprecated import resolve_model_name
from .cli.info import info from .cli import info
from . import en from . import en
from . import de from . import de

View File

@ -1,5 +1,4 @@
# coding: utf8 # coding: utf8
#
from __future__ import print_function from __future__ import print_function
# NB! This breaks in plac on Python 2!! # NB! This breaks in plac on Python 2!!
#from __future__ import unicode_literals, #from __future__ import unicode_literals,
@ -8,12 +7,13 @@ import plac
from spacy.cli import download as cli_download from spacy.cli import download as cli_download
from spacy.cli import link as cli_link from spacy.cli import link as cli_link
from spacy.cli import info as cli_info from spacy.cli import info as cli_info
from spacy.cli import package as cli_package
class CLI(object): class CLI(object):
"""Command-line interface for spaCy""" """Command-line interface for spaCy"""
commands = ('download', 'link', 'info') commands = ('download', 'link', 'info', 'package')
@plac.annotations( @plac.annotations(
model=("model to download (shortcut or model name)", "positional", None, str), model=("model to download (shortcut or model name)", "positional", None, str),
@ -32,8 +32,8 @@ class CLI(object):
@plac.annotations( @plac.annotations(
origin=("package name or local path to model", "positional", None, str), origin=("package name or local path to model", "positional", None, str),
link_name=("Name of shortuct link to create", "positional", None, str), link_name=("name of shortuct link to create", "positional", None, str),
force=("Force overwriting of existing link", "flag", "f", bool) force=("force overwriting of existing link", "flag", "f", bool)
) )
def link(self, origin, link_name, force=False): def link(self, origin, link_name, force=False):
""" """
@ -59,6 +59,21 @@ class CLI(object):
cli_info(model, markdown) cli_info(model, markdown)
@plac.annotations(
input_dir=("directory with model data", "positional", None, str),
output_dir=("output directory", "positional", None, str),
force=("force overwriting of existing folder in output directory", "flag", "f", bool)
)
def package(self, input_dir, output_dir, force=False):
"""
Generate Python package for model data, including meta and required
installation files. A new directory will be created in the specified
output directory, and model data will be copied over.
"""
cli_package(input_dir, output_dir, force)
def __missing__(self, name): def __missing__(self, name):
print("\n Command %r does not exist\n" % name) print("\n Command %r does not exist\n" % name)

View File

@ -1,3 +1,4 @@
from .download import download from .download import download
from .info import info from .info import info
from .link import link from .link import link
from .package import package

91
spacy/cli/package.py Normal file
View File

@ -0,0 +1,91 @@
# coding: utf8
from __future__ import unicode_literals
import json
import shutil
import requests
from pathlib import Path
from .. import about
from .. import util
def package(input_dir, output_dir, force):
input_path = Path(input_dir)
output_path = Path(output_dir)
check_dirs(input_path, output_path)
template_setup = get_template('setup.py')
template_manifest = get_template('MANIFEST.in')
template_init = get_template('en_model_name/__init__.py')
meta = generate_meta()
model_name = meta['lang'] + '_' + meta['name']
model_name_v = model_name + '-' + meta['version']
main_path = output_path / model_name_v
package_path = main_path / model_name
create_dirs(package_path, force)
shutil.copytree(input_path.as_posix(), (package_path / model_name_v).as_posix())
create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
create_file(main_path / 'setup.py', template_setup)
create_file(main_path / 'MANIFEST.in', template_manifest)
create_file(package_path / '__init__.py', template_init)
util.print_msg(
main_path.as_posix(),
"To build the package, run `python setup.py sdist` in that directory.",
title="Successfully created package {p}".format(p=model_name_v))
def check_dirs(input_path, output_path):
if not input_path.exists():
util.sys_exit(input_path.as_poisx(), title="Model directory not found")
if not output_path.exists():
util.sys_exit(output_path.as_posix(), title="Output directory not found")
def create_dirs(package_path, force):
if package_path.exists():
if force:
shutil.rmtree(package_path.as_posix())
else:
util.sys_exit(package_path.as_posix(),
"Please delete the directory and try again.",
title="Package directory already exists")
Path.mkdir(package_path, parents=True)
def create_file(file_path, contents):
file_path.touch()
file_path.open('w').write(contents, encoding='utf-8')
def generate_meta():
settings = [('lang', 'Model language', 'en'),
('name', 'Model name', 'model'),
('version', 'Model version', '0.0.0'),
('spacy_version', 'Required spaCy version', '>=1.7.0,<2.0.0'),
('description', 'Model description', False),
('author', 'Author', False),
('email', 'Author email', False),
('url', 'Author website', False),
('license', 'License', 'CC BY-NC 3.0')]
util.print_msg("Enter the package settings for your model.", title="Generating meta.json")
meta = {}
for setting, desc, default in settings:
response = util.get_raw_input(desc, default)
meta[setting] = default if response == '' and default else response
return meta
def get_template(filepath):
url = 'https://raw.githubusercontent.com/explosion/spacy-dev-resources/master/templates/model/'
r = requests.get(url + filepath)
if r.status_code != 200:
util.sys_exit(
"Couldn't fetch template files from GitHub.",
title="Server error ({c})".format(c=r.status_code))
return r.text

View File

@ -10,12 +10,19 @@ import sys
import textwrap import textwrap
try: try:
basestring basestring
except NameError: except NameError:
basestring = str basestring = str
try:
raw_input
except NameError: # Python 3
raw_input = input
LANGUAGES = {} LANGUAGES = {}
_data_path = pathlib.Path(__file__).parent / 'data' _data_path = pathlib.Path(__file__).parent / 'data'
@ -158,6 +165,17 @@ def parse_package_meta(package_path, package, require=True):
return None return None
def get_raw_input(description, default=False):
"""Get user input via raw_input / input and return input value. Takes a
description for the prompt, and an optional default value that's displayed
with the prompt."""
additional = ' (default: {d})'.format(d=default) if default else ''
prompt = ' {d}{a}: '.format(d=description, a=additional)
user_input = raw_input(prompt)
return user_input
def print_table(data, **kwargs): def print_table(data, **kwargs):
"""Print data in table format. Can either take a list of tuples or a """Print data in table format. Can either take a list of tuples or a
dictionary, which will be converted to a list of tuples.""" dictionary, which will be converted to a list of tuples."""

View File

@ -44,7 +44,7 @@ $color-red: #d9515d
$color-green: #3ec930 $color-green: #3ec930
$color-yellow: #f4c025 $color-yellow: #f4c025
$syntax-highlighting: ( comment: #949e9b, tag: #3ec930, number: #B084EB, selector: #FFB86C, operator: #FF2C6D, function: #09a3d5, keyword: #45A9F9, regex: #f4c025 ) $syntax-highlighting: ( comment: #949e9b, tag: #b084eb, number: #b084eb, selector: #ffb86c, operator: #ff2c6d, function: #35b3dc, keyword: #45a9f9, regex: #f4c025 )
$pattern: $color-theme url("/assets/img/pattern_#{$theme}.jpg") center top repeat $pattern: $color-theme url("/assets/img/pattern_#{$theme}.jpg") center top repeat
$pattern-overlay: transparent url("/assets/img/pattern_landing.jpg") center -138px no-repeat $pattern-overlay: transparent url("/assets/img/pattern_landing.jpg") center -138px no-repeat

View File

@ -103,3 +103,38 @@ p
+cell #[code --help], #[code -h] +cell #[code --help], #[code -h]
+cell flag +cell flag
+cell Show help message and available arguments. +cell Show help message and available arguments.
+h(2, "package") Package
+tag experimental
p
| Generate a #[+a("/docs/usage/models#own-models") model Python package]
| from an existing model data directory. All data files are copied over,
| and the meta data can be entered directly from the command line. While
| this feature is still experimental, the required file templates are
| downloaded from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub].
| This means you need to be connected to the internet to use this command.
+code(false, "bash").
python -m spacy package [input_dir] [output_dir] [--force]
+table(["Argument", "Type", "Description"])
+row
+cell #[code input_dir]
+cell positional
+cell Path to directory containing model data.
+row
+cell #[code output_dir]
+cell positional
+cell Directory to create package folder in.
+row
+cell #[code --force], #[code -f]
+cell flag
+cell Force overwriting of existing folder in output directory.
+row
+cell #[code --help], #[code -h]
+cell flag
+cell Show help message and available arguments.

View File

@ -238,7 +238,11 @@ p
| #[+a("/docs/usage/adding-languages") additional languages], you can | #[+a("/docs/usage/adding-languages") additional languages], you can
| create a shortuct link for it by pointing #[code spacy.link] to the | create a shortuct link for it by pointing #[code spacy.link] to the
| model's data directory. To allow your model to be downloaded and | model's data directory. To allow your model to be downloaded and
| installed via pip, you'll also need to generate a package for it. | installed via pip, you'll also need to generate a package for it. You can
| do this manually, or via the new
| #[+a("/docs/usage/cli#package") #[code spacy package] command] that will
| create all required files, and walk you through generating the meta data.
+infobox("Important note") +infobox("Important note")
| The model packages are #[strong not suitable] for the public | The model packages are #[strong not suitable] for the public