mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Tidy up CLI and fix print functions
This commit is contained in:
		
							parent
							
								
									311704674d
								
							
						
					
					
						commit
						59c3b9d4dd
					
				| 
						 | 
					@ -2,6 +2,7 @@
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from . import util
 | 
					from . import util
 | 
				
			||||||
 | 
					from .util import prints
 | 
				
			||||||
from .deprecated import resolve_model_name
 | 
					from .deprecated import resolve_model_name
 | 
				
			||||||
from .cli.info import info
 | 
					from .cli.info import info
 | 
				
			||||||
from .glossary import explain
 | 
					from .glossary import explain
 | 
				
			||||||
| 
						 | 
					@ -26,9 +27,8 @@ def load(name, **overrides):
 | 
				
			||||||
        if not model_path.exists():
 | 
					        if not model_path.exists():
 | 
				
			||||||
            lang_name = util.get_lang_class(name).lang
 | 
					            lang_name = util.get_lang_class(name).lang
 | 
				
			||||||
            model_path = None
 | 
					            model_path = None
 | 
				
			||||||
            util.print_msg(
 | 
					            prints("Only loading the '%s' tokenizer." % lang_name,
 | 
				
			||||||
                "Only loading the '{}' tokenizer.".format(lang_name),
 | 
					                   title="Warning: no model found for '%s'" % name)
 | 
				
			||||||
                title="Warning: no model found for '{}'".format(name))
 | 
					 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model_path = util.ensure_path(overrides['path'])
 | 
					        model_path = util.ensure_path(overrides['path'])
 | 
				
			||||||
        data_path = model_path.parent
 | 
					        data_path = model_path.parent
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,7 +4,7 @@ from __future__ import unicode_literals
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .converters import conllu2json
 | 
					from .converters import conllu2json
 | 
				
			||||||
from .. import util
 | 
					from ..util import prints
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Converters are matched by file extension. To add a converter, add a new entry
 | 
					# Converters are matched by file extension. To add a converter, add a new entry
 | 
				
			||||||
| 
						 | 
					@ -19,17 +19,12 @@ CONVERTERS = {
 | 
				
			||||||
def convert(input_file, output_dir, *args):
 | 
					def convert(input_file, output_dir, *args):
 | 
				
			||||||
    input_path = Path(input_file)
 | 
					    input_path = Path(input_file)
 | 
				
			||||||
    output_path = Path(output_dir)
 | 
					    output_path = Path(output_dir)
 | 
				
			||||||
    check_dirs(input_path, output_path)
 | 
					    if not input_path.exists():
 | 
				
			||||||
    file_ext = input_path.suffix
 | 
					        prints(input_path, title="Input file not found", exits=True)
 | 
				
			||||||
    if file_ext in CONVERTERS:
 | 
					 | 
				
			||||||
        CONVERTERS[file_ext](input_path, output_path, *args)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        util.sys_exit("Can't find converter for {}".format(input_path.parts[-1]),
 | 
					 | 
				
			||||||
                      title="Unknown format")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def check_dirs(input_file, output_path):
 | 
					 | 
				
			||||||
    if not input_file.exists():
 | 
					 | 
				
			||||||
        util.sys_exit(input_file.as_posix(), title="Input file not found")
 | 
					 | 
				
			||||||
    if not output_path.exists():
 | 
					    if not output_path.exists():
 | 
				
			||||||
        util.sys_exit(output_path.as_posix(), title="Output directory not found")
 | 
					        prints(output_path, title="Output directory not found", exits=True)
 | 
				
			||||||
 | 
					    file_ext = input_path.suffix
 | 
				
			||||||
 | 
					    if not file_ext in CONVERTERS:
 | 
				
			||||||
 | 
					        prints("Can't find converter for %s" % input_path.parts[-1],
 | 
				
			||||||
 | 
					               title="Unknown format", exits=True)
 | 
				
			||||||
 | 
					    CONVERTERS[file_ext](input_path, output_path, *args)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,9 +1,8 @@
 | 
				
			||||||
# coding: utf8
 | 
					# coding: utf8
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import json
 | 
					from ...compat import json_dumps, path2str
 | 
				
			||||||
from ...compat import json_dumps
 | 
					from ...util import prints
 | 
				
			||||||
from ... import util
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
 | 
					def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
 | 
				
			||||||
| 
						 | 
					@ -32,8 +31,8 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
 | 
				
			||||||
    output_file = output_path / output_filename
 | 
					    output_file = output_path / output_filename
 | 
				
			||||||
    with output_file.open('w', encoding='utf-8') as f:
 | 
					    with output_file.open('w', encoding='utf-8') as f:
 | 
				
			||||||
        f.write(json_dumps(docs))
 | 
					        f.write(json_dumps(docs))
 | 
				
			||||||
    util.print_msg("Created {} documents".format(len(docs)),
 | 
					    prints("Created %d documents" % len(docs),
 | 
				
			||||||
                   title="Generated output file {}".format(output_file))
 | 
					           title="Generated output file %s" % path2str(output_file))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def read_conllx(input_path, use_morphology=False, n=0):
 | 
					def read_conllx(input_path, use_morphology=False, n=0):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,78 +6,52 @@ import os
 | 
				
			||||||
import subprocess
 | 
					import subprocess
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .link import link_package
 | 
					from .link import link
 | 
				
			||||||
 | 
					from ..util import prints
 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
from .. import util
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def download(model=None, direct=False):
 | 
					def download(model, direct=False):
 | 
				
			||||||
    check_error_depr(model)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if direct:
 | 
					    if direct:
 | 
				
			||||||
        download_model('{m}/{m}.tar.gz'.format(m=model))
 | 
					        download_model('{m}/{m}.tar.gz'.format(m=model))
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model_name = check_shortcut(model)
 | 
					        shortcuts = get_json(about.__shortcuts__, "available shortcuts")
 | 
				
			||||||
 | 
					        model_name = shortcuts.get(model, model)
 | 
				
			||||||
        compatibility = get_compatibility()
 | 
					        compatibility = get_compatibility()
 | 
				
			||||||
        version = get_version(model_name, compatibility)
 | 
					        version = get_version(model_name, compatibility)
 | 
				
			||||||
        download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
 | 
					        download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
 | 
				
			||||||
        link_package(model_name, model, force=True)
 | 
					        link(model_name, model, force=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_json(url, desc):
 | 
					def get_json(url, desc):
 | 
				
			||||||
    r = requests.get(url)
 | 
					    r = requests.get(url)
 | 
				
			||||||
    if r.status_code != 200:
 | 
					    if r.status_code != 200:
 | 
				
			||||||
        util.sys_exit(
 | 
					        prints("Couldn't fetch %s. Please find a model for your spaCy installation "
 | 
				
			||||||
            "Couldn't fetch {d}. Please find the right model for your spaCy "
 | 
					               "(v%s), and download it manually." % (desc, about.__version__),
 | 
				
			||||||
            "installation (v{v}), and download it manually:".format(d=desc, v=about.__version__),
 | 
					               about.__docs__, title="Server error (%d)" % r.status_code, exits=True)
 | 
				
			||||||
            "python -m spacy.download [full model name + version] --direct",
 | 
					 | 
				
			||||||
            title="Server error ({c})".format(c=r.status_code))
 | 
					 | 
				
			||||||
    return r.json()
 | 
					    return r.json()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def check_shortcut(model):
 | 
					 | 
				
			||||||
    shortcuts = get_json(about.__shortcuts__, "available shortcuts")
 | 
					 | 
				
			||||||
    return shortcuts.get(model, model)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_compatibility():
 | 
					def get_compatibility():
 | 
				
			||||||
    version = about.__version__
 | 
					    version = about.__version__
 | 
				
			||||||
    comp_table = get_json(about.__compatibility__, "compatibility table")
 | 
					    comp_table = get_json(about.__compatibility__, "compatibility table")
 | 
				
			||||||
    comp = comp_table['spacy']
 | 
					    comp = comp_table['spacy']
 | 
				
			||||||
    if version not in comp:
 | 
					    if version not in comp:
 | 
				
			||||||
        util.sys_exit(
 | 
					        prints("No compatible models found for v%s of spaCy." % version,
 | 
				
			||||||
            "No compatible models found for v{v} of spaCy.".format(v=version),
 | 
					               title="Compatibility error", exits=True)
 | 
				
			||||||
            title="Compatibility error")
 | 
					 | 
				
			||||||
    return comp[version]
 | 
					    return comp[version]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_version(model, comp):
 | 
					def get_version(model, comp):
 | 
				
			||||||
    if model not in comp:
 | 
					    if model not in comp:
 | 
				
			||||||
        util.sys_exit(
 | 
					        version = about.__version__
 | 
				
			||||||
            "No compatible model found for "
 | 
					        prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
 | 
				
			||||||
            "'{m}' (spaCy v{v}).".format(m=model, v=about.__version__),
 | 
					               title="Compatibility error", exits=True)
 | 
				
			||||||
            title="Compatibility error")
 | 
					 | 
				
			||||||
    return comp[model][0]
 | 
					    return comp[model][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def download_model(filename):
 | 
					def download_model(filename):
 | 
				
			||||||
    util.print_msg("Downloading {f}".format(f=filename))
 | 
					 | 
				
			||||||
    download_url = about.__download_url__ + '/' + filename
 | 
					    download_url = about.__download_url__ + '/' + filename
 | 
				
			||||||
    subprocess.call([sys.executable, '-m',
 | 
					    subprocess.call([sys.executable, '-m',
 | 
				
			||||||
        'pip', 'install', '--no-cache-dir', download_url],
 | 
					        'pip', 'install', '--no-cache-dir', download_url],
 | 
				
			||||||
        env=os.environ.copy())
 | 
					        env=os.environ.copy())
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def check_error_depr(model):
 | 
					 | 
				
			||||||
    if not model:
 | 
					 | 
				
			||||||
        util.sys_exit(
 | 
					 | 
				
			||||||
            "python -m spacy.download [name or shortcut]",
 | 
					 | 
				
			||||||
            title="Missing model name or shortcut")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if model == 'all':
 | 
					 | 
				
			||||||
        util.sys_exit(
 | 
					 | 
				
			||||||
            "As of v1.7.0, the download all command is deprecated. Please "
 | 
					 | 
				
			||||||
            "download the models individually via spacy.download [model name] "
 | 
					 | 
				
			||||||
            "or pip install. For more info on this, see the documentation: "
 | 
					 | 
				
			||||||
            "{d}".format(d=about.__docs__),
 | 
					 | 
				
			||||||
            title="Deprecated command")
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,49 +4,46 @@ from __future__ import unicode_literals
 | 
				
			||||||
import platform
 | 
					import platform
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..compat import unicode_
 | 
					from ..compat import path2str
 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
from .. import util
 | 
					from .. import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def info(model=None, markdown=False):
 | 
					def info(model=None, markdown=False):
 | 
				
			||||||
    if model:
 | 
					    if model:
 | 
				
			||||||
        data = util.parse_package_meta(util.get_data_path(), model, require=True)
 | 
					        data_path = util.get_data_path()
 | 
				
			||||||
        model_path = Path(__file__).parent / util.get_data_path() / model
 | 
					        data = util.parse_package_meta(data_path, model, require=True)
 | 
				
			||||||
 | 
					        model_path = Path(__file__).parent / data_path / model
 | 
				
			||||||
        if model_path.resolve() != model_path:
 | 
					        if model_path.resolve() != model_path:
 | 
				
			||||||
            data['link'] = unicode_(model_path)
 | 
					            data['link'] = path2str(model_path)
 | 
				
			||||||
            data['source'] = unicode_(model_path.resolve())
 | 
					            data['source'] = path2str(model_path.resolve())
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            data['source'] = unicode_(model_path)
 | 
					            data['source'] = path2str(model_path)
 | 
				
			||||||
        print_info(data, "model " + model, markdown)
 | 
					        print_info(data, 'model %s' % model, markdown)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        data = get_spacy_data()
 | 
					        data = {'spaCy version': about.__version__,
 | 
				
			||||||
        print_info(data, "spaCy", markdown)
 | 
					                'Location': path2str(Path(__file__).parent.parent),
 | 
				
			||||||
 | 
					                'Platform': platform.platform(),
 | 
				
			||||||
 | 
					                'Python version': platform.python_version(),
 | 
				
			||||||
 | 
					                'Models': list_models()}
 | 
				
			||||||
 | 
					        print_info(data, 'spaCy', markdown)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def print_info(data, title, markdown):
 | 
					def print_info(data, title, markdown):
 | 
				
			||||||
    title = "Info about {title}".format(title=title)
 | 
					    title = 'Info about %s' % title
 | 
				
			||||||
    if markdown:
 | 
					    if markdown:
 | 
				
			||||||
        util.print_markdown(data, title=title)
 | 
					        util.print_markdown(data, title=title)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        util.print_table(data, title=title)
 | 
					        util.print_table(data, title=title)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_spacy_data():
 | 
					 | 
				
			||||||
    return {
 | 
					 | 
				
			||||||
        'spaCy version': about.__version__,
 | 
					 | 
				
			||||||
        'Location': unicode_(Path(__file__).parent.parent),
 | 
					 | 
				
			||||||
        'Platform': platform.platform(),
 | 
					 | 
				
			||||||
        'Python version': platform.python_version(),
 | 
					 | 
				
			||||||
        'Installed models': ', '.join(list_models())
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def list_models():
 | 
					def list_models():
 | 
				
			||||||
    # exclude common cache directories – this means models called "cache" etc.
 | 
					    def exclude_dir(dir_name):
 | 
				
			||||||
    # won't show up in list, but it seems worth it
 | 
					        # exclude common cache directories and hidden directories
 | 
				
			||||||
        exclude = ['cache', 'pycache', '__pycache__']
 | 
					        exclude = ['cache', 'pycache', '__pycache__']
 | 
				
			||||||
 | 
					        return dir_name in exclude or dir_name.startswith('.')
 | 
				
			||||||
    data_path = util.get_data_path()
 | 
					    data_path = util.get_data_path()
 | 
				
			||||||
    if data_path:
 | 
					    if data_path:
 | 
				
			||||||
        models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
 | 
					        models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
 | 
				
			||||||
        return [m for m in models if m not in exclude]
 | 
					        return ', '.join([m for m in models if not exclude_dir(m)])
 | 
				
			||||||
 | 
					    return '-'
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,78 +1,37 @@
 | 
				
			||||||
# coding: utf8
 | 
					# coding: utf8
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pip
 | 
					 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
import importlib
 | 
					from ..compat import symlink_to, path2str
 | 
				
			||||||
from ..compat import unicode_, symlink_to
 | 
					from ..util import prints
 | 
				
			||||||
from .. import util
 | 
					from .. import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def link(origin, link_name, force=False):
 | 
					def link(origin, link_name, force=False):
 | 
				
			||||||
    if is_package(origin):
 | 
					    if util.is_package(origin):
 | 
				
			||||||
        link_package(origin, link_name, force)
 | 
					        model_path = util.get_model_package_path(origin)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        symlink(origin, link_name, force)
 | 
					        model_path = Path(origin)
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def link_package(package_name, link_name, force=False):
 | 
					 | 
				
			||||||
    # Here we're importing the module just to find it. This is worryingly
 | 
					 | 
				
			||||||
    # indirect, but it's otherwise very difficult to find the package.
 | 
					 | 
				
			||||||
    # Python's installation and import rules are very complicated.
 | 
					 | 
				
			||||||
    pkg = importlib.import_module(package_name)
 | 
					 | 
				
			||||||
    package_path = Path(pkg.__file__).parent.parent
 | 
					 | 
				
			||||||
    meta = get_meta(package_path, package_name)
 | 
					 | 
				
			||||||
    model_name = package_name + '-' + meta['version']
 | 
					 | 
				
			||||||
    model_path = package_path / package_name / model_name
 | 
					 | 
				
			||||||
    symlink(model_path, link_name, force)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def symlink(model_path, link_name, force):
 | 
					 | 
				
			||||||
    model_path = Path(model_path)
 | 
					 | 
				
			||||||
    if not model_path.exists():
 | 
					    if not model_path.exists():
 | 
				
			||||||
        util.sys_exit(
 | 
					        prints("The data should be located in %s" % path2str(model_path),
 | 
				
			||||||
            "The data should be located in {p}".format(p=model_path),
 | 
					               title="Can't locate model data", exits=True)
 | 
				
			||||||
            title="Can't locate model data")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    link_path = util.get_data_path() / link_name
 | 
					    link_path = util.get_data_path() / link_name
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if link_path.exists() and not force:
 | 
					    if link_path.exists() and not force:
 | 
				
			||||||
        util.sys_exit(
 | 
					        prints("To overwrite an existing link, use the --force flag.",
 | 
				
			||||||
            "To overwrite an existing link, use the --force flag.",
 | 
					               title="Link %s already exists" % link_name, exits=True)
 | 
				
			||||||
            title="Link {l} already exists".format(l=link_name))
 | 
					 | 
				
			||||||
    elif link_path.exists():
 | 
					    elif link_path.exists():
 | 
				
			||||||
        link_path.unlink()
 | 
					        link_path.unlink()
 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        symlink_to(link_path, model_path)
 | 
					        symlink_to(link_path, model_path)
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        # This is quite dirty, but just making sure other errors are caught so
 | 
					        # This is quite dirty, but just making sure other errors are caught.
 | 
				
			||||||
        # users at least see a proper message.
 | 
					        prints("Creating a symlink in spacy/data failed. Make sure you have "
 | 
				
			||||||
        util.print_msg(
 | 
					               "the required permissions and try re-running the command as "
 | 
				
			||||||
            "Creating a symlink in spacy/data failed. Make sure you have the "
 | 
					               "admin, or use a virtualenv. You can still import the model as a "
 | 
				
			||||||
            "required permissions and try re-running the command as admin, or "
 | 
					               "module and call its load() method, or create the symlink manually.",
 | 
				
			||||||
            "use a virtualenv to install spaCy in a user directory, instead of "
 | 
					               "%s --> %s" % (path2str(model_path), path2str(link_path)),
 | 
				
			||||||
            "doing a system installation.",
 | 
					               title="Error: Couldn't link model to '%s'" % link_name)
 | 
				
			||||||
            "You can still import the model as a Python package and call its "
 | 
					 | 
				
			||||||
            "load() method, or create the symlink manually:",
 | 
					 | 
				
			||||||
            "{a} --> {b}".format(a=unicode_(model_path), b=unicode_(link_path)),
 | 
					 | 
				
			||||||
            title="Error: Couldn't link model to '{l}'".format(l=link_name))
 | 
					 | 
				
			||||||
        raise
 | 
					        raise
 | 
				
			||||||
 | 
					    prints("%s --> %s" % (path2str(model_path), path2str(link_path)),
 | 
				
			||||||
    util.print_msg(
 | 
					           "You can now load the model via spacy.load('%s')." % link_name,
 | 
				
			||||||
        "{a} --> {b}".format(a=model_path.as_posix(), b=link_path.as_posix()),
 | 
					 | 
				
			||||||
        "You can now load the model via spacy.load('{l}').".format(l=link_name),
 | 
					 | 
				
			||||||
           title="Linking successful")
 | 
					           title="Linking successful")
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_meta(package_path, package):
 | 
					 | 
				
			||||||
    meta = util.parse_package_meta(package_path, package)
 | 
					 | 
				
			||||||
    return meta
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def is_package(origin):
 | 
					 | 
				
			||||||
    packages = pip.get_installed_distributions()
 | 
					 | 
				
			||||||
    for package in packages:
 | 
					 | 
				
			||||||
        if package.project_name.replace('-', '_') == origin:
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
    return False
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,21 +4,25 @@ from __future__ import unicode_literals
 | 
				
			||||||
import gzip
 | 
					import gzip
 | 
				
			||||||
import math
 | 
					import math
 | 
				
			||||||
from ast import literal_eval
 | 
					from ast import literal_eval
 | 
				
			||||||
from pathlib import Path
 | 
					 | 
				
			||||||
from preshed.counter import PreshCounter
 | 
					from preshed.counter import PreshCounter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..vocab import write_binary_vectors
 | 
					from ..vocab import write_binary_vectors
 | 
				
			||||||
from ..compat import fix_text
 | 
					from ..compat import fix_text, path2str
 | 
				
			||||||
 | 
					from ..util import prints
 | 
				
			||||||
from .. import util
 | 
					from .. import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def model(lang, model_dir, freqs_data, clusters_data, vectors_data):
 | 
					def model(lang, model_dir, freqs_data, clusters_data, vectors_data):
 | 
				
			||||||
    model_path = Path(model_dir)
 | 
					    model_path = util.ensure_path(model_dir)
 | 
				
			||||||
    freqs_path = Path(freqs_data)
 | 
					    freqs_path = util.ensure_path(freqs_data)
 | 
				
			||||||
    clusters_path = Path(clusters_data) if clusters_data else None
 | 
					    clusters_path = util.ensure_path(clusters_data)
 | 
				
			||||||
    vectors_path = Path(vectors_data) if vectors_data else None
 | 
					    vectors_path = util.ensure_path(vectors_data)
 | 
				
			||||||
 | 
					    if not freqs_path.is_file():
 | 
				
			||||||
    check_dirs(freqs_path, clusters_path, vectors_path)
 | 
					        prints(freqs_path, title="No frequencies file found", exits=True)
 | 
				
			||||||
 | 
					    if clusters_path and not clusters_path.is_file():
 | 
				
			||||||
 | 
					        prints(clusters_path, title="No Brown clusters file found", exits=True)
 | 
				
			||||||
 | 
					    if vectors_path and not vectors_path.is_file():
 | 
				
			||||||
 | 
					        prints(vectors_path, title="No word vectors file found", exits=True)
 | 
				
			||||||
    vocab = util.get_lang_class(lang).Defaults.create_vocab()
 | 
					    vocab = util.get_lang_class(lang).Defaults.create_vocab()
 | 
				
			||||||
    probs, oov_prob = read_probs(freqs_path)
 | 
					    probs, oov_prob = read_probs(freqs_path)
 | 
				
			||||||
    clusters = read_clusters(clusters_path) if clusters_path else {}
 | 
					    clusters = read_clusters(clusters_path) if clusters_path else {}
 | 
				
			||||||
| 
						 | 
					@ -36,14 +40,14 @@ def create_model(model_path, vectors_path, vocab, oov_prob):
 | 
				
			||||||
        model_path.mkdir()
 | 
					        model_path.mkdir()
 | 
				
			||||||
    if not vocab_path.exists():
 | 
					    if not vocab_path.exists():
 | 
				
			||||||
        vocab_path.mkdir()
 | 
					        vocab_path.mkdir()
 | 
				
			||||||
    vocab.dump(lexemes_path.as_posix())
 | 
					    vocab.dump(path2str(lexemes_path))
 | 
				
			||||||
    with strings_path.open('w') as f:
 | 
					    with strings_path.open('w') as f:
 | 
				
			||||||
        vocab.strings.dump(f)
 | 
					        vocab.strings.dump(f)
 | 
				
			||||||
    with oov_path.open('w') as f:
 | 
					    with oov_path.open('w') as f:
 | 
				
			||||||
        f.write('%f' % oov_prob)
 | 
					        f.write('%f' % oov_prob)
 | 
				
			||||||
    if vectors_path:
 | 
					    if vectors_path:
 | 
				
			||||||
        vectors_dest = vocab_path / 'vec.bin'
 | 
					        vectors_dest = vocab_path / 'vec.bin'
 | 
				
			||||||
        write_binary_vectors(vectors_path.as_posix(), vectors_dest.as_posix())
 | 
					        write_binary_vectors(path2str(vectors_path), path2str(vectors_dest))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def read_probs(freqs_path, max_length=100, min_doc_freq=5, min_freq=200):
 | 
					def read_probs(freqs_path, max_length=100, min_doc_freq=5, min_freq=200):
 | 
				
			||||||
| 
						 | 
					@ -115,17 +119,8 @@ def populate_vocab(vocab, clusters, probs, oov_prob):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def check_unzip(file_path):
 | 
					def check_unzip(file_path):
 | 
				
			||||||
    file_path_str = file_path.as_posix()
 | 
					    file_path_str = path2str(file_path)
 | 
				
			||||||
    if file_path_str.endswith('gz'):
 | 
					    if file_path_str.endswith('gz'):
 | 
				
			||||||
        return gzip.open(file_path_str)
 | 
					        return gzip.open(file_path_str)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        return file_path.open()
 | 
					        return file_path.open()
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def check_dirs(freqs_data, clusters_data, vectors_data):
 | 
					 | 
				
			||||||
    if not freqs_data.is_file():
 | 
					 | 
				
			||||||
        util.sys_exit(freqs_data.as_posix(), title="No frequencies file found")
 | 
					 | 
				
			||||||
    if clusters_data and not clusters_data.is_file():
 | 
					 | 
				
			||||||
        util.sys_exit(clusters_data.as_posix(), title="No Brown clusters file found")
 | 
					 | 
				
			||||||
    if vectors_data and not vectors_data.is_file():
 | 
					 | 
				
			||||||
        util.sys_exit(vectors_data.as_posix(), title="No word vectors file found")
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,64 +5,57 @@ import shutil
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..compat import unicode_, json_dumps
 | 
					from ..compat import path2str, json_dumps
 | 
				
			||||||
 | 
					from ..util import prints
 | 
				
			||||||
from .. import util
 | 
					from .. import util
 | 
				
			||||||
 | 
					from .. import about
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def package(input_dir, output_dir, meta_path, force):
 | 
					def package(input_dir, output_dir, meta_path, force):
 | 
				
			||||||
    input_path = Path(input_dir)
 | 
					    input_path = util.ensure_path(input_dir)
 | 
				
			||||||
    output_path = Path(output_dir)
 | 
					    output_path = util.ensure_path(output_dir)
 | 
				
			||||||
    meta_path = util.ensure_path(meta_path)
 | 
					    meta_path = util.ensure_path(meta_path)
 | 
				
			||||||
    check_dirs(input_path, output_path, meta_path)
 | 
					    if not input_path or not input_path.exists():
 | 
				
			||||||
 | 
					        prints(input_path, title="Model directory not found", exits=True)
 | 
				
			||||||
 | 
					    if not output_path or not output_path.exists():
 | 
				
			||||||
 | 
					        prints(output_path, title="Output directory not found", exits=True)
 | 
				
			||||||
 | 
					    if meta_path and not meta_path.exists():
 | 
				
			||||||
 | 
					        prints(meta_path, title="meta.json not found", exits=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template_setup = get_template('setup.py')
 | 
					    template_setup = get_template('setup.py')
 | 
				
			||||||
    template_manifest = get_template('MANIFEST.in')
 | 
					    template_manifest = get_template('MANIFEST.in')
 | 
				
			||||||
    template_init = get_template('en_model_name/__init__.py')
 | 
					    template_init = get_template('en_model_name/__init__.py')
 | 
				
			||||||
 | 
					 | 
				
			||||||
    meta_path = meta_path or input_path / 'meta.json'
 | 
					    meta_path = meta_path or input_path / 'meta.json'
 | 
				
			||||||
    if meta_path.is_file():
 | 
					    if meta_path.is_file():
 | 
				
			||||||
        util.print_msg(unicode_(meta_path), title="Reading meta.json from file")
 | 
					        prints(meta_path, title="Reading meta.json from file")
 | 
				
			||||||
        meta = util.read_json(meta_path)
 | 
					        meta = util.read_json(meta_path)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        meta = generate_meta()
 | 
					        meta = generate_meta()
 | 
				
			||||||
 | 
					 | 
				
			||||||
    validate_meta(meta, ['lang', 'name', 'version'])
 | 
					    validate_meta(meta, ['lang', 'name', 'version'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model_name = meta['lang'] + '_' + meta['name']
 | 
					    model_name = meta['lang'] + '_' + meta['name']
 | 
				
			||||||
    model_name_v = model_name + '-' + meta['version']
 | 
					    model_name_v = model_name + '-' + meta['version']
 | 
				
			||||||
    main_path = output_path / model_name_v
 | 
					    main_path = output_path / model_name_v
 | 
				
			||||||
    package_path = main_path / model_name
 | 
					    package_path = main_path / model_name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    create_dirs(package_path, force)
 | 
					    create_dirs(package_path, force)
 | 
				
			||||||
    shutil.copytree(unicode_(input_path), unicode_(package_path / model_name_v))
 | 
					    shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
 | 
				
			||||||
    create_file(main_path / 'meta.json', json_dumps(meta))
 | 
					    create_file(main_path / 'meta.json', json_dumps(meta))
 | 
				
			||||||
    create_file(main_path / 'setup.py', template_setup)
 | 
					    create_file(main_path / 'setup.py', template_setup)
 | 
				
			||||||
    create_file(main_path / 'MANIFEST.in', template_manifest)
 | 
					    create_file(main_path / 'MANIFEST.in', template_manifest)
 | 
				
			||||||
    create_file(package_path / '__init__.py', template_init)
 | 
					    create_file(package_path / '__init__.py', template_init)
 | 
				
			||||||
 | 
					    prints(main_path, "To build the package, run `python setup.py sdist` in this "
 | 
				
			||||||
    util.print_msg(
 | 
					           "directory.", title="Successfully created package '%s'" % model_name_v)
 | 
				
			||||||
        unicode_(main_path),
 | 
					 | 
				
			||||||
        "To build the package, run `python setup.py sdist` in that directory.",
 | 
					 | 
				
			||||||
        title="Successfully created package {p}".format(p=model_name_v))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def check_dirs(input_path, output_path, meta_path):
 | 
					 | 
				
			||||||
    if not input_path.exists():
 | 
					 | 
				
			||||||
        util.sys_exit(unicode_(input_path.as_poisx), title="Model directory not found")
 | 
					 | 
				
			||||||
    if not output_path.exists():
 | 
					 | 
				
			||||||
        util.sys_exit(unicode_(output_path), title="Output directory not found")
 | 
					 | 
				
			||||||
    if meta_path and not meta_path.exists():
 | 
					 | 
				
			||||||
        util.sys_exit(unicode_(meta_path), title="meta.json not found")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def create_dirs(package_path, force):
 | 
					def create_dirs(package_path, force):
 | 
				
			||||||
    if package_path.exists():
 | 
					    if package_path.exists():
 | 
				
			||||||
        if force:
 | 
					        if force:
 | 
				
			||||||
            shutil.rmtree(unicode_(package_path))
 | 
					            shutil.rmtree(path2str(package_path))
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            util.sys_exit(unicode_(package_path),
 | 
					            prints(package_path, "Please delete the directory and try again, or "
 | 
				
			||||||
                "Please delete the directory and try again, or use the --force "
 | 
					                   "use the --force flag to overwrite existing directories.",
 | 
				
			||||||
                "flag to overwrite existing directories.",
 | 
					                   title="Package directory already exists", exits=True)
 | 
				
			||||||
                title="Package directory already exists")
 | 
					 | 
				
			||||||
    Path.mkdir(package_path, parents=True)
 | 
					    Path.mkdir(package_path, parents=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -75,15 +68,14 @@ def generate_meta():
 | 
				
			||||||
    settings = [('lang', 'Model language', 'en'),
 | 
					    settings = [('lang', 'Model language', 'en'),
 | 
				
			||||||
                ('name', 'Model name', 'model'),
 | 
					                ('name', 'Model name', 'model'),
 | 
				
			||||||
                ('version', 'Model version', '0.0.0'),
 | 
					                ('version', 'Model version', '0.0.0'),
 | 
				
			||||||
                ('spacy_version', 'Required spaCy version', '>=1.7.0,<2.0.0'),
 | 
					                ('spacy_version', 'Required spaCy version', '>=2.0.0,<3.0.0'),
 | 
				
			||||||
                ('description', 'Model description', False),
 | 
					                ('description', 'Model description', False),
 | 
				
			||||||
                ('author', 'Author', False),
 | 
					                ('author', 'Author', False),
 | 
				
			||||||
                ('email', 'Author email', False),
 | 
					                ('email', 'Author email', False),
 | 
				
			||||||
                ('url', 'Author website', False),
 | 
					                ('url', 'Author website', False),
 | 
				
			||||||
                ('license', 'License', 'CC BY-NC 3.0')]
 | 
					                ('license', 'License', 'CC BY-NC 3.0')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    util.print_msg("Enter the package settings for your model.", title="Generating meta.json")
 | 
					    prints("Enter the package settings for your model.", title="Generating meta.json")
 | 
				
			||||||
 | 
					 | 
				
			||||||
    meta = {}
 | 
					    meta = {}
 | 
				
			||||||
    for setting, desc, default in settings:
 | 
					    for setting, desc, default in settings:
 | 
				
			||||||
        response = util.get_raw_input(desc, default)
 | 
					        response = util.get_raw_input(desc, default)
 | 
				
			||||||
| 
						 | 
					@ -94,16 +86,13 @@ def generate_meta():
 | 
				
			||||||
def validate_meta(meta, keys):
 | 
					def validate_meta(meta, keys):
 | 
				
			||||||
    for key in keys:
 | 
					    for key in keys:
 | 
				
			||||||
        if key not in meta or meta[key] == '':
 | 
					        if key not in meta or meta[key] == '':
 | 
				
			||||||
            util.sys_exit(
 | 
					            prints("This setting is required to build your package.",
 | 
				
			||||||
                "This setting is required to build your package.",
 | 
					                   title='No "%s" setting found in meta.json' % key, exits=True)
 | 
				
			||||||
                title='No "{k}" setting found in meta.json'.format(k=key))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_template(filepath):
 | 
					def get_template(filepath):
 | 
				
			||||||
    url = 'https://raw.githubusercontent.com/explosion/spacy-dev-resources/master/templates/model/'
 | 
					    r = requests.get(about.__model_files__ + filepath)
 | 
				
			||||||
    r = requests.get(url + filepath)
 | 
					 | 
				
			||||||
    if r.status_code != 200:
 | 
					    if r.status_code != 200:
 | 
				
			||||||
        util.sys_exit(
 | 
					        prints("Couldn't fetch template files from GitHub.",
 | 
				
			||||||
            "Couldn't fetch template files from GitHub.",
 | 
					               title="Server error (%d)" % r.status_code, exits=True)
 | 
				
			||||||
            title="Server error ({c})".format(c=r.status_code))
 | 
					 | 
				
			||||||
    return r.text
 | 
					    return r.text
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,19 +4,24 @@ from __future__ import unicode_literals, division, print_function
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
from collections import defaultdict
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..util import ensure_path
 | 
					 | 
				
			||||||
from ..scorer import Scorer
 | 
					from ..scorer import Scorer
 | 
				
			||||||
from ..gold import GoldParse, merge_sents
 | 
					from ..gold import GoldParse, merge_sents
 | 
				
			||||||
from ..gold import read_json_file as read_gold_json
 | 
					from ..gold import read_json_file as read_gold_json
 | 
				
			||||||
 | 
					from ..util import prints
 | 
				
			||||||
from .. import util
 | 
					from .. import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ner,
 | 
					def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ner,
 | 
				
			||||||
          parser_L1):
 | 
					          parser_L1):
 | 
				
			||||||
    output_path = ensure_path(output_dir)
 | 
					    output_path = util.ensure_path(output_dir)
 | 
				
			||||||
    train_path = ensure_path(train_data)
 | 
					    train_path = util.ensure_path(train_data)
 | 
				
			||||||
    dev_path = ensure_path(dev_data)
 | 
					    dev_path = util.ensure_path(dev_data)
 | 
				
			||||||
    check_dirs(output_path, train_path, dev_path)
 | 
					    if not output_path.exists():
 | 
				
			||||||
 | 
					        prints(output_path, title="Output directory not found", exits=True)
 | 
				
			||||||
 | 
					    if not train_path.exists():
 | 
				
			||||||
 | 
					        prints(train_path, title="Training data not found", exits=True)
 | 
				
			||||||
 | 
					    if dev_path and not dev_path.exists():
 | 
				
			||||||
 | 
					        prints(dev_path, title="Development data not found", exits=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    lang = util.get_lang_class(language)
 | 
					    lang = util.get_lang_class(language)
 | 
				
			||||||
    parser_cfg = {
 | 
					    parser_cfg = {
 | 
				
			||||||
| 
						 | 
					@ -44,14 +49,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def train_config(config):
 | 
					def train_config(config):
 | 
				
			||||||
    config_path = ensure_path(config)
 | 
					    config_path = util.ensure_path(config)
 | 
				
			||||||
    if not config_path.is_file():
 | 
					    if not config_path.is_file():
 | 
				
			||||||
        util.sys_exit(config_path.as_posix(), title="Config file not found")
 | 
					        prints(config_path, title="Config file not found", exits=True)
 | 
				
			||||||
    config = json.load(config_path)
 | 
					    config = json.load(config_path)
 | 
				
			||||||
    for setting in []:
 | 
					    for setting in []:
 | 
				
			||||||
        if setting not in config.keys():
 | 
					        if setting not in config.keys():
 | 
				
			||||||
            util.sys_exit("{s} not found in config file.".format(s=setting),
 | 
					            prints("%s not found in config file." % setting, title="Missing setting")
 | 
				
			||||||
                          title="Missing setting")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_cfg,
 | 
					def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_cfg,
 | 
				
			||||||
| 
						 | 
					@ -88,16 +92,8 @@ def evaluate(Language, gold_tuples, output_path):
 | 
				
			||||||
    return scorer
 | 
					    return scorer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def check_dirs(output_path, train_path, dev_path):
 | 
					 | 
				
			||||||
    if not output_path.exists():
 | 
					 | 
				
			||||||
        util.sys_exit(output_path.as_posix(), title="Output directory not found")
 | 
					 | 
				
			||||||
    if not train_path.exists():
 | 
					 | 
				
			||||||
        util.sys_exit(train_path.as_posix(), title="Training data not found")
 | 
					 | 
				
			||||||
    if dev_path and not dev_path.exists():
 | 
					 | 
				
			||||||
        util.sys_exit(dev_path.as_posix(), title="Development data not found")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def print_progress(itn, nr_weight, nr_active_feat, **scores):
 | 
					def print_progress(itn, nr_weight, nr_active_feat, **scores):
 | 
				
			||||||
 | 
					    # TODO: Fix!
 | 
				
			||||||
    tpl = '{:d}\t{:d}\t{:d}\t{uas:.3f}\t{ents_f:.3f}\t{tags_acc:.3f}\t{token_acc:.3f}'
 | 
					    tpl = '{:d}\t{:d}\t{:d}\t{uas:.3f}\t{ents_f:.3f}\t{tags_acc:.3f}\t{token_acc:.3f}'
 | 
				
			||||||
    print(tpl.format(itn, nr_weight, nr_active_feat, **scores))
 | 
					    print(tpl.format(itn, nr_weight, nr_active_feat, **scores))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,6 +5,8 @@ from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from . import about
 | 
					from . import about
 | 
				
			||||||
from . import util
 | 
					from . import util
 | 
				
			||||||
 | 
					from .util import prints
 | 
				
			||||||
 | 
					from .compat import path2str
 | 
				
			||||||
from .cli import download
 | 
					from .cli import download
 | 
				
			||||||
from .cli import link
 | 
					from .cli import link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -114,9 +116,9 @@ def resolve_model_name(name):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    if name == 'en' or name == 'de':
 | 
					    if name == 'en' or name == 'de':
 | 
				
			||||||
        versions = ['1.0.0', '1.1.0']
 | 
					        versions = ['1.0.0', '1.1.0']
 | 
				
			||||||
        data_path = Path(util.get_data_path())
 | 
					        data_path = util.get_data_path()
 | 
				
			||||||
        model_path = data_path / name
 | 
					        model_path = data_path / name
 | 
				
			||||||
        v_model_paths = [data_path / Path(name + '-' + v) for v in versions]
 | 
					        v_model_paths = [data_path / '%s-%s' % (name, v) for v in versions]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if not model_path.exists(): # no shortcut found
 | 
					        if not model_path.exists(): # no shortcut found
 | 
				
			||||||
            for v_path in v_model_paths:
 | 
					            for v_path in v_model_paths:
 | 
				
			||||||
| 
						 | 
					@ -126,10 +128,10 @@ def resolve_model_name(name):
 | 
				
			||||||
                        return name
 | 
					                        return name
 | 
				
			||||||
                    else:
 | 
					                    else:
 | 
				
			||||||
                        raise ValueError(
 | 
					                        raise ValueError(
 | 
				
			||||||
                            "Found English model at {p}. This model is not "
 | 
					                            "Found English model at %s. This model is not "
 | 
				
			||||||
                            "compatible with the current version. See "
 | 
					                            "compatible with the current version. See "
 | 
				
			||||||
                            "https://spacy.io/docs/usage/models to download the "
 | 
					                            "https://spacy.io/docs/usage/models to download the "
 | 
				
			||||||
                            "new model.".format(p=v_path))
 | 
					                            "new model." % path2str(v_path))
 | 
				
			||||||
    return name
 | 
					    return name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -142,11 +144,10 @@ class ModelDownload():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def load(self, lang):
 | 
					    def load(self, lang):
 | 
				
			||||||
        util.print_msg(
 | 
					        prints("The spacy.%s.download command is now deprecated. Please use "
 | 
				
			||||||
            "The spacy.{l}.download command is now deprecated. Please use "
 | 
					               "python -m spacy download [model name or shortcut] instead. For "
 | 
				
			||||||
            "python -m spacy download [model name or shortcut] instead. For more "
 | 
					               "more info, see the docs: %s." % (lang, about.__docs__),
 | 
				
			||||||
            "info and available models, see the documentation: {d}. "
 | 
					               "Downloading default '%s' model now..." % lang,
 | 
				
			||||||
            "Downloading default '{l}' model now...".format(d=about.__docs__, l=lang),
 | 
					 | 
				
			||||||
               title="Warning: deprecated command")
 | 
					               title="Warning: deprecated command")
 | 
				
			||||||
        download(lang)
 | 
					        download(lang)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,7 +9,7 @@ from pathlib import Path
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import textwrap
 | 
					import textwrap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .compat import basestring_, unicode_, input_
 | 
					from .compat import path2str, basestring_, input_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LANGUAGES = {}
 | 
					LANGUAGES = {}
 | 
				
			||||||
| 
						 | 
					@ -151,95 +151,66 @@ def parse_package_meta(package_path, package, require=True):
 | 
				
			||||||
def get_raw_input(description, default=False):
 | 
					def get_raw_input(description, default=False):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Get user input via raw_input / input and return input value. Takes a
 | 
					    Get user input via raw_input / input and return input value. Takes a
 | 
				
			||||||
    description for the prompt, and an optional default value that's displayed
 | 
					    description, and an optional default value to display with the prompt.
 | 
				
			||||||
    with the prompt.
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    additional = ' (default: {d})'.format(d=default) if default else ''
 | 
					    additional = ' (default: %s)' % default if default else ''
 | 
				
			||||||
    prompt = '    {d}{a}: '.format(d=description, a=additional)
 | 
					    prompt = '    %s%s: ' % (description, additional)
 | 
				
			||||||
    user_input = input_(prompt)
 | 
					    user_input = input_(prompt)
 | 
				
			||||||
    return user_input
 | 
					    return user_input
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def print_table(data, **kwargs):
 | 
					def print_table(data, title=None):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Print data in table format. Can either take a list of tuples or a
 | 
					    Print data in table format. Can either take a list of tuples or a
 | 
				
			||||||
    dictionary, which will be converted to a list of tuples.
 | 
					    dictionary, which will be converted to a list of tuples.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    if type(data) == dict:
 | 
					    if type(data) == dict:
 | 
				
			||||||
        data = list(data.items())
 | 
					        data = list(data.items())
 | 
				
			||||||
 | 
					    tpl_row = '    {:<15}' * len(data[0])
 | 
				
			||||||
    tpl_msg = '\n{msg}\n'
 | 
					 | 
				
			||||||
    tpl_title = '\n    \033[93m{msg}\033[0m'
 | 
					 | 
				
			||||||
    tpl_row ="    {:<15}" * len(data[0])
 | 
					 | 
				
			||||||
    table = '\n'.join([tpl_row.format(l, v) for l, v in data])
 | 
					    table = '\n'.join([tpl_row.format(l, v) for l, v in data])
 | 
				
			||||||
 | 
					    if title:
 | 
				
			||||||
    if 'title' in kwargs and kwargs['title']:
 | 
					        print('\n    \033[93m{}\033[0m'.format(title))
 | 
				
			||||||
        print(tpl_title.format(msg=kwargs['title']))
 | 
					    print('\n{}\n'.format(table))
 | 
				
			||||||
 | 
					 | 
				
			||||||
    print(tpl_msg.format(msg=table))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def print_markdown(data, **kwargs):
 | 
					def print_markdown(data, title=None):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Print listed data in GitHub-flavoured Markdown format so it can be
 | 
					    Print listed data in GitHub-flavoured Markdown format so it can be
 | 
				
			||||||
    copy-pasted into issues. Can either take a list of tuples or a dictionary,
 | 
					    copy-pasted into issues. Can either take a list of tuples or a dictionary.
 | 
				
			||||||
    which will be converted to a list of tuples.
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    def excl_value(value):
 | 
					    def excl_value(value):
 | 
				
			||||||
        # don't print value if it contains absolute path of directory (i.e.
 | 
					        return Path(value).exists() # contains path (personal info)
 | 
				
			||||||
        # personal info). Other conditions can be included here if necessary.
 | 
					 | 
				
			||||||
        if unicode_(Path(__file__).parent) in value:
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if type(data) == dict:
 | 
					    if type(data) == dict:
 | 
				
			||||||
        data = list(data.items())
 | 
					        data = list(data.items())
 | 
				
			||||||
 | 
					    markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
 | 
				
			||||||
    tpl_msg = "\n{msg}\n"
 | 
					    if title:
 | 
				
			||||||
    tpl_title = "\n## {msg}"
 | 
					        print("\n## {}".format(title))
 | 
				
			||||||
    tpl_row = "* **{l}:** {v}"
 | 
					    print('\n{}\n'.format('\n'.join(markdown)))
 | 
				
			||||||
    markdown = '\n'.join([tpl_row.format(l=l, v=v) for l, v in data if not excl_value(v)])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if 'title' in kwargs and kwargs['title']:
 | 
					 | 
				
			||||||
        print(tpl_title.format(msg=kwargs['title']))
 | 
					 | 
				
			||||||
    print(tpl_msg.format(msg=markdown))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def print_msg(*text, **kwargs):
 | 
					def prints(*texts, title=None, exits=False):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Print formatted message. Each positional argument is rendered as newline-
 | 
					    Print formatted message. Each positional argument is rendered as newline-
 | 
				
			||||||
    separated paragraph. If kwarg 'title' exist, title is printed above the text
 | 
					    separated paragraph. An optional highlighted title is printed above the text
 | 
				
			||||||
    and highlighted (using ANSI escape sequences manually to avoid unnecessary
 | 
					    (using ANSI escape sequences manually to avoid unnecessary dependency).
 | 
				
			||||||
    dependency).
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    message = '\n\n'.join([_wrap_text(t) for t in text])
 | 
					    title = '\033[93m{}\033[0m\n'.format(_wrap(title)) if title else ''
 | 
				
			||||||
    tpl_msg = '\n{msg}\n'
 | 
					    message = '\n\n'.join([_wrap(text) for text in texts])
 | 
				
			||||||
    tpl_title = '\n\033[93m{msg}\033[0m'
 | 
					    print('\n{}{}\n'.format(title, message))
 | 
				
			||||||
 | 
					    if exits:
 | 
				
			||||||
    if 'title' in kwargs and kwargs['title']:
 | 
					        sys.exit(0)
 | 
				
			||||||
        title = _wrap_text(kwargs['title'])
 | 
					 | 
				
			||||||
        print(tpl_title.format(msg=title))
 | 
					 | 
				
			||||||
    print(tpl_msg.format(msg=message))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _wrap_text(text):
 | 
					def _wrap(text, wrap_max=80, indent=4):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Wrap text at given width using textwrap module. Indent should consist of
 | 
					    Wrap text at given width using textwrap module. Indent should consist of
 | 
				
			||||||
    spaces. Its length is deducted from wrap width to ensure exact wrapping.
 | 
					    spaces. Its length is deducted from wrap width to ensure exact wrapping.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    wrap_max = 80
 | 
					    indent = indent * ' '
 | 
				
			||||||
    indent = '    '
 | 
					 | 
				
			||||||
    wrap_width = wrap_max - len(indent)
 | 
					    wrap_width = wrap_max - len(indent)
 | 
				
			||||||
 | 
					    if isinstance(text, Path):
 | 
				
			||||||
 | 
					        text = path2str(text)
 | 
				
			||||||
    return textwrap.fill(text, width=wrap_width, initial_indent=indent,
 | 
					    return textwrap.fill(text, width=wrap_width, initial_indent=indent,
 | 
				
			||||||
                         subsequent_indent=indent, break_long_words=False,
 | 
					                         subsequent_indent=indent, break_long_words=False,
 | 
				
			||||||
                         break_on_hyphens=False)
 | 
					                         break_on_hyphens=False)
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def sys_exit(*messages, **kwargs):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Performs SystemExit. For modules used from the command line, like
 | 
					 | 
				
			||||||
    download and link. To print message, use the same arguments as for
 | 
					 | 
				
			||||||
    print_msg().
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    if messages:
 | 
					 | 
				
			||||||
        print_msg(*messages, **kwargs)
 | 
					 | 
				
			||||||
    sys.exit(0)
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user