Tidy up CLI

2025-10-19 02:04:19 +03:00 · 2017-10-27 14:38:39 +02:00 · 2017-10-27 14:38:39 +02:00 · d941fc3667
commit d941fc3667
parent 298c3d973c
10 changed files with 103 additions and 99 deletions
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -7,10 +7,9 @@ from pathlib import Path
 from .converters import conllu2json, iob2json, conll_ner2json
 from ..util import prints
-# Converters are matched by file extension. To add a converter, add a new entry
+# Converters are matched by file extension. To add a converter, add a new
-# to this dict with the file extension mapped to the converter function imported
+# entry to this dict with the file extension mapped to the converter function
-# from /converters.
+# imported from /converters.
 CONVERTERS = {
    'conllu': conllu2json,
    'conll': conllu2json,
@ -24,8 +23,7 @@ CONVERTERS = {
    output_dir=("output directory for converted file", "positional", None, str),
    n_sents=("Number of sentences per doc", "option", "n", int),
    converter=("Name of converter (auto, iob, conllu or ner)", "option", "c", str),
-    morphology=("Enable appending morphology to tags", "flag", "m", bool)
+    morphology=("Enable appending morphology to tags", "flag", "m", bool))
 )
 def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
            converter='auto'):
    """
@ -40,7 +38,7 @@ def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
        prints(output_path, title="Output directory not found", exits=1)
    if converter == 'auto':
        converter = input_path.suffix[1:]
-    if not converter in CONVERTERS:
+    if converter not in CONVERTERS:
            prints("Can't find converter for %s" % converter,
                title="Unknown format", exits=1)
    func = CONVERTERS[converter]
--- a/spacy/cli/converters/conll_ner2json.py
+++ b/spacy/cli/converters/conll_ner2json.py
@ -8,7 +8,8 @@ from ...gold import iob_to_biluo
 def conll_ner2json(input_path, output_path, n_sents=10, use_morphology=False):
    """
-    Convert files in the CoNLL-2003 NER format into JSON format for use with train cli.
+    Convert files in the CoNLL-2003 NER format into JSON format for use with
    train cli.
    """
    docs = read_conll_ner(input_path)
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -13,10 +13,9 @@ from .. import about
@plac.annotations(
-    model=("model to download (shortcut or model name)", "positional", None, str),
+    model=("model to download, shortcut or name)", "positional", None, str),
    direct=("force direct download. Needs model name with version and won't "
-            "perform compatibility check", "flag", "d", bool)
+            "perform compatibility check", "flag", "d", bool))
 )
 def download(cmd, model, direct=False):
    """
    Download compatible model from default download path using pip. Model
@ -30,21 +29,25 @@ def download(cmd, model, direct=False):
        model_name = shortcuts.get(model, model)
        compatibility = get_compatibility()
        version = get_version(model_name, compatibility)
-        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
+        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name,
                                                            v=version))
        if dl == 0:
            try:
                # Get package path here because link uses
-                # pip.get_installed_distributions() to check if model is a package,
+                # pip.get_installed_distributions() to check if model is a
-                # which fails if model was just installed via subprocess
+                # package, which fails if model was just installed via
                # subprocess
                package_path = get_package_path(model_name)
-                link(None, model_name, model, force=True, model_path=package_path)
+                link(None, model_name, model, force=True,
                     model_path=package_path)
            except:
-                # Dirty, but since spacy.download and the auto-linking is mostly
+                # Dirty, but since spacy.download and the auto-linking is
-                # a convenience wrapper, it's best to show a success message and
+                # mostly a convenience wrapper, it's best to show a success
-                # loading instructions, even if linking fails.
+                # message and loading instructions, even if linking fails.
-                prints("Creating a shortcut link for 'en' didn't work (maybe you "
+                prints(
-                    "don't have admin permissions?), but you can still load "
+                    "Creating a shortcut link for 'en' didn't work (maybe "
-                    "the model via its full package name:",
+                    "you don't have admin permissions?), but you can still "
                    "load the model via its full package name:",
                    "nlp = spacy.load('%s')" % model_name,
                    title="Download successful")
@ -52,9 +55,10 @@ def download(cmd, model, direct=False):
 def get_json(url, desc):
    r = requests.get(url)
    if r.status_code != 200:
-        prints("Couldn't fetch %s. Please find a model for your spaCy installation "
+        msg = ("Couldn't fetch %s. Please find a model for your spaCy "
-               "(v%s), and download it manually." % (desc, about.__version__),
+               "installation (v%s), and download it manually.")
-               about.__docs_models__, title="Server error (%d)" % r.status_code, exits=1)
+        prints(msg % (desc, about.__version__), about.__docs_models__,
               title="Server error (%d)" % r.status_code, exits=1)
    return r.json()
@ -71,13 +75,13 @@ def get_compatibility():
 def get_version(model, comp):
    if model not in comp:
        version = about.__version__
-        prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
+        msg = "No compatible model found for '%s' (spaCy v%s)."
-               title="Compatibility error", exits=1)
+        prints(msg % (model, version), title="Compatibility error", exits=1)
    return comp[model][0]
 def download_model(filename):
    download_url = about.__download_url__ + '/' + filename
-    return subprocess.call([sys.executable, '-m',
+    return subprocess.call(
-        'pip', 'install', '--no-cache-dir', download_url],
+        [sys.executable, '-m', 'pip', 'install', '--no-cache-dir',
-        env=os.environ.copy())
+         download_url], env=os.environ.copy())
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -2,27 +2,15 @@
 from __future__ import unicode_literals, division, print_function
 import plac
 import json
 from collections import defaultdict
 import cytoolz
 from pathlib import Path
 import dill
 import tqdm
 from thinc.neural._classes.model import Model
 from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random
-from ..tokens.doc import Doc
+from ..gold import GoldCorpus
 from ..scorer import Scorer
 from ..gold import GoldParse, merge_sents
 from ..gold import GoldCorpus, minibatch
 from ..util import prints
 from .. import util
 from .. import about
 from .. import displacy
-from ..compat import json_dumps
+
 random.seed(0)
 numpy.random.seed(0)
@ -30,17 +18,18 @@ numpy.random.seed(0)
@plac.annotations(
    model=("Model name or path", "positional", None, str),
-    data_path=("Location of JSON-formatted evaluation data", "positional", None, str),
+    data_path=("Location of JSON-formatted evaluation data", "positional",
               None, str),
    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
    gpu_id=("Use GPU", "option", "g", int),
-    displacy_path=("Directory to output rendered parses as HTML", "option", "dp", str),
+    displacy_path=("Directory to output rendered parses as HTML", "option",
-    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int)
+                   "dp", str),
-)
+    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int))
 def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
             displacy_path=None, displacy_limit=25):
    """
-    Evaluate a model. To render a sample of parses in a HTML file, set an output
+    Evaluate a model. To render a sample of parses in a HTML file, set an
-    directory as the displacy_path argument.
+    output directory as the displacy_path argument.
    """
    if gpu_id >= 0:
        util.use_gpu(gpu_id)
@ -50,7 +39,8 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
    if not data_path.exists():
        prints(data_path, title="Evaluation data not found", exits=1)
    if displacy_path and not displacy_path.exists():
-        prints(displacy_path, title="Visualization output directory not found", exits=1)
+        prints(displacy_path, title="Visualization output directory not found",
               exits=1)
    corpus = GoldCorpus(data_path, data_path)
    nlp = util.load_model(model)
    dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
@ -64,12 +54,14 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
        docs, golds = zip(*dev_docs)
        render_deps = 'parser' in nlp.meta.get('pipeline', [])
        render_ents = 'ner' in nlp.meta.get('pipeline', [])
-        render_parses(docs, displacy_path, model_name=model, limit=displacy_limit,
+        render_parses(docs, displacy_path, model_name=model,
-                      deps=render_deps, ents=render_ents)
+                      limit=displacy_limit, deps=render_deps, ents=render_ents)
-        prints(displacy_path, title="Generated %s parses as HTML" % displacy_limit)
+        msg = "Generated %s parses as HTML" % displacy_limit
        prints(displacy_path, title=msg)
-def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=True):
+def render_parses(docs, output_path, model_name='', limit=250, deps=True,
                  ents=True):
    docs[0].user_data['title'] = model_name
    if ents:
        with (output_path / 'entities.html').open('w') as file_:
@ -77,7 +69,8 @@ def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=T
            file_.write(html)
    if deps:
        with (output_path / 'parses.html').open('w') as file_:
-            html = displacy.render(docs[:limit], style='dep', page=True, options={'compact': True})
+            html = displacy.render(docs[:limit], style='dep', page=True,
                                   options={'compact': True})
            file_.write(html)
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -12,8 +12,7 @@ from .. import util
@plac.annotations(
    model=("optional: shortcut link of model", "positional", None, str),
-    markdown=("generate Markdown for GitHub issues", "flag", "md", str)
+    markdown=("generate Markdown for GitHub issues", "flag", "md", str))
 )
 def info(cmd, model=None, markdown=False):
    """Print info about spaCy installation. If a model shortcut link is
    speficied as an argument, print model information. Flag --markdown
--- a/spacy/cli/link.py
+++ b/spacy/cli/link.py
@ -12,8 +12,7 @@ from .. import util
@plac.annotations(
    origin=("package name or local path to model", "positional", None, str),
    link_name=("name of shortuct link to create", "positional", None, str),
-    force=("force overwriting of existing link", "flag", "f", bool)
+    force=("force overwriting of existing link", "flag", "f", bool))
 )
 def link(cmd, origin, link_name, force=False, model_path=None):
    """
    Create a symlink for models within the spacy/data directory. Accepts
@ -46,8 +45,9 @@ def link(cmd, origin, link_name, force=False, model_path=None):
        # This is quite dirty, but just making sure other errors are caught.
        prints("Creating a symlink in spacy/data failed. Make sure you have "
               "the required permissions and try re-running the command as "
-               "admin, or use a virtualenv. You can still import the model as a "
+               "admin, or use a virtualenv. You can still import the model as "
-               "module and call its load() method, or create the symlink manually.",
+               "a module and call its load() method, or create the symlink "
               "manually.",
               "%s --> %s" % (path2str(model_path), path2str(link_path)),
               title="Error: Couldn't link model to '%s'" % link_name)
        raise
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -16,10 +16,12 @@ from .. import about
    input_dir=("directory with model data", "positional", None, str),
    output_dir=("output parent directory", "positional", None, str),
    meta_path=("path to meta.json", "option", "m", str),
-    create_meta=("create meta.json, even if one exists in directory", "flag", "c", bool),
+    create_meta=("create meta.json, even if one exists in directory", "flag",
-    force=("force overwriting of existing folder in output directory", "flag", "f", bool)
+                 "c", bool),
-)
+    force=("force overwriting of existing folder in output directory", "flag",
-def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force=False):
+           "f", bool))
 def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
            force=False):
    """
    Generate Python package for model data, including meta and required
    installation files. A new directory will be created in the specified
@ -52,13 +54,15 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force
    package_path = main_path / model_name
    create_dirs(package_path, force)
-    shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
+    shutil.copytree(path2str(input_path),
                    path2str(package_path / model_name_v))
    create_file(main_path / 'meta.json', json_dumps(meta))
    create_file(main_path / 'setup.py', template_setup)
    create_file(main_path / 'MANIFEST.in', template_manifest)
    create_file(package_path / '__init__.py', template_init)
-    prints(main_path, "To build the package, run `python setup.py sdist` in this "
+    prints(main_path, "To build the package, run `python setup.py sdist` in "
-           "directory.", title="Successfully created package '%s'" % model_name_v)
+           "this directory.",
           title="Successfully created package '%s'" % model_name_v)
 def create_dirs(package_path, force):
@ -66,9 +70,10 @@ def create_dirs(package_path, force):
        if force:
            shutil.rmtree(path2str(package_path))
        else:
-            prints(package_path, "Please delete the directory and try again, or "
+            prints(package_path, "Please delete the directory and try again, "
-                   "use the --force flag to overwrite existing directories.",
+                   "or use the --force flag to overwrite existing "
-                   title="Package directory already exists", exits=1)
+                   "directories.", title="Package directory already exists",
                   exits=1)
    Path.mkdir(package_path, parents=True)
@ -82,7 +87,8 @@ def generate_meta(model_path):
    settings = [('lang', 'Model language', 'en'),
                ('name', 'Model name', 'model'),
                ('version', 'Model version', '0.0.0'),
-                ('spacy_version', 'Required spaCy version', '>=%s,<3.0.0' % about.__version__),
+                ('spacy_version', 'Required spaCy version',
                 '>=%s,<3.0.0' % about.__version__),
                ('description', 'Model description', False),
                ('author', 'Author', False),
                ('email', 'Author email', False),
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@ -27,15 +27,15 @@ def read_inputs(loc):
@plac.annotations(
    lang=("model/language", "positional", None, str),
-    inputs=("Location of input file", "positional", None, read_inputs)
+    inputs=("Location of input file", "positional", None, read_inputs))
 )
 def profile(cmd, lang, inputs=None):
    """
    Profile a spaCy pipeline, to find out which functions take the most time.
    """
    nlp = spacy.load(lang)
    texts = list(cytoolz.take(10000, inputs))
-    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof")
+    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(),
                    "Profile.prof")
    s = pstats.Stats("Profile.prof")
    s.strip_dirs().sort_stats("time").print_stats()
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -2,21 +2,14 @@
 from __future__ import unicode_literals, division, print_function
 import plac
 import json
 from collections import defaultdict
 import cytoolz
 from pathlib import Path
 import dill
 import tqdm
 from thinc.neural._classes.model import Model
 from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random
 from ..tokens.doc import Doc
 from ..scorer import Scorer
 from ..gold import GoldParse, merge_sents
 from ..gold import GoldCorpus, minibatch
 from ..util import prints
 from .. import util
@ -31,8 +24,10 @@ numpy.random.seed(0)
@plac.annotations(
    lang=("model language", "positional", None, str),
    output_dir=("output directory to store model in", "positional", None, str),
-    train_data=("location of JSON-formatted training data", "positional", None, str),
+    train_data=("location of JSON-formatted training data", "positional",
-    dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
+                None, str),
    dev_data=("location of JSON-formatted development data (optional)",
              "positional", None, str),
    n_iter=("number of iterations", "option", "n", int),
    n_sents=("number of sentences", "option", "ns", int),
    use_gpu=("Use GPU", "option", "g", int),
@ -42,11 +37,12 @@ numpy.random.seed(0)
    no_entities=("Don't train NER", "flag", "N", bool),
    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
    version=("Model version", "option", "V", str),
-    meta_path=("Optional path to meta.json. All relevant properties will be overwritten.", "option", "m", Path)
+    meta_path=("Optional path to meta.json. All relevant properties will be "
-)
+               "overwritten.", "option", "m", Path))
 def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
-          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False, no_entities=False,
+          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False,
-          gold_preproc=False, version="0.0.0", meta_path=None):
+          no_entities=False, gold_preproc=False, version="0.0.0",
          meta_path=None):
    """
    Train a model. Expects data in spaCy's JSON format.
    """
@ -72,9 +68,12 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
    meta.setdefault('name', 'unnamed')
    pipeline = ['tagger', 'parser', 'ner']
-    if no_tagger and 'tagger' in pipeline: pipeline.remove('tagger')
+    if no_tagger and 'tagger' in pipeline:
-    if no_parser and 'parser' in pipeline: pipeline.remove('parser')
+        pipeline.remove('tagger')
-    if no_entities and 'ner' in pipeline: pipeline.remove('ner')
+    if no_parser and 'parser' in pipeline:
        pipeline.remove('parser')
    if no_entities and 'ner' in pipeline:
        pipeline.remove('ner')
    # Take dropout and batch size as generators of values -- dropout
    # starts high and decays sharply, to force the optimizer to explore.
@ -139,7 +138,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
                        scorer = nlp_loaded.evaluate(dev_docs)
                        end_time = timer()
                        cpu_wps = nwords/(end_time-start_time)
-                acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
+                acc_loc = (output_path / ('model%d' % i) / 'accuracy.json')
                with acc_loc.open('w') as file_:
                    file_.write(json_dumps(scorer.scores))
                meta_loc = output_path / ('model%d' % i) / 'meta.json'
@ -157,7 +156,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
                with meta_loc.open('w') as file_:
                    file_.write(json_dumps(meta))
                util.set_env_log(True)
-            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps)
+            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps,
                           gpu_wps=gpu_wps)
    finally:
        print("Saving model...")
        try:
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -1,5 +1,5 @@
 # coding: utf8
-from __future__ import unicode_literals
+from __future__ import unicode_literals, print_function
 import requests
 import pkg_resources
@ -29,8 +29,10 @@ def validate(cmd):
    model_links = get_model_links(current_compat)
    model_pkgs = get_model_pkgs(current_compat, all_models)
    incompat_links = {l for l, d in model_links.items() if not d['compat']}
-    incompat_models = {d['name'] for _, d in model_pkgs.items() if not d['compat']}
+    incompat_models = {d['name'] for _, d in model_pkgs.items()
-    incompat_models.update([d['name'] for _, d in model_links.items() if not d['compat']])
+                       if not d['compat']}
    incompat_models.update([d['name'] for _, d in model_links.items()
                            if not d['compat']])
    na_models = [m for m in incompat_models if m not in current_compat]
    update_models = [m for m in incompat_models if m in current_compat]
@ -90,7 +92,6 @@ def get_model_pkgs(compat, all_models):
 def get_model_row(compat, name, data, type='package'):
    tpl_row = '    {:<10}' + ('  {:<20}' * 4)
    tpl_red = '\x1b[38;5;1m{}\x1b[0m'
    tpl_green = '\x1b[38;5;2m{}\x1b[0m'
    if data['compat']:
@ -110,7 +111,8 @@ def get_row(*args):
 def is_model_path(model_path):
    exclude = ['cache', 'pycache', '__pycache__']
    name = model_path.parts[-1]
-    return model_path.is_dir() and name not in exclude and not name.startswith('.')
+    return (model_path.is_dir() and name not in exclude
            and not name.startswith('.'))
 def is_compat(compat, name, version):
@ -118,6 +120,7 @@ def is_compat(compat, name, version):
 def reformat_version(version):
    """Hack to reformat old versions ending on '-alpha' to match pip format."""
    if version.endswith('-alpha'):
        return version.replace('-alpha', 'a0')
    return version.replace('-alpha', 'a')