Tidy up CLI

2025-10-18 17:54:17 +03:00 · 2017-10-27 14:38:39 +02:00 · 2017-10-27 14:38:39 +02:00 · d941fc3667
commit d941fc3667
parent 298c3d973c
10 changed files with 103 additions and 99 deletions
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -7,10 +7,9 @@ from pathlib import Path
 from .converters import conllu2json, iob2json, conll_ner2json
 from ..util import prints

-# Converters are matched by file extension. To add a converter, add a new entry
-# to this dict with the file extension mapped to the converter function imported
-# from /converters.
-
+# Converters are matched by file extension. To add a converter, add a new
+# entry to this dict with the file extension mapped to the converter function
+# imported from /converters.
 CONVERTERS = {
    'conllu': conllu2json,
    'conll': conllu2json,
@ -24,8 +23,7 @@ CONVERTERS = {
    output_dir=("output directory for converted file", "positional", None, str),
    n_sents=("Number of sentences per doc", "option", "n", int),
    converter=("Name of converter (auto, iob, conllu or ner)", "option", "c", str),
-    morphology=("Enable appending morphology to tags", "flag", "m", bool)
-)
+    morphology=("Enable appending morphology to tags", "flag", "m", bool))
 def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
            converter='auto'):
    """
@ -40,7 +38,7 @@ def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
        prints(output_path, title="Output directory not found", exits=1)
    if converter == 'auto':
        converter = input_path.suffix[1:]
-    if not converter in CONVERTERS:
+    if converter not in CONVERTERS:
            prints("Can't find converter for %s" % converter,
                title="Unknown format", exits=1)
    func = CONVERTERS[converter]
--- a/spacy/cli/converters/conll_ner2json.py
+++ b/spacy/cli/converters/conll_ner2json.py
@ -8,7 +8,8 @@ from ...gold import iob_to_biluo

 def conll_ner2json(input_path, output_path, n_sents=10, use_morphology=False):
    """
-    Convert files in the CoNLL-2003 NER format into JSON format for use with train cli.
+    Convert files in the CoNLL-2003 NER format into JSON format for use with
+    train cli.
    """
    docs = read_conll_ner(input_path)

--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -13,10 +13,9 @@ from .. import about


@plac.annotations(
-    model=("model to download (shortcut or model name)", "positional", None, str),
+    model=("model to download, shortcut or name)", "positional", None, str),
    direct=("force direct download. Needs model name with version and won't "
-            "perform compatibility check", "flag", "d", bool)
-)
+            "perform compatibility check", "flag", "d", bool))
 def download(cmd, model, direct=False):
    """
    Download compatible model from default download path using pip. Model
@ -30,21 +29,25 @@ def download(cmd, model, direct=False):
        model_name = shortcuts.get(model, model)
        compatibility = get_compatibility()
        version = get_version(model_name, compatibility)
-        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
+        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name,
+                                                            v=version))
        if dl == 0:
            try:
                # Get package path here because link uses
-                # pip.get_installed_distributions() to check if model is a package,
-                # which fails if model was just installed via subprocess
+                # pip.get_installed_distributions() to check if model is a
+                # package, which fails if model was just installed via
+                # subprocess
                package_path = get_package_path(model_name)
-                link(None, model_name, model, force=True, model_path=package_path)
+                link(None, model_name, model, force=True,
+                     model_path=package_path)
            except:
-                # Dirty, but since spacy.download and the auto-linking is mostly
-                # a convenience wrapper, it's best to show a success message and
-                # loading instructions, even if linking fails.
-                prints("Creating a shortcut link for 'en' didn't work (maybe you "
-                    "don't have admin permissions?), but you can still load "
-                    "the model via its full package name:",
+                # Dirty, but since spacy.download and the auto-linking is
+                # mostly a convenience wrapper, it's best to show a success
+                # message and loading instructions, even if linking fails.
+                prints(
+                    "Creating a shortcut link for 'en' didn't work (maybe "
+                    "you don't have admin permissions?), but you can still "
+                    "load the model via its full package name:",
                    "nlp = spacy.load('%s')" % model_name,
                    title="Download successful")

@ -52,9 +55,10 @@ def download(cmd, model, direct=False):
 def get_json(url, desc):
    r = requests.get(url)
    if r.status_code != 200:
-        prints("Couldn't fetch %s. Please find a model for your spaCy installation "
-               "(v%s), and download it manually." % (desc, about.__version__),
-               about.__docs_models__, title="Server error (%d)" % r.status_code, exits=1)
+        msg = ("Couldn't fetch %s. Please find a model for your spaCy "
+               "installation (v%s), and download it manually.")
+        prints(msg % (desc, about.__version__), about.__docs_models__,
+               title="Server error (%d)" % r.status_code, exits=1)
    return r.json()


@ -71,13 +75,13 @@ def get_compatibility():
 def get_version(model, comp):
    if model not in comp:
        version = about.__version__
-        prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
-               title="Compatibility error", exits=1)
+        msg = "No compatible model found for '%s' (spaCy v%s)."
+        prints(msg % (model, version), title="Compatibility error", exits=1)
    return comp[model][0]


 def download_model(filename):
    download_url = about.__download_url__ + '/' + filename
-    return subprocess.call([sys.executable, '-m',
-        'pip', 'install', '--no-cache-dir', download_url],
-        env=os.environ.copy())
+    return subprocess.call(
+        [sys.executable, '-m', 'pip', 'install', '--no-cache-dir',
+         download_url], env=os.environ.copy())
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -2,27 +2,15 @@
 from __future__ import unicode_literals, division, print_function

 import plac
-import json
-from collections import defaultdict
-import cytoolz
-from pathlib import Path
-import dill
-import tqdm
-from thinc.neural._classes.model import Model
-from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random

-from ..tokens.doc import Doc
-from ..scorer import Scorer
-from ..gold import GoldParse, merge_sents
-from ..gold import GoldCorpus, minibatch
+from ..gold import GoldCorpus
 from ..util import prints
 from .. import util
-from .. import about
 from .. import displacy
-from ..compat import json_dumps
+

 random.seed(0)
 numpy.random.seed(0)
@ -30,17 +18,18 @@ numpy.random.seed(0)

@plac.annotations(
    model=("Model name or path", "positional", None, str),
-    data_path=("Location of JSON-formatted evaluation data", "positional", None, str),
+    data_path=("Location of JSON-formatted evaluation data", "positional",
+               None, str),
    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
    gpu_id=("Use GPU", "option", "g", int),
-    displacy_path=("Directory to output rendered parses as HTML", "option", "dp", str),
-    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int)
-)
+    displacy_path=("Directory to output rendered parses as HTML", "option",
+                   "dp", str),
+    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int))
 def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
             displacy_path=None, displacy_limit=25):
    """
-    Evaluate a model. To render a sample of parses in a HTML file, set an output
-    directory as the displacy_path argument.
+    Evaluate a model. To render a sample of parses in a HTML file, set an
+    output directory as the displacy_path argument.
    """
    if gpu_id >= 0:
        util.use_gpu(gpu_id)
@ -50,7 +39,8 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
    if not data_path.exists():
        prints(data_path, title="Evaluation data not found", exits=1)
    if displacy_path and not displacy_path.exists():
-        prints(displacy_path, title="Visualization output directory not found", exits=1)
+        prints(displacy_path, title="Visualization output directory not found",
+               exits=1)
    corpus = GoldCorpus(data_path, data_path)
    nlp = util.load_model(model)
    dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
@ -64,12 +54,14 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
        docs, golds = zip(*dev_docs)
        render_deps = 'parser' in nlp.meta.get('pipeline', [])
        render_ents = 'ner' in nlp.meta.get('pipeline', [])
-        render_parses(docs, displacy_path, model_name=model, limit=displacy_limit,
-                      deps=render_deps, ents=render_ents)
-        prints(displacy_path, title="Generated %s parses as HTML" % displacy_limit)
+        render_parses(docs, displacy_path, model_name=model,
+                      limit=displacy_limit, deps=render_deps, ents=render_ents)
+        msg = "Generated %s parses as HTML" % displacy_limit
+        prints(displacy_path, title=msg)


-def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=True):
+def render_parses(docs, output_path, model_name='', limit=250, deps=True,
+                  ents=True):
    docs[0].user_data['title'] = model_name
    if ents:
        with (output_path / 'entities.html').open('w') as file_:
@ -77,7 +69,8 @@ def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=T
            file_.write(html)
    if deps:
        with (output_path / 'parses.html').open('w') as file_:
-            html = displacy.render(docs[:limit], style='dep', page=True, options={'compact': True})
+            html = displacy.render(docs[:limit], style='dep', page=True,
+                                   options={'compact': True})
            file_.write(html)


--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -12,8 +12,7 @@ from .. import util

@plac.annotations(
    model=("optional: shortcut link of model", "positional", None, str),
-    markdown=("generate Markdown for GitHub issues", "flag", "md", str)
-)
+    markdown=("generate Markdown for GitHub issues", "flag", "md", str))
 def info(cmd, model=None, markdown=False):
    """Print info about spaCy installation. If a model shortcut link is
    speficied as an argument, print model information. Flag --markdown
--- a/spacy/cli/link.py
+++ b/spacy/cli/link.py
@ -12,8 +12,7 @@ from .. import util
@plac.annotations(
    origin=("package name or local path to model", "positional", None, str),
    link_name=("name of shortuct link to create", "positional", None, str),
-    force=("force overwriting of existing link", "flag", "f", bool)
-)
+    force=("force overwriting of existing link", "flag", "f", bool))
 def link(cmd, origin, link_name, force=False, model_path=None):
    """
    Create a symlink for models within the spacy/data directory. Accepts
@ -46,8 +45,9 @@ def link(cmd, origin, link_name, force=False, model_path=None):
        # This is quite dirty, but just making sure other errors are caught.
        prints("Creating a symlink in spacy/data failed. Make sure you have "
               "the required permissions and try re-running the command as "
-               "admin, or use a virtualenv. You can still import the model as a "
-               "module and call its load() method, or create the symlink manually.",
+               "admin, or use a virtualenv. You can still import the model as "
+               "a module and call its load() method, or create the symlink "
+               "manually.",
               "%s --> %s" % (path2str(model_path), path2str(link_path)),
               title="Error: Couldn't link model to '%s'" % link_name)
        raise
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -16,10 +16,12 @@ from .. import about
    input_dir=("directory with model data", "positional", None, str),
    output_dir=("output parent directory", "positional", None, str),
    meta_path=("path to meta.json", "option", "m", str),
-    create_meta=("create meta.json, even if one exists in directory", "flag", "c", bool),
-    force=("force overwriting of existing folder in output directory", "flag", "f", bool)
-)
-def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force=False):
+    create_meta=("create meta.json, even if one exists in directory", "flag",
+                 "c", bool),
+    force=("force overwriting of existing folder in output directory", "flag",
+           "f", bool))
+def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
+            force=False):
    """
    Generate Python package for model data, including meta and required
    installation files. A new directory will be created in the specified
@ -52,13 +54,15 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force
    package_path = main_path / model_name

    create_dirs(package_path, force)
-    shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
+    shutil.copytree(path2str(input_path),
+                    path2str(package_path / model_name_v))
    create_file(main_path / 'meta.json', json_dumps(meta))
    create_file(main_path / 'setup.py', template_setup)
    create_file(main_path / 'MANIFEST.in', template_manifest)
    create_file(package_path / '__init__.py', template_init)
-    prints(main_path, "To build the package, run `python setup.py sdist` in this "
-           "directory.", title="Successfully created package '%s'" % model_name_v)
+    prints(main_path, "To build the package, run `python setup.py sdist` in "
+           "this directory.",
+           title="Successfully created package '%s'" % model_name_v)


 def create_dirs(package_path, force):
@ -66,9 +70,10 @@ def create_dirs(package_path, force):
        if force:
            shutil.rmtree(path2str(package_path))
        else:
-            prints(package_path, "Please delete the directory and try again, or "
-                   "use the --force flag to overwrite existing directories.",
-                   title="Package directory already exists", exits=1)
+            prints(package_path, "Please delete the directory and try again, "
+                   "or use the --force flag to overwrite existing "
+                   "directories.", title="Package directory already exists",
+                   exits=1)
    Path.mkdir(package_path, parents=True)


@ -82,7 +87,8 @@ def generate_meta(model_path):
    settings = [('lang', 'Model language', 'en'),
                ('name', 'Model name', 'model'),
                ('version', 'Model version', '0.0.0'),
-                ('spacy_version', 'Required spaCy version', '>=%s,<3.0.0' % about.__version__),
+                ('spacy_version', 'Required spaCy version',
+                 '>=%s,<3.0.0' % about.__version__),
                ('description', 'Model description', False),
                ('author', 'Author', False),
                ('email', 'Author email', False),
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@ -27,15 +27,15 @@ def read_inputs(loc):

@plac.annotations(
    lang=("model/language", "positional", None, str),
-    inputs=("Location of input file", "positional", None, read_inputs)
-)
+    inputs=("Location of input file", "positional", None, read_inputs))
 def profile(cmd, lang, inputs=None):
    """
    Profile a spaCy pipeline, to find out which functions take the most time.
    """
    nlp = spacy.load(lang)
    texts = list(cytoolz.take(10000, inputs))
-    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof")
+    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(),
+                    "Profile.prof")
    s = pstats.Stats("Profile.prof")
    s.strip_dirs().sort_stats("time").print_stats()

--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -2,21 +2,14 @@
 from __future__ import unicode_literals, division, print_function

 import plac
-import json
-from collections import defaultdict
-import cytoolz
 from pathlib import Path
 import dill
 import tqdm
 from thinc.neural._classes.model import Model
-from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random

-from ..tokens.doc import Doc
-from ..scorer import Scorer
-from ..gold import GoldParse, merge_sents
 from ..gold import GoldCorpus, minibatch
 from ..util import prints
 from .. import util
@ -31,8 +24,10 @@ numpy.random.seed(0)
@plac.annotations(
    lang=("model language", "positional", None, str),
    output_dir=("output directory to store model in", "positional", None, str),
-    train_data=("location of JSON-formatted training data", "positional", None, str),
-    dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
+    train_data=("location of JSON-formatted training data", "positional",
+                None, str),
+    dev_data=("location of JSON-formatted development data (optional)",
+              "positional", None, str),
    n_iter=("number of iterations", "option", "n", int),
    n_sents=("number of sentences", "option", "ns", int),
    use_gpu=("Use GPU", "option", "g", int),
@ -42,11 +37,12 @@ numpy.random.seed(0)
    no_entities=("Don't train NER", "flag", "N", bool),
    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
    version=("Model version", "option", "V", str),
-    meta_path=("Optional path to meta.json. All relevant properties will be overwritten.", "option", "m", Path)
-)
+    meta_path=("Optional path to meta.json. All relevant properties will be "
+               "overwritten.", "option", "m", Path))
 def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
-          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False, no_entities=False,
-          gold_preproc=False, version="0.0.0", meta_path=None):
+          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False,
+          no_entities=False, gold_preproc=False, version="0.0.0",
+          meta_path=None):
    """
    Train a model. Expects data in spaCy's JSON format.
    """
@ -72,9 +68,12 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
    meta.setdefault('name', 'unnamed')

    pipeline = ['tagger', 'parser', 'ner']
-    if no_tagger and 'tagger' in pipeline: pipeline.remove('tagger')
-    if no_parser and 'parser' in pipeline: pipeline.remove('parser')
-    if no_entities and 'ner' in pipeline: pipeline.remove('ner')
+    if no_tagger and 'tagger' in pipeline:
+        pipeline.remove('tagger')
+    if no_parser and 'parser' in pipeline:
+        pipeline.remove('parser')
+    if no_entities and 'ner' in pipeline:
+        pipeline.remove('ner')

    # Take dropout and batch size as generators of values -- dropout
    # starts high and decays sharply, to force the optimizer to explore.
@ -157,7 +156,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
                with meta_loc.open('w') as file_:
                    file_.write(json_dumps(meta))
                util.set_env_log(True)
-            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps)
+            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps,
+                           gpu_wps=gpu_wps)
    finally:
        print("Saving model...")
        try:
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -1,5 +1,5 @@
 # coding: utf8
-from __future__ import unicode_literals
+from __future__ import unicode_literals, print_function

 import requests
 import pkg_resources
@ -29,8 +29,10 @@ def validate(cmd):
    model_links = get_model_links(current_compat)
    model_pkgs = get_model_pkgs(current_compat, all_models)
    incompat_links = {l for l, d in model_links.items() if not d['compat']}
-    incompat_models = {d['name'] for _, d in model_pkgs.items() if not d['compat']}
-    incompat_models.update([d['name'] for _, d in model_links.items() if not d['compat']])
+    incompat_models = {d['name'] for _, d in model_pkgs.items()
+                       if not d['compat']}
+    incompat_models.update([d['name'] for _, d in model_links.items()
+                            if not d['compat']])
    na_models = [m for m in incompat_models if m not in current_compat]
    update_models = [m for m in incompat_models if m in current_compat]

@ -90,7 +92,6 @@ def get_model_pkgs(compat, all_models):


 def get_model_row(compat, name, data, type='package'):
-    tpl_row = '    {:<10}' + ('  {:<20}' * 4)
    tpl_red = '\x1b[38;5;1m{}\x1b[0m'
    tpl_green = '\x1b[38;5;2m{}\x1b[0m'
    if data['compat']:
@ -110,7 +111,8 @@ def get_row(*args):
 def is_model_path(model_path):
    exclude = ['cache', 'pycache', '__pycache__']
    name = model_path.parts[-1]
-    return model_path.is_dir() and name not in exclude and not name.startswith('.')
+    return (model_path.is_dir() and name not in exclude
+            and not name.startswith('.'))


 def is_compat(compat, name, version):
@ -118,6 +120,7 @@ def is_compat(compat, name, version):


 def reformat_version(version):
+    """Hack to reformat old versions ending on '-alpha' to match pip format."""
    if version.endswith('-alpha'):
        return version.replace('-alpha', 'a0')
    return version.replace('-alpha', 'a')