From d941fc36672bb08cfaf59c2301b98f27ff846667 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 27 Oct 2017 14:38:39 +0200
Subject: [PATCH] Tidy up CLI

---
 spacy/cli/convert.py                   | 12 +++----
 spacy/cli/converters/conll_ner2json.py |  3 +-
 spacy/cli/download.py                  | 46 ++++++++++++++------------
 spacy/cli/evaluate.py                  | 45 +++++++++++--------------
 spacy/cli/info.py                      |  3 +-
 spacy/cli/link.py                      |  8 ++---
 spacy/cli/package.py                   | 28 ++++++++++------
 spacy/cli/profile.py                   |  8 ++---
 spacy/cli/train.py                     | 36 ++++++++++----------
 spacy/cli/validate.py                  | 13 +++++---
 10 files changed, 103 insertions(+), 99 deletions(-)

diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index d9a812a15..ad17844a1 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -7,10 +7,9 @@ from pathlib import Path
 from .converters import conllu2json, iob2json, conll_ner2json
 from ..util import prints
 
-# Converters are matched by file extension. To add a converter, add a new entry
-# to this dict with the file extension mapped to the converter function imported
-# from /converters.
-
+# Converters are matched by file extension. To add a converter, add a new
+# entry to this dict with the file extension mapped to the converter function
+# imported from /converters.
 CONVERTERS = {
     'conllu': conllu2json,
     'conll': conllu2json,
@@ -24,8 +23,7 @@ CONVERTERS = {
     output_dir=("output directory for converted file", "positional", None, str),
     n_sents=("Number of sentences per doc", "option", "n", int),
     converter=("Name of converter (auto, iob, conllu or ner)", "option", "c", str),
-    morphology=("Enable appending morphology to tags", "flag", "m", bool)
-)
+    morphology=("Enable appending morphology to tags", "flag", "m", bool))
 def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
             converter='auto'):
     """
@@ -40,7 +38,7 @@ def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
         prints(output_path, title="Output directory not found", exits=1)
     if converter == 'auto':
         converter = input_path.suffix[1:]
-    if not converter in CONVERTERS:
+    if converter not in CONVERTERS:
             prints("Can't find converter for %s" % converter,
                 title="Unknown format", exits=1)
     func = CONVERTERS[converter]
diff --git a/spacy/cli/converters/conll_ner2json.py b/spacy/cli/converters/conll_ner2json.py
index e3bd82e7e..fb2979652 100644
--- a/spacy/cli/converters/conll_ner2json.py
+++ b/spacy/cli/converters/conll_ner2json.py
@@ -8,7 +8,8 @@ from ...gold import iob_to_biluo
 
 def conll_ner2json(input_path, output_path, n_sents=10, use_morphology=False):
     """
-    Convert files in the CoNLL-2003 NER format into JSON format for use with train cli.
+    Convert files in the CoNLL-2003 NER format into JSON format for use with
+    train cli.
     """
     docs = read_conll_ner(input_path)
 
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 28ae07865..0d3f11153 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -13,10 +13,9 @@ from .. import about
 
 
 @plac.annotations(
-    model=("model to download (shortcut or model name)", "positional", None, str),
+    model=("model to download, shortcut or name)", "positional", None, str),
     direct=("force direct download. Needs model name with version and won't "
-            "perform compatibility check", "flag", "d", bool)
-)
+            "perform compatibility check", "flag", "d", bool))
 def download(cmd, model, direct=False):
     """
     Download compatible model from default download path using pip. Model
@@ -30,21 +29,25 @@ def download(cmd, model, direct=False):
         model_name = shortcuts.get(model, model)
         compatibility = get_compatibility()
         version = get_version(model_name, compatibility)
-        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
+        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name,
+                                                            v=version))
         if dl == 0:
             try:
                 # Get package path here because link uses
-                # pip.get_installed_distributions() to check if model is a package,
-                # which fails if model was just installed via subprocess
+                # pip.get_installed_distributions() to check if model is a
+                # package, which fails if model was just installed via
+                # subprocess
                 package_path = get_package_path(model_name)
-                link(None, model_name, model, force=True, model_path=package_path)
+                link(None, model_name, model, force=True,
+                     model_path=package_path)
             except:
-                # Dirty, but since spacy.download and the auto-linking is mostly
-                # a convenience wrapper, it's best to show a success message and
-                # loading instructions, even if linking fails.
-                prints("Creating a shortcut link for 'en' didn't work (maybe you "
-                    "don't have admin permissions?), but you can still load "
-                    "the model via its full package name:",
+                # Dirty, but since spacy.download and the auto-linking is
+                # mostly a convenience wrapper, it's best to show a success
+                # message and loading instructions, even if linking fails.
+                prints(
+                    "Creating a shortcut link for 'en' didn't work (maybe "
+                    "you don't have admin permissions?), but you can still "
+                    "load the model via its full package name:",
                     "nlp = spacy.load('%s')" % model_name,
                     title="Download successful")
 
@@ -52,9 +55,10 @@ def download(cmd, model, direct=False):
 def get_json(url, desc):
     r = requests.get(url)
     if r.status_code != 200:
-        prints("Couldn't fetch %s. Please find a model for your spaCy installation "
-               "(v%s), and download it manually." % (desc, about.__version__),
-               about.__docs_models__, title="Server error (%d)" % r.status_code, exits=1)
+        msg = ("Couldn't fetch %s. Please find a model for your spaCy "
+               "installation (v%s), and download it manually.")
+        prints(msg % (desc, about.__version__), about.__docs_models__,
+               title="Server error (%d)" % r.status_code, exits=1)
     return r.json()
 
 
@@ -71,13 +75,13 @@ def get_compatibility():
 def get_version(model, comp):
     if model not in comp:
         version = about.__version__
-        prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
-               title="Compatibility error", exits=1)
+        msg = "No compatible model found for '%s' (spaCy v%s)."
+        prints(msg % (model, version), title="Compatibility error", exits=1)
     return comp[model][0]
 
 
 def download_model(filename):
     download_url = about.__download_url__ + '/' + filename
-    return subprocess.call([sys.executable, '-m',
-        'pip', 'install', '--no-cache-dir', download_url],
-        env=os.environ.copy())
+    return subprocess.call(
+        [sys.executable, '-m', 'pip', 'install', '--no-cache-dir',
+         download_url], env=os.environ.copy())
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 29e30b7d2..d4d54d8aa 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -2,27 +2,15 @@
 from __future__ import unicode_literals, division, print_function
 
 import plac
-import json
-from collections import defaultdict
-import cytoolz
-from pathlib import Path
-import dill
-import tqdm
-from thinc.neural._classes.model import Model
-from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random
 
-from ..tokens.doc import Doc
-from ..scorer import Scorer
-from ..gold import GoldParse, merge_sents
-from ..gold import GoldCorpus, minibatch
+from ..gold import GoldCorpus
 from ..util import prints
 from .. import util
-from .. import about
 from .. import displacy
-from ..compat import json_dumps
+
 
 random.seed(0)
 numpy.random.seed(0)
@@ -30,17 +18,18 @@ numpy.random.seed(0)
 
 @plac.annotations(
     model=("Model name or path", "positional", None, str),
-    data_path=("Location of JSON-formatted evaluation data", "positional", None, str),
+    data_path=("Location of JSON-formatted evaluation data", "positional",
+               None, str),
     gold_preproc=("Use gold preprocessing", "flag", "G", bool),
     gpu_id=("Use GPU", "option", "g", int),
-    displacy_path=("Directory to output rendered parses as HTML", "option", "dp", str),
-    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int)
-)
+    displacy_path=("Directory to output rendered parses as HTML", "option",
+                   "dp", str),
+    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int))
 def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
              displacy_path=None, displacy_limit=25):
     """
-    Evaluate a model. To render a sample of parses in a HTML file, set an output
-    directory as the displacy_path argument.
+    Evaluate a model. To render a sample of parses in a HTML file, set an
+    output directory as the displacy_path argument.
     """
     if gpu_id >= 0:
         util.use_gpu(gpu_id)
@@ -50,7 +39,8 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
     if not data_path.exists():
         prints(data_path, title="Evaluation data not found", exits=1)
     if displacy_path and not displacy_path.exists():
-        prints(displacy_path, title="Visualization output directory not found", exits=1)
+        prints(displacy_path, title="Visualization output directory not found",
+               exits=1)
     corpus = GoldCorpus(data_path, data_path)
     nlp = util.load_model(model)
     dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
@@ -64,12 +54,14 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
         docs, golds = zip(*dev_docs)
         render_deps = 'parser' in nlp.meta.get('pipeline', [])
         render_ents = 'ner' in nlp.meta.get('pipeline', [])
-        render_parses(docs, displacy_path, model_name=model, limit=displacy_limit,
-                      deps=render_deps, ents=render_ents)
-        prints(displacy_path, title="Generated %s parses as HTML" % displacy_limit)
+        render_parses(docs, displacy_path, model_name=model,
+                      limit=displacy_limit, deps=render_deps, ents=render_ents)
+        msg = "Generated %s parses as HTML" % displacy_limit
+        prints(displacy_path, title=msg)
 
 
-def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=True):
+def render_parses(docs, output_path, model_name='', limit=250, deps=True,
+                  ents=True):
     docs[0].user_data['title'] = model_name
     if ents:
         with (output_path / 'entities.html').open('w') as file_:
@@ -77,7 +69,8 @@ def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=T
             file_.write(html)
     if deps:
         with (output_path / 'parses.html').open('w') as file_:
-            html = displacy.render(docs[:limit], style='dep', page=True, options={'compact': True})
+            html = displacy.render(docs[:limit], style='dep', page=True,
+                                   options={'compact': True})
             file_.write(html)
 
 
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 5d45b271c..3636494fb 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -12,8 +12,7 @@ from .. import util
 
 @plac.annotations(
     model=("optional: shortcut link of model", "positional", None, str),
-    markdown=("generate Markdown for GitHub issues", "flag", "md", str)
-)
+    markdown=("generate Markdown for GitHub issues", "flag", "md", str))
 def info(cmd, model=None, markdown=False):
     """Print info about spaCy installation. If a model shortcut link is
     speficied as an argument, print model information. Flag --markdown
diff --git a/spacy/cli/link.py b/spacy/cli/link.py
index 5b333dae5..cfbc97e3e 100644
--- a/spacy/cli/link.py
+++ b/spacy/cli/link.py
@@ -12,8 +12,7 @@ from .. import util
 @plac.annotations(
     origin=("package name or local path to model", "positional", None, str),
     link_name=("name of shortuct link to create", "positional", None, str),
-    force=("force overwriting of existing link", "flag", "f", bool)
-)
+    force=("force overwriting of existing link", "flag", "f", bool))
 def link(cmd, origin, link_name, force=False, model_path=None):
     """
     Create a symlink for models within the spacy/data directory. Accepts
@@ -46,8 +45,9 @@ def link(cmd, origin, link_name, force=False, model_path=None):
         # This is quite dirty, but just making sure other errors are caught.
         prints("Creating a symlink in spacy/data failed. Make sure you have "
                "the required permissions and try re-running the command as "
-               "admin, or use a virtualenv. You can still import the model as a "
-               "module and call its load() method, or create the symlink manually.",
+               "admin, or use a virtualenv. You can still import the model as "
+               "a module and call its load() method, or create the symlink "
+               "manually.",
                "%s --> %s" % (path2str(model_path), path2str(link_path)),
                title="Error: Couldn't link model to '%s'" % link_name)
         raise
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 6b0811459..d1984fe65 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -16,10 +16,12 @@ from .. import about
     input_dir=("directory with model data", "positional", None, str),
     output_dir=("output parent directory", "positional", None, str),
     meta_path=("path to meta.json", "option", "m", str),
-    create_meta=("create meta.json, even if one exists in directory", "flag", "c", bool),
-    force=("force overwriting of existing folder in output directory", "flag", "f", bool)
-)
-def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force=False):
+    create_meta=("create meta.json, even if one exists in directory", "flag",
+                 "c", bool),
+    force=("force overwriting of existing folder in output directory", "flag",
+           "f", bool))
+def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
+            force=False):
     """
     Generate Python package for model data, including meta and required
     installation files. A new directory will be created in the specified
@@ -52,13 +54,15 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force
     package_path = main_path / model_name
 
     create_dirs(package_path, force)
-    shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
+    shutil.copytree(path2str(input_path),
+                    path2str(package_path / model_name_v))
     create_file(main_path / 'meta.json', json_dumps(meta))
     create_file(main_path / 'setup.py', template_setup)
     create_file(main_path / 'MANIFEST.in', template_manifest)
     create_file(package_path / '__init__.py', template_init)
-    prints(main_path, "To build the package, run `python setup.py sdist` in this "
-           "directory.", title="Successfully created package '%s'" % model_name_v)
+    prints(main_path, "To build the package, run `python setup.py sdist` in "
+           "this directory.",
+           title="Successfully created package '%s'" % model_name_v)
 
 
 def create_dirs(package_path, force):
@@ -66,9 +70,10 @@ def create_dirs(package_path, force):
         if force:
             shutil.rmtree(path2str(package_path))
         else:
-            prints(package_path, "Please delete the directory and try again, or "
-                   "use the --force flag to overwrite existing directories.",
-                   title="Package directory already exists", exits=1)
+            prints(package_path, "Please delete the directory and try again, "
+                   "or use the --force flag to overwrite existing "
+                   "directories.", title="Package directory already exists",
+                   exits=1)
     Path.mkdir(package_path, parents=True)
 
 
@@ -82,7 +87,8 @@ def generate_meta(model_path):
     settings = [('lang', 'Model language', 'en'),
                 ('name', 'Model name', 'model'),
                 ('version', 'Model version', '0.0.0'),
-                ('spacy_version', 'Required spaCy version', '>=%s,<3.0.0' % about.__version__),
+                ('spacy_version', 'Required spaCy version',
+                 '>=%s,<3.0.0' % about.__version__),
                 ('description', 'Model description', False),
                 ('author', 'Author', False),
                 ('email', 'Author email', False),
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index db6fc5b41..a394989d0 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -27,15 +27,15 @@ def read_inputs(loc):
 
 @plac.annotations(
     lang=("model/language", "positional", None, str),
-    inputs=("Location of input file", "positional", None, read_inputs)
-)
+    inputs=("Location of input file", "positional", None, read_inputs))
 def profile(cmd, lang, inputs=None):
     """
     Profile a spaCy pipeline, to find out which functions take the most time.
     """
-    nlp = spacy.load(lang) 
+    nlp = spacy.load(lang)
     texts = list(cytoolz.take(10000, inputs))
-    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof")
+    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(),
+                    "Profile.prof")
     s = pstats.Stats("Profile.prof")
     s.strip_dirs().sort_stats("time").print_stats()
 
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index da398751c..fb96e6c05 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -2,21 +2,14 @@
 from __future__ import unicode_literals, division, print_function
 
 import plac
-import json
-from collections import defaultdict
-import cytoolz
 from pathlib import Path
 import dill
 import tqdm
 from thinc.neural._classes.model import Model
-from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random
 
-from ..tokens.doc import Doc
-from ..scorer import Scorer
-from ..gold import GoldParse, merge_sents
 from ..gold import GoldCorpus, minibatch
 from ..util import prints
 from .. import util
@@ -31,8 +24,10 @@ numpy.random.seed(0)
 @plac.annotations(
     lang=("model language", "positional", None, str),
     output_dir=("output directory to store model in", "positional", None, str),
-    train_data=("location of JSON-formatted training data", "positional", None, str),
-    dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
+    train_data=("location of JSON-formatted training data", "positional",
+                None, str),
+    dev_data=("location of JSON-formatted development data (optional)",
+              "positional", None, str),
     n_iter=("number of iterations", "option", "n", int),
     n_sents=("number of sentences", "option", "ns", int),
     use_gpu=("Use GPU", "option", "g", int),
@@ -42,11 +37,12 @@ numpy.random.seed(0)
     no_entities=("Don't train NER", "flag", "N", bool),
     gold_preproc=("Use gold preprocessing", "flag", "G", bool),
     version=("Model version", "option", "V", str),
-    meta_path=("Optional path to meta.json. All relevant properties will be overwritten.", "option", "m", Path)
-)
+    meta_path=("Optional path to meta.json. All relevant properties will be "
+               "overwritten.", "option", "m", Path))
 def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
-          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False, no_entities=False,
-          gold_preproc=False, version="0.0.0", meta_path=None):
+          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False,
+          no_entities=False, gold_preproc=False, version="0.0.0",
+          meta_path=None):
     """
     Train a model. Expects data in spaCy's JSON format.
     """
@@ -72,9 +68,12 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
     meta.setdefault('name', 'unnamed')
 
     pipeline = ['tagger', 'parser', 'ner']
-    if no_tagger and 'tagger' in pipeline: pipeline.remove('tagger')
-    if no_parser and 'parser' in pipeline: pipeline.remove('parser')
-    if no_entities and 'ner' in pipeline: pipeline.remove('ner')
+    if no_tagger and 'tagger' in pipeline:
+        pipeline.remove('tagger')
+    if no_parser and 'parser' in pipeline:
+        pipeline.remove('parser')
+    if no_entities and 'ner' in pipeline:
+        pipeline.remove('ner')
 
     # Take dropout and batch size as generators of values -- dropout
     # starts high and decays sharply, to force the optimizer to explore.
@@ -139,7 +138,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
                         scorer = nlp_loaded.evaluate(dev_docs)
                         end_time = timer()
                         cpu_wps = nwords/(end_time-start_time)
-                acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
+                acc_loc = (output_path / ('model%d' % i) / 'accuracy.json')
                 with acc_loc.open('w') as file_:
                     file_.write(json_dumps(scorer.scores))
                 meta_loc = output_path / ('model%d' % i) / 'meta.json'
@@ -157,7 +156,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
                 with meta_loc.open('w') as file_:
                     file_.write(json_dumps(meta))
                 util.set_env_log(True)
-            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps)
+            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps,
+                           gpu_wps=gpu_wps)
     finally:
         print("Saving model...")
         try:
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index c1f992ed6..1c645a554 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -1,5 +1,5 @@
 # coding: utf8
-from __future__ import unicode_literals
+from __future__ import unicode_literals, print_function
 
 import requests
 import pkg_resources
@@ -29,8 +29,10 @@ def validate(cmd):
     model_links = get_model_links(current_compat)
     model_pkgs = get_model_pkgs(current_compat, all_models)
     incompat_links = {l for l, d in model_links.items() if not d['compat']}
-    incompat_models = {d['name'] for _, d in model_pkgs.items() if not d['compat']}
-    incompat_models.update([d['name'] for _, d in model_links.items() if not d['compat']])
+    incompat_models = {d['name'] for _, d in model_pkgs.items()
+                       if not d['compat']}
+    incompat_models.update([d['name'] for _, d in model_links.items()
+                            if not d['compat']])
     na_models = [m for m in incompat_models if m not in current_compat]
     update_models = [m for m in incompat_models if m in current_compat]
 
@@ -90,7 +92,6 @@ def get_model_pkgs(compat, all_models):
 
 
 def get_model_row(compat, name, data, type='package'):
-    tpl_row = '    {:<10}' + ('  {:<20}' * 4)
     tpl_red = '\x1b[38;5;1m{}\x1b[0m'
     tpl_green = '\x1b[38;5;2m{}\x1b[0m'
     if data['compat']:
@@ -110,7 +111,8 @@ def get_row(*args):
 def is_model_path(model_path):
     exclude = ['cache', 'pycache', '__pycache__']
     name = model_path.parts[-1]
-    return model_path.is_dir() and name not in exclude and not name.startswith('.')
+    return (model_path.is_dir() and name not in exclude
+            and not name.startswith('.'))
 
 
 def is_compat(compat, name, version):
@@ -118,6 +120,7 @@ def is_compat(compat, name, version):
 
 
 def reformat_version(version):
+    """Hack to reformat old versions ending on '-alpha' to match pip format."""
     if version.endswith('-alpha'):
         return version.replace('-alpha', 'a0')
     return version.replace('-alpha', 'a')