Small CLI improvements (#3030)

* Add todo * Auto-format * Update wasabi pin * Format training results with wasabi * Remove loading animation from model saving Currently behaves weirdly * Inline messages * Remove unnecessary path2str Already taken care of by printer * Inline messages in CLI * Remove unused function * Move loading indicator into loading function * Check for invalid whitespace entities
2025-08-01 10:59:55 +03:00 · 2018-12-08 11:49:43 +01:00 · 2018-12-08 11:49:43 +01:00 · ffdd5e964f
commit ffdd5e964f
parent 8aa7882762
14 changed files with 199 additions and 271 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -4,7 +4,7 @@ preshed>=2.0.1,<2.1.0
 thinc==7.0.0.dev6
 blis>=0.2.2,<0.3.0
 murmurhash>=0.28.0,<1.1.0
-wasabi>=0.0.10,<1.1.0
+wasabi>=0.0.12,<1.1.0
 srsly>=0.0.5,<1.1.0
 # Third party dependencies
 numpy>=1.15.0
--- a/setup.py
+++ b/setup.py
@ -13,12 +13,12 @@ from setuptools import Extension, setup, find_packages


 def is_new_osx():
-    '''Check whether we're on OSX >= 10.10'''
+    """Check whether we're on OSX >= 10.10"""
    name = distutils.util.get_platform()
-    if sys.platform != 'darwin':
+    if sys.platform != "darwin":
        return False
-    elif name.startswith('macosx-10'):
-        minor_version = int(name.split('-')[1].split('.')[1])
+    elif name.startswith("macosx-10"):
+        minor_version = int(name.split("-")[1].split(".")[1])
        if minor_version >= 7:
            return True
        else:
@ -27,7 +27,6 @@ def is_new_osx():
        return False


-
 PACKAGE_DATA = {"": ["*.pyx", "*.pxd", "*.txt", "*.tokens"]}


@ -84,7 +83,6 @@ if is_new_osx():
    LINK_OPTIONS["other"].append("-nodefaultlibs")


-
 USE_OPENMP_DEFAULT = "0" if sys.platform != "darwin" else None
 if os.environ.get("USE_OPENMP", USE_OPENMP_DEFAULT) == "1":
    if sys.platform == "darwin":
@ -232,7 +230,7 @@ def setup_package():
                "regex==2018.01.10",
                "requests>=2.13.0,<3.0.0",
                "jsonschema>=2.6.0,<3.0.0",
-                "wasabi>=0.0.8,<1.1.0",
+                "wasabi>=0.0.12,<1.1.0",
                "srsly>=0.0.5,<1.1.0",
                'pathlib==1.0.1; python_version < "3.4"',
            ],
--- a/spacy/cli/_messages.py
+++ b/spacy/cli/_messages.py
@ -1,105 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
-# fmt: off
-
-class Messages(object):
-    M001 = ("Download successful but linking failed")
-    M002 = ("Creating a shortcut link for 'en' didn't work (maybe you "
-            "don't have admin permissions?), but you can still load the "
-            "model via its full package name: nlp = spacy.load('{name}')")
-    M003 = ("Server error ({code})")
-    M004 = ("Couldn't fetch {desc}. Please find a model for your spaCy "
-            "installation (v{version}), and download it manually. For more "
-            "details, see the documentation: https://spacy.io/usage/models")
-    M005 = ("Compatibility error")
-    M006 = ("No compatible models found for v{version} of spaCy.")
-    M007 = ("No compatible model found for '{name}' (spaCy v{version}).")
-    M008 = ("Can't locate model data")
-    M009 = ("The data should be located in {path}")
-    M010 = ("Can't find the spaCy data path to create model symlink")
-    M011 = ("Make sure a directory `/data` exists within your spaCy "
-            "installation and try again. The data directory should be "
-            "located here:")
-    M012 = ("Link '{name}' already exists")
-    M013 = ("To overwrite an existing link, use the --force flag.")
-    M014 = ("Can't overwrite symlink '{name}'")
-    M015 = ("This can happen if your data directory contains a directory or "
-            "file of the same name.")
-    M016 = ("Error: Couldn't link model to '{name}'")
-    M017 = ("Creating a symlink in spacy/data failed. Make sure you have the "
-            "required permissions and try re-running the command as admin, or "
-            "use a virtualenv. You can still import the model as a module and "
-            "call its load() method, or create the symlink manually.")
-    M018 = ("Linking successful")
-    M019 = ("You can now load the model via spacy.load('{name}')")
-    M020 = ("Can't find model meta.json")
-    M021 = ("Couldn't fetch compatibility table.")
-    M022 = ("Can't find spaCy v{version} in compatibility table")
-    M023 = ("Installed models (spaCy v{version})")
-    M024 = ("No models found in your current environment.")
-    M025 = ("Use the following commands to update the model packages:")
-    M026 = ("The following models are not available for spaCy "
-            "v{version}: {models}")
-    M027 = ("You may also want to overwrite the incompatible links using the "
-            "`python -m spacy link` command with `--force`, or remove them "
-            "from the data directory. Data path: {path}")
-    M028 = ("Input file not found")
-    M029 = ("Output directory not found")
-    M030 = ("Unknown format")
-    M031 = ("Can't find converter for {converter}")
-    M032 = ("Generated output file {name}")
-    M033 = ("Created {n_docs} documents")
-    M034 = ("Evaluation data not found")
-    M035 = ("Visualization output directory not found")
-    M036 = ("Generated {n} parses as HTML")
-    M037 = ("Can't find words frequencies file")
-    M038 = ("Sucessfully compiled vocab")
-    M039 = ("{entries} entries, {vectors} vectors")
-    M040 = ("Output directory not found")
-    M041 = ("Loaded meta.json from file")
-    M042 = ("Successfully created package '{name}'")
-    M043 = ("To build the package, run `python setup.py sdist` in this "
-            "directory.")
-    M044 = ("Package directory already exists")
-    M045 = ("Please delete the directory and try again, or use the `--force` "
-            "flag to overwrite existing directories.")
-    M046 = ("Generating meta.json")
-    M047 = ("Enter the package settings for your model. The following "
-            "information will be read from your model data: pipeline, vectors.")
-    M048 = ("No '{key}' setting found in meta.json")
-    M049 = ("This setting is required to build your package.")
-    M050 = ("Training data not found")
-    M051 = ("Development data not found")
-    M052 = ("Not a valid meta.json format")
-    M053 = ("Expected dict but got: {meta_type}")
-    M054 = ("No --lang specified, but tokenization required.")
-    M055 = ("Training pipeline: {pipeline}")
-    M056 = ("Starting with base model '{model}'")
-    M057 = ("Starting with blank model '{model}'")
-    M058 = ("Loading vector from model '{model}'")
-    M059 = ("Can't use multitask objective without '{pipe}' in the pipeline")
-    M060 = ("Counting training words (limit={limit})")
-    M061 = ("\nSaving model...")
-    M062 = ("Output directory is not empty.")
-    M063 = ("Incompatible arguments")
-    M064 = ("The -f and -c arguments are deprecated, and not compatible with "
-            "the -j argument, which should specify the same information. "
-            "Either merge the frequencies and clusters data into the "
-            "JSONL-formatted file (recommended), or use only the -f and -c "
-            "files, without the other lexical attributes.")
-    M065 = ("This can lead to unintended side effects when saving the model. "
-            "Please use an empty directory or a different path instead. If "
-            "the specified output path doesn't exist, the directory will be "
-            "created for you.")
-    M066 = ("Saved model to output directory")
-    M067 = ("Can't find lexical data")
-    M068 = ("Sucessfully compiled vocab and vectors, and saved model")
-    M069 = ("Unknown file type: '{name}'")
-    M070 = ("Supported file types: '{options}'")
-    M071 = ("Loaded pretrained tok2vec for: {components}")
-    M072 = ("Model language ('{model_lang}') doesn't match language specified "
-            "as `lang` argument ('{lang}') ")
-
-# fmt: on
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -6,10 +6,8 @@ from pathlib import Path
 from wasabi import Printer
 import srsly

-from ..compat import path2str
 from .converters import conllu2json, conllubio2json, iob2json, conll_ner2json
 from .converters import ner_jsonl2json
-from ._messages import Messages


 # Converters are matched by file extension. To add a converter, add a new
@ -56,18 +54,18 @@ def convert(
    input_path = Path(input_file)
    if file_type not in FILE_TYPES:
        msg.fail(
-            Messages.M069.format(name=file_type),
-            Messages.M070.format(options=", ".join(FILE_TYPES)),
+            "Unknown file type: '{}'".format(file_type),
+            "Supported file types: '{}'".format(", ".join(FILE_TYPES)),
            exits=1,
        )
    if not input_path.exists():
-        msg.fail(Messages.M028, input_path, exits=1)
+        msg.fail("Input file not found", input_path, exits=1)
    if output_dir != "-" and not Path(output_dir).exists():
-        msg.fail(Messages.M029, output_dir, exits=1)
+        msg.fail("Output directory not found", output_dir, exits=1)
    if converter == "auto":
        converter = input_path.suffix[1:]
    if converter not in CONVERTERS:
-        msg.fail(Messages.M030, Messages.M031.format(converter=converter), exits=1)
+        msg.fail("Can't find converter for {}".format(converter), exits=1)
    # Use converter function to convert data
    func = CONVERTERS[converter]
    input_data = input_path.open("r", encoding="utf-8").read()
@ -80,10 +78,7 @@ def convert(
            srsly.write_json(output_file, data)
        elif file_type == "jsonl":
            srsly.write_jsonl(output_file, data)
-        msg.good(
-            Messages.M032.format(name=path2str(output_file)),
-            Messages.M033.format(n_docs=len(data)),
-        )
+        msg.good("Generated output file ({} documents)".format(len(data)), output_file)
    else:
        # Print to stdout
        if file_type == "json":
--- a/spacy/cli/converters/jsonl2json.py
+++ b/spacy/cli/converters/jsonl2json.py
@ -4,12 +4,11 @@ from __future__ import unicode_literals
 import srsly

 from ...util import get_lang_class
-from .._messages import Messages


 def ner_jsonl2json(input_data, lang=None, n_sents=10, use_morphology=False):
    if lang is None:
-        raise ValueError(Messages.M054)
+        raise ValueError("No --lang specified, but tokenization required")
    json_docs = []
    input_tuples = [srsly.json_loads(line) for line in input_data]
    nlp = get_lang_class(lang)()
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -12,7 +12,6 @@ from ..gold import GoldCorpus, read_json_object
 from ..util import load_model, get_lang_class

 # from .schemas import get_schema, validate_json
-from ._messages import Messages


 # Minimum number of expected occurences of label in data to train new label
@ -58,9 +57,9 @@ def debug_data(

    # Make sure all files and paths exists if they are needed
    if not train_path.exists():
-        msg.fail(Messages.M050, train_path, exits=1)
+        msg.fail("Training data not found", train_path, exits=1)
    if not dev_path.exists():
-        msg.fail(Messages.M051, dev_path, exits=1)
+        msg.fail("Development data not found", dev_path, exits=1)

    # Initialize the model and pipeline
    pipeline = [p.strip() for p in pipeline.split(",")]
@ -72,10 +71,8 @@ def debug_data(

    msg.divider("Data format validation")
    # Load the data in one – might take a while but okay in this case
-    with msg.loading("Loading {}...".format(train_path.parts[-1])):
-        train_data = _load_file(train_path, msg)
-    with msg.loading("Loading {}...".format(dev_path.parts[-1])):
-        dev_data = _load_file(dev_path, msg)
+    train_data = _load_file(train_path, msg)
+    dev_data = _load_file(dev_path, msg)

    # Validate data format using the JSON schema
    # TODO: update once the new format is ready
@ -172,6 +169,7 @@ def debug_data(
        existing_labels = [l for l in labels if l in model_labels]
        has_low_data_warning = False
        has_no_neg_warning = False
+        has_ws_ents_error = False

        msg.divider("Named Entity Recognition")
        msg.info(
@ -201,6 +199,10 @@ def debug_data(
                "Existing: {}".format(_format_labels(existing_labels)), show=verbose
            )

+        if gold_data["ws_ents"]:
+            msg.fail("{} invalid whitespace entity spans".format(gold_data["ws_ents"]))
+            has_ws_ents_error = True
+
        for label in new_labels:
            if label_counts[label] <= NEW_LABEL_THRESHOLD:
                msg.warn(
@ -222,6 +224,8 @@ def debug_data(
            msg.good("Good amount of examples for all labels")
        if not has_no_neg_warning:
            msg.good("Examples without occurences available for all labels")
+        if not has_ws_ents_error:
+            msg.good("No entities consisting of or starting/ending with whitespace")

        if has_low_data_warning:
            msg.text(
@ -236,6 +240,11 @@ def debug_data(
                "type.",
                show=verbose,
            )
+        if has_ws_ents_error:
+            msg.text(
+                "As of spaCy v2.1.0, entity spans consisting of or starting/ending "
+                "with whitespace characters are considered invalid."
+            )

    if "textcat" in pipeline:
        msg.divider("Text Classification")
@ -321,11 +330,13 @@ def debug_data(
 def _load_file(file_path, msg):
    file_name = file_path.parts[-1]
    if file_path.suffix == ".json":
-        data = srsly.read_json(file_path)
+        with msg.loading("Loading {}...".format(file_name)):
+            data = srsly.read_json(file_path)
        msg.good("Loaded {}".format(file_name))
        return data
    elif file_path.suffix == ".jsonl":
-        data = srsly.read_jsonl(file_path)
+        with msg.loading("Loading {}...".format(file_name)):
+            data = srsly.read_jsonl(file_path)
        msg.good("Loaded {}".format(file_name))
        return data
    msg.fail(
@ -342,6 +353,7 @@ def _compile_gold(train_docs, pipeline):
        "tags": Counter(),
        "deps": Counter(),
        "words": Counter(),
+        "ws_ents": 0,
        "n_words": 0,
        "texts": set(),
    }
@ -350,7 +362,10 @@ def _compile_gold(train_docs, pipeline):
        data["n_words"] += len(gold.words)
        data["texts"].add(doc.text)
        if "ner" in pipeline:
-            for label in gold.ner:
+            for i, label in enumerate(gold.ner):
+                if label.startswith(("B-", "U-", "L-")) and doc[i].is_space:
+                    # "Illegal" whitespace entity
+                    data["ws_ents"] += 1
                if label.startswith(("B-", "U-")):
                    combined_label = label.split("-")[1]
                    data["ner"][combined_label] += 1
@ -371,18 +386,6 @@ def _format_labels(labels, counts=False):
    return ", ".join(["'{}'".format(l) for l in labels])


-def _get_ner_counts(data):
-    counter = Counter()
-    for doc, gold in data:
-        for label in gold.ner:
-            if label.startswith(("B-", "U-")):
-                combined_label = label.split("-")[1]
-                counter[combined_label] += 1
-            elif label == "-":
-                counter["-"] += 1
-    return counter
-
-
 def _get_examples_without_label(data, label):
    count = 0
    for doc, gold in data:
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -8,7 +8,6 @@ import subprocess
 import sys
 from wasabi import Printer

-from ._messages import Messages
 from .link import link
 from ..util import get_package_path
 from .. import about
@ -50,15 +49,24 @@ def download(model, direct=False, *pip_args):
            # Dirty, but since spacy.download and the auto-linking is
            # mostly a convenience wrapper, it's best to show a success
            # message and loading instructions, even if linking fails.
-            msg.warn(Messages.M002.format(name=model_name), Messages.M001)
+            msg.warn(
+                "Download successful but linking failed",
+                "Creating a shortcut link for 'en' didn't work (maybe you "
+                "don't have admin permissions?), but you can still load the "
+                "model via its full package name: "
+                "nlp = spacy.load('{}')".format(model_name),
+            )


 def get_json(url, desc):
    r = requests.get(url)
    if r.status_code != 200:
        msg.fail(
-            Messages.M003.format(code=r.status_code),
-            Messages.M004.format(desc=desc, version=about.__version__),
+            "Server error ({})".format(r.status_code),
+            "Couldn't fetch {}. Please find a model for your spaCy "
+            "installation (v{}), and download it manually. For more "
+            "details, see the documentation: "
+            "https://spacy.io/usage/models".format(desc, about.__version__),
            exits=1,
        )
    return r.json()
@ -70,7 +78,7 @@ def get_compatibility():
    comp_table = get_json(about.__compatibility__, "compatibility table")
    comp = comp_table["spacy"]
    if version not in comp:
-        msg.fail(Messages.M005, Messages.M006.format(version=version), exits=1)
+        msg.fail("No compatible models found for v{} of spaCy".format(version), exits=1)
    return comp[version]


@ -78,8 +86,8 @@ def get_version(model, comp):
    model = model.rsplit(".dev", 1)[0]
    if model not in comp:
        msg.fail(
-            Messages.M005,
-            Messages.M007.format(name=model, version=about.__version__),
+            "No compatible model found for '{}' "
+            "(spaCy v{}).".format(model, about.__version__),
            exits=1,
        )
    return comp[model][0]
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -5,7 +5,6 @@ import plac
 from timeit import default_timer as timer
 from wasabi import Printer

-from ._messages import Messages
 from ..gold import GoldCorpus
 from .. import util
 from .. import displacy
@ -39,9 +38,9 @@ def evaluate(
    data_path = util.ensure_path(data_path)
    displacy_path = util.ensure_path(displacy_path)
    if not data_path.exists():
-        msg.fail(Messages.M034, data_path, exits=1)
+        msg.fail("Evaluation data not found", data_path, exits=1)
    if displacy_path and not displacy_path.exists():
-        msg.fail(Messages.M035, displacy_path, exits=1)
+        msg.fail("Visualization output directory not found", displacy_path, exits=1)
    corpus = GoldCorpus(data_path, data_path)
    nlp = util.load_model(model)
    dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
@ -75,7 +74,7 @@ def evaluate(
            deps=render_deps,
            ents=render_ents,
        )
-        msg.good(Messages.M036.format(n=displacy_limit), displacy_path)
+        msg.good("Generated {} parses as HTML".format(displacy_limit), displacy_path)


 def render_parses(docs, output_path, model_name="", limit=250, deps=True, ents=True):
@ -90,39 +89,3 @@ def render_parses(docs, output_path, model_name="", limit=250, deps=True, ents=T
                docs[:limit], style="dep", page=True, options={"compact": True}
            )
            file_.write(html)
-
-
-def print_progress(itn, losses, dev_scores, wps=0.0):
-    scores = {}
-    for col in [
-        "dep_loss",
-        "tag_loss",
-        "uas",
-        "tags_acc",
-        "token_acc",
-        "ents_p",
-        "ents_r",
-        "ents_f",
-        "wps",
-    ]:
-        scores[col] = 0.0
-    scores["dep_loss"] = losses.get("parser", 0.0)
-    scores["ner_loss"] = losses.get("ner", 0.0)
-    scores["tag_loss"] = losses.get("tagger", 0.0)
-    scores.update(dev_scores)
-    scores["wps"] = wps
-    tpl = "\t".join(
-        (
-            "{:d}",
-            "{dep_loss:.3f}",
-            "{ner_loss:.3f}",
-            "{uas:.3f}",
-            "{ents_p:.3f}",
-            "{ents_r:.3f}",
-            "{ents_f:.3f}",
-            "{tags_acc:.3f}",
-            "{token_acc:.3f}",
-            "{wps:.1f}",
-        )
-    )
-    print(tpl.format(itn, **scores))
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -7,7 +7,6 @@ from pathlib import Path
 from wasabi import Printer
 import srsly

-from ._messages import Messages
 from ..compat import path2str, basestring_, unicode_
 from .. import util
 from .. import about
@ -32,7 +31,7 @@ def info(model=None, markdown=False, silent=False):
            model_path = util.get_data_path() / model
        meta_path = model_path / "meta.json"
        if not meta_path.is_file():
-            msg.fail(Messages.M020, meta_path, exits=1)
+            msg.fail("Can't find model meta.json", meta_path, exits=1)
        meta = srsly.read_json(meta_path)
        if model_path.resolve() != model_path:
            meta["link"] = path2str(model_path)
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@ -14,7 +14,6 @@ import zipfile
 import srsly
 from wasabi import Printer

-from ._messages import Messages
 from ..vectors import Vectors
 from ..errors import Errors, Warnings, user_warning
 from ..util import ensure_path, get_lang_class
@ -58,14 +57,21 @@ def init_model(
                settings.append("-f")
            if clusters_loc:
                settings.append("-c")
-            msg.warn(Messages.M063, Messages.M064)
+            msg.warn(
+                "Incompatible arguments",
+                "The -f and -c arguments are deprecated, and not compatible "
+                "with the -j argument, which should specify the same "
+                "information. Either merge the frequencies and clusters data "
+                "into the JSONL-formatted file (recommended), or use only the "
+                "-f and -c files, without the other lexical attributes.",
+            )
        jsonl_loc = ensure_path(jsonl_loc)
        lex_attrs = srsly.read_jsonl(jsonl_loc)
    else:
        clusters_loc = ensure_path(clusters_loc)
        freqs_loc = ensure_path(freqs_loc)
        if freqs_loc is not None and not freqs_loc.exists():
-            msg.fail(Messages.M037, freqs_loc, exits=1)
+            msg.fail("Can't find words frequencies file", freqs_loc, exits=1)
        lex_attrs = read_attrs_from_deprecated(freqs_loc, clusters_loc)

    with msg.loading("Creating model..."):
@ -75,7 +81,10 @@ def init_model(
        add_vectors(nlp, vectors_loc, prune_vectors)
    vec_added = len(nlp.vocab.vectors)
    lex_added = len(nlp.vocab)
-    msg.good(Messages.M038, Messages.M039.format(entries=lex_added, vectors=vec_added))
+    msg.good(
+        "Sucessfully compiled vocab",
+        "{} entries, {} vectors".format(lex_added, vec_added),
+    )
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
--- a/spacy/cli/link.py
+++ b/spacy/cli/link.py
@ -5,7 +5,6 @@ import plac
 from pathlib import Path
 from wasabi import Printer

-from ._messages import Messages
 from ..compat import symlink_to, path2str
 from .. import util

@ -28,29 +27,52 @@ def link(origin, link_name, force=False, model_path=None):
        model_path = Path(origin) if model_path is None else Path(model_path)
    if not model_path.exists():
        msg.fail(
-            Messages.M008, Messages.M009.format(path=path2str(model_path)), exits=1
+            "Can't locate model data",
+            "The data should be located in {}".format(path2str(model_path)),
+            exits=1,
        )
    data_path = util.get_data_path()
    if not data_path or not data_path.exists():
        spacy_loc = Path(__file__).parent.parent
-        msg.fail(Messages.M010, Messages.M011.format(path=spacy_loc), exits=1)
+        msg.fail(
+            "Can't find the spaCy data path to create model symlink",
+            "Make sure a directory `/data` exists within your spaCy "
+            "installation and try again. The data directory should be located "
+            "here:".format(path=spacy_loc),
+            exits=1,
+        )
    link_path = util.get_data_path() / link_name
    if link_path.is_symlink() and not force:
-        msg.fail(Messages.M012.format(name=link_name), Messages.M013, exits=1)
+        msg.fail(
+            "Link '{}' already exists".format(link_name),
+            "To overwrite an existing link, use the --force flag",
+            exits=1,
+        )
    elif link_path.is_symlink():  # does a symlink exist?
        # NB: It's important to check for is_symlink here and not for exists,
        # because invalid/outdated symlinks would return False otherwise.
        link_path.unlink()
    elif link_path.exists():  # does it exist otherwise?
        # NB: Check this last because valid symlinks also "exist".
-        msg.fail(Messages.M014.format(name=link_name), Messages.M015, exits=1)
+        msg.fail(
+            "Can't overwrite symlink '{}'".format(link_name),
+            "This can happen if your data directory contains a directory or "
+            "file of the same name.",
+            exits=1,
+        )
    details = "%s --> %s" % (path2str(model_path), path2str(link_path))
    try:
        symlink_to(link_path, model_path)
    except:  # noqa: E722
        # This is quite dirty, but just making sure other errors are caught.
-        msg.fail(Messages.M016.format(name=link_name), Messages.M017)
+        msg.fail(
+            "Couldn't link model to '{}'".format(link_name),
+            "Creating a symlink in spacy/data failed. Make sure you have the "
+            "required permissions and try re-running the command as admin, or "
+            "use a virtualenv. You can still import the model as a module and "
+            "call its load() method, or create the symlink manually.",
+        )
        msg.text(details)
        raise
-    msg.good(Messages.M018, details)
-    msg.text(Messages.M019.format(name=link_name))
+    msg.good("Linking successful", details)
+    msg.text("You can now load the model via spacy.load('{}')".format(link_name))
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -7,7 +7,6 @@ from pathlib import Path
 from wasabi import Printer, get_raw_input
 import srsly

-from ._messages import Messages
 from ..compat import path2str
 from .. import util
 from .. import about
@ -33,22 +32,26 @@ def package(input_dir, output_dir, meta_path=None, create_meta=False, force=Fals
    output_path = util.ensure_path(output_dir)
    meta_path = util.ensure_path(meta_path)
    if not input_path or not input_path.exists():
-        msg.fail(Messages.M008, input_path, exits=1)
+        msg.fail("Can't locate model data", input_path, exits=1)
    if not output_path or not output_path.exists():
-        msg.fail(Messages.M040, output_path, exits=1)
+        msg.fail("Output directory not found", output_path, exits=1)
    if meta_path and not meta_path.exists():
-        msg.fail(Messages.M020, meta_path, exits=1)
+        msg.fail("Can't find model meta.json", meta_path, exits=1)

    meta_path = meta_path or input_path / "meta.json"
    if meta_path.is_file():
        meta = srsly.read_json(meta_path)
        if not create_meta:  # only print if user doesn't want to overwrite
-            msg.good(Messages.M041, meta_path)
+            msg.good("Loaded meta.json from file", meta_path)
        else:
            meta = generate_meta(input_dir, meta, msg)
    for key in ("lang", "name", "version"):
        if key not in meta or meta[key] == "":
-            msg.fail(Messages.M048.format(key=key), Messages.M049, exits=1)
+            msg.fail(
+                "No '{}' setting found in meta.json".format(key),
+                "This setting is required to build your package.",
+                exits=1,
+            )
    model_name = meta["lang"] + "_" + meta["name"]
    model_name_v = model_name + "-" + meta["version"]
    main_path = output_path / model_name_v
@ -59,8 +62,10 @@ def package(input_dir, output_dir, meta_path=None, create_meta=False, force=Fals
            shutil.rmtree(path2str(package_path))
        else:
            msg.fail(
-                Messages.M044,
-                Messages.M045.format(path=path2str(package_path)),
+                "Package directory already exists",
+                "Please delete the directory and try again, or use the "
+                "`--force` flag to overwrite existing "
+                "directories.".format(path=path2str(package_path)),
                exits=1,
            )
    Path.mkdir(package_path, parents=True)
@ -69,8 +74,8 @@ def package(input_dir, output_dir, meta_path=None, create_meta=False, force=Fals
    create_file(main_path / "setup.py", TEMPLATE_SETUP)
    create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
    create_file(package_path / "__init__.py", TEMPLATE_INIT)
-    msg.good(Messages.M042.format(name=model_name_v), main_path)
-    msg.text(Messages.M043)
+    msg.good("Successfully created package '{}'".format(model_name_v), main_path)
+    msg.text("To build the package, run `python setup.py sdist` in this directory.")


 def create_file(file_path, contents):
@ -98,8 +103,11 @@ def generate_meta(model_path, existing_meta, msg):
        "vectors": len(nlp.vocab.vectors),
        "keys": nlp.vocab.vectors.n_keys,
    }
-    msg.divider(Messages.M046)
-    msg.text(Messages.M047)
+    msg.divider("Generating meta.json")
+    msg.text(
+        "Enter the package settings for your model. The following information "
+        "will be read from your model data: pipeline, vectors."
+    )
    for setting, desc, default in settings:
        response = get_raw_input(desc, default)
        meta[setting] = default if response == "" and default else response
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -10,7 +10,6 @@ import shutil
 import srsly
 from wasabi import Printer

-from ._messages import Messages
 from .._ml import create_default_optimizer
 from ..attrs import PROB, IS_OOV, CLUSTER, LANG
 from ..gold import GoldCorpus
@ -91,16 +90,20 @@ def train(
    dev_path = util.ensure_path(dev_path)
    meta_path = util.ensure_path(meta_path)
    if not train_path or not train_path.exists():
-        msg.fail(Messages.M050, train_path, exits=1)
+        msg.fail("Training data not found", train_path, exits=1)
    if not dev_path or not dev_path.exists():
-        msg.fail(Messages.M051, dev_path, exits=1)
+        msg.fail("Development data not found", dev_path, exits=1)
    if meta_path is not None and not meta_path.exists():
-        msg.fail(Messages.M020, meta_path, exits=1)
+        msg.fail("Can't find model meta.json", meta_path, exits=1)
    meta = srsly.read_json(meta_path) if meta_path else {}
-    if not isinstance(meta, dict):
-        msg.fail(Messages.M052, Messages.M053.format(meta_type=type(meta)), exits=1)
    if output_path.exists() and [p for p in output_path.iterdir() if p.is_dir()]:
-        msg.fail(Messages.M062, Messages.M065)
+        msg.warn(
+            "Output directory is not empty",
+            "This can lead to unintended side effects when saving the model. "
+            "Please use an empty directory or a different path instead. If "
+            "the specified output path doesn't exist, the directory will be "
+            "created for you.",
+        )
    if not output_path.exists():
        output_path.mkdir()

@ -123,19 +126,23 @@ def train(
    # the model and make sure the pipeline matches the pipeline setting. If
    # training starts from a blank model, intitalize the language class.
    pipeline = [p.strip() for p in pipeline.split(",")]
-    msg.text(Messages.M055.format(pipeline=pipeline))
+    msg.text("Training pipeline: {}".format(pipeline))
    if base_model:
-        msg.text(Messages.M056.format(model=base_model))
+        msg.text("Starting with base model '{}'".format(base_model))
        nlp = util.load_model(base_model)
        if nlp.lang != lang:
-            msg.fail(Messages.M072.format(model_lang=nlp.lang, lang=lang), exits=1)
+            msg.fail(
+                "Model language ('{}') doesn't match language specified as "
+                "`lang` argument ('{}') ".format(nlp.lang, lang),
+                exits=1,
+            )
        other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipeline]
        nlp.disable_pipes(*other_pipes)
        for pipe in pipeline:
            if pipe not in nlp.pipe_names:
                nlp.add_pipe(nlp.create_pipe(pipe))
    else:
-        msg.text(Messages.M057.format(model=lang))
+        msg.text("Starting with blank model '{}'".format(lang))
        lang_cls = util.get_lang_class(lang)
        nlp = lang_cls()
        for pipe in pipeline:
@ -145,7 +152,7 @@ def train(
        nlp.add_pipe(nlp.create_pipe("merge_subtokens"))

    if vectors:
-        msg.text(Messages.M058.format(model=vectors))
+        msg.text("Loading vector from model '{}'".format(vectors))
        _load_vectors(nlp, vectors)

    # Multitask objectives
@ -153,13 +160,16 @@ def train(
    for pipe_name, multitasks in multitask_options:
        if multitasks:
            if pipe_name not in pipeline:
-                msg.fail(Messages.M059.format(pipe=pipe_name))
+                msg.fail(
+                    "Can't use multitask objective without '{}' in the "
+                    "pipeline".format(pipe_name)
+                )
            pipe = nlp.get_pipe(pipe_name)
            for objective in multitasks.split(","):
                pipe.add_multitask_objective(objective)

    # Prepare training corpus
-    msg.text(Messages.M060.format(limit=n_examples))
+    msg.text("Counting training words (limit={})".format(n_examples))
    corpus = GoldCorpus(train_path, dev_path, limit=n_examples)
    n_train_words = corpus.count_train()

@ -175,11 +185,19 @@ def train(
    # Load in pre-trained weights
    if init_tok2vec is not None:
        components = _load_pretrained_tok2vec(nlp, init_tok2vec)
-        msg.text(Messages.M071.format(components=components))
+        msg.text("Loaded pretrained tok2vec for: {}".format(components))

-    print(
-        "\nItn.  Dep Loss  NER Loss  UAS     NER P.  NER R.  NER F.  Tag %   Token %  CPU WPS  GPU WPS"
-    )
+    # fmt: off
+    row_head = ("Itn", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS")
+    row_settings = {
+        "widths": (3, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7),
+        "aligns": ["r" for i in row_head],
+        "spacing": 2
+    }
+    # fmt: on
+    print("")
+    msg.row(row_head, **row_settings)
+    msg.row(["-" * width for width in row_settings["widths"]], **row_settings)
    try:
        for i in range(n_iter):
            train_docs = corpus.train_docs(
@ -246,15 +264,18 @@ def train(

                util.set_env_log(verbose)

-            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps)
+            progress = _get_progress(
+                i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps
+            )
+            msg.row(progress, **row_settings)
    finally:
-        with msg.loading(Messages.M061):
-            with nlp.use_params(optimizer.averages):
-                final_model_path = output_path / "model-final"
-                nlp.to_disk(final_model_path)
-        msg.good(Messages.M066, util.path2str(final_model_path))
-
-    _collate_best_model(meta, output_path, nlp.pipe_names)
+        with nlp.use_params(optimizer.averages):
+            final_model_path = output_path / "model-final"
+            nlp.to_disk(final_model_path)
+        msg.good("Saved model to output directory", final_model_path)
+        with msg.loading("Creating best model..."):
+            best_model_path = _collate_best_model(meta, output_path, nlp.pipe_names)
+        msg.good("Created best model", best_model_path)


 def _load_vectors(nlp, vectors):
@ -297,6 +318,7 @@ def _collate_best_model(meta, output_path, components):
        for metric in _get_metrics(component):
            meta["accuracy"][metric] = accs[metric]
    srsly.write_json(best_dest / "meta.json", meta)
+    return best_dest


 def _find_best(experiment_dir, component):
@ -322,7 +344,7 @@ def _get_metrics(component):
    return ("token_acc",)


-def print_progress(itn, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
+def _get_progress(itn, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
    scores = {}
    for col in [
        "dep_loss",
@ -343,19 +365,16 @@ def print_progress(itn, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
    scores.update(dev_scores)
    scores["cpu_wps"] = cpu_wps
    scores["gpu_wps"] = gpu_wps or 0.0
-    tpl = "".join(
-        (
-            "{:<6d}",
-            "{dep_loss:<10.3f}",
-            "{ner_loss:<10.3f}",
-            "{uas:<8.3f}",
-            "{ents_p:<8.3f}",
-            "{ents_r:<8.3f}",
-            "{ents_f:<8.3f}",
-            "{tags_acc:<8.3f}",
-            "{token_acc:<9.3f}",
-            "{cpu_wps:<9.1f}",
-            "{gpu_wps:.1f}",
-        )
-    )
-    print(tpl.format(itn, **scores))
+    return [
+        itn,
+        "{:.3f}".format(scores["dep_loss"]),
+        "{:.3f}".format(scores["ner_loss"]),
+        "{:.3f}".format(scores["uas"]),
+        "{:.3f}".format(scores["ents_p"]),
+        "{:.3f}".format(scores["ents_r"]),
+        "{:.3f}".format(scores["ents_f"]),
+        "{:.3f}".format(scores["tags_acc"]),
+        "{:.3f}".format(scores["token_acc"]),
+        "{:.0f}".format(scores["cpu_wps"]),
+        "{:.0f}".format(scores["gpu_wps"]),
+    ]
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -8,7 +8,6 @@ import requests
 import srsly
 from wasabi import Printer

-from ._messages import Messages
 from ..compat import path2str
 from ..util import get_data_path
 from .. import about
@ -23,13 +22,17 @@ def validate():
    with msg.loading("Loading compatibility table..."):
        r = requests.get(about.__compatibility__)
        if r.status_code != 200:
-            msg.fail(Messages.M003.format(code=r.status_code), Messages.M021, exits=1)
+            msg.fail(
+                "Server error ({})".format(r.status_code),
+                "Couldn't fetch compatibility table.",
+                exits=1,
+            )
    msg.good("Loaded compatibility table")
    compat = r.json()["spacy"]
    current_compat = compat.get(about.__version__)
    if not current_compat:
        msg.fail(
-            Messages.M022.format(version=about.__version__),
+            "Can't find spaCy v{} in compatibility table".format(about.__version__),
            about.__compatibility__,
            exits=1,
        )
@ -49,7 +52,7 @@ def validate():
    update_models = [m for m in incompat_models if m in current_compat]
    spacy_dir = Path(__file__).parent.parent

-    msg.divider(Messages.M023.format(version=about.__version__))
+    msg.divider("Installed models (spaCy v{})".format(about.__version__))
    msg.info("spaCy installation: {}".format(path2str(spacy_dir)))

    if model_links or model_pkgs:
@ -61,17 +64,24 @@ def validate():
            rows.append(get_model_row(current_compat, name, data, msg, "link"))
        msg.table(rows, header=header)
    else:
-        msg.text(Messages.M024, exits=0)
+        msg.text("No models found in your current environment.", exits=0)
    if update_models:
        msg.divider("Install updates")
+        msg.text("Use the following commands to update the model packages:")
        cmd = "python -m spacy download {}"
        print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
    if na_models:
        msg.text(
-            Messages.M025.format(version=about.__version__, models=", ".join(na_models))
+            "The following models are not available for spaCy "
+            "v{}: {}".format(about.__version__, ", ".join(na_models))
        )
    if incompat_links:
-        msg.text(Messages.M027.format(path=path2str(get_data_path())))
+        msg.text(
+            "You may also want to overwrite the incompatible links using the "
+            "`python -m spacy link` command with `--force`, or remove them "
+            "from the data directory. "
+            "Data path: {path}".format(path=path2str(get_data_path()))
+        )
    if incompat_models or incompat_links:
        sys.exit(1)