mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Tidy up CLI and fix print functions
This commit is contained in:
parent
311704674d
commit
59c3b9d4dd
|
@ -2,6 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from . import util
|
||||
from .util import prints
|
||||
from .deprecated import resolve_model_name
|
||||
from .cli.info import info
|
||||
from .glossary import explain
|
||||
|
@ -26,9 +27,8 @@ def load(name, **overrides):
|
|||
if not model_path.exists():
|
||||
lang_name = util.get_lang_class(name).lang
|
||||
model_path = None
|
||||
util.print_msg(
|
||||
"Only loading the '{}' tokenizer.".format(lang_name),
|
||||
title="Warning: no model found for '{}'".format(name))
|
||||
prints("Only loading the '%s' tokenizer." % lang_name,
|
||||
title="Warning: no model found for '%s'" % name)
|
||||
else:
|
||||
model_path = util.ensure_path(overrides['path'])
|
||||
data_path = model_path.parent
|
||||
|
|
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
|||
from pathlib import Path
|
||||
|
||||
from .converters import conllu2json
|
||||
from .. import util
|
||||
from ..util import prints
|
||||
|
||||
|
||||
# Converters are matched by file extension. To add a converter, add a new entry
|
||||
|
@ -19,17 +19,12 @@ CONVERTERS = {
|
|||
def convert(input_file, output_dir, *args):
|
||||
input_path = Path(input_file)
|
||||
output_path = Path(output_dir)
|
||||
check_dirs(input_path, output_path)
|
||||
file_ext = input_path.suffix
|
||||
if file_ext in CONVERTERS:
|
||||
CONVERTERS[file_ext](input_path, output_path, *args)
|
||||
else:
|
||||
util.sys_exit("Can't find converter for {}".format(input_path.parts[-1]),
|
||||
title="Unknown format")
|
||||
|
||||
|
||||
def check_dirs(input_file, output_path):
|
||||
if not input_file.exists():
|
||||
util.sys_exit(input_file.as_posix(), title="Input file not found")
|
||||
if not input_path.exists():
|
||||
prints(input_path, title="Input file not found", exits=True)
|
||||
if not output_path.exists():
|
||||
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
||||
prints(output_path, title="Output directory not found", exits=True)
|
||||
file_ext = input_path.suffix
|
||||
if not file_ext in CONVERTERS:
|
||||
prints("Can't find converter for %s" % input_path.parts[-1],
|
||||
title="Unknown format", exits=True)
|
||||
CONVERTERS[file_ext](input_path, output_path, *args)
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
from ...compat import json_dumps
|
||||
from ... import util
|
||||
from ...compat import json_dumps, path2str
|
||||
from ...util import prints
|
||||
|
||||
|
||||
def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
|
||||
|
@ -32,8 +31,8 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
|
|||
output_file = output_path / output_filename
|
||||
with output_file.open('w', encoding='utf-8') as f:
|
||||
f.write(json_dumps(docs))
|
||||
util.print_msg("Created {} documents".format(len(docs)),
|
||||
title="Generated output file {}".format(output_file))
|
||||
prints("Created %d documents" % len(docs),
|
||||
title="Generated output file %s" % path2str(output_file))
|
||||
|
||||
|
||||
def read_conllx(input_path, use_morphology=False, n=0):
|
||||
|
|
|
@ -6,78 +6,52 @@ import os
|
|||
import subprocess
|
||||
import sys
|
||||
|
||||
from .link import link_package
|
||||
from .link import link
|
||||
from ..util import prints
|
||||
from .. import about
|
||||
from .. import util
|
||||
|
||||
|
||||
def download(model=None, direct=False):
|
||||
check_error_depr(model)
|
||||
|
||||
def download(model, direct=False):
|
||||
if direct:
|
||||
download_model('{m}/{m}.tar.gz'.format(m=model))
|
||||
else:
|
||||
model_name = check_shortcut(model)
|
||||
shortcuts = get_json(about.__shortcuts__, "available shortcuts")
|
||||
model_name = shortcuts.get(model, model)
|
||||
compatibility = get_compatibility()
|
||||
version = get_version(model_name, compatibility)
|
||||
download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
|
||||
link_package(model_name, model, force=True)
|
||||
link(model_name, model, force=True)
|
||||
|
||||
|
||||
def get_json(url, desc):
|
||||
r = requests.get(url)
|
||||
if r.status_code != 200:
|
||||
util.sys_exit(
|
||||
"Couldn't fetch {d}. Please find the right model for your spaCy "
|
||||
"installation (v{v}), and download it manually:".format(d=desc, v=about.__version__),
|
||||
"python -m spacy.download [full model name + version] --direct",
|
||||
title="Server error ({c})".format(c=r.status_code))
|
||||
prints("Couldn't fetch %s. Please find a model for your spaCy installation "
|
||||
"(v%s), and download it manually." % (desc, about.__version__),
|
||||
about.__docs__, title="Server error (%d)" % r.status_code, exits=True)
|
||||
return r.json()
|
||||
|
||||
|
||||
def check_shortcut(model):
|
||||
shortcuts = get_json(about.__shortcuts__, "available shortcuts")
|
||||
return shortcuts.get(model, model)
|
||||
|
||||
|
||||
def get_compatibility():
|
||||
version = about.__version__
|
||||
comp_table = get_json(about.__compatibility__, "compatibility table")
|
||||
comp = comp_table['spacy']
|
||||
if version not in comp:
|
||||
util.sys_exit(
|
||||
"No compatible models found for v{v} of spaCy.".format(v=version),
|
||||
title="Compatibility error")
|
||||
prints("No compatible models found for v%s of spaCy." % version,
|
||||
title="Compatibility error", exits=True)
|
||||
return comp[version]
|
||||
|
||||
|
||||
def get_version(model, comp):
|
||||
if model not in comp:
|
||||
util.sys_exit(
|
||||
"No compatible model found for "
|
||||
"'{m}' (spaCy v{v}).".format(m=model, v=about.__version__),
|
||||
title="Compatibility error")
|
||||
version = about.__version__
|
||||
prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
|
||||
title="Compatibility error", exits=True)
|
||||
return comp[model][0]
|
||||
|
||||
|
||||
def download_model(filename):
|
||||
util.print_msg("Downloading {f}".format(f=filename))
|
||||
download_url = about.__download_url__ + '/' + filename
|
||||
subprocess.call([sys.executable, '-m',
|
||||
'pip', 'install', '--no-cache-dir', download_url],
|
||||
env=os.environ.copy())
|
||||
|
||||
|
||||
def check_error_depr(model):
|
||||
if not model:
|
||||
util.sys_exit(
|
||||
"python -m spacy.download [name or shortcut]",
|
||||
title="Missing model name or shortcut")
|
||||
|
||||
if model == 'all':
|
||||
util.sys_exit(
|
||||
"As of v1.7.0, the download all command is deprecated. Please "
|
||||
"download the models individually via spacy.download [model name] "
|
||||
"or pip install. For more info on this, see the documentation: "
|
||||
"{d}".format(d=about.__docs__),
|
||||
title="Deprecated command")
|
||||
|
|
|
@ -4,49 +4,46 @@ from __future__ import unicode_literals
|
|||
import platform
|
||||
from pathlib import Path
|
||||
|
||||
from ..compat import unicode_
|
||||
from ..compat import path2str
|
||||
from .. import about
|
||||
from .. import util
|
||||
|
||||
|
||||
def info(model=None, markdown=False):
|
||||
if model:
|
||||
data = util.parse_package_meta(util.get_data_path(), model, require=True)
|
||||
model_path = Path(__file__).parent / util.get_data_path() / model
|
||||
data_path = util.get_data_path()
|
||||
data = util.parse_package_meta(data_path, model, require=True)
|
||||
model_path = Path(__file__).parent / data_path / model
|
||||
if model_path.resolve() != model_path:
|
||||
data['link'] = unicode_(model_path)
|
||||
data['source'] = unicode_(model_path.resolve())
|
||||
data['link'] = path2str(model_path)
|
||||
data['source'] = path2str(model_path.resolve())
|
||||
else:
|
||||
data['source'] = unicode_(model_path)
|
||||
print_info(data, "model " + model, markdown)
|
||||
data['source'] = path2str(model_path)
|
||||
print_info(data, 'model %s' % model, markdown)
|
||||
else:
|
||||
data = get_spacy_data()
|
||||
print_info(data, "spaCy", markdown)
|
||||
data = {'spaCy version': about.__version__,
|
||||
'Location': path2str(Path(__file__).parent.parent),
|
||||
'Platform': platform.platform(),
|
||||
'Python version': platform.python_version(),
|
||||
'Models': list_models()}
|
||||
print_info(data, 'spaCy', markdown)
|
||||
|
||||
|
||||
def print_info(data, title, markdown):
|
||||
title = "Info about {title}".format(title=title)
|
||||
title = 'Info about %s' % title
|
||||
if markdown:
|
||||
util.print_markdown(data, title=title)
|
||||
else:
|
||||
util.print_table(data, title=title)
|
||||
|
||||
|
||||
def get_spacy_data():
|
||||
return {
|
||||
'spaCy version': about.__version__,
|
||||
'Location': unicode_(Path(__file__).parent.parent),
|
||||
'Platform': platform.platform(),
|
||||
'Python version': platform.python_version(),
|
||||
'Installed models': ', '.join(list_models())
|
||||
}
|
||||
|
||||
|
||||
def list_models():
|
||||
# exclude common cache directories – this means models called "cache" etc.
|
||||
# won't show up in list, but it seems worth it
|
||||
def exclude_dir(dir_name):
|
||||
# exclude common cache directories and hidden directories
|
||||
exclude = ['cache', 'pycache', '__pycache__']
|
||||
return dir_name in exclude or dir_name.startswith('.')
|
||||
data_path = util.get_data_path()
|
||||
if data_path:
|
||||
models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
|
||||
return [m for m in models if m not in exclude]
|
||||
return ', '.join([m for m in models if not exclude_dir(m)])
|
||||
return '-'
|
||||
|
|
|
@ -1,78 +1,37 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pip
|
||||
from pathlib import Path
|
||||
import importlib
|
||||
from ..compat import unicode_, symlink_to
|
||||
from ..compat import symlink_to, path2str
|
||||
from ..util import prints
|
||||
from .. import util
|
||||
|
||||
|
||||
def link(origin, link_name, force=False):
|
||||
if is_package(origin):
|
||||
link_package(origin, link_name, force)
|
||||
if util.is_package(origin):
|
||||
model_path = util.get_model_package_path(origin)
|
||||
else:
|
||||
symlink(origin, link_name, force)
|
||||
|
||||
|
||||
def link_package(package_name, link_name, force=False):
|
||||
# Here we're importing the module just to find it. This is worryingly
|
||||
# indirect, but it's otherwise very difficult to find the package.
|
||||
# Python's installation and import rules are very complicated.
|
||||
pkg = importlib.import_module(package_name)
|
||||
package_path = Path(pkg.__file__).parent.parent
|
||||
meta = get_meta(package_path, package_name)
|
||||
model_name = package_name + '-' + meta['version']
|
||||
model_path = package_path / package_name / model_name
|
||||
symlink(model_path, link_name, force)
|
||||
|
||||
|
||||
def symlink(model_path, link_name, force):
|
||||
model_path = Path(model_path)
|
||||
model_path = Path(origin)
|
||||
if not model_path.exists():
|
||||
util.sys_exit(
|
||||
"The data should be located in {p}".format(p=model_path),
|
||||
title="Can't locate model data")
|
||||
|
||||
prints("The data should be located in %s" % path2str(model_path),
|
||||
title="Can't locate model data", exits=True)
|
||||
link_path = util.get_data_path() / link_name
|
||||
|
||||
if link_path.exists() and not force:
|
||||
util.sys_exit(
|
||||
"To overwrite an existing link, use the --force flag.",
|
||||
title="Link {l} already exists".format(l=link_name))
|
||||
prints("To overwrite an existing link, use the --force flag.",
|
||||
title="Link %s already exists" % link_name, exits=True)
|
||||
elif link_path.exists():
|
||||
link_path.unlink()
|
||||
|
||||
try:
|
||||
symlink_to(link_path, model_path)
|
||||
except:
|
||||
# This is quite dirty, but just making sure other errors are caught so
|
||||
# users at least see a proper message.
|
||||
util.print_msg(
|
||||
"Creating a symlink in spacy/data failed. Make sure you have the "
|
||||
"required permissions and try re-running the command as admin, or "
|
||||
"use a virtualenv to install spaCy in a user directory, instead of "
|
||||
"doing a system installation.",
|
||||
"You can still import the model as a Python package and call its "
|
||||
"load() method, or create the symlink manually:",
|
||||
"{a} --> {b}".format(a=unicode_(model_path), b=unicode_(link_path)),
|
||||
title="Error: Couldn't link model to '{l}'".format(l=link_name))
|
||||
# This is quite dirty, but just making sure other errors are caught.
|
||||
prints("Creating a symlink in spacy/data failed. Make sure you have "
|
||||
"the required permissions and try re-running the command as "
|
||||
"admin, or use a virtualenv. You can still import the model as a "
|
||||
"module and call its load() method, or create the symlink manually.",
|
||||
"%s --> %s" % (path2str(model_path), path2str(link_path)),
|
||||
title="Error: Couldn't link model to '%s'" % link_name)
|
||||
raise
|
||||
|
||||
util.print_msg(
|
||||
"{a} --> {b}".format(a=model_path.as_posix(), b=link_path.as_posix()),
|
||||
"You can now load the model via spacy.load('{l}').".format(l=link_name),
|
||||
prints("%s --> %s" % (path2str(model_path), path2str(link_path)),
|
||||
"You can now load the model via spacy.load('%s')." % link_name,
|
||||
title="Linking successful")
|
||||
|
||||
|
||||
def get_meta(package_path, package):
|
||||
meta = util.parse_package_meta(package_path, package)
|
||||
return meta
|
||||
|
||||
|
||||
def is_package(origin):
|
||||
packages = pip.get_installed_distributions()
|
||||
for package in packages:
|
||||
if package.project_name.replace('-', '_') == origin:
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -4,21 +4,25 @@ from __future__ import unicode_literals
|
|||
import gzip
|
||||
import math
|
||||
from ast import literal_eval
|
||||
from pathlib import Path
|
||||
from preshed.counter import PreshCounter
|
||||
|
||||
from ..vocab import write_binary_vectors
|
||||
from ..compat import fix_text
|
||||
from ..compat import fix_text, path2str
|
||||
from ..util import prints
|
||||
from .. import util
|
||||
|
||||
|
||||
def model(lang, model_dir, freqs_data, clusters_data, vectors_data):
|
||||
model_path = Path(model_dir)
|
||||
freqs_path = Path(freqs_data)
|
||||
clusters_path = Path(clusters_data) if clusters_data else None
|
||||
vectors_path = Path(vectors_data) if vectors_data else None
|
||||
|
||||
check_dirs(freqs_path, clusters_path, vectors_path)
|
||||
model_path = util.ensure_path(model_dir)
|
||||
freqs_path = util.ensure_path(freqs_data)
|
||||
clusters_path = util.ensure_path(clusters_data)
|
||||
vectors_path = util.ensure_path(vectors_data)
|
||||
if not freqs_path.is_file():
|
||||
prints(freqs_path, title="No frequencies file found", exits=True)
|
||||
if clusters_path and not clusters_path.is_file():
|
||||
prints(clusters_path, title="No Brown clusters file found", exits=True)
|
||||
if vectors_path and not vectors_path.is_file():
|
||||
prints(vectors_path, title="No word vectors file found", exits=True)
|
||||
vocab = util.get_lang_class(lang).Defaults.create_vocab()
|
||||
probs, oov_prob = read_probs(freqs_path)
|
||||
clusters = read_clusters(clusters_path) if clusters_path else {}
|
||||
|
@ -36,14 +40,14 @@ def create_model(model_path, vectors_path, vocab, oov_prob):
|
|||
model_path.mkdir()
|
||||
if not vocab_path.exists():
|
||||
vocab_path.mkdir()
|
||||
vocab.dump(lexemes_path.as_posix())
|
||||
vocab.dump(path2str(lexemes_path))
|
||||
with strings_path.open('w') as f:
|
||||
vocab.strings.dump(f)
|
||||
with oov_path.open('w') as f:
|
||||
f.write('%f' % oov_prob)
|
||||
if vectors_path:
|
||||
vectors_dest = vocab_path / 'vec.bin'
|
||||
write_binary_vectors(vectors_path.as_posix(), vectors_dest.as_posix())
|
||||
write_binary_vectors(path2str(vectors_path), path2str(vectors_dest))
|
||||
|
||||
|
||||
def read_probs(freqs_path, max_length=100, min_doc_freq=5, min_freq=200):
|
||||
|
@ -115,17 +119,8 @@ def populate_vocab(vocab, clusters, probs, oov_prob):
|
|||
|
||||
|
||||
def check_unzip(file_path):
|
||||
file_path_str = file_path.as_posix()
|
||||
file_path_str = path2str(file_path)
|
||||
if file_path_str.endswith('gz'):
|
||||
return gzip.open(file_path_str)
|
||||
else:
|
||||
return file_path.open()
|
||||
|
||||
|
||||
def check_dirs(freqs_data, clusters_data, vectors_data):
|
||||
if not freqs_data.is_file():
|
||||
util.sys_exit(freqs_data.as_posix(), title="No frequencies file found")
|
||||
if clusters_data and not clusters_data.is_file():
|
||||
util.sys_exit(clusters_data.as_posix(), title="No Brown clusters file found")
|
||||
if vectors_data and not vectors_data.is_file():
|
||||
util.sys_exit(vectors_data.as_posix(), title="No word vectors file found")
|
||||
|
|
|
@ -5,64 +5,57 @@ import shutil
|
|||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
from ..compat import unicode_, json_dumps
|
||||
from ..compat import path2str, json_dumps
|
||||
from ..util import prints
|
||||
from .. import util
|
||||
from .. import about
|
||||
|
||||
|
||||
def package(input_dir, output_dir, meta_path, force):
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
input_path = util.ensure_path(input_dir)
|
||||
output_path = util.ensure_path(output_dir)
|
||||
meta_path = util.ensure_path(meta_path)
|
||||
check_dirs(input_path, output_path, meta_path)
|
||||
if not input_path or not input_path.exists():
|
||||
prints(input_path, title="Model directory not found", exits=True)
|
||||
if not output_path or not output_path.exists():
|
||||
prints(output_path, title="Output directory not found", exits=True)
|
||||
if meta_path and not meta_path.exists():
|
||||
prints(meta_path, title="meta.json not found", exits=True)
|
||||
|
||||
template_setup = get_template('setup.py')
|
||||
template_manifest = get_template('MANIFEST.in')
|
||||
template_init = get_template('en_model_name/__init__.py')
|
||||
|
||||
meta_path = meta_path or input_path / 'meta.json'
|
||||
if meta_path.is_file():
|
||||
util.print_msg(unicode_(meta_path), title="Reading meta.json from file")
|
||||
prints(meta_path, title="Reading meta.json from file")
|
||||
meta = util.read_json(meta_path)
|
||||
else:
|
||||
meta = generate_meta()
|
||||
|
||||
validate_meta(meta, ['lang', 'name', 'version'])
|
||||
|
||||
model_name = meta['lang'] + '_' + meta['name']
|
||||
model_name_v = model_name + '-' + meta['version']
|
||||
main_path = output_path / model_name_v
|
||||
package_path = main_path / model_name
|
||||
|
||||
create_dirs(package_path, force)
|
||||
shutil.copytree(unicode_(input_path), unicode_(package_path / model_name_v))
|
||||
shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
|
||||
create_file(main_path / 'meta.json', json_dumps(meta))
|
||||
create_file(main_path / 'setup.py', template_setup)
|
||||
create_file(main_path / 'MANIFEST.in', template_manifest)
|
||||
create_file(package_path / '__init__.py', template_init)
|
||||
|
||||
util.print_msg(
|
||||
unicode_(main_path),
|
||||
"To build the package, run `python setup.py sdist` in that directory.",
|
||||
title="Successfully created package {p}".format(p=model_name_v))
|
||||
|
||||
|
||||
def check_dirs(input_path, output_path, meta_path):
|
||||
if not input_path.exists():
|
||||
util.sys_exit(unicode_(input_path.as_poisx), title="Model directory not found")
|
||||
if not output_path.exists():
|
||||
util.sys_exit(unicode_(output_path), title="Output directory not found")
|
||||
if meta_path and not meta_path.exists():
|
||||
util.sys_exit(unicode_(meta_path), title="meta.json not found")
|
||||
prints(main_path, "To build the package, run `python setup.py sdist` in this "
|
||||
"directory.", title="Successfully created package '%s'" % model_name_v)
|
||||
|
||||
|
||||
def create_dirs(package_path, force):
|
||||
if package_path.exists():
|
||||
if force:
|
||||
shutil.rmtree(unicode_(package_path))
|
||||
shutil.rmtree(path2str(package_path))
|
||||
else:
|
||||
util.sys_exit(unicode_(package_path),
|
||||
"Please delete the directory and try again, or use the --force "
|
||||
"flag to overwrite existing directories.",
|
||||
title="Package directory already exists")
|
||||
prints(package_path, "Please delete the directory and try again, or "
|
||||
"use the --force flag to overwrite existing directories.",
|
||||
title="Package directory already exists", exits=True)
|
||||
Path.mkdir(package_path, parents=True)
|
||||
|
||||
|
||||
|
@ -75,15 +68,14 @@ def generate_meta():
|
|||
settings = [('lang', 'Model language', 'en'),
|
||||
('name', 'Model name', 'model'),
|
||||
('version', 'Model version', '0.0.0'),
|
||||
('spacy_version', 'Required spaCy version', '>=1.7.0,<2.0.0'),
|
||||
('spacy_version', 'Required spaCy version', '>=2.0.0,<3.0.0'),
|
||||
('description', 'Model description', False),
|
||||
('author', 'Author', False),
|
||||
('email', 'Author email', False),
|
||||
('url', 'Author website', False),
|
||||
('license', 'License', 'CC BY-NC 3.0')]
|
||||
|
||||
util.print_msg("Enter the package settings for your model.", title="Generating meta.json")
|
||||
|
||||
prints("Enter the package settings for your model.", title="Generating meta.json")
|
||||
meta = {}
|
||||
for setting, desc, default in settings:
|
||||
response = util.get_raw_input(desc, default)
|
||||
|
@ -94,16 +86,13 @@ def generate_meta():
|
|||
def validate_meta(meta, keys):
|
||||
for key in keys:
|
||||
if key not in meta or meta[key] == '':
|
||||
util.sys_exit(
|
||||
"This setting is required to build your package.",
|
||||
title='No "{k}" setting found in meta.json'.format(k=key))
|
||||
prints("This setting is required to build your package.",
|
||||
title='No "%s" setting found in meta.json' % key, exits=True)
|
||||
|
||||
|
||||
def get_template(filepath):
|
||||
url = 'https://raw.githubusercontent.com/explosion/spacy-dev-resources/master/templates/model/'
|
||||
r = requests.get(url + filepath)
|
||||
r = requests.get(about.__model_files__ + filepath)
|
||||
if r.status_code != 200:
|
||||
util.sys_exit(
|
||||
"Couldn't fetch template files from GitHub.",
|
||||
title="Server error ({c})".format(c=r.status_code))
|
||||
prints("Couldn't fetch template files from GitHub.",
|
||||
title="Server error (%d)" % r.status_code, exits=True)
|
||||
return r.text
|
||||
|
|
|
@ -4,19 +4,24 @@ from __future__ import unicode_literals, division, print_function
|
|||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
from ..util import ensure_path
|
||||
from ..scorer import Scorer
|
||||
from ..gold import GoldParse, merge_sents
|
||||
from ..gold import read_json_file as read_gold_json
|
||||
from ..util import prints
|
||||
from .. import util
|
||||
|
||||
|
||||
def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ner,
|
||||
parser_L1):
|
||||
output_path = ensure_path(output_dir)
|
||||
train_path = ensure_path(train_data)
|
||||
dev_path = ensure_path(dev_data)
|
||||
check_dirs(output_path, train_path, dev_path)
|
||||
output_path = util.ensure_path(output_dir)
|
||||
train_path = util.ensure_path(train_data)
|
||||
dev_path = util.ensure_path(dev_data)
|
||||
if not output_path.exists():
|
||||
prints(output_path, title="Output directory not found", exits=True)
|
||||
if not train_path.exists():
|
||||
prints(train_path, title="Training data not found", exits=True)
|
||||
if dev_path and not dev_path.exists():
|
||||
prints(dev_path, title="Development data not found", exits=True)
|
||||
|
||||
lang = util.get_lang_class(language)
|
||||
parser_cfg = {
|
||||
|
@ -44,14 +49,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne
|
|||
|
||||
|
||||
def train_config(config):
|
||||
config_path = ensure_path(config)
|
||||
config_path = util.ensure_path(config)
|
||||
if not config_path.is_file():
|
||||
util.sys_exit(config_path.as_posix(), title="Config file not found")
|
||||
prints(config_path, title="Config file not found", exits=True)
|
||||
config = json.load(config_path)
|
||||
for setting in []:
|
||||
if setting not in config.keys():
|
||||
util.sys_exit("{s} not found in config file.".format(s=setting),
|
||||
title="Missing setting")
|
||||
prints("%s not found in config file." % setting, title="Missing setting")
|
||||
|
||||
|
||||
def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_cfg,
|
||||
|
@ -88,16 +92,8 @@ def evaluate(Language, gold_tuples, output_path):
|
|||
return scorer
|
||||
|
||||
|
||||
def check_dirs(output_path, train_path, dev_path):
|
||||
if not output_path.exists():
|
||||
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
||||
if not train_path.exists():
|
||||
util.sys_exit(train_path.as_posix(), title="Training data not found")
|
||||
if dev_path and not dev_path.exists():
|
||||
util.sys_exit(dev_path.as_posix(), title="Development data not found")
|
||||
|
||||
|
||||
def print_progress(itn, nr_weight, nr_active_feat, **scores):
|
||||
# TODO: Fix!
|
||||
tpl = '{:d}\t{:d}\t{:d}\t{uas:.3f}\t{ents_f:.3f}\t{tags_acc:.3f}\t{token_acc:.3f}'
|
||||
print(tpl.format(itn, nr_weight, nr_active_feat, **scores))
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@ from pathlib import Path
|
|||
|
||||
from . import about
|
||||
from . import util
|
||||
from .util import prints
|
||||
from .compat import path2str
|
||||
from .cli import download
|
||||
from .cli import link
|
||||
|
||||
|
@ -114,9 +116,9 @@ def resolve_model_name(name):
|
|||
"""
|
||||
if name == 'en' or name == 'de':
|
||||
versions = ['1.0.0', '1.1.0']
|
||||
data_path = Path(util.get_data_path())
|
||||
data_path = util.get_data_path()
|
||||
model_path = data_path / name
|
||||
v_model_paths = [data_path / Path(name + '-' + v) for v in versions]
|
||||
v_model_paths = [data_path / '%s-%s' % (name, v) for v in versions]
|
||||
|
||||
if not model_path.exists(): # no shortcut found
|
||||
for v_path in v_model_paths:
|
||||
|
@ -126,10 +128,10 @@ def resolve_model_name(name):
|
|||
return name
|
||||
else:
|
||||
raise ValueError(
|
||||
"Found English model at {p}. This model is not "
|
||||
"Found English model at %s. This model is not "
|
||||
"compatible with the current version. See "
|
||||
"https://spacy.io/docs/usage/models to download the "
|
||||
"new model.".format(p=v_path))
|
||||
"new model." % path2str(v_path))
|
||||
return name
|
||||
|
||||
|
||||
|
@ -142,11 +144,10 @@ class ModelDownload():
|
|||
|
||||
@classmethod
|
||||
def load(self, lang):
|
||||
util.print_msg(
|
||||
"The spacy.{l}.download command is now deprecated. Please use "
|
||||
"python -m spacy download [model name or shortcut] instead. For more "
|
||||
"info and available models, see the documentation: {d}. "
|
||||
"Downloading default '{l}' model now...".format(d=about.__docs__, l=lang),
|
||||
prints("The spacy.%s.download command is now deprecated. Please use "
|
||||
"python -m spacy download [model name or shortcut] instead. For "
|
||||
"more info, see the docs: %s." % (lang, about.__docs__),
|
||||
"Downloading default '%s' model now..." % lang,
|
||||
title="Warning: deprecated command")
|
||||
download(lang)
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
import sys
|
||||
import textwrap
|
||||
|
||||
from .compat import basestring_, unicode_, input_
|
||||
from .compat import path2str, basestring_, input_
|
||||
|
||||
|
||||
LANGUAGES = {}
|
||||
|
@ -151,95 +151,66 @@ def parse_package_meta(package_path, package, require=True):
|
|||
def get_raw_input(description, default=False):
|
||||
"""
|
||||
Get user input via raw_input / input and return input value. Takes a
|
||||
description for the prompt, and an optional default value that's displayed
|
||||
with the prompt.
|
||||
description, and an optional default value to display with the prompt.
|
||||
"""
|
||||
additional = ' (default: {d})'.format(d=default) if default else ''
|
||||
prompt = ' {d}{a}: '.format(d=description, a=additional)
|
||||
additional = ' (default: %s)' % default if default else ''
|
||||
prompt = ' %s%s: ' % (description, additional)
|
||||
user_input = input_(prompt)
|
||||
return user_input
|
||||
|
||||
|
||||
def print_table(data, **kwargs):
|
||||
def print_table(data, title=None):
|
||||
"""
|
||||
Print data in table format. Can either take a list of tuples or a
|
||||
dictionary, which will be converted to a list of tuples.
|
||||
"""
|
||||
if type(data) == dict:
|
||||
data = list(data.items())
|
||||
|
||||
tpl_msg = '\n{msg}\n'
|
||||
tpl_title = '\n \033[93m{msg}\033[0m'
|
||||
tpl_row =" {:<15}" * len(data[0])
|
||||
tpl_row = ' {:<15}' * len(data[0])
|
||||
table = '\n'.join([tpl_row.format(l, v) for l, v in data])
|
||||
|
||||
if 'title' in kwargs and kwargs['title']:
|
||||
print(tpl_title.format(msg=kwargs['title']))
|
||||
|
||||
print(tpl_msg.format(msg=table))
|
||||
if title:
|
||||
print('\n \033[93m{}\033[0m'.format(title))
|
||||
print('\n{}\n'.format(table))
|
||||
|
||||
|
||||
def print_markdown(data, **kwargs):
|
||||
def print_markdown(data, title=None):
|
||||
"""
|
||||
Print listed data in GitHub-flavoured Markdown format so it can be
|
||||
copy-pasted into issues. Can either take a list of tuples or a dictionary,
|
||||
which will be converted to a list of tuples.
|
||||
copy-pasted into issues. Can either take a list of tuples or a dictionary.
|
||||
"""
|
||||
def excl_value(value):
|
||||
# don't print value if it contains absolute path of directory (i.e.
|
||||
# personal info). Other conditions can be included here if necessary.
|
||||
if unicode_(Path(__file__).parent) in value:
|
||||
return True
|
||||
return Path(value).exists() # contains path (personal info)
|
||||
|
||||
if type(data) == dict:
|
||||
data = list(data.items())
|
||||
|
||||
tpl_msg = "\n{msg}\n"
|
||||
tpl_title = "\n## {msg}"
|
||||
tpl_row = "* **{l}:** {v}"
|
||||
markdown = '\n'.join([tpl_row.format(l=l, v=v) for l, v in data if not excl_value(v)])
|
||||
|
||||
if 'title' in kwargs and kwargs['title']:
|
||||
print(tpl_title.format(msg=kwargs['title']))
|
||||
print(tpl_msg.format(msg=markdown))
|
||||
markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
|
||||
if title:
|
||||
print("\n## {}".format(title))
|
||||
print('\n{}\n'.format('\n'.join(markdown)))
|
||||
|
||||
|
||||
def print_msg(*text, **kwargs):
|
||||
def prints(*texts, title=None, exits=False):
|
||||
"""
|
||||
Print formatted message. Each positional argument is rendered as newline-
|
||||
separated paragraph. If kwarg 'title' exist, title is printed above the text
|
||||
and highlighted (using ANSI escape sequences manually to avoid unnecessary
|
||||
dependency).
|
||||
separated paragraph. An optional highlighted title is printed above the text
|
||||
(using ANSI escape sequences manually to avoid unnecessary dependency).
|
||||
"""
|
||||
message = '\n\n'.join([_wrap_text(t) for t in text])
|
||||
tpl_msg = '\n{msg}\n'
|
||||
tpl_title = '\n\033[93m{msg}\033[0m'
|
||||
|
||||
if 'title' in kwargs and kwargs['title']:
|
||||
title = _wrap_text(kwargs['title'])
|
||||
print(tpl_title.format(msg=title))
|
||||
print(tpl_msg.format(msg=message))
|
||||
title = '\033[93m{}\033[0m\n'.format(_wrap(title)) if title else ''
|
||||
message = '\n\n'.join([_wrap(text) for text in texts])
|
||||
print('\n{}{}\n'.format(title, message))
|
||||
if exits:
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def _wrap_text(text):
|
||||
def _wrap(text, wrap_max=80, indent=4):
|
||||
"""
|
||||
Wrap text at given width using textwrap module. Indent should consist of
|
||||
spaces. Its length is deducted from wrap width to ensure exact wrapping.
|
||||
"""
|
||||
wrap_max = 80
|
||||
indent = ' '
|
||||
indent = indent * ' '
|
||||
wrap_width = wrap_max - len(indent)
|
||||
if isinstance(text, Path):
|
||||
text = path2str(text)
|
||||
return textwrap.fill(text, width=wrap_width, initial_indent=indent,
|
||||
subsequent_indent=indent, break_long_words=False,
|
||||
break_on_hyphens=False)
|
||||
|
||||
|
||||
def sys_exit(*messages, **kwargs):
|
||||
"""
|
||||
Performs SystemExit. For modules used from the command line, like
|
||||
download and link. To print message, use the same arguments as for
|
||||
print_msg().
|
||||
"""
|
||||
if messages:
|
||||
print_msg(*messages, **kwargs)
|
||||
sys.exit(0)
|
||||
|
|
Loading…
Reference in New Issue
Block a user