mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Reduce complexity in CLI
Remove now redundant model command and move plac annotations to cli files
This commit is contained in:
parent
aae97f00e9
commit
fc3ec733ea
|
@ -3,127 +3,21 @@ from __future__ import print_function
|
|||
# NB! This breaks in plac on Python 2!!
|
||||
#from __future__ import unicode_literals
|
||||
|
||||
import plac
|
||||
from spacy.cli import download as cli_download
|
||||
from spacy.cli import link as cli_link
|
||||
from spacy.cli import info as cli_info
|
||||
from spacy.cli import package as cli_package
|
||||
from spacy.cli import train as cli_train
|
||||
from spacy.cli import model as cli_model
|
||||
from spacy.cli import convert as cli_convert
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
model=("model to download (shortcut or model name)", "positional", None, str),
|
||||
direct=("force direct download. Needs model name with version and won't "
|
||||
"perform compatibility check", "flag", "d", bool)
|
||||
)
|
||||
def download(model, direct=False):
|
||||
"""
|
||||
Download compatible model from default download path using pip. Model
|
||||
can be shortcut, model name or, if --direct flag is set, full model name
|
||||
with version.
|
||||
"""
|
||||
cli_download(model, direct)
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
origin=("package name or local path to model", "positional", None, str),
|
||||
link_name=("name of shortuct link to create", "positional", None, str),
|
||||
force=("force overwriting of existing link", "flag", "f", bool)
|
||||
)
|
||||
def link(origin, link_name, force=False):
|
||||
"""
|
||||
Create a symlink for models within the spacy/data directory. Accepts
|
||||
either the name of a pip package, or the local path to the model data
|
||||
directory. Linking models allows loading them via spacy.load(link_name).
|
||||
"""
|
||||
cli_link(origin, link_name, force)
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
model=("optional: shortcut link of model", "positional", None, str),
|
||||
markdown=("generate Markdown for GitHub issues", "flag", "md", str)
|
||||
)
|
||||
def info(model=None, markdown=False):
|
||||
"""
|
||||
Print info about spaCy installation. If a model shortcut link is
|
||||
speficied as an argument, print model information. Flag --markdown
|
||||
prints details in Markdown for easy copy-pasting to GitHub issues.
|
||||
"""
|
||||
cli_info(model, markdown)
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
input_dir=("directory with model data", "positional", None, str),
|
||||
output_dir=("output parent directory", "positional", None, str),
|
||||
meta=("path to meta.json", "option", "m", str),
|
||||
force=("force overwriting of existing folder in output directory", "flag", "f", bool)
|
||||
)
|
||||
def package(input_dir, output_dir, meta=None, force=False):
|
||||
"""
|
||||
Generate Python package for model data, including meta and required
|
||||
installation files. A new directory will be created in the specified
|
||||
output directory, and model data will be copied over.
|
||||
"""
|
||||
cli_package(input_dir, output_dir, meta, force)
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
input_file=("input file", "positional", None, str),
|
||||
output_dir=("output directory for converted file", "positional", None, str),
|
||||
n_sents=("Number of sentences per doc", "option", "n", float),
|
||||
morphology=("Enable appending morphology to tags", "flag", "m", bool)
|
||||
)
|
||||
def convert(input_file, output_dir, n_sents=10, morphology=False):
|
||||
"""
|
||||
Convert files into JSON format for use with train command and other
|
||||
experiment management functions.
|
||||
"""
|
||||
cli_convert(input_file, output_dir, n_sents, morphology)
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
lang=("model language", "positional", None, str),
|
||||
output_dir=("output directory to store model in", "positional", None, str),
|
||||
train_data=("location of JSON-formatted training data", "positional", None, str),
|
||||
dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
|
||||
n_iter=("number of iterations", "option", "n", int),
|
||||
nsents=("number of sentences", "option", None, int),
|
||||
use_gpu=("Use GPU", "flag", "g", bool),
|
||||
no_tagger=("Don't train tagger", "flag", "T", bool),
|
||||
no_parser=("Don't train parser", "flag", "P", bool),
|
||||
no_entities=("Don't train NER", "flag", "N", bool)
|
||||
)
|
||||
def train(lang, output_dir, train_data, dev_data=None, n_iter=15,
|
||||
nsents=0, use_gpu=False,
|
||||
no_tagger=False, no_parser=False, no_entities=False):
|
||||
"""
|
||||
Train a model. Expects data in spaCy's JSON format.
|
||||
"""
|
||||
nsents = nsents or None
|
||||
cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents,
|
||||
use_gpu, no_tagger, no_parser, no_entities)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import plac
|
||||
import sys
|
||||
commands = {
|
||||
'train': train,
|
||||
'convert': convert,
|
||||
'download': download,
|
||||
'link': link,
|
||||
'info': info,
|
||||
'package': package,
|
||||
}
|
||||
from spacy.cli import download, link, info, package, train, convert
|
||||
from spacy.util import prints
|
||||
|
||||
commands = {'download': download, 'link': link, 'info': info, 'train': train,
|
||||
'convert': convert, 'package': package}
|
||||
if len(sys.argv) == 1:
|
||||
print("Available commands: %s" % ', '.join(sorted(commands)))
|
||||
sys.exit(1)
|
||||
prints(', '.join(commands), title="Available commands", exits=1)
|
||||
command = sys.argv.pop(1)
|
||||
sys.argv[0] = 'spacy %s' % command
|
||||
if command in commands:
|
||||
plac.call(commands[command])
|
||||
else:
|
||||
print("Unknown command: %s. Available: %s" % (command, ', '.join(commands)))
|
||||
sys.exit(1)
|
||||
prints("Available: %s" % ', '.join(commands),
|
||||
title="Unknown command: %s" % command, exits=1)
|
||||
|
|
|
@ -3,5 +3,4 @@ from .info import info
|
|||
from .link import link
|
||||
from .package import package
|
||||
from .train import train
|
||||
from .model import model
|
||||
from .convert import convert
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import plac
|
||||
from pathlib import Path
|
||||
|
||||
from .converters import conllu2json, iob2json
|
||||
|
@ -18,15 +19,24 @@ CONVERTERS = {
|
|||
}
|
||||
|
||||
|
||||
def convert(input_file, output_dir, *args):
|
||||
@plac.annotations(
|
||||
input_file=("input file", "positional", None, str),
|
||||
output_dir=("output directory for converted file", "positional", None, str),
|
||||
n_sents=("Number of sentences per doc", "option", "n", float),
|
||||
morphology=("Enable appending morphology to tags", "flag", "m", bool)
|
||||
)
|
||||
def convert(input_file, output_dir, n_sents, morphology):
|
||||
"""Convert files into JSON format for use with train command and other
|
||||
experiment management functions.
|
||||
"""
|
||||
input_path = Path(input_file)
|
||||
output_path = Path(output_dir)
|
||||
if not input_path.exists():
|
||||
prints(input_path, title="Input file not found", exits=True)
|
||||
prints(input_path, title="Input file not found", exits=1)
|
||||
if not output_path.exists():
|
||||
prints(output_path, title="Output directory not found", exits=True)
|
||||
prints(output_path, title="Output directory not found", exits=1)
|
||||
file_ext = input_path.suffix
|
||||
if not file_ext in CONVERTERS:
|
||||
prints("Can't find converter for %s" % input_path.parts[-1],
|
||||
title="Unknown format", exits=True)
|
||||
title="Unknown format", exits=1)
|
||||
CONVERTERS[file_ext](input_path, output_path, *args)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import plac
|
||||
import requests
|
||||
import os
|
||||
import subprocess
|
||||
|
@ -11,7 +12,16 @@ from ..util import prints
|
|||
from .. import about
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
model=("model to download (shortcut or model name)", "positional", None, str),
|
||||
direct=("force direct download. Needs model name with version and won't "
|
||||
"perform compatibility check", "flag", "d", bool)
|
||||
)
|
||||
def download(model, direct=False):
|
||||
"""Download compatible model from default download path using pip. Model
|
||||
can be shortcut, model name or, if --direct flag is set, full model name
|
||||
with version.
|
||||
"""
|
||||
if direct:
|
||||
download_model('{m}/{m}.tar.gz'.format(m=model))
|
||||
else:
|
||||
|
@ -38,7 +48,7 @@ def get_json(url, desc):
|
|||
if r.status_code != 200:
|
||||
prints("Couldn't fetch %s. Please find a model for your spaCy installation "
|
||||
"(v%s), and download it manually." % (desc, about.__version__),
|
||||
about.__docs_models__, title="Server error (%d)" % r.status_code, exits=True)
|
||||
about.__docs_models__, title="Server error (%d)" % r.status_code, exits=1)
|
||||
return r.json()
|
||||
|
||||
|
||||
|
@ -48,7 +58,7 @@ def get_compatibility():
|
|||
comp = comp_table['spacy']
|
||||
if version not in comp:
|
||||
prints("No compatible models found for v%s of spaCy." % version,
|
||||
title="Compatibility error", exits=True)
|
||||
title="Compatibility error", exits=1)
|
||||
return comp[version]
|
||||
|
||||
|
||||
|
@ -56,7 +66,7 @@ def get_version(model, comp):
|
|||
if model not in comp:
|
||||
version = about.__version__
|
||||
prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
|
||||
title="Compatibility error", exits=True)
|
||||
title="Compatibility error", exits=1)
|
||||
return comp[model][0]
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import plac
|
||||
import platform
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -9,7 +10,15 @@ from .. import about
|
|||
from .. import util
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
model=("optional: shortcut link of model", "positional", None, str),
|
||||
markdown=("generate Markdown for GitHub issues", "flag", "md", str)
|
||||
)
|
||||
def info(model=None, markdown=False):
|
||||
"""Print info about spaCy installation. If a model shortcut link is
|
||||
speficied as an argument, print model information. Flag --markdown
|
||||
prints details in Markdown for easy copy-pasting to GitHub issues.
|
||||
"""
|
||||
if model:
|
||||
model_path = util.resolve_model_path(model)
|
||||
meta = util.parse_package_meta(model_path)
|
||||
|
|
|
@ -1,24 +1,35 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import plac
|
||||
from pathlib import Path
|
||||
|
||||
from ..compat import symlink_to, path2str
|
||||
from ..util import prints
|
||||
from .. import util
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
origin=("package name or local path to model", "positional", None, str),
|
||||
link_name=("name of shortuct link to create", "positional", None, str),
|
||||
force=("force overwriting of existing link", "flag", "f", bool)
|
||||
)
|
||||
def link(origin, link_name, force=False):
|
||||
"""Create a symlink for models within the spacy/data directory. Accepts
|
||||
either the name of a pip package, or the local path to the model data
|
||||
directory. Linking models allows loading them via spacy.load(link_name).
|
||||
"""
|
||||
if util.is_package(origin):
|
||||
model_path = util.get_model_package_path(origin)
|
||||
else:
|
||||
model_path = Path(origin)
|
||||
if not model_path.exists():
|
||||
prints("The data should be located in %s" % path2str(model_path),
|
||||
title="Can't locate model data", exits=True)
|
||||
title="Can't locate model data", exits=1)
|
||||
link_path = util.get_data_path() / link_name
|
||||
if link_path.exists() and not force:
|
||||
prints("To overwrite an existing link, use the --force flag.",
|
||||
title="Link %s already exists" % link_name, exits=True)
|
||||
title="Link %s already exists" % link_name, exits=1)
|
||||
elif link_path.exists():
|
||||
link_path.unlink()
|
||||
try:
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import gzip
|
||||
import math
|
||||
from ast import literal_eval
|
||||
from preshed.counter import PreshCounter
|
||||
|
||||
from ..vocab import write_binary_vectors
|
||||
from ..compat import fix_text, path2str
|
||||
from ..util import prints
|
||||
from .. import util
|
||||
|
||||
|
||||
def model(lang, model_dir, freqs_data, clusters_data, vectors_data):
|
||||
model_path = util.ensure_path(model_dir)
|
||||
freqs_path = util.ensure_path(freqs_data)
|
||||
clusters_path = util.ensure_path(clusters_data)
|
||||
vectors_path = util.ensure_path(vectors_data)
|
||||
if not freqs_path.is_file():
|
||||
prints(freqs_path, title="No frequencies file found", exits=True)
|
||||
if clusters_path and not clusters_path.is_file():
|
||||
prints(clusters_path, title="No Brown clusters file found", exits=True)
|
||||
if vectors_path and not vectors_path.is_file():
|
||||
prints(vectors_path, title="No word vectors file found", exits=True)
|
||||
vocab = util.get_lang_class(lang).Defaults.create_vocab()
|
||||
probs, oov_prob = read_probs(freqs_path)
|
||||
clusters = read_clusters(clusters_path) if clusters_path else {}
|
||||
populate_vocab(vocab, clusters, probs, oov_prob)
|
||||
create_model(model_path, vectors_path, vocab, oov_prob)
|
||||
|
||||
|
||||
def create_model(model_path, vectors_path, vocab, oov_prob):
|
||||
vocab_path = model_path / 'vocab'
|
||||
lexemes_path = vocab_path / 'lexemes.bin'
|
||||
strings_path = vocab_path / 'strings.json'
|
||||
oov_path = vocab_path / 'oov_prob'
|
||||
|
||||
if not model_path.exists():
|
||||
model_path.mkdir()
|
||||
if not vocab_path.exists():
|
||||
vocab_path.mkdir()
|
||||
vocab.dump(path2str(lexemes_path))
|
||||
with strings_path.open('w') as f:
|
||||
vocab.strings.dump(f)
|
||||
with oov_path.open('w') as f:
|
||||
f.write('%f' % oov_prob)
|
||||
if vectors_path:
|
||||
vectors_dest = vocab_path / 'vec.bin'
|
||||
write_binary_vectors(path2str(vectors_path), path2str(vectors_dest))
|
||||
|
||||
|
||||
def read_probs(freqs_path, max_length=100, min_doc_freq=5, min_freq=200):
|
||||
counts = PreshCounter()
|
||||
total = 0
|
||||
freqs_file = check_unzip(freqs_path)
|
||||
for i, line in enumerate(freqs_file):
|
||||
freq, doc_freq, key = line.rstrip().split('\t', 2)
|
||||
freq = int(freq)
|
||||
counts.inc(i+1, freq)
|
||||
total += freq
|
||||
counts.smooth()
|
||||
log_total = math.log(total)
|
||||
freqs_file = check_unzip(freqs_path)
|
||||
probs = {}
|
||||
for line in freqs_file:
|
||||
freq, doc_freq, key = line.rstrip().split('\t', 2)
|
||||
doc_freq = int(doc_freq)
|
||||
freq = int(freq)
|
||||
if doc_freq >= min_doc_freq and freq >= min_freq and len(key) < max_length:
|
||||
word = literal_eval(key)
|
||||
smooth_count = counts.smoother(int(freq))
|
||||
probs[word] = math.log(smooth_count) - log_total
|
||||
oov_prob = math.log(counts.smoother(0)) - log_total
|
||||
return probs, oov_prob
|
||||
|
||||
|
||||
def read_clusters(clusters_path):
|
||||
clusters = {}
|
||||
with clusters_path.open() as f:
|
||||
for line in f:
|
||||
try:
|
||||
cluster, word, freq = line.split()
|
||||
word = fix_text(word)
|
||||
except ValueError:
|
||||
continue
|
||||
# If the clusterer has only seen the word a few times, its
|
||||
# cluster is unreliable.
|
||||
if int(freq) >= 3:
|
||||
clusters[word] = cluster
|
||||
else:
|
||||
clusters[word] = '0'
|
||||
# Expand clusters with re-casing
|
||||
for word, cluster in list(clusters.items()):
|
||||
if word.lower() not in clusters:
|
||||
clusters[word.lower()] = cluster
|
||||
if word.title() not in clusters:
|
||||
clusters[word.title()] = cluster
|
||||
if word.upper() not in clusters:
|
||||
clusters[word.upper()] = cluster
|
||||
return clusters
|
||||
|
||||
|
||||
def populate_vocab(vocab, clusters, probs, oov_prob):
|
||||
for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])):
|
||||
lexeme = vocab[word]
|
||||
lexeme.prob = prob
|
||||
lexeme.is_oov = False
|
||||
# Decode as a little-endian string, so that we can do & 15 to get
|
||||
# the first 4 bits. See _parse_features.pyx
|
||||
if word in clusters:
|
||||
lexeme.cluster = int(clusters[word][::-1], 2)
|
||||
else:
|
||||
lexeme.cluster = 0
|
||||
|
||||
|
||||
def check_unzip(file_path):
|
||||
file_path_str = path2str(file_path)
|
||||
if file_path_str.endswith('gz'):
|
||||
return gzip.open(file_path_str)
|
||||
else:
|
||||
return file_path.open()
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import plac
|
||||
import shutil
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
@ -11,16 +12,26 @@ from .. import util
|
|||
from .. import about
|
||||
|
||||
|
||||
def package(input_dir, output_dir, meta_path, force):
|
||||
@plac.annotations(
|
||||
input_dir=("directory with model data", "positional", None, str),
|
||||
output_dir=("output parent directory", "positional", None, str),
|
||||
meta=("path to meta.json", "option", "m", str),
|
||||
force=("force overwriting of existing folder in output directory", "flag", "f", bool)
|
||||
)
|
||||
def package(input_dir, output_dir, meta, force):
|
||||
"""Generate Python package for model data, including meta and required
|
||||
installation files. A new directory will be created in the specified
|
||||
output directory, and model data will be copied over.
|
||||
"""
|
||||
input_path = util.ensure_path(input_dir)
|
||||
output_path = util.ensure_path(output_dir)
|
||||
meta_path = util.ensure_path(meta_path)
|
||||
meta_path = util.ensure_path(meta)
|
||||
if not input_path or not input_path.exists():
|
||||
prints(input_path, title="Model directory not found", exits=True)
|
||||
prints(input_path, title="Model directory not found", exits=1)
|
||||
if not output_path or not output_path.exists():
|
||||
prints(output_path, title="Output directory not found", exits=True)
|
||||
prints(output_path, title="Output directory not found", exits=1)
|
||||
if meta_path and not meta_path.exists():
|
||||
prints(meta_path, title="meta.json not found", exits=True)
|
||||
prints(meta_path, title="meta.json not found", exits=1)
|
||||
|
||||
template_setup = get_template('setup.py')
|
||||
template_manifest = get_template('MANIFEST.in')
|
||||
|
@ -55,7 +66,7 @@ def create_dirs(package_path, force):
|
|||
else:
|
||||
prints(package_path, "Please delete the directory and try again, or "
|
||||
"use the --force flag to overwrite existing directories.",
|
||||
title="Package directory already exists", exits=True)
|
||||
title="Package directory already exists", exits=1)
|
||||
Path.mkdir(package_path, parents=True)
|
||||
|
||||
|
||||
|
@ -87,12 +98,12 @@ def validate_meta(meta, keys):
|
|||
for key in keys:
|
||||
if key not in meta or meta[key] == '':
|
||||
prints("This setting is required to build your package.",
|
||||
title='No "%s" setting found in meta.json' % key, exits=True)
|
||||
title='No "%s" setting found in meta.json' % key, exits=1)
|
||||
|
||||
|
||||
def get_template(filepath):
|
||||
r = requests.get(about.__model_files__ + filepath)
|
||||
if r.status_code != 200:
|
||||
prints("Couldn't fetch template files from GitHub.",
|
||||
title="Server error (%d)" % r.status_code, exits=True)
|
||||
title="Server error (%d)" % r.status_code, exits=1)
|
||||
return r.text
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals, division, print_function
|
||||
|
||||
import plac
|
||||
import json
|
||||
from collections import defaultdict
|
||||
import cytoolz
|
||||
|
@ -18,19 +19,33 @@ from .. import util
|
|||
from .. import displacy
|
||||
|
||||
|
||||
def train(lang_id, output_dir, train_data, dev_data, n_iter, n_sents,
|
||||
@plac.annotations(
|
||||
lang=("model language", "positional", None, str),
|
||||
output_dir=("output directory to store model in", "positional", None, str),
|
||||
train_data=("location of JSON-formatted training data", "positional", None, str),
|
||||
dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
|
||||
n_iter=("number of iterations", "option", "n", int),
|
||||
n_sents=("number of sentences", "option", "ns", int),
|
||||
use_gpu=("Use GPU", "flag", "G", bool),
|
||||
no_tagger=("Don't train tagger", "flag", "T", bool),
|
||||
no_parser=("Don't train parser", "flag", "P", bool),
|
||||
no_entities=("Don't train NER", "flag", "N", bool)
|
||||
)
|
||||
def train(lang, output_dir, train_data, dev_data, n_iter, n_sents,
|
||||
use_gpu, no_tagger, no_parser, no_entities):
|
||||
"""Train a model. Expects data in spaCy's JSON format."""
|
||||
n_sents = n_sents or None
|
||||
output_path = util.ensure_path(output_dir)
|
||||
train_path = util.ensure_path(train_data)
|
||||
dev_path = util.ensure_path(dev_data)
|
||||
if not output_path.exists():
|
||||
prints(output_path, title="Output directory not found", exits=True)
|
||||
prints(output_path, title="Output directory not found", exits=1)
|
||||
if not train_path.exists():
|
||||
prints(train_path, title="Training data not found", exits=True)
|
||||
prints(train_path, title="Training data not found", exits=1)
|
||||
if dev_path and not dev_path.exists():
|
||||
prints(dev_path, title="Development data not found", exits=True)
|
||||
prints(dev_path, title="Development data not found", exits=1)
|
||||
|
||||
lang_class = util.get_lang_class(lang_id)
|
||||
lang_class = util.get_lang_class(lang)
|
||||
|
||||
pipeline = ['token_vectors', 'tags', 'dependencies', 'entities']
|
||||
if no_tagger and 'tags' in pipeline: pipeline.remove('tags')
|
||||
|
|
|
@ -5,16 +5,23 @@ include ../../_includes/_mixins
|
|||
p
|
||||
| As of v1.7.0, spaCy comes with new command line helpers to download and
|
||||
| link models and show useful debugging information. For a list of available
|
||||
| commands, type #[code python -m spacy --help].
|
||||
| commands, type #[code python -m spacy]. To make the command even more
|
||||
| convenient, we recommend
|
||||
| #[+a("https://askubuntu.com/questions/17536/how-do-i-create-a-permanent-bash-alias/17537#17537") creating an alias]
|
||||
| mapping #[code python -m spacy] to #[code spacy].
|
||||
|
||||
+aside("Why python -m?")
|
||||
| The problem with a global entry point is that it's resolved by looking up
|
||||
| entries in your #[code PATH] environment variable. This can give you
|
||||
| unexpected results, like executing the wrong spaCy installation
|
||||
| (especially when using #[code virtualenv]). #[code python -m] prevents
|
||||
| fallbacks to system modules and makes sure the correct spaCy version is
|
||||
| used. If you hate typing it every time, we recommend creating an
|
||||
| #[code alias] instead.
|
||||
| unexpected results, like executing the wrong spaCy installation.
|
||||
| #[code python -m] prevents fallbacks to system modules.
|
||||
|
||||
+infobox("⚠️ Deprecation note")
|
||||
| As of spaCy 2.0, the #[code model] command to initialise a model data
|
||||
| directory is deprecated. The command was only necessary because previous
|
||||
| versions of spaCy expected a model directory to already be set up. This
|
||||
| has since been changed, so you can use the #[+api("cli#train") #[code train]]
|
||||
| command straight away.
|
||||
|
||||
+h(2, "download") Download
|
||||
|
||||
|
@ -45,7 +52,7 @@ p
|
|||
+cell flag
|
||||
+cell Show help message and available arguments.
|
||||
|
||||
+infobox("Important note")
|
||||
+aside("Downloading best practices")
|
||||
| The #[code download] command is mostly intended as a convenient,
|
||||
| interactive wrapper – it performs compatibility checks and prints
|
||||
| detailed messages in case things go wrong. It's #[strong not recommended]
|
||||
|
@ -116,7 +123,6 @@ p
|
|||
+cell Show help message and available arguments.
|
||||
|
||||
+h(2, "convert") Convert
|
||||
+tag experimental
|
||||
|
||||
p
|
||||
| Convert files into spaCy's #[+a("/docs/api/annotation#json-input") JSON format]
|
||||
|
@ -153,49 +159,7 @@ p
|
|||
+cell flag
|
||||
+cell Show help message and available arguments.
|
||||
|
||||
+h(2, "model") Model
|
||||
+tag experimental
|
||||
|
||||
p
|
||||
| Initialise a new model and its data directory. For more info on this, see
|
||||
| the documentation on #[+a("/docs/usage/adding-languages") adding languages].
|
||||
|
||||
+code(false, "bash").
|
||||
python -m spacy model [lang] [model_dir] [freqs_data] [clusters_data] [vectors_data]
|
||||
|
||||
+table(["Argument", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code lang]
|
||||
+cell positional
|
||||
+cell Model language.
|
||||
|
||||
+row
|
||||
+cell #[code model_dir]
|
||||
+cell positional
|
||||
+cell Output directory to store the model in.
|
||||
|
||||
+row
|
||||
+cell #[code freqs_data]
|
||||
+cell positional
|
||||
+cell Tab-separated frequencies file.
|
||||
|
||||
+row
|
||||
+cell #[code clusters_data]
|
||||
+cell positional
|
||||
+cell Brown custers file (optional).
|
||||
|
||||
+row
|
||||
+cell #[code vectors_data]
|
||||
+cell positional
|
||||
+cell Word vectors file (optional).
|
||||
|
||||
+row
|
||||
+cell #[code --help], #[code -h]
|
||||
+cell flag
|
||||
+cell Show help message and available arguments.
|
||||
|
||||
+h(2, "train") Train
|
||||
+tag experimental
|
||||
|
||||
p
|
||||
| Train a model. Expects data in spaCy's
|
||||
|
@ -231,7 +195,7 @@ p
|
|||
+cell Number of iterations (default: #[code 15]).
|
||||
|
||||
+row
|
||||
+cell #[code --nsents]
|
||||
+cell #[code --n_sents], #[code -ns]
|
||||
+cell option
|
||||
+cell Number of sentences (default: #[code 0]).
|
||||
|
||||
|
@ -241,7 +205,7 @@ p
|
|||
+cell L1 regularization penalty for parser (default: #[code 0.0]).
|
||||
|
||||
+row
|
||||
+cell #[code --use-gpu], #[code -g]
|
||||
+cell #[code --use-gpu], #[code -G]
|
||||
+cell flag
|
||||
+cell Use GPU.
|
||||
|
||||
|
@ -266,17 +230,16 @@ p
|
|||
+cell Show help message and available arguments.
|
||||
|
||||
+h(2, "package") Package
|
||||
+tag experimental
|
||||
|
||||
p
|
||||
| Generate a #[+a("/docs/usage/saving-loading#generating") model Python package]
|
||||
| from an existing model data directory. All data files are copied over.
|
||||
| If the path to a meta.json is supplied, or a meta.json is found in the
|
||||
| input directory, this file is used. Otherwise, the data can be entered
|
||||
| directly from the command line. While this feature is still experimental,
|
||||
| the required file templates are downloaded from
|
||||
| #[+src(gh("spacy-dev-resources", "templates/model")) GitHub]. This means
|
||||
| you need to be connected to the internet to use this command.
|
||||
| directly from the command line. The required file templates are downloaded
|
||||
| from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make
|
||||
| sure you're always using the latest versions. This means you need to be
|
||||
| connected to the internet to use this command.
|
||||
|
||||
+code(false, "bash").
|
||||
python -m spacy package [input_dir] [output_dir] [--meta] [--force]
|
||||
|
|
Loading…
Reference in New Issue
Block a user