mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 13:47:13 +03:00
Add convert command
This commit is contained in:
parent
9952d3b08a
commit
789ce8a45e
|
@ -10,12 +10,13 @@ from spacy.cli import info as cli_info
|
||||||
from spacy.cli import package as cli_package
|
from spacy.cli import package as cli_package
|
||||||
from spacy.cli import train as cli_train
|
from spacy.cli import train as cli_train
|
||||||
from spacy.cli import model as cli_model
|
from spacy.cli import model as cli_model
|
||||||
|
from spacy.cli import convert as cli_convert
|
||||||
|
|
||||||
|
|
||||||
class CLI(object):
|
class CLI(object):
|
||||||
"""Command-line interface for spaCy"""
|
"""Command-line interface for spaCy"""
|
||||||
|
|
||||||
commands = ('download', 'link', 'info', 'package', 'train', 'model')
|
commands = ('download', 'link', 'info', 'package', 'train', 'model', 'convert')
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("model to download (shortcut or model name)", "positional", None, str),
|
model=("model to download (shortcut or model name)", "positional", None, str),
|
||||||
|
@ -110,6 +111,20 @@ class CLI(object):
|
||||||
|
|
||||||
cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data)
|
cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data)
|
||||||
|
|
||||||
|
@plac.annotations(
|
||||||
|
input_file=("input file", "positional", None, str),
|
||||||
|
output_dir=("output directory for converted file", "positional", None, str),
|
||||||
|
n_sents=("Number of sentences per doc", "option", "n", float),
|
||||||
|
morphology=("Enable appending morphology to tags", "flag", "m", bool)
|
||||||
|
)
|
||||||
|
def convert(self, input_file, output_dir, n_sents=10, morphology=False):
|
||||||
|
"""
|
||||||
|
Convert files into JSON format for use with train command and other
|
||||||
|
experiment management functions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
cli_convert(input_file, output_dir, n_sents, morphology)
|
||||||
|
|
||||||
|
|
||||||
def __missing__(self, name):
|
def __missing__(self, name):
|
||||||
print("\n Command %r does not exist."
|
print("\n Command %r does not exist."
|
||||||
|
|
|
@ -4,3 +4,4 @@ from .link import link
|
||||||
from .package import package
|
from .package import package
|
||||||
from .train import train, train_config
|
from .train import train, train_config
|
||||||
from .model import model
|
from .model import model
|
||||||
|
from .convert import convert
|
||||||
|
|
37
spacy/cli/convert.py
Normal file
37
spacy/cli/convert.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals, division, print_function
|
||||||
|
|
||||||
|
import io
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
|
||||||
|
from .converters import conllu2json
|
||||||
|
from .. import util
|
||||||
|
|
||||||
|
|
||||||
|
# Converters are matched by file extension. To add a converter, add a new entry
|
||||||
|
# to this dict with the file extension mapped to the converter function imported
|
||||||
|
# from /converters.
|
||||||
|
|
||||||
|
CONVERTERS = {
|
||||||
|
'.conllu': conllu2json
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def convert(input_file, output_dir, *args):
|
||||||
|
input_path = Path(input_file)
|
||||||
|
output_path = Path(output_dir)
|
||||||
|
check_dirs(input_path, output_path)
|
||||||
|
file_ext = input_path.suffix
|
||||||
|
|
||||||
|
if file_ext in CONVERTERS:
|
||||||
|
CONVERTERS[file_ext](input_path, output_path, *args)
|
||||||
|
else:
|
||||||
|
util.sys_exit("Can't find converter for {}".format(input_path.parts[-1]),
|
||||||
|
title="Unknown format")
|
||||||
|
|
||||||
|
|
||||||
|
def check_dirs(input_file, output_path):
|
||||||
|
if not input_file.exists():
|
||||||
|
util.sys_exit(input_file.as_posix(), title="Input file not found")
|
||||||
|
if not output_path.exists():
|
||||||
|
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
Loading…
Reference in New Issue
Block a user