Merge pull request #5674 from svlandeg/fix/small-edits

This commit is contained in:
Ines Montani 2020-06-30 12:56:14 +02:00 committed by GitHub
commit 5f325b602b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 14 deletions

View File

@ -4,7 +4,6 @@ import srsly
from pathlib import Path
from wasabi import msg
import subprocess
import shlex
import os
import re
import shutil
@ -16,7 +15,7 @@ from ._app import app, Arg, Opt, COMMAND, NAME
from .. import about
from ..schemas import ProjectConfigSchema, validate
from ..util import ensure_path, run_command, make_tempdir, working_dir
from ..util import get_hash, get_checksum
from ..util import get_hash, get_checksum, split_command
CONFIG_FILE = "project.yml"
@ -82,6 +81,8 @@ def project_clone_cli(
initializing DVC (Data Version Control). This allows DVC to integrate with
Git.
"""
if dest == Path.cwd():
dest = dest / name
project_clone(name, dest, repo=repo, git=git, no_init=no_init)
@ -109,7 +110,7 @@ def project_assets_cli(
"""Use DVC (Data Version Control) to fetch project assets. Assets are
defined in the "assets" section of the project config. If possible, DVC
will try to track the files so you can pull changes from upstream. It will
also try and store the checksum so the assets are versioned. If th file
also try and store the checksum so the assets are versioned. If the file
can't be tracked or checked, it will be downloaded without DVC. If a checksum
is provided in the project config, the file is only downloaded if no local
file with the same checksum exists.
@ -236,13 +237,16 @@ def project_clone(
# We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir:
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
run_command(shlex.split(cmd))
try:
run_command(split_command(cmd))
except:
raise RuntimeError(f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'.")
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
f.write(name)
run_command(["git", "-C", tmp_dir, "fetch"])
run_command(["git", "-C", tmp_dir, "checkout"])
run_command(["git", "-C", str(tmp_dir), "fetch"])
run_command(["git", "-C", str(tmp_dir), "checkout"])
shutil.move(str(tmp_dir / Path(name).name), str(project_dir))
msg.good(f"Cloned project '{name}' from {repo}")
msg.good(f"Cloned project '{name}' from {repo} into {project_dir}")
for sub_dir in DIRS:
dir_path = project_dir / sub_dir
if not dir_path.exists():
@ -500,7 +504,7 @@ def update_dvc_config(
path = path.resolve()
dvc_config_path = path / DVC_CONFIG
if dvc_config_path.exists():
# Cneck if the file was generated using the current config, if not, redo
# Check if the file was generated using the current config, if not, redo
with dvc_config_path.open("r", encoding="utf8") as f:
ref_hash = f.readline().strip().replace("# ", "")
if ref_hash == config_hash and not force:
@ -588,12 +592,12 @@ def run_commands(
variables (Dict[str, str]): Dictionary of variable names, mapped to their
values. Will be used to substitute format string variables in the
commands.
silent (boll): Don't print the commands.
silent (bool): Don't print the commands.
"""
for command in commands:
# Substitute variables, e.g. "./{NAME}.json"
command = command.format(**variables)
command = shlex.split(command)
command = split_command(command)
# TODO: is this needed / a good idea?
if len(command) and command[0] in ("python", "python3"):
command[0] = sys.executable

View File

@ -132,6 +132,7 @@ class Warnings(object):
"are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.")
# TODO: fix numbering after merging develop into master
W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.")
W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.")
W093 = ("Could not find any data to train the {name} on. Is your "
"input data correctly formatted ?")

View File

@ -22,7 +22,7 @@ from contextlib import contextmanager
import tempfile
import shutil
import hashlib
import shlex
try:
import cupy.random
@ -35,7 +35,7 @@ except ImportError:
import importlib_metadata
from .symbols import ORTH
from .compat import cupy, CudaStream
from .compat import cupy, CudaStream, is_windows
from .errors import Errors, Warnings
from . import about
@ -467,7 +467,10 @@ def make_tempdir():
"""
d = Path(tempfile.mkdtemp())
yield d
shutil.rmtree(str(d))
try:
shutil.rmtree(str(d))
except PermissionError as e:
warnings.warn(Warnings.W091.format(dir=d, msg=e))
def get_hash(data) -> str:
@ -922,7 +925,11 @@ def from_disk(path, readers, exclude):
# Split to support file names like meta.json
if key.split(".")[0] not in exclude:
reader(path / key)
return path
return
def split_command(command):
return shlex.split(command, posix=not is_windows)
def import_file(name, loc):