Merge pull request #5674 from svlandeg/fix/small-edits

This commit is contained in:
Ines Montani 2020-06-30 12:56:14 +02:00 committed by GitHub
commit 5f325b602b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 14 deletions

View File

@ -4,7 +4,6 @@ import srsly
from pathlib import Path from pathlib import Path
from wasabi import msg from wasabi import msg
import subprocess import subprocess
import shlex
import os import os
import re import re
import shutil import shutil
@ -16,7 +15,7 @@ from ._app import app, Arg, Opt, COMMAND, NAME
from .. import about from .. import about
from ..schemas import ProjectConfigSchema, validate from ..schemas import ProjectConfigSchema, validate
from ..util import ensure_path, run_command, make_tempdir, working_dir from ..util import ensure_path, run_command, make_tempdir, working_dir
from ..util import get_hash, get_checksum from ..util import get_hash, get_checksum, split_command
CONFIG_FILE = "project.yml" CONFIG_FILE = "project.yml"
@ -82,6 +81,8 @@ def project_clone_cli(
initializing DVC (Data Version Control). This allows DVC to integrate with initializing DVC (Data Version Control). This allows DVC to integrate with
Git. Git.
""" """
if dest == Path.cwd():
dest = dest / name
project_clone(name, dest, repo=repo, git=git, no_init=no_init) project_clone(name, dest, repo=repo, git=git, no_init=no_init)
@ -109,7 +110,7 @@ def project_assets_cli(
"""Use DVC (Data Version Control) to fetch project assets. Assets are """Use DVC (Data Version Control) to fetch project assets. Assets are
defined in the "assets" section of the project config. If possible, DVC defined in the "assets" section of the project config. If possible, DVC
will try to track the files so you can pull changes from upstream. It will will try to track the files so you can pull changes from upstream. It will
also try and store the checksum so the assets are versioned. If th file also try and store the checksum so the assets are versioned. If the file
can't be tracked or checked, it will be downloaded without DVC. If a checksum can't be tracked or checked, it will be downloaded without DVC. If a checksum
is provided in the project config, the file is only downloaded if no local is provided in the project config, the file is only downloaded if no local
file with the same checksum exists. file with the same checksum exists.
@ -236,13 +237,16 @@ def project_clone(
# We're using Git and sparse checkout to only clone the files we need # We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir: with make_tempdir() as tmp_dir:
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true" cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
run_command(shlex.split(cmd)) try:
run_command(split_command(cmd))
except:
raise RuntimeError(f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'.")
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f: with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
f.write(name) f.write(name)
run_command(["git", "-C", tmp_dir, "fetch"]) run_command(["git", "-C", str(tmp_dir), "fetch"])
run_command(["git", "-C", tmp_dir, "checkout"]) run_command(["git", "-C", str(tmp_dir), "checkout"])
shutil.move(str(tmp_dir / Path(name).name), str(project_dir)) shutil.move(str(tmp_dir / Path(name).name), str(project_dir))
msg.good(f"Cloned project '{name}' from {repo}") msg.good(f"Cloned project '{name}' from {repo} into {project_dir}")
for sub_dir in DIRS: for sub_dir in DIRS:
dir_path = project_dir / sub_dir dir_path = project_dir / sub_dir
if not dir_path.exists(): if not dir_path.exists():
@ -500,7 +504,7 @@ def update_dvc_config(
path = path.resolve() path = path.resolve()
dvc_config_path = path / DVC_CONFIG dvc_config_path = path / DVC_CONFIG
if dvc_config_path.exists(): if dvc_config_path.exists():
# Cneck if the file was generated using the current config, if not, redo # Check if the file was generated using the current config, if not, redo
with dvc_config_path.open("r", encoding="utf8") as f: with dvc_config_path.open("r", encoding="utf8") as f:
ref_hash = f.readline().strip().replace("# ", "") ref_hash = f.readline().strip().replace("# ", "")
if ref_hash == config_hash and not force: if ref_hash == config_hash and not force:
@ -588,12 +592,12 @@ def run_commands(
variables (Dict[str, str]): Dictionary of variable names, mapped to their variables (Dict[str, str]): Dictionary of variable names, mapped to their
values. Will be used to substitute format string variables in the values. Will be used to substitute format string variables in the
commands. commands.
silent (boll): Don't print the commands. silent (bool): Don't print the commands.
""" """
for command in commands: for command in commands:
# Substitute variables, e.g. "./{NAME}.json" # Substitute variables, e.g. "./{NAME}.json"
command = command.format(**variables) command = command.format(**variables)
command = shlex.split(command) command = split_command(command)
# TODO: is this needed / a good idea? # TODO: is this needed / a good idea?
if len(command) and command[0] in ("python", "python3"): if len(command) and command[0] in ("python", "python3"):
command[0] = sys.executable command[0] = sys.executable

View File

@ -132,6 +132,7 @@ class Warnings(object):
"are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.") "are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.")
# TODO: fix numbering after merging develop into master # TODO: fix numbering after merging develop into master
W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.")
W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.") W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.")
W093 = ("Could not find any data to train the {name} on. Is your " W093 = ("Could not find any data to train the {name} on. Is your "
"input data correctly formatted ?") "input data correctly formatted ?")

View File

@ -22,7 +22,7 @@ from contextlib import contextmanager
import tempfile import tempfile
import shutil import shutil
import hashlib import hashlib
import shlex
try: try:
import cupy.random import cupy.random
@ -35,7 +35,7 @@ except ImportError:
import importlib_metadata import importlib_metadata
from .symbols import ORTH from .symbols import ORTH
from .compat import cupy, CudaStream from .compat import cupy, CudaStream, is_windows
from .errors import Errors, Warnings from .errors import Errors, Warnings
from . import about from . import about
@ -467,7 +467,10 @@ def make_tempdir():
""" """
d = Path(tempfile.mkdtemp()) d = Path(tempfile.mkdtemp())
yield d yield d
shutil.rmtree(str(d)) try:
shutil.rmtree(str(d))
except PermissionError as e:
warnings.warn(Warnings.W091.format(dir=d, msg=e))
def get_hash(data) -> str: def get_hash(data) -> str:
@ -922,7 +925,11 @@ def from_disk(path, readers, exclude):
# Split to support file names like meta.json # Split to support file names like meta.json
if key.split(".")[0] not in exclude: if key.split(".")[0] not in exclude:
reader(path / key) reader(path / key)
return path return
def split_command(command):
return shlex.split(command, posix=not is_windows)
def import_file(name, loc): def import_file(name, loc):