mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Merge pull request #5674 from svlandeg/fix/small-edits
This commit is contained in:
commit
5f325b602b
|
@ -4,7 +4,6 @@ import srsly
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
import subprocess
|
import subprocess
|
||||||
import shlex
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
@ -16,7 +15,7 @@ from ._app import app, Arg, Opt, COMMAND, NAME
|
||||||
from .. import about
|
from .. import about
|
||||||
from ..schemas import ProjectConfigSchema, validate
|
from ..schemas import ProjectConfigSchema, validate
|
||||||
from ..util import ensure_path, run_command, make_tempdir, working_dir
|
from ..util import ensure_path, run_command, make_tempdir, working_dir
|
||||||
from ..util import get_hash, get_checksum
|
from ..util import get_hash, get_checksum, split_command
|
||||||
|
|
||||||
|
|
||||||
CONFIG_FILE = "project.yml"
|
CONFIG_FILE = "project.yml"
|
||||||
|
@ -82,6 +81,8 @@ def project_clone_cli(
|
||||||
initializing DVC (Data Version Control). This allows DVC to integrate with
|
initializing DVC (Data Version Control). This allows DVC to integrate with
|
||||||
Git.
|
Git.
|
||||||
"""
|
"""
|
||||||
|
if dest == Path.cwd():
|
||||||
|
dest = dest / name
|
||||||
project_clone(name, dest, repo=repo, git=git, no_init=no_init)
|
project_clone(name, dest, repo=repo, git=git, no_init=no_init)
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,7 +110,7 @@ def project_assets_cli(
|
||||||
"""Use DVC (Data Version Control) to fetch project assets. Assets are
|
"""Use DVC (Data Version Control) to fetch project assets. Assets are
|
||||||
defined in the "assets" section of the project config. If possible, DVC
|
defined in the "assets" section of the project config. If possible, DVC
|
||||||
will try to track the files so you can pull changes from upstream. It will
|
will try to track the files so you can pull changes from upstream. It will
|
||||||
also try and store the checksum so the assets are versioned. If th file
|
also try and store the checksum so the assets are versioned. If the file
|
||||||
can't be tracked or checked, it will be downloaded without DVC. If a checksum
|
can't be tracked or checked, it will be downloaded without DVC. If a checksum
|
||||||
is provided in the project config, the file is only downloaded if no local
|
is provided in the project config, the file is only downloaded if no local
|
||||||
file with the same checksum exists.
|
file with the same checksum exists.
|
||||||
|
@ -236,13 +237,16 @@ def project_clone(
|
||||||
# We're using Git and sparse checkout to only clone the files we need
|
# We're using Git and sparse checkout to only clone the files we need
|
||||||
with make_tempdir() as tmp_dir:
|
with make_tempdir() as tmp_dir:
|
||||||
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
|
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
|
||||||
run_command(shlex.split(cmd))
|
try:
|
||||||
|
run_command(split_command(cmd))
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'.")
|
||||||
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
|
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
|
||||||
f.write(name)
|
f.write(name)
|
||||||
run_command(["git", "-C", tmp_dir, "fetch"])
|
run_command(["git", "-C", str(tmp_dir), "fetch"])
|
||||||
run_command(["git", "-C", tmp_dir, "checkout"])
|
run_command(["git", "-C", str(tmp_dir), "checkout"])
|
||||||
shutil.move(str(tmp_dir / Path(name).name), str(project_dir))
|
shutil.move(str(tmp_dir / Path(name).name), str(project_dir))
|
||||||
msg.good(f"Cloned project '{name}' from {repo}")
|
msg.good(f"Cloned project '{name}' from {repo} into {project_dir}")
|
||||||
for sub_dir in DIRS:
|
for sub_dir in DIRS:
|
||||||
dir_path = project_dir / sub_dir
|
dir_path = project_dir / sub_dir
|
||||||
if not dir_path.exists():
|
if not dir_path.exists():
|
||||||
|
@ -500,7 +504,7 @@ def update_dvc_config(
|
||||||
path = path.resolve()
|
path = path.resolve()
|
||||||
dvc_config_path = path / DVC_CONFIG
|
dvc_config_path = path / DVC_CONFIG
|
||||||
if dvc_config_path.exists():
|
if dvc_config_path.exists():
|
||||||
# Cneck if the file was generated using the current config, if not, redo
|
# Check if the file was generated using the current config, if not, redo
|
||||||
with dvc_config_path.open("r", encoding="utf8") as f:
|
with dvc_config_path.open("r", encoding="utf8") as f:
|
||||||
ref_hash = f.readline().strip().replace("# ", "")
|
ref_hash = f.readline().strip().replace("# ", "")
|
||||||
if ref_hash == config_hash and not force:
|
if ref_hash == config_hash and not force:
|
||||||
|
@ -588,12 +592,12 @@ def run_commands(
|
||||||
variables (Dict[str, str]): Dictionary of variable names, mapped to their
|
variables (Dict[str, str]): Dictionary of variable names, mapped to their
|
||||||
values. Will be used to substitute format string variables in the
|
values. Will be used to substitute format string variables in the
|
||||||
commands.
|
commands.
|
||||||
silent (boll): Don't print the commands.
|
silent (bool): Don't print the commands.
|
||||||
"""
|
"""
|
||||||
for command in commands:
|
for command in commands:
|
||||||
# Substitute variables, e.g. "./{NAME}.json"
|
# Substitute variables, e.g. "./{NAME}.json"
|
||||||
command = command.format(**variables)
|
command = command.format(**variables)
|
||||||
command = shlex.split(command)
|
command = split_command(command)
|
||||||
# TODO: is this needed / a good idea?
|
# TODO: is this needed / a good idea?
|
||||||
if len(command) and command[0] in ("python", "python3"):
|
if len(command) and command[0] in ("python", "python3"):
|
||||||
command[0] = sys.executable
|
command[0] = sys.executable
|
||||||
|
|
|
@ -132,6 +132,7 @@ class Warnings(object):
|
||||||
"are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.")
|
"are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.")
|
||||||
|
|
||||||
# TODO: fix numbering after merging develop into master
|
# TODO: fix numbering after merging develop into master
|
||||||
|
W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.")
|
||||||
W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.")
|
W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.")
|
||||||
W093 = ("Could not find any data to train the {name} on. Is your "
|
W093 = ("Could not find any data to train the {name} on. Is your "
|
||||||
"input data correctly formatted ?")
|
"input data correctly formatted ?")
|
||||||
|
|
|
@ -22,7 +22,7 @@ from contextlib import contextmanager
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import shlex
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cupy.random
|
import cupy.random
|
||||||
|
@ -35,7 +35,7 @@ except ImportError:
|
||||||
import importlib_metadata
|
import importlib_metadata
|
||||||
|
|
||||||
from .symbols import ORTH
|
from .symbols import ORTH
|
||||||
from .compat import cupy, CudaStream
|
from .compat import cupy, CudaStream, is_windows
|
||||||
from .errors import Errors, Warnings
|
from .errors import Errors, Warnings
|
||||||
from . import about
|
from . import about
|
||||||
|
|
||||||
|
@ -467,7 +467,10 @@ def make_tempdir():
|
||||||
"""
|
"""
|
||||||
d = Path(tempfile.mkdtemp())
|
d = Path(tempfile.mkdtemp())
|
||||||
yield d
|
yield d
|
||||||
shutil.rmtree(str(d))
|
try:
|
||||||
|
shutil.rmtree(str(d))
|
||||||
|
except PermissionError as e:
|
||||||
|
warnings.warn(Warnings.W091.format(dir=d, msg=e))
|
||||||
|
|
||||||
|
|
||||||
def get_hash(data) -> str:
|
def get_hash(data) -> str:
|
||||||
|
@ -922,7 +925,11 @@ def from_disk(path, readers, exclude):
|
||||||
# Split to support file names like meta.json
|
# Split to support file names like meta.json
|
||||||
if key.split(".")[0] not in exclude:
|
if key.split(".")[0] not in exclude:
|
||||||
reader(path / key)
|
reader(path / key)
|
||||||
return path
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def split_command(command):
|
||||||
|
return shlex.split(command, posix=not is_windows)
|
||||||
|
|
||||||
|
|
||||||
def import_file(name, loc):
|
def import_file(name, loc):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user