mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #6094 from explosion/bugfix/run_process
This commit is contained in:
		
						commit
						6898b35028
					
				| 
						 | 
					@ -308,6 +308,31 @@ def git_checkout(
 | 
				
			||||||
        msg.fail("Destination of checkout must not exist", exits=1)
 | 
					        msg.fail("Destination of checkout must not exist", exits=1)
 | 
				
			||||||
    if not dest.parent.exists():
 | 
					    if not dest.parent.exists():
 | 
				
			||||||
        raise IOError("Parent of destination of checkout must exist")
 | 
					        raise IOError("Parent of destination of checkout must exist")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if sparse and git_version >= (2, 22):
 | 
				
			||||||
 | 
					        return git_sparse_checkout(repo, subpath, dest, branch)
 | 
				
			||||||
 | 
					    elif sparse:
 | 
				
			||||||
 | 
					        # Only show warnings if the user explicitly wants sparse checkout but
 | 
				
			||||||
 | 
					        # the Git version doesn't support it
 | 
				
			||||||
 | 
					        err_old = (
 | 
				
			||||||
 | 
					            f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
 | 
				
			||||||
 | 
					            f"that doesn't fully support sparse checkout yet."
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
 | 
				
			||||||
 | 
					        msg.warn(
 | 
				
			||||||
 | 
					            f"{err_unk if git_version == (0, 0) else err_old} "
 | 
				
			||||||
 | 
					            f"This means that more files than necessary may be downloaded "
 | 
				
			||||||
 | 
					            f"temporarily. To only download the files needed, make sure "
 | 
				
			||||||
 | 
					            f"you're using Git v2.22 or above."
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    with make_tempdir() as tmp_dir:
 | 
				
			||||||
 | 
					        cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
 | 
				
			||||||
 | 
					        ret = run_command(cmd, capture=True)
 | 
				
			||||||
 | 
					        # We need Path(name) to make sure we also support subdirectories
 | 
				
			||||||
 | 
					        shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def git_sparse_checkout(repo, subpath, dest, branch):
 | 
				
			||||||
    # We're using Git, partial clone and sparse checkout to
 | 
					    # We're using Git, partial clone and sparse checkout to
 | 
				
			||||||
    # only clone the files we need
 | 
					    # only clone the files we need
 | 
				
			||||||
    # This ends up being RIDICULOUS. omg.
 | 
					    # This ends up being RIDICULOUS. omg.
 | 
				
			||||||
| 
						 | 
					@ -324,47 +349,31 @@ def git_checkout(
 | 
				
			||||||
    # *that* we can do by path.
 | 
					    # *that* we can do by path.
 | 
				
			||||||
    # We're using Git and sparse checkout to only clone the files we need
 | 
					    # We're using Git and sparse checkout to only clone the files we need
 | 
				
			||||||
    with make_tempdir() as tmp_dir:
 | 
					    with make_tempdir() as tmp_dir:
 | 
				
			||||||
        supports_sparse = git_version >= (2, 22)
 | 
					 | 
				
			||||||
        use_sparse = supports_sparse and sparse
 | 
					 | 
				
			||||||
        # This is the "clone, but don't download anything" part.
 | 
					        # This is the "clone, but don't download anything" part.
 | 
				
			||||||
        cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
 | 
					        cmd = (
 | 
				
			||||||
        if use_sparse:
 | 
					            f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
 | 
				
			||||||
            cmd += f"--filter=blob:none"  # <-- The key bit
 | 
					            f"-b {branch} --filter=blob:none"
 | 
				
			||||||
        # Only show warnings if the user explicitly wants sparse checkout but
 | 
					 | 
				
			||||||
        # the Git version doesn't support it
 | 
					 | 
				
			||||||
        elif sparse:
 | 
					 | 
				
			||||||
            err_old = (
 | 
					 | 
				
			||||||
                f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
 | 
					 | 
				
			||||||
                f"that doesn't fully support sparse checkout yet."
 | 
					 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
            err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
 | 
					        run_command(cmd)
 | 
				
			||||||
            msg.warn(
 | 
					 | 
				
			||||||
                f"{err_unk if git_version == (0, 0) else err_old} "
 | 
					 | 
				
			||||||
                f"This means that more files than necessary may be downloaded "
 | 
					 | 
				
			||||||
                f"temporarily. To only download the files needed, make sure "
 | 
					 | 
				
			||||||
                f"you're using Git v2.22 or above."
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        try_run_command(cmd)
 | 
					 | 
				
			||||||
        # Now we need to find the missing filenames for the subpath we want.
 | 
					        # Now we need to find the missing filenames for the subpath we want.
 | 
				
			||||||
        # Looking for this 'rev-list' command in the git --help? Hah.
 | 
					        # Looking for this 'rev-list' command in the git --help? Hah.
 | 
				
			||||||
        cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}"
 | 
					        cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
 | 
				
			||||||
        ret = try_run_command(cmd)
 | 
					        ret = run_command(cmd, capture=True)
 | 
				
			||||||
        git_repo = _from_http_to_git(repo)
 | 
					        git_repo = _from_http_to_git(repo)
 | 
				
			||||||
        # Now pass those missings into another bit of git internals
 | 
					        # Now pass those missings into another bit of git internals
 | 
				
			||||||
        missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
 | 
					        missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
 | 
				
			||||||
        if use_sparse and not missings:
 | 
					        if not missings:
 | 
				
			||||||
            err = (
 | 
					            err = (
 | 
				
			||||||
                f"Could not find any relevant files for '{subpath}'. "
 | 
					                f"Could not find any relevant files for '{subpath}'. "
 | 
				
			||||||
                f"Did you specify a correct and complete path within repo '{repo}' "
 | 
					                f"Did you specify a correct and complete path within repo '{repo}' "
 | 
				
			||||||
                f"and branch {branch}?"
 | 
					                f"and branch {branch}?"
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            msg.fail(err, exits=1)
 | 
					            msg.fail(err, exits=1)
 | 
				
			||||||
        if use_sparse:
 | 
					 | 
				
			||||||
        cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
 | 
					        cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
 | 
				
			||||||
            try_run_command(cmd)
 | 
					        run_command(cmd, capture=True)
 | 
				
			||||||
        # And finally, we can checkout our subpath
 | 
					        # And finally, we can checkout our subpath
 | 
				
			||||||
        cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
 | 
					        cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
 | 
				
			||||||
        try_run_command(cmd)
 | 
					        run_command(cmd, capture=True)
 | 
				
			||||||
        # We need Path(name) to make sure we also support subdirectories
 | 
					        # We need Path(name) to make sure we also support subdirectories
 | 
				
			||||||
        shutil.move(str(tmp_dir / Path(subpath)), str(dest))
 | 
					        shutil.move(str(tmp_dir / Path(subpath)), str(dest))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -378,7 +387,7 @@ def get_git_version(
 | 
				
			||||||
    RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
 | 
					    RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
 | 
				
			||||||
        (0, 0) if the version couldn't be determined.
 | 
					        (0, 0) if the version couldn't be determined.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    ret = try_run_command(["git", "--version"], error=error)
 | 
					    ret = run_command("git --version", capture=True)
 | 
				
			||||||
    stdout = ret.stdout.strip()
 | 
					    stdout = ret.stdout.strip()
 | 
				
			||||||
    if not stdout or not stdout.startswith("git version"):
 | 
					    if not stdout or not stdout.startswith("git version"):
 | 
				
			||||||
        return (0, 0)
 | 
					        return (0, 0)
 | 
				
			||||||
| 
						 | 
					@ -386,23 +395,6 @@ def get_git_version(
 | 
				
			||||||
    return (int(version[0]), int(version[1]))
 | 
					    return (int(version[0]), int(version[1]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def try_run_command(
 | 
					 | 
				
			||||||
    cmd: Union[str, List[str]], error: str = "Could not run command"
 | 
					 | 
				
			||||||
) -> subprocess.CompletedProcess:
 | 
					 | 
				
			||||||
    """Try running a command and raise an error if it fails.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    cmd (Union[str, List[str]]): The command to run.
 | 
					 | 
				
			||||||
    error (str): The error message.
 | 
					 | 
				
			||||||
    RETURNS (CompletedProcess): The completed process if the command ran.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        return run_command(cmd, capture=True)
 | 
					 | 
				
			||||||
    except subprocess.CalledProcessError as e:
 | 
					 | 
				
			||||||
        msg.fail(error)
 | 
					 | 
				
			||||||
        print(cmd)
 | 
					 | 
				
			||||||
        sys.exit(1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def _from_http_to_git(repo: str) -> str:
 | 
					def _from_http_to_git(repo: str) -> str:
 | 
				
			||||||
    if repo.startswith("http://"):
 | 
					    if repo.startswith("http://"):
 | 
				
			||||||
        repo = repo.replace(r"http://", r"https://")
 | 
					        repo = repo.replace(r"http://", r"https://")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -110,7 +110,7 @@ def package(
 | 
				
			||||||
    msg.good(f"Successfully created package '{model_name_v}'", main_path)
 | 
					    msg.good(f"Successfully created package '{model_name_v}'", main_path)
 | 
				
			||||||
    if create_sdist:
 | 
					    if create_sdist:
 | 
				
			||||||
        with util.working_dir(main_path):
 | 
					        with util.working_dir(main_path):
 | 
				
			||||||
            util.run_command([sys.executable, "setup.py", "sdist"])
 | 
					            util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
 | 
				
			||||||
        zip_file = main_path / "dist" / f"{model_name_v}.tar.gz"
 | 
					        zip_file = main_path / "dist" / f"{model_name_v}.tar.gz"
 | 
				
			||||||
        msg.good(f"Successfully created zipped Python package", zip_file)
 | 
					        msg.good(f"Successfully created zipped Python package", zip_file)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -144,7 +144,7 @@ def run_commands(
 | 
				
			||||||
        if not silent:
 | 
					        if not silent:
 | 
				
			||||||
            print(f"Running command: {join_command(command)}")
 | 
					            print(f"Running command: {join_command(command)}")
 | 
				
			||||||
        if not dry:
 | 
					        if not dry:
 | 
				
			||||||
            run_command(command)
 | 
					            run_command(command, capture=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def validate_subcommand(
 | 
					def validate_subcommand(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -254,7 +254,7 @@ def load_vectors_into_model(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_vocab_data_into_model(
 | 
					def load_vocab_data_into_model(
 | 
				
			||||||
    nlp: "Language", *, lookups: Optional["Lookups"]=None
 | 
					    nlp: "Language", *, lookups: Optional["Lookups"] = None
 | 
				
			||||||
) -> None:
 | 
					) -> None:
 | 
				
			||||||
    """Load vocab data."""
 | 
					    """Load vocab data."""
 | 
				
			||||||
    if lookups:
 | 
					    if lookups:
 | 
				
			||||||
| 
						 | 
					@ -659,8 +659,8 @@ def join_command(command: List[str]) -> str:
 | 
				
			||||||
def run_command(
 | 
					def run_command(
 | 
				
			||||||
    command: Union[str, List[str]],
 | 
					    command: Union[str, List[str]],
 | 
				
			||||||
    *,
 | 
					    *,
 | 
				
			||||||
    capture: bool = False,
 | 
					 | 
				
			||||||
    stdin: Optional[Any] = None,
 | 
					    stdin: Optional[Any] = None,
 | 
				
			||||||
 | 
					    capture: bool = False,
 | 
				
			||||||
) -> Optional[subprocess.CompletedProcess]:
 | 
					) -> Optional[subprocess.CompletedProcess]:
 | 
				
			||||||
    """Run a command on the command line as a subprocess. If the subprocess
 | 
					    """Run a command on the command line as a subprocess. If the subprocess
 | 
				
			||||||
    returns a non-zero exit code, a system exit is performed.
 | 
					    returns a non-zero exit code, a system exit is performed.
 | 
				
			||||||
| 
						 | 
					@ -668,33 +668,46 @@ def run_command(
 | 
				
			||||||
    command (str / List[str]): The command. If provided as a string, the
 | 
					    command (str / List[str]): The command. If provided as a string, the
 | 
				
			||||||
        string will be split using shlex.split.
 | 
					        string will be split using shlex.split.
 | 
				
			||||||
    stdin (Optional[Any]): stdin to read from or None.
 | 
					    stdin (Optional[Any]): stdin to read from or None.
 | 
				
			||||||
    capture (bool): Whether to capture the output.
 | 
					    capture (bool): Whether to capture the output and errors. If False,
 | 
				
			||||||
 | 
					        the stdout and stderr will not be redirected, and if there's an error,
 | 
				
			||||||
 | 
					        sys.exit will be called with the returncode. You should use capture=False
 | 
				
			||||||
 | 
					        when you want to turn over execution to the command, and capture=True
 | 
				
			||||||
 | 
					        when you want to run the command more like a function.
 | 
				
			||||||
    RETURNS (Optional[CompletedProcess]): The process object.
 | 
					    RETURNS (Optional[CompletedProcess]): The process object.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    if isinstance(command, str):
 | 
					    if isinstance(command, str):
 | 
				
			||||||
        command = split_command(command)
 | 
					        cmd_list = split_command(command)
 | 
				
			||||||
 | 
					        cmd_str = command
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        cmd_list = command
 | 
				
			||||||
 | 
					        cmd_str = " ".join(command)
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        ret = subprocess.run(
 | 
					        ret = subprocess.run(
 | 
				
			||||||
            command,
 | 
					            cmd_list,
 | 
				
			||||||
            env=os.environ.copy(),
 | 
					            env=os.environ.copy(),
 | 
				
			||||||
            input=stdin,
 | 
					            input=stdin,
 | 
				
			||||||
            encoding="utf8",
 | 
					            encoding="utf8",
 | 
				
			||||||
            check=True,
 | 
					            check=False,
 | 
				
			||||||
            stdout=subprocess.PIPE if capture else None,
 | 
					            stdout=subprocess.PIPE if capture else None,
 | 
				
			||||||
            stderr=subprocess.PIPE if capture else None,
 | 
					            stderr=subprocess.STDOUT if capture else None,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    except FileNotFoundError:
 | 
					    except FileNotFoundError:
 | 
				
			||||||
 | 
					        # Indicates the *command* wasn't found, it's an error before the command
 | 
				
			||||||
 | 
					        # is run.
 | 
				
			||||||
        raise FileNotFoundError(
 | 
					        raise FileNotFoundError(
 | 
				
			||||||
            Errors.E970.format(str_command=" ".join(command), tool=command[0])
 | 
					            Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
 | 
				
			||||||
        ) from None
 | 
					        ) from None
 | 
				
			||||||
    except subprocess.CalledProcessError as e:
 | 
					    if ret.returncode != 0 and capture:
 | 
				
			||||||
        # We don't want a duplicate traceback here so we're making sure the
 | 
					        message = f"Error running command:\n\n{cmd_str}\n\n"
 | 
				
			||||||
        # CalledProcessError isn't re-raised. We also print both the string
 | 
					        message += f"Subprocess exited with status {ret.returncode}"
 | 
				
			||||||
        # message and the stderr, in case the error only has one of them.
 | 
					        if ret.stdout is not None:
 | 
				
			||||||
        print(e.stderr)
 | 
					            message += f"\n\nProcess log (stdout and stderr):\n\n"
 | 
				
			||||||
        print(e)
 | 
					            message += ret.stdout
 | 
				
			||||||
        sys.exit(1)
 | 
					        error = subprocess.SubprocessError(message)
 | 
				
			||||||
    if ret.returncode != 0:
 | 
					        error.ret = ret
 | 
				
			||||||
 | 
					        error.command = cmd_str
 | 
				
			||||||
 | 
					        raise error
 | 
				
			||||||
 | 
					    elif ret.returncode != 0:
 | 
				
			||||||
        sys.exit(ret.returncode)
 | 
					        sys.exit(ret.returncode)
 | 
				
			||||||
    return ret
 | 
					    return ret
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user