mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-11 07:34:54 +03:00
Improvements / corrections
This commit is contained in:
parent
2c1f58e74f
commit
012578f0f6
|
@ -15,7 +15,8 @@ Commands are only re-run if their inputs have changed."""
|
||||||
INTRO_WORKFLOWS = f"""The following workflows are defined by the project. They
|
INTRO_WORKFLOWS = f"""The following workflows are defined by the project. They
|
||||||
can be executed using [`spacy project run [name]`]({DOCS_URL}/api/cli#project-run)
|
can be executed using [`spacy project run [name]`]({DOCS_URL}/api/cli#project-run)
|
||||||
and will run the specified commands in order. Commands grouped within square brackets
|
and will run the specified commands in order. Commands grouped within square brackets
|
||||||
are run in parallel. Commands are only re-run if their inputs have changed."""
|
preceded by 'parallel' are run in parallel. Commands are only re-run if their inputs
|
||||||
|
have changed."""
|
||||||
INTRO_ASSETS = f"""The following assets are defined by the project. They can
|
INTRO_ASSETS = f"""The following assets are defined by the project. They can
|
||||||
be fetched by running [`spacy project assets`]({DOCS_URL}/api/cli#project-assets)
|
be fetched by running [`spacy project assets`]({DOCS_URL}/api/cli#project-assets)
|
||||||
in the project directory."""
|
in the project directory."""
|
||||||
|
@ -77,7 +78,7 @@ def project_document(
|
||||||
rendered_steps.append(md.code(step))
|
rendered_steps.append(md.code(step))
|
||||||
else:
|
else:
|
||||||
rendered_steps.append(
|
rendered_steps.append(
|
||||||
"["
|
"parallel["
|
||||||
+ ", ".join(md.code(p_step) for p_step in step["parallel"])
|
+ ", ".join(md.code(p_step) for p_step in step["parallel"])
|
||||||
+ "]"
|
+ "]"
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Any, List, Literal, Optional, Dict, Union, cast, Tuple
|
from typing import Any, List, Optional, Dict, Union, cast
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import time
|
from time import time
|
||||||
|
@ -15,6 +15,19 @@ from ...util import SimpleFrozenDict, working_dir
|
||||||
from ...util import check_bool_env_var, ENV_VARS, split_command, join_command
|
from ...util import check_bool_env_var, ENV_VARS, split_command, join_command
|
||||||
from ...errors import Errors
|
from ...errors import Errors
|
||||||
|
|
||||||
|
"""
|
||||||
|
Permits the execution of a parallel command group.
|
||||||
|
|
||||||
|
The main process starts a worker process for each spaCy projects command in the parallel group.
|
||||||
|
Each spaCy projects command consists of n OS level commands. The worker process starts
|
||||||
|
subprocesses for these OS level commands in order, and sends status information about their
|
||||||
|
execution back to the main process.
|
||||||
|
|
||||||
|
The main process maintains a state machine for each spaCy projects command/worker process. The meaning
|
||||||
|
of the states is documented alongside the _PARALLEL_COMMAND_STATES code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use spawn to create worker processes on all OSs for consistency
|
||||||
set_start_method("spawn", force=True)
|
set_start_method("spawn", force=True)
|
||||||
|
|
||||||
# How often the worker processes managing the commands in a parallel group
|
# How often the worker processes managing the commands in a parallel group
|
||||||
|
@ -30,34 +43,48 @@ MAX_WIDTH_DIVIDER = 60
|
||||||
|
|
||||||
# Whether or not to display realtime status information
|
# Whether or not to display realtime status information
|
||||||
DISPLAY_STATUS_TABLE = sys.stdout.isatty() and supports_ansi()
|
DISPLAY_STATUS_TABLE = sys.stdout.isatty() and supports_ansi()
|
||||||
|
|
||||||
|
|
||||||
class _ParallelCommandState:
|
class _ParallelCommandState:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
display_color: str,
|
display_color: str,
|
||||||
transitions: Optional[Tuple[str]] = None,
|
transitions: Optional[List[str]] = None,
|
||||||
):
|
):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.display_color = display_color
|
self.display_color = display_color
|
||||||
if transitions is not None:
|
if transitions is not None:
|
||||||
self.transitions = transitions
|
self.transitions = transitions
|
||||||
else:
|
else:
|
||||||
self.transitions = None
|
self.transitions = []
|
||||||
|
|
||||||
|
|
||||||
_PARALLEL_COMMAND_STATES = (
|
_PARALLEL_COMMAND_STATES = (
|
||||||
_ParallelCommandState("pending", "yellow", ("starting", "cancelled", "not rerun")),
|
# The command has not yet been run
|
||||||
|
_ParallelCommandState("pending", "yellow", ["starting", "cancelled", "not rerun"]),
|
||||||
|
# The command was not run because its settings, inputs and outputs had not changed
|
||||||
|
# since a previous successful run
|
||||||
_ParallelCommandState("not rerun", "blue"),
|
_ParallelCommandState("not rerun", "blue"),
|
||||||
_ParallelCommandState("starting", "green", ("running", "hung")),
|
# The main process has spawned a worker process for the command but not yet received
|
||||||
|
# an acknowledgement
|
||||||
|
_ParallelCommandState("starting", "green", ["running", "hung"]),
|
||||||
|
# The command is running
|
||||||
_ParallelCommandState(
|
_ParallelCommandState(
|
||||||
"running", "green", ("running", "succeeded", "failed", "killed", "hung")
|
"running", "green", ["running", "succeeded", "failed", "terminated", "hung"]
|
||||||
),
|
),
|
||||||
|
# The command succeeded (rc=0)
|
||||||
_ParallelCommandState("succeeded", "green"),
|
_ParallelCommandState("succeeded", "green"),
|
||||||
|
# The command failed (rc>0)
|
||||||
_ParallelCommandState("failed", "red"),
|
_ParallelCommandState("failed", "red"),
|
||||||
_ParallelCommandState("killed", "red"),
|
# The command was terminated (rc<0), usually but not necessarily by the main process because
|
||||||
|
# another command within the same parallel group failed
|
||||||
|
_ParallelCommandState("terminated", "red"),
|
||||||
|
# The main process would expect the worker process to be running, but the worker process has
|
||||||
|
# not communicated with the main process for an extended period of time
|
||||||
_ParallelCommandState("hung", "red"),
|
_ParallelCommandState("hung", "red"),
|
||||||
|
# The main process did not attempt to execute the command because another command in the same
|
||||||
|
# parallel group had already failed
|
||||||
_ParallelCommandState("cancelled", "red"),
|
_ParallelCommandState("cancelled", "red"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -70,10 +97,9 @@ class _ParallelCommandInfo:
|
||||||
self.cmd_name = cmd_name
|
self.cmd_name = cmd_name
|
||||||
self.cmd = cmd
|
self.cmd = cmd
|
||||||
self.cmd_ind = cmd_ind
|
self.cmd_ind = cmd_ind
|
||||||
self.len_os_cmds = len(cmd["script"])
|
|
||||||
self.state = _ParallelCommandInfo.state_dict["pending"]
|
self.state = _ParallelCommandInfo.state_dict["pending"]
|
||||||
self.pid: Optional[int] = None
|
self.pid: Optional[int] = None
|
||||||
self.last_status_time: Optional[int] = None
|
self.last_keepalive_time: Optional[int] = None
|
||||||
self.os_cmd_ind: Optional[int] = None
|
self.os_cmd_ind: Optional[int] = None
|
||||||
self.rc: Optional[int] = None
|
self.rc: Optional[int] = None
|
||||||
self.output: Optional[str] = None
|
self.output: Optional[str] = None
|
||||||
|
@ -85,10 +111,11 @@ class _ParallelCommandInfo:
|
||||||
self.state = _ParallelCommandInfo.state_dict[new_state]
|
self.state = _ParallelCommandInfo.state_dict[new_state]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def disp_state(self) -> str:
|
def state_repr(self) -> str:
|
||||||
state_str = self.state.name
|
state_str = self.state.name
|
||||||
if state_str == "running" and self.os_cmd_ind is not None:
|
if state_str == "running" and self.os_cmd_ind is not None:
|
||||||
state_str = f"{state_str} ({self.os_cmd_ind + 1}/{self.len_os_cmds})"
|
number_of_os_cmds = len(self.cmd["script"])
|
||||||
|
state_str = f"{state_str} ({self.os_cmd_ind + 1}/{number_of_os_cmds})"
|
||||||
return color(state_str, self.state.display_color)
|
return color(state_str, self.state.display_color)
|
||||||
|
|
||||||
|
|
||||||
|
@ -102,7 +129,7 @@ def project_run_parallel_group(
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Run a parallel group of commands. Note that because of the challenges of managing
|
"""Run a parallel group of commands. Note that because of the challenges of managing
|
||||||
parallel output streams it is not possible to specify a value for 'capture'. Essentially,
|
parallel output streams it is not possible to specify a value for 'capture'. Essentially,
|
||||||
with parallel groups 'capture==True' for stdout and 'capture==False' for stderr.
|
with parallel groups 'capture==True'.
|
||||||
|
|
||||||
project_dir (Path): Path to project directory.
|
project_dir (Path): Path to project directory.
|
||||||
cmd_infos: a list of objects containing information about he commands to run.
|
cmd_infos: a list of objects containing information about he commands to run.
|
||||||
|
@ -162,13 +189,18 @@ def project_run_parallel_group(
|
||||||
"parallel[" + ", ".join(cmd_info.cmd_name for cmd_info in cmd_infos) + "]"
|
"parallel[" + ", ".join(cmd_info.cmd_name for cmd_info in cmd_infos) + "]"
|
||||||
)
|
)
|
||||||
if len(divider_parallel_descriptor) > MAX_WIDTH_DIVIDER:
|
if len(divider_parallel_descriptor) > MAX_WIDTH_DIVIDER:
|
||||||
divider_parallel_descriptor = divider_parallel_descriptor[:(MAX_WIDTH_DIVIDER-3)] + "..."
|
divider_parallel_descriptor = (
|
||||||
|
divider_parallel_descriptor[: (MAX_WIDTH_DIVIDER - 3)] + "..."
|
||||||
|
)
|
||||||
msg.divider(divider_parallel_descriptor)
|
msg.divider(divider_parallel_descriptor)
|
||||||
if not DISPLAY_STATUS_TABLE:
|
if not DISPLAY_STATUS_TABLE:
|
||||||
print(parallel_descriptor)
|
print(parallel_descriptor)
|
||||||
else:
|
|
||||||
first = True
|
|
||||||
|
|
||||||
|
msg.info(
|
||||||
|
"Temporary logs are being written to "
|
||||||
|
+ sep.join((str(current_dir), PARALLEL_LOGGING_DIR_NAME))
|
||||||
|
)
|
||||||
|
first = True
|
||||||
while any(
|
while any(
|
||||||
cmd_info.state.name in ("pending", "starting", "running")
|
cmd_info.state.name in ("pending", "starting", "running")
|
||||||
for cmd_info in cmd_infos
|
for cmd_info in cmd_infos
|
||||||
|
@ -177,9 +209,10 @@ def project_run_parallel_group(
|
||||||
mess: Dict[str, Union[str, int]] = parallel_group_status_queue.get(
|
mess: Dict[str, Union[str, int]] = parallel_group_status_queue.get(
|
||||||
timeout=PARALLEL_GROUP_STATUS_INTERVAL * 20
|
timeout=PARALLEL_GROUP_STATUS_INTERVAL * 20
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
|
# No more messages are being received: the whole group has hung
|
||||||
for other_cmd_info in (
|
for other_cmd_info in (
|
||||||
c for c in cmd_infos if c.state.name == "running"
|
c for c in cmd_infos if c.state.name in ("starting", "running")
|
||||||
):
|
):
|
||||||
other_cmd_info.change_state("hung")
|
other_cmd_info.change_state("hung")
|
||||||
for other_cmd_info in (
|
for other_cmd_info in (
|
||||||
|
@ -187,22 +220,23 @@ def project_run_parallel_group(
|
||||||
):
|
):
|
||||||
other_cmd_info.change_state("cancelled")
|
other_cmd_info.change_state("cancelled")
|
||||||
cmd_info = cmd_infos[cast(int, mess["cmd_ind"])]
|
cmd_info = cmd_infos[cast(int, mess["cmd_ind"])]
|
||||||
if mess["status"] in ("started", "alive"):
|
if mess["type"] in ("started", "keepalive"):
|
||||||
cmd_info.last_status_time = int(time())
|
cmd_info.last_keepalive_time = int(time())
|
||||||
for other_cmd_info in (
|
for other_cmd_info in (
|
||||||
c for c in cmd_infos if c.state.name in ("starting", "running")
|
c for c in cmd_infos if c.state.name in ("starting", "running")
|
||||||
):
|
):
|
||||||
if (
|
if (
|
||||||
other_cmd_info.last_status_time is not None
|
other_cmd_info.last_keepalive_time is not None
|
||||||
and time() - other_cmd_info.last_status_time
|
and time() - other_cmd_info.last_keepalive_time
|
||||||
> PARALLEL_GROUP_STATUS_INTERVAL * 20
|
> PARALLEL_GROUP_STATUS_INTERVAL * 20
|
||||||
):
|
):
|
||||||
|
# a specific command has hung
|
||||||
other_cmd_info.change_state("hung")
|
other_cmd_info.change_state("hung")
|
||||||
if mess["status"] == "started":
|
if mess["type"] == "started":
|
||||||
cmd_info.change_state("running")
|
cmd_info.change_state("running")
|
||||||
cmd_info.os_cmd_ind = cast(int, mess["os_cmd_ind"])
|
cmd_info.os_cmd_ind = cast(int, mess["os_cmd_ind"])
|
||||||
cmd_info.pid = cast(int, mess["pid"])
|
cmd_info.pid = cast(int, mess["pid"])
|
||||||
if mess["status"] == "completed":
|
if mess["type"] == "completed":
|
||||||
cmd_info.rc = cast(int, mess["rc"])
|
cmd_info.rc = cast(int, mess["rc"])
|
||||||
if cmd_info.rc == 0:
|
if cmd_info.rc == 0:
|
||||||
cmd_info.change_state("succeeded")
|
cmd_info.change_state("succeeded")
|
||||||
|
@ -214,11 +248,12 @@ def project_run_parallel_group(
|
||||||
elif cmd_info.rc > 0:
|
elif cmd_info.rc > 0:
|
||||||
cmd_info.change_state("failed")
|
cmd_info.change_state("failed")
|
||||||
else:
|
else:
|
||||||
cmd_info.change_state("killed")
|
cmd_info.change_state("terminated")
|
||||||
cmd_info.output = cast(str, mess["output"])
|
cmd_info.output = cast(str, mess["output"])
|
||||||
if any(
|
if any(
|
||||||
c for c in cmd_infos if c.state.name in ("failed", "killed", "hung")
|
c for c in cmd_infos if c.state.name in ("failed", "terminated", "hung")
|
||||||
):
|
):
|
||||||
|
# a command in the group hasn't succeeded, so terminate/cancel the rest
|
||||||
for other_cmd_info in (
|
for other_cmd_info in (
|
||||||
c for c in cmd_infos if c.state.name == "running"
|
c for c in cmd_infos if c.state.name == "running"
|
||||||
):
|
):
|
||||||
|
@ -230,12 +265,13 @@ def project_run_parallel_group(
|
||||||
c for c in cmd_infos if c.state.name == "pending"
|
c for c in cmd_infos if c.state.name == "pending"
|
||||||
):
|
):
|
||||||
other_cmd_info.change_state("cancelled")
|
other_cmd_info.change_state("cancelled")
|
||||||
if mess["status"] != "alive" and DISPLAY_STATUS_TABLE:
|
if mess["type"] != "keepalive" and DISPLAY_STATUS_TABLE:
|
||||||
if first:
|
if first:
|
||||||
first = False
|
first = False
|
||||||
else:
|
else:
|
||||||
|
# overwrite the existing status table
|
||||||
print("\033[2K\033[F" * (4 + len(cmd_infos)))
|
print("\033[2K\033[F" * (4 + len(cmd_infos)))
|
||||||
data = [[c.cmd_name, c.disp_state] for c in cmd_infos]
|
data = [[c.cmd_name, c.state_repr] for c in cmd_infos]
|
||||||
header = ["Command", "Status"]
|
header = ["Command", "Status"]
|
||||||
msg.table(data, header=header)
|
msg.table(data, header=header)
|
||||||
|
|
||||||
|
@ -248,11 +284,9 @@ def project_run_parallel_group(
|
||||||
process_rcs = [c.rc for c in cmd_infos if c.rc is not None]
|
process_rcs = [c.rc for c in cmd_infos if c.rc is not None]
|
||||||
if len(process_rcs) > 0:
|
if len(process_rcs) > 0:
|
||||||
group_rc = max(process_rcs)
|
group_rc = max(process_rcs)
|
||||||
if group_rc <= 0:
|
if group_rc > 0:
|
||||||
group_rc = min(process_rcs)
|
|
||||||
if group_rc != 0:
|
|
||||||
sys.exit(group_rc)
|
sys.exit(group_rc)
|
||||||
if any(c for c in cmd_infos if c.state.name == "hung"):
|
if any(c for c in cmd_infos if c.state.name in ("hung", "terminated")):
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
|
|
||||||
|
@ -263,21 +297,31 @@ def _project_run_parallel_cmd(
|
||||||
current_dir: str,
|
current_dir: str,
|
||||||
parallel_group_status_queue: Queue,
|
parallel_group_status_queue: Queue,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""Run a single spaCy projects command as a worker process.
|
||||||
|
|
||||||
|
Communicates with the main process via queue messages that consist of
|
||||||
|
dictionaries whose type is determined by the entry 'type'. Possible
|
||||||
|
values of 'type' are 'started', 'completed' and 'keepalive'. Each dictionary
|
||||||
|
type contains different additional fields."""
|
||||||
|
# we can use the command name as a unique log filename because a parallel
|
||||||
|
# group is not allowed to contain the same command more than once
|
||||||
log_file_name = sep.join(
|
log_file_name = sep.join(
|
||||||
(current_dir, PARALLEL_LOGGING_DIR_NAME, cmd_info.cmd_name + ".log")
|
(current_dir, PARALLEL_LOGGING_DIR_NAME, cmd_info.cmd_name + ".log")
|
||||||
)
|
)
|
||||||
file_not_found = False
|
file_not_found = False
|
||||||
|
# buffering=0: make sure output is not lost if a subprocess is terminated
|
||||||
with open(log_file_name, "wb", buffering=0) as logfile:
|
with open(log_file_name, "wb", buffering=0) as logfile:
|
||||||
for os_cmd_ind, os_cmd in enumerate(cmd_info.cmd["script"]):
|
for os_cmd_ind, os_cmd in enumerate(cmd_info.cmd["script"]):
|
||||||
command = split_command(os_cmd)
|
command = split_command(os_cmd)
|
||||||
if len(command) and command[0] in ("python", "python3"):
|
if len(command) and command[0] in ("python", "python3"):
|
||||||
|
# -u: prevent buffering within Python
|
||||||
command = [sys.executable, "-u", *command[1:]]
|
command = [sys.executable, "-u", *command[1:]]
|
||||||
elif len(command) and command[0] in ("pip", "pip3"):
|
elif len(command) and command[0] in ("pip", "pip3"):
|
||||||
command = [sys.executable, "-m", "pip", *command[1:]]
|
command = [sys.executable, "-m", "pip", *command[1:]]
|
||||||
logfile.write(
|
logfile.write(
|
||||||
bytes(
|
bytes(
|
||||||
f"Running command: {join_command(command)}" + linesep,
|
f"Running command: {join_command(command)}" + linesep,
|
||||||
encoding="UTF-8",
|
encoding="utf8",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if not dry:
|
if not dry:
|
||||||
|
@ -305,22 +349,22 @@ def _project_run_parallel_cmd(
|
||||||
break
|
break
|
||||||
parallel_group_status_queue.put(
|
parallel_group_status_queue.put(
|
||||||
{
|
{
|
||||||
|
"type": "started",
|
||||||
"cmd_ind": cmd_info.cmd_ind,
|
"cmd_ind": cmd_info.cmd_ind,
|
||||||
"os_cmd_ind": os_cmd_ind,
|
"os_cmd_ind": os_cmd_ind,
|
||||||
"status": "started",
|
|
||||||
"pid": sp.pid,
|
"pid": sp.pid,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
sp.communicate(timeout=PARALLEL_GROUP_STATUS_INTERVAL)
|
sp.wait(timeout=PARALLEL_GROUP_STATUS_INTERVAL)
|
||||||
except TimeoutExpired:
|
except TimeoutExpired:
|
||||||
pass
|
pass
|
||||||
if sp.returncode == None:
|
if sp.returncode == None:
|
||||||
parallel_group_status_queue.put(
|
parallel_group_status_queue.put(
|
||||||
{
|
{
|
||||||
|
"type": "keepalive",
|
||||||
"cmd_ind": cmd_info.cmd_ind,
|
"cmd_ind": cmd_info.cmd_ind,
|
||||||
"status": "alive",
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -336,7 +380,7 @@ def _project_run_parallel_cmd(
|
||||||
else:
|
else:
|
||||||
logfile.write(
|
logfile.write(
|
||||||
bytes(
|
bytes(
|
||||||
linesep + f"Killed (rc={sp.returncode})" + linesep,
|
linesep + f"Terminated (rc={sp.returncode})" + linesep,
|
||||||
encoding="UTF-8",
|
encoding="UTF-8",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -350,8 +394,8 @@ def _project_run_parallel_cmd(
|
||||||
rc = sp.returncode
|
rc = sp.returncode
|
||||||
parallel_group_status_queue.put(
|
parallel_group_status_queue.put(
|
||||||
{
|
{
|
||||||
|
"type": "completed",
|
||||||
"cmd_ind": cmd_info.cmd_ind,
|
"cmd_ind": cmd_info.cmd_ind,
|
||||||
"status": "completed",
|
|
||||||
"rc": rc,
|
"rc": rc,
|
||||||
"output": logfile.read(),
|
"output": logfile.read(),
|
||||||
}
|
}
|
||||||
|
@ -364,5 +408,5 @@ def _start_process(
|
||||||
cmd_info = proc_to_cmd_infos[process]
|
cmd_info = proc_to_cmd_infos[process]
|
||||||
if cmd_info.state.name == "pending":
|
if cmd_info.state.name == "pending":
|
||||||
cmd_info.change_state("starting")
|
cmd_info.change_state("starting")
|
||||||
cmd_info.last_status_time = int(time())
|
cmd_info.last_keepalive_time = int(time())
|
||||||
process.start()
|
process.start()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user