Merge branch 'master' into spacy.io

This commit is contained in:
Ines Montani 2021-02-02 14:28:00 +11:00
commit c0220dddcb
11 changed files with 56 additions and 17 deletions

View File

@ -1,6 +1,6 @@
# fmt: off
__title__ = "spacy"
__version__ = "3.0.0"
__version__ = "3.0.1.dev0"
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
__projects__ = "https://github.com/explosion/projects"

View File

@ -37,7 +37,12 @@ def project_run_cli(
def project_run(
project_dir: Path, subcommand: str, *, force: bool = False, dry: bool = False
project_dir: Path,
subcommand: str,
*,
force: bool = False,
dry: bool = False,
capture: bool = False,
) -> None:
"""Run a named script defined in the project.yml. If the script is part
of the default pipeline (defined in the "run" section), DVC is used to
@ -48,6 +53,11 @@ def project_run(
subcommand (str): Name of command to run.
force (bool): Force re-running, even if nothing changed.
dry (bool): Perform a dry run and don't execute commands.
capture (bool): Whether to capture the output and errors of individual commands.
If False, the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the return code. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
"""
config = load_project_config(project_dir)
commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
@ -72,7 +82,7 @@ def project_run(
if not rerun and not force:
msg.info(f"Skipping '{cmd['name']}': nothing changed")
else:
run_commands(cmd["script"], dry=dry)
run_commands(cmd["script"], dry=dry, capture=capture)
if not dry:
update_lockfile(current_dir, cmd)
@ -126,12 +136,18 @@ def run_commands(
commands: Iterable[str] = SimpleFrozenList(),
silent: bool = False,
dry: bool = False,
capture: bool = False,
) -> None:
"""Run a sequence of commands in a subprocess, in order.
commands (List[str]): The string commands.
silent (bool): Don't print the commands.
dry (bool): Perform a dry run and don't execut anything.
capture (bool): Whether to capture the output and errors of individual commands.
If False, the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the return code. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
"""
for command in commands:
command = split_command(command)
@ -149,7 +165,7 @@ def run_commands(
if not silent:
print(f"Running command: {join_command(command)}")
if not dry:
run_command(command, capture=False)
run_command(command, capture=capture)
def validate_subcommand(

View File

@ -1190,7 +1190,6 @@ class Language:
get_examples: Optional[Callable[[], Iterable[Example]]] = None,
*,
sgd: Optional[Optimizer] = None,
link_components: bool = True,
) -> Optimizer:
"""Initialize the pipe for training, using data examples if available.
@ -1198,8 +1197,6 @@ class Language:
returns gold-standard Example objects.
sgd (Optional[Optimizer]): An optimizer to use for updates. If not
provided, will be created using the .create_optimizer() method.
link_components (bool): Link listener components automatically or not
(default True)
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/language#initialize
@ -1247,8 +1244,7 @@ class Language:
proc.initialize, p_settings, section="components", name=name
)
proc.initialize(get_examples, nlp=self, **p_settings)
if link_components:
self._link_components()
self._link_components()
self._optimizer = sgd
if sgd is not None:
self._optimizer = sgd

View File

@ -80,7 +80,8 @@ class Tok2Vec(TrainablePipe):
def add_listener(self, listener: "Tok2VecListener", component_name: str) -> None:
"""Add a listener for a downstream component. Usually internals."""
self.listener_map.setdefault(component_name, [])
self.listener_map[component_name].append(listener)
if listener not in self.listener_map[component_name]:
self.listener_map[component_name].append(listener)
def remove_listener(self, listener: "Tok2VecListener", component_name: str) -> bool:
"""Remove a listener for a downstream component. Usually internals."""

View File

@ -67,7 +67,7 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
# Make sure that listeners are defined before initializing further
nlp._link_components()
with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer, link_components=False)
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
logger.info(f"Initialized pipeline components: {nlp.pipe_names}")
# Detect components with listeners that are not frozen consistently
for name, proc in nlp.pipeline:

View File

@ -803,7 +803,7 @@ def run_command(
stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output and errors. If False,
the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the returncode. You should use capture=False
sys.exit will be called with the return code. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
RETURNS (Optional[CompletedProcess]): The process object.

View File

@ -269,7 +269,7 @@ best-matching package compatible with your spaCy installation.
>
> ```diff
> - python -m spacy download en
> + python -m spacy dowmload en_core_web_sm
> + python -m spacy download en_core_web_sm
> ```
>
> ```diff

View File

@ -15069,6 +15069,11 @@
"parse-url": "^5.0.0"
}
},
"github-buttons": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/github-buttons/-/github-buttons-2.14.2.tgz",
"integrity": "sha512-DMakrcFRdojVAndkKYVDTHF3Ym09OoWia//IQ7B/MVxC+iQ2DenYfD7IR69ZZ9awM8PNS/9wthr4IyDhkFJ4mg=="
},
"github-from-package": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
@ -22693,6 +22698,14 @@
"resolved": "https://registry.npmjs.org/react-error-overlay/-/react-error-overlay-3.0.0.tgz",
"integrity": "sha512-XzgvowFrwDo6TWcpJ/WTiarb9UI6lhA4PMzS7n1joK3sHfBBBOQHUc0U4u57D6DWO9vHv6lVSWx2Q/Ymfyv4hw=="
},
"react-github-btn": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/react-github-btn/-/react-github-btn-1.2.0.tgz",
"integrity": "sha512-/b2TGTeek5Ky+KtuP5BxOaXgb1FGhbwgZNI6rkwkGk7+xtCtsNMkdchOcCnC3qU1JGTWPKzYZWpPBIouVhXAoQ==",
"requires": {
"github-buttons": "^2.8.0"
}
},
"react-helmet": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/react-helmet/-/react-helmet-5.2.0.tgz",

View File

@ -49,6 +49,7 @@
"prop-types": "^15.7.2",
"react": "^16.8.2",
"react-dom": "^16.8.2",
"react-github-btn": "^1.2.0",
"react-helmet": "^5.2.0",
"react-intersection-observer": "^8.0.1",
"remark-react": "^5.0.1"

View File

@ -1,6 +1,7 @@
import React from 'react'
import PropTypes from 'prop-types'
import classNames from 'classnames'
import GitHubButton from 'react-github-btn'
import Link from './link'
import Icon from './icon'
@ -61,10 +62,13 @@ export default function Navigation({ title, items = [], section, search, alert,
</li>
)
})}
<li className={classes.item}>
<Link to={github()} aria-label="GitHub" hidden>
<Icon name="github" />
</Link>
<li className={classNames(classes.item, classes.github)}>
<GitHubButton
href={github()}
data-size="large"
data-show-count="true"
aria-label="Star spaCy on GitHub"
/>
</li>
</ul>
{search && <div className={classes.search}>{search}</div>}

View File

@ -68,6 +68,14 @@
height: 100%
padding-right: 2rem
.github
text-transform: initial
margin-top: 0.25rem
margin-left: 1.5em !important
& > span
min-width: 100px
.dropdown
--dropdown-text-color: var(--color-theme)
font-family: var(--font-secondary)