mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 06:09:01 +03:00
Merge pull request #12494 from adrianeboyd/backport/v3.5.2-1
Backports for v3.5.2
This commit is contained in:
commit
e4bbdf7b50
24
.github/azure-steps.yml
vendored
24
.github/azure-steps.yml
vendored
|
@ -57,51 +57,51 @@ steps:
|
|||
python -m spacy download ca_core_news_md
|
||||
python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
|
||||
displayName: 'Test download CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -W error -m spacy info ca_core_news_sm | grep -q download_url
|
||||
displayName: 'Test download_url in info CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
|
||||
displayName: 'Test no warnings on load (#11713)'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
|
||||
displayName: 'Test convert CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m spacy init config -p ner -l ca ner.cfg
|
||||
python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
|
||||
displayName: 'Test debug config CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
# will have errors due to sparse data, check for summary in output
|
||||
python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
|
||||
displayName: 'Test debug data CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
|
||||
displayName: 'Test train CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
|
||||
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
displayName: 'Test assemble CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
|
||||
python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
|
||||
displayName: 'Test assemble CLI vectors warning'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m pip install -U -r requirements.txt
|
||||
|
@ -116,9 +116,3 @@ steps:
|
|||
python -m pytest --pyargs spacy
|
||||
displayName: "Run CPU tests with thinc-apple-ops"
|
||||
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
|
||||
|
||||
- script: |
|
||||
python .github/validate_universe_json.py website/meta/universe.json
|
||||
displayName: 'Test website/meta/universe.json'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
|
||||
|
|
45
.github/workflows/autoblack.yml
vendored
45
.github/workflows/autoblack.yml
vendored
|
@ -1,45 +0,0 @@
|
|||
# GitHub Action that uses Black to reformat all Python code and submits a PR
|
||||
# in regular intervals. Inspired by: https://github.com/cclauss/autoblack
|
||||
|
||||
name: autoblack
|
||||
on:
|
||||
workflow_dispatch: # allow manual trigger
|
||||
schedule:
|
||||
- cron: '0 8 * * 5' # every Friday at 8am UTC
|
||||
|
||||
jobs:
|
||||
autoblack:
|
||||
if: github.repository_owner == 'explosion'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.head_ref }}
|
||||
- uses: actions/setup-python@v4
|
||||
- run: pip install black -c requirements.txt
|
||||
- name: Auto-format code if needed
|
||||
run: black spacy
|
||||
# We can't run black --check here because that returns a non-zero excit
|
||||
# code and makes GitHub think the action failed
|
||||
- name: Check for modified files
|
||||
id: git-check
|
||||
run: echo modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create Pull Request
|
||||
if: steps.git-check.outputs.modified == 'true'
|
||||
uses: peter-evans/create-pull-request@v4
|
||||
with:
|
||||
title: Auto-format code with black
|
||||
labels: meta
|
||||
commit-message: Auto-format code with black
|
||||
committer: GitHub <noreply@github.com>
|
||||
author: explosion-bot <explosion-bot@users.noreply.github.com>
|
||||
body: _This PR is auto-generated._
|
||||
branch: autoblack
|
||||
delete-branch: true
|
||||
draft: false
|
||||
- name: Check outputs
|
||||
if: steps.git-check.outputs.modified == 'true'
|
||||
run: |
|
||||
echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
|
||||
echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"
|
1
.github/workflows/explosionbot.yml
vendored
1
.github/workflows/explosionbot.yml
vendored
|
@ -8,6 +8,7 @@ on:
|
|||
|
||||
jobs:
|
||||
explosion-bot:
|
||||
if: github.repository_owner == 'explosion'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Dump GitHub context
|
||||
|
|
1
.github/workflows/issue-manager.yml
vendored
1
.github/workflows/issue-manager.yml
vendored
|
@ -13,6 +13,7 @@ on:
|
|||
|
||||
jobs:
|
||||
issue-manager:
|
||||
if: github.repository_owner == 'explosion'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: tiangolo/issue-manager@0.4.0
|
||||
|
|
1
.github/workflows/lock.yml
vendored
1
.github/workflows/lock.yml
vendored
|
@ -13,6 +13,7 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
action:
|
||||
if: github.repository_owner == 'explosion'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: dessant/lock-threads@v4
|
||||
|
|
1
.github/workflows/spacy_universe_alert.yml
vendored
1
.github/workflows/spacy_universe_alert.yml
vendored
|
@ -7,6 +7,7 @@ on:
|
|||
|
||||
jobs:
|
||||
build:
|
||||
if: github.repository_owner == 'explosion'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
|
|
173
.github/workflows/tests.yml
vendored
Normal file
173
.github/workflows/tests.yml
vendored
Normal file
|
@ -0,0 +1,173 @@
|
|||
name: tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches-ignore:
|
||||
- "spacy.io"
|
||||
- "nightly.spacy.io"
|
||||
- "v2.spacy.io"
|
||||
paths-ignore:
|
||||
- "*.md"
|
||||
- "*.mdx"
|
||||
- "website/**"
|
||||
- ".github/workflows/**"
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
paths-ignore:
|
||||
- "*.md"
|
||||
- "*.mdx"
|
||||
- "website/**"
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
name: Validate
|
||||
if: github.repository_owner == 'explosion'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Configure Python version
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.7"
|
||||
architecture: x64
|
||||
|
||||
- name: black
|
||||
run: |
|
||||
python -m pip install black -c requirements.txt
|
||||
python -m black spacy --check
|
||||
- name: flake8
|
||||
run: |
|
||||
python -m pip install flake8==5.0.4
|
||||
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
|
||||
tests:
|
||||
name: Test
|
||||
needs: Validate
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python_version: ["3.11"]
|
||||
include:
|
||||
- os: ubuntu-20.04
|
||||
python_version: "3.6"
|
||||
- os: windows-latest
|
||||
python_version: "3.7"
|
||||
- os: macos-latest
|
||||
python_version: "3.8"
|
||||
- os: ubuntu-latest
|
||||
python_version: "3.9"
|
||||
- os: windows-latest
|
||||
python_version: "3.10"
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Configure Python version
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
architecture: x64
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install -U build pip setuptools
|
||||
python -m pip install -U -r requirements.txt
|
||||
|
||||
- name: Build sdist
|
||||
run: |
|
||||
python -m build --sdist
|
||||
|
||||
- name: Run mypy
|
||||
run: |
|
||||
python -m mypy spacy
|
||||
if: matrix.python_version != '3.6'
|
||||
|
||||
- name: Delete source directory and .egg-info
|
||||
run: |
|
||||
rm -rf spacy *.egg-info
|
||||
shell: bash
|
||||
|
||||
- name: Uninstall all packages
|
||||
run: |
|
||||
python -m pip freeze
|
||||
python -m pip freeze --exclude pywin32 > installed.txt
|
||||
python -m pip uninstall -y -r installed.txt
|
||||
|
||||
- name: Install from sdist
|
||||
run: |
|
||||
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||
SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
|
||||
shell: bash
|
||||
|
||||
- name: Test import
|
||||
run: python -W error -c "import spacy"
|
||||
|
||||
- name: "Test download CLI"
|
||||
run: |
|
||||
python -m spacy download ca_core_news_sm
|
||||
python -m spacy download ca_core_news_md
|
||||
python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test download_url in info CLI"
|
||||
run: |
|
||||
python -W error -m spacy info ca_core_news_sm | grep -q download_url
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test no warnings on load (#11713)"
|
||||
run: |
|
||||
python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test convert CLI"
|
||||
run: |
|
||||
python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test debug config CLI"
|
||||
run: |
|
||||
python -m spacy init config -p ner -l ca ner.cfg
|
||||
python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test debug data CLI"
|
||||
run: |
|
||||
# will have errors due to sparse data, check for summary in output
|
||||
python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test train CLI"
|
||||
run: |
|
||||
python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test assemble CLI"
|
||||
run: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
|
||||
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test assemble CLI vectors warning"
|
||||
run: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
|
||||
python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Install test requirements"
|
||||
run: |
|
||||
python -m pip install -U -r requirements.txt
|
||||
|
||||
- name: "Run CPU tests"
|
||||
run: |
|
||||
python -m pytest --pyargs spacy -W error
|
||||
|
||||
- name: "Run CPU tests with thinc-apple-ops"
|
||||
run: |
|
||||
python -m pip install 'spacy[apple]'
|
||||
python -m pytest --pyargs spacy
|
||||
if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'
|
33
.github/workflows/universe_validation.yml
vendored
Normal file
33
.github/workflows/universe_validation.yml
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
name: universe validation
|
||||
|
||||
on:
|
||||
push:
|
||||
branches-ignore:
|
||||
- "spacy.io"
|
||||
- "nightly.spacy.io"
|
||||
- "v2.spacy.io"
|
||||
paths:
|
||||
- "website/meta/universe.json"
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
paths:
|
||||
- "website/meta/universe.json"
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
name: Validate
|
||||
if: github.repository_owner == 'explosion'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Configure Python version
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.7"
|
||||
architecture: x64
|
||||
|
||||
- name: Validate website/meta/universe.json
|
||||
run: |
|
||||
python .github/validate_universe_json.py website/meta/universe.json
|
|
@ -48,6 +48,9 @@ jobs:
|
|||
pip install flake8==5.0.4
|
||||
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
|
||||
displayName: "flake8"
|
||||
- script: |
|
||||
python .github/validate_universe_json.py website/meta/universe.json
|
||||
displayName: 'Validate website/meta/universe.json'
|
||||
|
||||
- job: "Test"
|
||||
dependsOn: "Validate"
|
||||
|
|
36
setup.cfg
36
setup.cfg
|
@ -78,41 +78,41 @@ transformers =
|
|||
ray =
|
||||
spacy_ray>=0.1.0,<1.0.0
|
||||
cuda =
|
||||
cupy>=5.0.0b4,<12.0.0
|
||||
cupy>=5.0.0b4,<13.0.0
|
||||
cuda80 =
|
||||
cupy-cuda80>=5.0.0b4,<12.0.0
|
||||
cupy-cuda80>=5.0.0b4,<13.0.0
|
||||
cuda90 =
|
||||
cupy-cuda90>=5.0.0b4,<12.0.0
|
||||
cupy-cuda90>=5.0.0b4,<13.0.0
|
||||
cuda91 =
|
||||
cupy-cuda91>=5.0.0b4,<12.0.0
|
||||
cupy-cuda91>=5.0.0b4,<13.0.0
|
||||
cuda92 =
|
||||
cupy-cuda92>=5.0.0b4,<12.0.0
|
||||
cupy-cuda92>=5.0.0b4,<13.0.0
|
||||
cuda100 =
|
||||
cupy-cuda100>=5.0.0b4,<12.0.0
|
||||
cupy-cuda100>=5.0.0b4,<13.0.0
|
||||
cuda101 =
|
||||
cupy-cuda101>=5.0.0b4,<12.0.0
|
||||
cupy-cuda101>=5.0.0b4,<13.0.0
|
||||
cuda102 =
|
||||
cupy-cuda102>=5.0.0b4,<12.0.0
|
||||
cupy-cuda102>=5.0.0b4,<13.0.0
|
||||
cuda110 =
|
||||
cupy-cuda110>=5.0.0b4,<12.0.0
|
||||
cupy-cuda110>=5.0.0b4,<13.0.0
|
||||
cuda111 =
|
||||
cupy-cuda111>=5.0.0b4,<12.0.0
|
||||
cupy-cuda111>=5.0.0b4,<13.0.0
|
||||
cuda112 =
|
||||
cupy-cuda112>=5.0.0b4,<12.0.0
|
||||
cupy-cuda112>=5.0.0b4,<13.0.0
|
||||
cuda113 =
|
||||
cupy-cuda113>=5.0.0b4,<12.0.0
|
||||
cupy-cuda113>=5.0.0b4,<13.0.0
|
||||
cuda114 =
|
||||
cupy-cuda114>=5.0.0b4,<12.0.0
|
||||
cupy-cuda114>=5.0.0b4,<13.0.0
|
||||
cuda115 =
|
||||
cupy-cuda115>=5.0.0b4,<12.0.0
|
||||
cupy-cuda115>=5.0.0b4,<13.0.0
|
||||
cuda116 =
|
||||
cupy-cuda116>=5.0.0b4,<12.0.0
|
||||
cupy-cuda116>=5.0.0b4,<13.0.0
|
||||
cuda117 =
|
||||
cupy-cuda117>=5.0.0b4,<12.0.0
|
||||
cupy-cuda117>=5.0.0b4,<13.0.0
|
||||
cuda11x =
|
||||
cupy-cuda11x>=11.0.0,<12.0.0
|
||||
cupy-cuda11x>=11.0.0,<13.0.0
|
||||
cuda-autodetect =
|
||||
cupy-wheel>=11.0.0,<12.0.0
|
||||
cupy-wheel>=11.0.0,<13.0.0
|
||||
apple =
|
||||
thinc-apple-ops>=0.1.0.dev0,<1.0.0
|
||||
# Language tokenizers with external dependencies
|
||||
|
|
|
@ -35,7 +35,7 @@ def find_threshold_cli(
|
|||
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
||||
use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
||||
gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
|
||||
verbose: bool = Opt(False, "--silent", "-V", "-VV", help="Display more information for debugging purposes"),
|
||||
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
|
||||
# fmt: on
|
||||
):
|
||||
"""
|
||||
|
|
|
@ -23,6 +23,7 @@ def pretrain_cli(
|
|||
resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
|
||||
epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
|
||||
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
||||
skip_last: bool = Opt(False, "--skip-last", "-L", help="Skip saving model-last.bin"),
|
||||
# fmt: on
|
||||
):
|
||||
"""
|
||||
|
@ -74,6 +75,7 @@ def pretrain_cli(
|
|||
epoch_resume=epoch_resume,
|
||||
use_gpu=use_gpu,
|
||||
silent=False,
|
||||
skip_last=skip_last,
|
||||
)
|
||||
msg.good("Successfully finished pretrain")
|
||||
|
||||
|
|
|
@ -125,13 +125,17 @@ def app(environ, start_response):
|
|||
return [res]
|
||||
|
||||
|
||||
def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||
def parse_deps(
|
||||
orig_doc: Union[Doc, Span], options: Dict[str, Any] = {}
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate dependency parse in {'words': [], 'arcs': []} format.
|
||||
|
||||
orig_doc (Doc): Document to parse.
|
||||
orig_doc (Union[Doc, Span]): Document to parse.
|
||||
options (Dict[str, Any]): Dependency parse specific visualisation options.
|
||||
RETURNS (dict): Generated dependency parse keyed by words and arcs.
|
||||
"""
|
||||
if isinstance(orig_doc, Span):
|
||||
orig_doc = orig_doc.as_doc()
|
||||
doc = Doc(orig_doc.vocab).from_bytes(
|
||||
orig_doc.to_bytes(exclude=["user_data", "user_hooks"])
|
||||
)
|
||||
|
|
|
@ -549,8 +549,8 @@ class Errors(metaclass=ErrorsWithCodes):
|
|||
"during training, make sure to include it in 'annotating components'")
|
||||
|
||||
# New errors added in v3.x
|
||||
E850 = ("The PretrainVectors objective currently only supports default "
|
||||
"vectors, not {mode} vectors.")
|
||||
E850 = ("The PretrainVectors objective currently only supports default or "
|
||||
"floret vectors, not {mode} vectors.")
|
||||
E851 = ("The 'textcat' component labels should only have values of 0 or 1, "
|
||||
"but found value of '{val}'.")
|
||||
E852 = ("The tar file pulled from the remote attempted an unsafe path "
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Any, Optional, Iterable, Tuple, List, Callable, TYPE_CHECKING, cast
|
||||
from thinc.types import Floats2d
|
||||
from thinc.types import Floats2d, Ints1d
|
||||
from thinc.api import chain, Maxout, LayerNorm, Softmax, Linear, zero_init, Model
|
||||
from thinc.api import MultiSoftmax, list2array
|
||||
from thinc.api import to_categorical, CosineDistance, L2Distance
|
||||
|
@ -7,7 +7,7 @@ from thinc.loss import Loss
|
|||
|
||||
from ...util import registry, OOV_RANK
|
||||
from ...errors import Errors
|
||||
from ...attrs import ID
|
||||
from ...attrs import ID, ORTH
|
||||
from ...vectors import Mode as VectorsMode
|
||||
|
||||
import numpy
|
||||
|
@ -24,8 +24,6 @@ def create_pretrain_vectors(
|
|||
maxout_pieces: int, hidden_size: int, loss: str
|
||||
) -> Callable[["Vocab", Model], Model]:
|
||||
def create_vectors_objective(vocab: "Vocab", tok2vec: Model) -> Model:
|
||||
if vocab.vectors.mode != VectorsMode.default:
|
||||
raise ValueError(Errors.E850.format(mode=vocab.vectors.mode))
|
||||
if vocab.vectors.shape[1] == 0:
|
||||
raise ValueError(Errors.E875)
|
||||
model = build_cloze_multi_task_model(
|
||||
|
@ -70,14 +68,23 @@ def get_vectors_loss(ops, docs, prediction, distance):
|
|||
"""Compute a loss based on a distance between the documents' vectors and
|
||||
the prediction.
|
||||
"""
|
||||
# The simplest way to implement this would be to vstack the
|
||||
# token.vector values, but that's a bit inefficient, especially on GPU.
|
||||
# Instead we fetch the index into the vectors table for each of our tokens,
|
||||
# and look them up all at once. This prevents data copying.
|
||||
ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
|
||||
target = docs[0].vocab.vectors.data[ids]
|
||||
target[ids == OOV_RANK] = 0
|
||||
d_target, loss = distance(prediction, target)
|
||||
vocab = docs[0].vocab
|
||||
if vocab.vectors.mode == VectorsMode.default:
|
||||
# The simplest way to implement this would be to vstack the
|
||||
# token.vector values, but that's a bit inefficient, especially on GPU.
|
||||
# Instead we fetch the index into the vectors table for each of our
|
||||
# tokens, and look them up all at once. This prevents data copying.
|
||||
ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
|
||||
target = docs[0].vocab.vectors.data[ids]
|
||||
target[ids == OOV_RANK] = 0
|
||||
d_target, loss = distance(prediction, target)
|
||||
elif vocab.vectors.mode == VectorsMode.floret:
|
||||
keys = ops.flatten([cast(Ints1d, doc.to_array(ORTH)) for doc in docs])
|
||||
target = vocab.vectors.get_batch(keys)
|
||||
target = ops.as_contig(target)
|
||||
d_target, loss = distance(prediction, target)
|
||||
else:
|
||||
raise ValueError(Errors.E850.format(mode=vocab.vectors.mode))
|
||||
return loss, d_target
|
||||
|
||||
|
||||
|
|
|
@ -474,18 +474,24 @@ class EntityLinker(TrainablePipe):
|
|||
|
||||
# Looping through each entity in batch (TODO: rewrite)
|
||||
for j, ent in enumerate(ent_batch):
|
||||
sent_index = sentences.index(ent.sent)
|
||||
assert sent_index >= 0
|
||||
assert hasattr(ent, "sents")
|
||||
sents = list(ent.sents)
|
||||
sent_indices = (
|
||||
sentences.index(sents[0]),
|
||||
sentences.index(sents[-1]),
|
||||
)
|
||||
assert sent_indices[1] >= sent_indices[0] >= 0
|
||||
|
||||
if self.incl_context:
|
||||
# get n_neighbour sentences, clipped to the length of the document
|
||||
start_sentence = max(0, sent_index - self.n_sents)
|
||||
start_sentence = max(0, sent_indices[0] - self.n_sents)
|
||||
end_sentence = min(
|
||||
len(sentences) - 1, sent_index + self.n_sents
|
||||
len(sentences) - 1, sent_indices[1] + self.n_sents
|
||||
)
|
||||
start_token = sentences[start_sentence].start
|
||||
end_token = sentences[end_sentence].end
|
||||
sent_doc = doc[start_token:end_token].as_doc()
|
||||
|
||||
# currently, the context is the same for each entity in a sentence (should be refined)
|
||||
sentence_encoding = self.model.predict([sent_doc])[0]
|
||||
sentence_encoding_t = sentence_encoding.T
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
|
||||
from thinc.api import Optimizer
|
||||
from thinc.types import Ragged, Ints2d, Floats2d
|
||||
|
@ -82,39 +83,42 @@ class Suggester(Protocol):
|
|||
...
|
||||
|
||||
|
||||
def ngram_suggester(
|
||||
docs: Iterable[Doc], sizes: List[int], *, ops: Optional[Ops] = None
|
||||
) -> Ragged:
|
||||
if ops is None:
|
||||
ops = get_current_ops()
|
||||
spans = []
|
||||
lengths = []
|
||||
for doc in docs:
|
||||
starts = ops.xp.arange(len(doc), dtype="i")
|
||||
starts = starts.reshape((-1, 1))
|
||||
length = 0
|
||||
for size in sizes:
|
||||
if size <= len(doc):
|
||||
starts_size = starts[: len(doc) - (size - 1)]
|
||||
spans.append(ops.xp.hstack((starts_size, starts_size + size)))
|
||||
length += spans[-1].shape[0]
|
||||
if spans:
|
||||
assert spans[-1].ndim == 2, spans[-1].shape
|
||||
lengths.append(length)
|
||||
lengths_array = ops.asarray1i(lengths)
|
||||
if len(spans) > 0:
|
||||
output = Ragged(ops.xp.vstack(spans), lengths_array)
|
||||
else:
|
||||
output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
|
||||
|
||||
assert output.dataXd.ndim == 2
|
||||
return output
|
||||
|
||||
|
||||
@registry.misc("spacy.ngram_suggester.v1")
|
||||
def build_ngram_suggester(sizes: List[int]) -> Suggester:
|
||||
"""Suggest all spans of the given lengths. Spans are returned as a ragged
|
||||
array of integers. The array has two columns, indicating the start and end
|
||||
position."""
|
||||
|
||||
def ngram_suggester(docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged:
|
||||
if ops is None:
|
||||
ops = get_current_ops()
|
||||
spans = []
|
||||
lengths = []
|
||||
for doc in docs:
|
||||
starts = ops.xp.arange(len(doc), dtype="i")
|
||||
starts = starts.reshape((-1, 1))
|
||||
length = 0
|
||||
for size in sizes:
|
||||
if size <= len(doc):
|
||||
starts_size = starts[: len(doc) - (size - 1)]
|
||||
spans.append(ops.xp.hstack((starts_size, starts_size + size)))
|
||||
length += spans[-1].shape[0]
|
||||
if spans:
|
||||
assert spans[-1].ndim == 2, spans[-1].shape
|
||||
lengths.append(length)
|
||||
lengths_array = ops.asarray1i(lengths)
|
||||
if len(spans) > 0:
|
||||
output = Ragged(ops.xp.vstack(spans), lengths_array)
|
||||
else:
|
||||
output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
|
||||
|
||||
assert output.dataXd.ndim == 2
|
||||
return output
|
||||
|
||||
return ngram_suggester
|
||||
return partial(ngram_suggester, sizes=sizes)
|
||||
|
||||
|
||||
@registry.misc("spacy.ngram_range_suggester.v1")
|
||||
|
@ -726,6 +730,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
if not allow_overlap:
|
||||
# Get the probabilities
|
||||
sort_idx = (argmax_scores.squeeze() * -1).argsort()
|
||||
argmax_scores = argmax_scores[sort_idx]
|
||||
predicted = predicted[sort_idx]
|
||||
indices = indices[sort_idx]
|
||||
keeps = keeps[sort_idx]
|
||||
|
@ -748,4 +753,5 @@ class SpanCategorizer(TrainablePipe):
|
|||
attrs_scores.append(argmax_scores[i])
|
||||
spans.append(Span(doc, start, end, label=self.labels[label]))
|
||||
|
||||
spans.attrs["scores"] = numpy.array(attrs_scores)
|
||||
return spans
|
||||
|
|
|
@ -700,3 +700,34 @@ def test_span_group_copy(doc):
|
|||
assert len(doc.spans["test"]) == 3
|
||||
# check that the copy spans were not modified and this is an isolated doc
|
||||
assert len(doc_copy.spans["test"]) == 2
|
||||
|
||||
|
||||
def test_for_partial_ent_sents():
|
||||
"""Spans may be associated with multiple sentences. These .sents should always be complete, not partial, sentences,
|
||||
which this tests for.
|
||||
"""
|
||||
doc = Doc(
|
||||
English().vocab,
|
||||
words=["Mahler's", "Symphony", "No.", "8", "was", "beautiful."],
|
||||
sent_starts=[1, 0, 0, 1, 0, 0],
|
||||
)
|
||||
doc.set_ents([Span(doc, 1, 4, "WORK")])
|
||||
# The specified entity is associated with both sentences in this doc, so we expect all sentences in the doc to be
|
||||
# equal to the sentences referenced in ent.sents.
|
||||
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
|
||||
assert doc_sent == ent_sent
|
||||
|
||||
|
||||
def test_for_no_ent_sents():
|
||||
"""Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
|
||||
sentence.
|
||||
"""
|
||||
doc = Doc(
|
||||
English().vocab,
|
||||
words=["This", "is", "a", "test.", "ENTITY"],
|
||||
sent_starts=[1, 0, 0, 0, 1],
|
||||
)
|
||||
doc.set_ents([Span(doc, 4, 5, "WORK")])
|
||||
sents = list(doc.ents[0].sents)
|
||||
assert len(sents) == 1
|
||||
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from typing import Callable, Iterable, Dict, Any
|
||||
from typing import Callable, Iterable, Dict, Any, Tuple
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_equal
|
||||
|
||||
from spacy import registry, util
|
||||
from spacy import registry, util, Language
|
||||
from spacy.attrs import ENT_KB_ID
|
||||
from spacy.compat import pickle
|
||||
from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
|
||||
|
@ -108,18 +108,23 @@ def test_issue7065():
|
|||
|
||||
|
||||
@pytest.mark.issue(7065)
|
||||
def test_issue7065_b():
|
||||
@pytest.mark.parametrize("entity_in_first_sentence", [True, False])
|
||||
def test_sentence_crossing_ents(entity_in_first_sentence: bool):
|
||||
"""Tests if NEL crashes if entities cross sentence boundaries and the first associated sentence doesn't have an
|
||||
entity.
|
||||
entity_in_prior_sentence (bool): Whether to include an entity in the first sentence associated with the
|
||||
sentence-crossing entity.
|
||||
"""
|
||||
# Test that the NEL doesn't crash when an entity crosses a sentence boundary
|
||||
nlp = English()
|
||||
vector_length = 3
|
||||
nlp.add_pipe("sentencizer")
|
||||
text = "Mahler 's Symphony No. 8 was beautiful."
|
||||
entities = [(0, 6, "PERSON"), (10, 24, "WORK")]
|
||||
links = {
|
||||
(0, 6): {"Q7304": 1.0, "Q270853": 0.0},
|
||||
(10, 24): {"Q7304": 0.0, "Q270853": 1.0},
|
||||
}
|
||||
sent_starts = [1, -1, 0, 0, 0, 0, 0, 0, 0]
|
||||
entities = [(10, 24, "WORK")]
|
||||
links = {(10, 24): {"Q7304": 0.0, "Q270853": 1.0}}
|
||||
if entity_in_first_sentence:
|
||||
entities.append((0, 6, "PERSON"))
|
||||
links[(0, 6)] = {"Q7304": 1.0, "Q270853": 0.0}
|
||||
sent_starts = [1, -1, 0, 0, 0, 1, 0, 0, 0]
|
||||
doc = nlp(text)
|
||||
example = Example.from_dict(
|
||||
doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
|
||||
|
@ -145,31 +150,14 @@ def test_issue7065_b():
|
|||
|
||||
# Create the Entity Linker component and add it to the pipeline
|
||||
entity_linker = nlp.add_pipe("entity_linker", last=True)
|
||||
entity_linker.set_kb(create_kb)
|
||||
entity_linker.set_kb(create_kb) # type: ignore
|
||||
# train the NEL pipe
|
||||
optimizer = nlp.initialize(get_examples=lambda: train_examples)
|
||||
for i in range(2):
|
||||
losses = {}
|
||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||
nlp.update(train_examples, sgd=optimizer)
|
||||
|
||||
# Add a custom rule-based component to mimick NER
|
||||
patterns = [
|
||||
{"label": "PERSON", "pattern": [{"LOWER": "mahler"}]},
|
||||
{
|
||||
"label": "WORK",
|
||||
"pattern": [
|
||||
{"LOWER": "symphony"},
|
||||
{"LOWER": "no"},
|
||||
{"LOWER": "."},
|
||||
{"LOWER": "8"},
|
||||
],
|
||||
},
|
||||
]
|
||||
ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
|
||||
ruler.add_patterns(patterns)
|
||||
# test the trained model - this should not throw E148
|
||||
doc = nlp(text)
|
||||
assert doc
|
||||
# This shouldn't crash.
|
||||
entity_linker.predict([example.reference]) # type: ignore
|
||||
|
||||
|
||||
def test_no_entities():
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import pytest
|
||||
import numpy
|
||||
from numpy.testing import assert_array_equal, assert_almost_equal
|
||||
from thinc.api import get_current_ops, Ragged
|
||||
from thinc.api import get_current_ops, NumpyOps, Ragged
|
||||
|
||||
from spacy import util
|
||||
from spacy.lang.en import English
|
||||
|
@ -190,17 +190,19 @@ def test_make_spangroup_singlelabel(threshold, allow_overlap, nr_results):
|
|||
spangroup = spancat._make_span_group_singlelabel(
|
||||
doc, indices, scores, allow_overlap
|
||||
)
|
||||
assert len(spangroup) == nr_results
|
||||
if threshold > 0.4:
|
||||
if allow_overlap:
|
||||
assert spangroup[0].text == "London"
|
||||
assert spangroup[0].label_ == "City"
|
||||
assert_almost_equal(0.6, spangroup.attrs["scores"][0], 5)
|
||||
assert spangroup[1].text == "Greater London"
|
||||
assert spangroup[1].label_ == "GreatCity"
|
||||
|
||||
assert spangroup.attrs["scores"][1] == 0.9
|
||||
assert_almost_equal(0.9, spangroup.attrs["scores"][1], 5)
|
||||
else:
|
||||
assert spangroup[0].text == "Greater London"
|
||||
assert spangroup[0].label_ == "GreatCity"
|
||||
assert spangroup.attrs["scores"][0] == 0.9
|
||||
else:
|
||||
if allow_overlap:
|
||||
assert spangroup[0].text == "Greater"
|
||||
|
@ -256,22 +258,32 @@ def test_make_spangroup_negative_label():
|
|||
assert len(spangroup_single) == 2
|
||||
assert spangroup_single[0].text == "Greater"
|
||||
assert spangroup_single[0].label_ == "City"
|
||||
assert_almost_equal(0.4, spangroup_single.attrs["scores"][0], 5)
|
||||
assert spangroup_single[1].text == "Greater London"
|
||||
assert spangroup_single[1].label_ == "GreatCity"
|
||||
assert spangroup_single.attrs["scores"][1] == 0.9
|
||||
assert_almost_equal(0.9, spangroup_single.attrs["scores"][1], 5)
|
||||
|
||||
assert len(spangroup_multi) == 6
|
||||
assert spangroup_multi[0].text == "Greater"
|
||||
assert spangroup_multi[0].label_ == "City"
|
||||
assert_almost_equal(0.4, spangroup_multi.attrs["scores"][0], 5)
|
||||
assert spangroup_multi[1].text == "Greater"
|
||||
assert spangroup_multi[1].label_ == "Person"
|
||||
assert_almost_equal(0.3, spangroup_multi.attrs["scores"][1], 5)
|
||||
assert spangroup_multi[2].text == "London"
|
||||
assert spangroup_multi[2].label_ == "City"
|
||||
assert_almost_equal(0.6, spangroup_multi.attrs["scores"][2], 5)
|
||||
assert spangroup_multi[3].text == "London"
|
||||
assert spangroup_multi[3].label_ == "GreatCity"
|
||||
assert_almost_equal(0.4, spangroup_multi.attrs["scores"][3], 5)
|
||||
assert spangroup_multi[4].text == "Greater London"
|
||||
assert spangroup_multi[4].label_ == "Thing"
|
||||
assert spangroup_multi[4].text == "Greater London"
|
||||
assert_almost_equal(0.8, spangroup_multi.attrs["scores"][4], 5)
|
||||
assert spangroup_multi[5].text == "Greater London"
|
||||
assert spangroup_multi[5].label_ == "GreatCity"
|
||||
assert_almost_equal(0.9, spangroup_multi.attrs["scores"][5], 5)
|
||||
|
||||
|
||||
def test_ngram_suggester(en_tokenizer):
|
||||
|
@ -565,3 +577,21 @@ def test_set_candidates(name):
|
|||
assert len(docs[0].spans["candidates"]) == 9
|
||||
assert docs[0].spans["candidates"][0].text == "Just"
|
||||
assert docs[0].spans["candidates"][4].text == "Just a"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
|
||||
@pytest.mark.parametrize("n_process", [1, 2])
|
||||
def test_spancat_multiprocessing(name, n_process):
|
||||
if isinstance(get_current_ops, NumpyOps) or n_process < 2:
|
||||
nlp = Language()
|
||||
spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
|
||||
train_examples = make_examples(nlp)
|
||||
nlp.initialize(get_examples=lambda: train_examples)
|
||||
texts = [
|
||||
"Just a sentence.",
|
||||
"I like London and Berlin",
|
||||
"I like Berlin",
|
||||
"I eat ham.",
|
||||
]
|
||||
docs = list(nlp.pipe(texts, n_process=n_process))
|
||||
assert len(docs) == len(texts)
|
||||
|
|
|
@ -213,6 +213,13 @@ def test_serialize_doc_exclude(en_vocab):
|
|||
|
||||
def test_serialize_doc_span_groups(en_vocab):
|
||||
doc = Doc(en_vocab, words=["hello", "world", "!"])
|
||||
doc.spans["content"] = [doc[0:2]]
|
||||
span = doc[0:2]
|
||||
span.label_ = "test_serialize_doc_span_groups_label"
|
||||
span.id_ = "test_serialize_doc_span_groups_id"
|
||||
span.kb_id_ = "test_serialize_doc_span_groups_kb_id"
|
||||
doc.spans["content"] = [span]
|
||||
new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
|
||||
assert len(new_doc.spans["content"]) == 1
|
||||
assert new_doc.spans["content"][0].label_ == "test_serialize_doc_span_groups_label"
|
||||
assert new_doc.spans["content"][0].id_ == "test_serialize_doc_span_groups_id"
|
||||
assert new_doc.spans["content"][0].kb_id_ == "test_serialize_doc_span_groups_kb_id"
|
||||
|
|
|
@ -49,7 +49,11 @@ def test_serialize_doc_bin():
|
|||
nlp = English()
|
||||
for doc in nlp.pipe(texts):
|
||||
doc.cats = cats
|
||||
doc.spans["start"] = [doc[0:2]]
|
||||
span = doc[0:2]
|
||||
span.label_ = "UNUSUAL_SPAN_LABEL"
|
||||
span.id_ = "UNUSUAL_SPAN_ID"
|
||||
span.kb_id_ = "UNUSUAL_SPAN_KB_ID"
|
||||
doc.spans["start"] = [span]
|
||||
doc[0].norm_ = "UNUSUAL_TOKEN_NORM"
|
||||
doc[0].ent_id_ = "UNUSUAL_TOKEN_ENT_ID"
|
||||
doc_bin.add(doc)
|
||||
|
@ -63,6 +67,9 @@ def test_serialize_doc_bin():
|
|||
assert doc.text == texts[i]
|
||||
assert doc.cats == cats
|
||||
assert len(doc.spans) == 1
|
||||
assert doc.spans["start"][0].label_ == "UNUSUAL_SPAN_LABEL"
|
||||
assert doc.spans["start"][0].id_ == "UNUSUAL_SPAN_ID"
|
||||
assert doc.spans["start"][0].kb_id_ == "UNUSUAL_SPAN_KB_ID"
|
||||
assert doc[0].norm_ == "UNUSUAL_TOKEN_NORM"
|
||||
assert doc[0].ent_id_ == "UNUSUAL_TOKEN_ENT_ID"
|
||||
|
||||
|
|
|
@ -275,6 +275,20 @@ def test_displacy_parse_deps(en_vocab):
|
|||
{"start": 2, "end": 3, "label": "det", "dir": "left"},
|
||||
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
|
||||
]
|
||||
# Test that displacy.parse_deps converts Span to Doc
|
||||
deps = displacy.parse_deps(doc[:])
|
||||
assert isinstance(deps, dict)
|
||||
assert deps["words"] == [
|
||||
{"lemma": None, "text": words[0], "tag": pos[0]},
|
||||
{"lemma": None, "text": words[1], "tag": pos[1]},
|
||||
{"lemma": None, "text": words[2], "tag": pos[2]},
|
||||
{"lemma": None, "text": words[3], "tag": pos[3]},
|
||||
]
|
||||
assert deps["arcs"] == [
|
||||
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
|
||||
{"start": 2, "end": 3, "label": "det", "dir": "left"},
|
||||
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
|
||||
]
|
||||
|
||||
|
||||
def test_displacy_invalid_arcs():
|
||||
|
|
|
@ -165,7 +165,8 @@ def test_pretraining_default():
|
|||
|
||||
|
||||
@pytest.mark.parametrize("objective", CHAR_OBJECTIVES)
|
||||
def test_pretraining_tok2vec_characters(objective):
|
||||
@pytest.mark.parametrize("skip_last", (True, False))
|
||||
def test_pretraining_tok2vec_characters(objective, skip_last):
|
||||
"""Test that pretraining works with the character objective"""
|
||||
config = Config().from_str(pretrain_string_listener)
|
||||
config["pretraining"]["objective"] = objective
|
||||
|
@ -178,10 +179,14 @@ def test_pretraining_tok2vec_characters(objective):
|
|||
filled["paths"]["raw_text"] = file_path
|
||||
filled = filled.interpolate()
|
||||
assert filled["pretraining"]["component"] == "tok2vec"
|
||||
pretrain(filled, tmp_dir)
|
||||
pretrain(filled, tmp_dir, skip_last=skip_last)
|
||||
assert Path(tmp_dir / "model0.bin").exists()
|
||||
assert Path(tmp_dir / "model4.bin").exists()
|
||||
assert not Path(tmp_dir / "model5.bin").exists()
|
||||
if skip_last:
|
||||
assert not Path(tmp_dir / "model-last.bin").exists()
|
||||
else:
|
||||
assert Path(tmp_dir / "model-last.bin").exists()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("objective", VECTOR_OBJECTIVES)
|
||||
|
@ -237,6 +242,7 @@ def test_pretraining_tagger_tok2vec(config):
|
|||
pretrain(filled, tmp_dir)
|
||||
assert Path(tmp_dir / "model0.bin").exists()
|
||||
assert Path(tmp_dir / "model4.bin").exists()
|
||||
assert Path(tmp_dir / "model-last.bin").exists()
|
||||
assert not Path(tmp_dir / "model5.bin").exists()
|
||||
|
||||
|
||||
|
@ -359,19 +365,15 @@ def test_pretrain_default_vectors():
|
|||
nlp.vocab.vectors = Vectors(shape=(10, 10))
|
||||
create_pretrain_vectors(1, 1, "cosine")(nlp.vocab, nlp.get_pipe("tok2vec").model)
|
||||
|
||||
# floret vectors are supported
|
||||
nlp.vocab.vectors = Vectors(
|
||||
data=get_current_ops().xp.zeros((10, 10)), mode="floret", hash_count=1
|
||||
)
|
||||
create_pretrain_vectors(1, 1, "cosine")(nlp.vocab, nlp.get_pipe("tok2vec").model)
|
||||
|
||||
# error for no vectors
|
||||
with pytest.raises(ValueError, match="E875"):
|
||||
nlp.vocab.vectors = Vectors()
|
||||
create_pretrain_vectors(1, 1, "cosine")(
|
||||
nlp.vocab, nlp.get_pipe("tok2vec").model
|
||||
)
|
||||
|
||||
# error for floret vectors
|
||||
with pytest.raises(ValueError, match="E850"):
|
||||
ops = get_current_ops()
|
||||
nlp.vocab.vectors = Vectors(
|
||||
data=ops.xp.zeros((10, 10)), mode="floret", hash_count=1
|
||||
)
|
||||
create_pretrain_vectors(1, 1, "cosine")(
|
||||
nlp.vocab, nlp.get_pipe("tok2vec").model
|
||||
)
|
||||
|
|
|
@ -124,6 +124,10 @@ class DocBin:
|
|||
for key, group in doc.spans.items():
|
||||
for span in group:
|
||||
self.strings.add(span.label_)
|
||||
if span.kb_id in span.doc.vocab.strings:
|
||||
self.strings.add(span.kb_id_)
|
||||
if span.id in span.doc.vocab.strings:
|
||||
self.strings.add(span.id_)
|
||||
|
||||
def get_docs(self, vocab: Vocab) -> Iterator[Doc]:
|
||||
"""Recover Doc objects from the annotations, using the given vocab.
|
||||
|
|
|
@ -544,10 +544,6 @@ cdef class Doc:
|
|||
|
||||
DOCS: https://spacy.io/api/doc#char_span
|
||||
"""
|
||||
if not isinstance(label, int):
|
||||
label = self.vocab.strings.add(label)
|
||||
if not isinstance(kb_id, int):
|
||||
kb_id = self.vocab.strings.add(kb_id)
|
||||
alignment_modes = ("strict", "contract", "expand")
|
||||
if alignment_mode not in alignment_modes:
|
||||
raise ValueError(
|
||||
|
@ -1350,6 +1346,10 @@ cdef class Doc:
|
|||
for group in self.spans.values():
|
||||
for span in group:
|
||||
strings.add(span.label_)
|
||||
if span.kb_id in span.doc.vocab.strings:
|
||||
strings.add(span.kb_id_)
|
||||
if span.id in span.doc.vocab.strings:
|
||||
strings.add(span.id_)
|
||||
# Msgpack doesn't distinguish between lists and tuples, which is
|
||||
# vexing for user data. As a best guess, we *know* that within
|
||||
# keys, we must have tuples. In values we just have to hope
|
||||
|
|
|
@ -460,9 +460,12 @@ cdef class Span:
|
|||
start = i
|
||||
if start >= self.end:
|
||||
break
|
||||
if start < self.end:
|
||||
yield Span(self.doc, start, self.end)
|
||||
elif i == self.doc.length - 1:
|
||||
yield Span(self.doc, start, self.doc.length)
|
||||
|
||||
# Ensure that trailing parts of the Span instance are included in last element of .sents.
|
||||
if start == self.doc.length - 1:
|
||||
yield Span(self.doc, start, self.doc.length)
|
||||
|
||||
@property
|
||||
def ents(self):
|
||||
|
|
|
@ -24,6 +24,7 @@ def pretrain(
|
|||
epoch_resume: Optional[int] = None,
|
||||
use_gpu: int = -1,
|
||||
silent: bool = True,
|
||||
skip_last: bool = False,
|
||||
):
|
||||
msg = Printer(no_print=silent)
|
||||
if config["training"]["seed"] is not None:
|
||||
|
@ -60,10 +61,14 @@ def pretrain(
|
|||
row_settings = {"widths": (3, 10, 10, 6, 4), "aligns": ("r", "r", "r", "r", "r")}
|
||||
msg.row(("#", "# Words", "Total Loss", "Loss", "w/s"), **row_settings)
|
||||
|
||||
def _save_model(epoch, is_temp=False):
|
||||
def _save_model(epoch, is_temp=False, is_last=False):
|
||||
is_temp_str = ".temp" if is_temp else ""
|
||||
with model.use_params(optimizer.averages):
|
||||
with (output_dir / f"model{epoch}{is_temp_str}.bin").open("wb") as file_:
|
||||
if is_last:
|
||||
save_path = output_dir / f"model-last.bin"
|
||||
else:
|
||||
save_path = output_dir / f"model{epoch}{is_temp_str}.bin"
|
||||
with (save_path).open("wb") as file_:
|
||||
file_.write(model.get_ref("tok2vec").to_bytes())
|
||||
log = {
|
||||
"nr_word": tracker.nr_word,
|
||||
|
@ -76,22 +81,26 @@ def pretrain(
|
|||
|
||||
# TODO: I think we probably want this to look more like the
|
||||
# 'create_train_batches' function?
|
||||
for epoch in range(epoch_resume, P["max_epochs"]):
|
||||
for batch_id, batch in enumerate(batcher(corpus(nlp))):
|
||||
docs = ensure_docs(batch)
|
||||
loss = make_update(model, docs, optimizer, objective)
|
||||
progress = tracker.update(epoch, loss, docs)
|
||||
if progress:
|
||||
msg.row(progress, **row_settings)
|
||||
if P["n_save_every"] and (batch_id % P["n_save_every"] == 0):
|
||||
_save_model(epoch, is_temp=True)
|
||||
try:
|
||||
for epoch in range(epoch_resume, P["max_epochs"]):
|
||||
for batch_id, batch in enumerate(batcher(corpus(nlp))):
|
||||
docs = ensure_docs(batch)
|
||||
loss = make_update(model, docs, optimizer, objective)
|
||||
progress = tracker.update(epoch, loss, docs)
|
||||
if progress:
|
||||
msg.row(progress, **row_settings)
|
||||
if P["n_save_every"] and (batch_id % P["n_save_every"] == 0):
|
||||
_save_model(epoch, is_temp=True)
|
||||
|
||||
if P["n_save_epoch"]:
|
||||
if epoch % P["n_save_epoch"] == 0 or epoch == P["max_epochs"] - 1:
|
||||
if P["n_save_epoch"]:
|
||||
if epoch % P["n_save_epoch"] == 0 or epoch == P["max_epochs"] - 1:
|
||||
_save_model(epoch)
|
||||
else:
|
||||
_save_model(epoch)
|
||||
else:
|
||||
_save_model(epoch)
|
||||
tracker.epoch_loss = 0.0
|
||||
tracker.epoch_loss = 0.0
|
||||
finally:
|
||||
if not skip_last:
|
||||
_save_model(P["max_epochs"], is_last=True)
|
||||
|
||||
|
||||
def ensure_docs(examples_or_docs: Iterable[Union[Doc, Example]]) -> List[Doc]:
|
||||
|
|
|
@ -1122,17 +1122,18 @@ auto-generated by setting `--pretraining` on
|
|||
$ python -m spacy pretrain [config_path] [output_dir] [--code] [--resume-path] [--epoch-resume] [--gpu-id] [overrides]
|
||||
```
|
||||
|
||||
| Name | Description |
|
||||
| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
|
||||
| `output_dir` | Directory to save binary weights to on each epoch. ~~Path (positional)~~ |
|
||||
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
|
||||
| `--resume-path`, `-r` | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~ |
|
||||
| `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~ |
|
||||
| `--gpu-id`, `-g` | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~ |
|
||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.dropout 0.2`. ~~Any (option/flag)~~ |
|
||||
| **CREATES** | The pretrained weights that can be used to initialize `spacy train`. |
|
||||
| Name | Description |
|
||||
| -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
|
||||
| `output_dir` | Directory to save binary weights to on each epoch. ~~Path (positional)~~ |
|
||||
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
|
||||
| `--resume-path`, `-r` | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~ |
|
||||
| `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~ |
|
||||
| `--gpu-id`, `-g` | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~ |
|
||||
| `--skip-last`, `-L` <Tag variant="new">3.5.2</Tag> | Skip saving `model-last.bin`. Defaults to `False`. ~~bool (flag)~~ |
|
||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.dropout 0.2`. ~~Any (option/flag)~~ |
|
||||
| **CREATES** | The pretrained weights that can be used to initialize `spacy train`. |
|
||||
|
||||
## evaluate {id="evaluate",version="2",tag="command"}
|
||||
|
||||
|
@ -1254,19 +1255,19 @@ be provided.
|
|||
> $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `model` | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~ |
|
||||
| `data_path` | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~ |
|
||||
| `pipe_name` | Name of pipe to examine thresholds for. ~~str (positional)~~ |
|
||||
| `threshold_key` | Key of threshold attribute in component's configuration. ~~str (positional)~~ |
|
||||
| `scores_key` | Name of score to metric to optimize. ~~str (positional)~~ |
|
||||
| `--n_trials`, `-n` | Number of trials to determine optimal thresholds. ~~int (option)~~ |
|
||||
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
|
||||
| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
|
||||
| `--gold-preproc`, `-G` | Use gold preprocessing. ~~bool (flag)~~ |
|
||||
| `--silent`, `-V`, `-VV` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
|
||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
| Name | Description |
|
||||
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `model` | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~ |
|
||||
| `data_path` | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~ |
|
||||
| `pipe_name` | Name of pipe to examine thresholds for. ~~str (positional)~~ |
|
||||
| `threshold_key` | Key of threshold attribute in component's configuration. ~~str (positional)~~ |
|
||||
| `scores_key` | Name of score to metric to optimize. ~~str (positional)~~ |
|
||||
| `--n_trials`, `-n` | Number of trials to determine optimal thresholds. ~~int (option)~~ |
|
||||
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
|
||||
| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
|
||||
| `--gold-preproc`, `-G` | Use gold preprocessing. ~~bool (flag)~~ |
|
||||
| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~ |
|
||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
|
||||
## assemble {id="assemble",tag="command"}
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ details on the architectures and their arguments and hyperparameters.
|
|||
> config={
|
||||
> "model": DEFAULT_COREF_MODEL,
|
||||
> "span_cluster_prefix": DEFAULT_CLUSTER_PREFIX,
|
||||
> },
|
||||
> }
|
||||
> nlp.add_pipe("experimental_coref", config=config)
|
||||
> ```
|
||||
|
||||
|
|
|
@ -20,8 +20,9 @@ output class probabilities are independent for each class. However, if you need
|
|||
to predict at most one true class for a span, then use `spancat_singlelabel`. It
|
||||
uses a `Softmax` layer and treats the task as a multi-class problem.
|
||||
|
||||
Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the doc.
|
||||
Individual span scores can be found in `spangroup.attrs["scores"]`.
|
||||
Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the doc
|
||||
under `doc.spans[spans_key]`, where `spans_key` is a component config setting.
|
||||
Individual span scores are stored in `doc.spans[spans_key].attrs["scores"]`.
|
||||
|
||||
## Assigned Attributes {id="assigned-attributes"}
|
||||
|
||||
|
@ -29,7 +30,9 @@ Predictions will be saved to `Doc.spans[spans_key]` as a
|
|||
[`SpanGroup`](/api/spangroup). The scores for the spans in the `SpanGroup` will
|
||||
be saved in `SpanGroup.attrs["scores"]`.
|
||||
|
||||
`spans_key` defaults to `"sc"`, but can be passed as a parameter.
|
||||
`spans_key` defaults to `"sc"`, but can be passed as a parameter. The `spancat`
|
||||
component will overwrite any existing spans under the spans key
|
||||
`doc.spans[spans_key]`.
|
||||
|
||||
| Location | Value |
|
||||
| -------------------------------------- | -------------------------------------------------------- |
|
||||
|
|
|
@ -8,6 +8,13 @@ Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values instead of
|
|||
integer IDs. This ensures that strings always map to the same ID, even from
|
||||
different `StringStores`.
|
||||
|
||||
<Infobox variant ="warning">
|
||||
|
||||
Note that a `StringStore` instance is not static. It increases in size as texts
|
||||
with new tokens are processed.
|
||||
|
||||
</Infobox>
|
||||
|
||||
## StringStore.\_\_init\_\_ {id="init",tag="method"}
|
||||
|
||||
Create the `StringStore`.
|
||||
|
|
|
@ -25,7 +25,10 @@ and call the package's own `load()` method. If a pipeline is loaded from a path,
|
|||
spaCy will assume it's a data directory, load its
|
||||
[`config.cfg`](/api/data-formats#config) and use the language and pipeline
|
||||
information to construct the `Language` class. The data will be loaded in via
|
||||
[`Language.from_disk`](/api/language#from_disk).
|
||||
[`Language.from_disk`](/api/language#from_disk). Loading a pipeline from a
|
||||
package will also import any custom code, if present, whereas loading from a
|
||||
directory does not. For these cases, you need to manually import your custom
|
||||
code.
|
||||
|
||||
<Infobox variant="warning" title="Changed in v3.0">
|
||||
|
||||
|
@ -291,7 +294,7 @@ the `manual=True` argument in `displacy.render`.
|
|||
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------------- |
|
||||
| `orig_doc` | Doc to parse dependencies. ~~Doc~~ |
|
||||
| `orig_doc` | Doc or span to parse dependencies. ~~Union[Doc, Span]~~ |
|
||||
| `options` | Dependency parse specific visualisation options. ~~Dict[str, Any]~~ |
|
||||
| **RETURNS** | Generated dependency parse keyed by words and arcs. ~~dict~~ |
|
||||
|
||||
|
@ -577,7 +580,7 @@ start decreasing across epochs.
|
|||
> ```ini
|
||||
> [training.logger]
|
||||
> @loggers = "spacy.ConsoleLogger.v3"
|
||||
> progress_bar = "all_steps"
|
||||
> progress_bar = "eval"
|
||||
> console_output = true
|
||||
> output_file = "training_log.jsonl"
|
||||
> ```
|
||||
|
|
|
@ -10,6 +10,13 @@ The `Vocab` object provides a lookup table that allows you to access
|
|||
[`StringStore`](/api/stringstore). It also owns underlying C-data that is shared
|
||||
between `Doc` objects.
|
||||
|
||||
<Infobox variant ="warning">
|
||||
|
||||
Note that a `Vocab` instance is not static. It increases in size as texts with
|
||||
new tokens are processed.
|
||||
|
||||
</Infobox>
|
||||
|
||||
## Vocab.\_\_init\_\_ {id="init",tag="method"}
|
||||
|
||||
Create the vocabulary.
|
||||
|
|
|
@ -746,13 +746,16 @@ this by setting `initialize.init_tok2vec` to the filename of the `.bin` file
|
|||
that you want to use from pretraining.
|
||||
|
||||
A pretraining step that runs for 5 epochs with an output path of `pretrain/`, as
|
||||
an example, produces `pretrain/model0.bin` through `pretrain/model4.bin`. To
|
||||
make use of the final output, you could fill in this value in your config file:
|
||||
an example, produces `pretrain/model0.bin` through `pretrain/model4.bin` plus a
|
||||
copy of the last iteration as `pretrain/model-last.bin`. Additionally, you can
|
||||
configure `n_save_epoch` to tell pretraining in which epoch interval it should
|
||||
save the current training progress. To use the final output to initialize your
|
||||
`tok2vec` layer, you could fill in this value in your config file:
|
||||
|
||||
```ini {title="config.cfg"}
|
||||
|
||||
[paths]
|
||||
init_tok2vec = "pretrain/model4.bin"
|
||||
init_tok2vec = "pretrain/model-last.bin"
|
||||
|
||||
[initialize]
|
||||
init_tok2vec = ${paths.init_tok2vec}
|
||||
|
|
|
@ -1682,6 +1682,8 @@ def expand_person_entities(doc):
|
|||
if prev_token.text in ("Dr", "Dr.", "Mr", "Mr.", "Ms", "Ms."):
|
||||
new_ent = Span(doc, ent.start - 1, ent.end, label=ent.label)
|
||||
new_ents.append(new_ent)
|
||||
else:
|
||||
new_ents.append(ent)
|
||||
else:
|
||||
new_ents.append(ent)
|
||||
doc.ents = new_ents
|
||||
|
|
|
@ -758,6 +758,15 @@ any custom architectures, functions or
|
|||
your pipeline and registered when it's loaded. See the documentation on
|
||||
[saving and loading pipelines](/usage/saving-loading#models-custom) for details.
|
||||
|
||||
<Infobox variant="warning">
|
||||
|
||||
Note that the unpackaged models produced by `spacy train` are data directories
|
||||
that **do not include custom code**. You need to import the code in your script
|
||||
before loading in unpackaged models. For more details, see
|
||||
[`spacy.load`](/api/top-level#spacy.load).
|
||||
|
||||
</Infobox>
|
||||
|
||||
#### Example: Modifying the nlp object {id="custom-code-nlp-callbacks"}
|
||||
|
||||
For many use cases, you don't necessarily want to implement the whole `Language`
|
||||
|
|
|
@ -3215,6 +3215,51 @@
|
|||
"category": ["pipeline"],
|
||||
"tags": ["syllables", "multilingual"]
|
||||
},
|
||||
{
|
||||
"id": "sentimental-onix",
|
||||
"title": "Sentimental Onix",
|
||||
"slogan": "Use onnx for sentiment models",
|
||||
"description": "spaCy pipeline component for sentiment analysis using onnx",
|
||||
"github": "sloev/sentimental-onix",
|
||||
"pip": "sentimental-onix",
|
||||
"code_example": [
|
||||
"# Download model:",
|
||||
"# python -m sentimental_onix download en",
|
||||
"import spacy",
|
||||
"from sentimental_onix import pipeline",
|
||||
"",
|
||||
"nlp = spacy.load(\"en_core_web_sm\")",
|
||||
"nlp.add_pipe(\"sentencizer\")",
|
||||
"nlp.add_pipe(\"sentimental_onix\", after=\"sentencizer\")",
|
||||
"",
|
||||
"sentences = [",
|
||||
" (sent.text, sent._.sentiment)",
|
||||
" for doc in nlp.pipe(",
|
||||
" [",
|
||||
" \"i hate pasta on tuesdays\",",
|
||||
" \"i like movies on wednesdays\",",
|
||||
" \"i find your argument ridiculous\",",
|
||||
" \"soda with straws are my favorite\",",
|
||||
" ]",
|
||||
" )",
|
||||
" for sent in doc.sents",
|
||||
"]",
|
||||
"",
|
||||
"assert sentences == [",
|
||||
" (\"i hate pasta on tuesdays\", \"Negative\"),",
|
||||
" (\"i like movies on wednesdays\", \"Positive\"),",
|
||||
" (\"i find your argument ridiculous\", \"Negative\"),",
|
||||
" (\"soda with straws are my favorite\", \"Positive\"),",
|
||||
"]"
|
||||
],
|
||||
"thumb": "https://raw.githubusercontent.com/sloev/sentimental-onix/master/.github/onix.webp",
|
||||
"author": "Johannes Valbjørn",
|
||||
"author_links": {
|
||||
"github": "sloev"
|
||||
},
|
||||
"category": ["pipeline"],
|
||||
"tags": ["sentiment", "english"]
|
||||
},
|
||||
{
|
||||
"id": "gobbli",
|
||||
"title": "gobbli",
|
||||
|
|
|
@ -111,11 +111,12 @@
|
|||
line-height: var(--line-height-xs)
|
||||
text-align: center
|
||||
|
||||
@include breakpoint(max, xs)
|
||||
.list
|
||||
@include breakpoint(max, md)
|
||||
.alert
|
||||
display: none
|
||||
|
||||
.alert
|
||||
@include breakpoint(max, xs)
|
||||
.list
|
||||
display: none
|
||||
|
||||
.has-alert
|
||||
|
|
|
@ -25,11 +25,6 @@ const AlertSpace = ({ nightly, legacy }) => {
|
|||
const isOnline = useOnlineStatus()
|
||||
return (
|
||||
<>
|
||||
{isOnline && (
|
||||
<Alert title="💥 We'd love to learn more about your experience with spaCy!">
|
||||
<Link to="https://form.typeform.com/to/aMel9q9f">Take our survey here.</Link>
|
||||
</Alert>
|
||||
)}
|
||||
{nightly && (
|
||||
<Alert
|
||||
title="You're viewing the pre-release docs."
|
||||
|
@ -62,9 +57,15 @@ const AlertSpace = ({ nightly, legacy }) => {
|
|||
)
|
||||
}
|
||||
|
||||
// const navAlert = (
|
||||
// <Link to="/usage/v3-5" noLinkLayout>
|
||||
// <strong>💥 Out now:</strong> spaCy v3.5
|
||||
// </Link>
|
||||
// )
|
||||
|
||||
const navAlert = (
|
||||
<Link to="/usage/v3-5" noLinkLayout>
|
||||
<strong>💥 Out now:</strong> spaCy v3.5
|
||||
<Link to="https://form.typeform.com/to/aMel9q9f" noLinkLayout>
|
||||
<strong>💥 Take the user survey!</strong>
|
||||
</Link>
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user