mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 13:40:34 +03:00
Merge pull request #11741 from adrianeboyd/chore/update-v4-from-master-4
Update v4 from master
This commit is contained in:
commit
d0fc871a1c
70
.github/azure-steps.yml
vendored
70
.github/azure-steps.yml
vendored
|
@ -1,9 +1,7 @@
|
||||||
parameters:
|
parameters:
|
||||||
python_version: ''
|
python_version: ''
|
||||||
architecture: ''
|
architecture: 'x64'
|
||||||
prefix: ''
|
num_build_jobs: 2
|
||||||
gpu: false
|
|
||||||
num_build_jobs: 1
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- task: UsePythonVersion@0
|
- task: UsePythonVersion@0
|
||||||
|
@ -17,16 +15,16 @@ steps:
|
||||||
displayName: 'Set variables'
|
displayName: 'Set variables'
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
${{ parameters.prefix }} python -m pip install -U pip setuptools
|
python -m pip install -U build pip setuptools
|
||||||
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
|
python -m pip install -U -r requirements.txt
|
||||||
displayName: "Install dependencies"
|
displayName: "Install dependencies"
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }}
|
python -m build --sdist
|
||||||
${{ parameters.prefix }} python setup.py sdist --formats=gztar
|
displayName: "Build sdist"
|
||||||
displayName: "Compile and build sdist"
|
|
||||||
|
|
||||||
- script: python -m mypy spacy
|
- script: |
|
||||||
|
python -m mypy spacy
|
||||||
displayName: 'Run mypy'
|
displayName: 'Run mypy'
|
||||||
condition: ne(variables['python_version'], '3.6')
|
condition: ne(variables['python_version'], '3.6')
|
||||||
|
|
||||||
|
@ -35,35 +33,24 @@ steps:
|
||||||
contents: "spacy"
|
contents: "spacy"
|
||||||
displayName: "Delete source directory"
|
displayName: "Delete source directory"
|
||||||
|
|
||||||
|
- task: DeleteFiles@1
|
||||||
|
inputs:
|
||||||
|
contents: "*.egg-info"
|
||||||
|
displayName: "Delete egg-info directory"
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt
|
python -m pip freeze > installed.txt
|
||||||
${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt
|
python -m pip uninstall -y -r installed.txt
|
||||||
displayName: "Uninstall all packages"
|
displayName: "Uninstall all packages"
|
||||||
|
|
||||||
- bash: |
|
- bash: |
|
||||||
${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||||
${{ parameters.prefix }} SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
|
SPACY_NUM_BUILD_JOBS=${{ parameters.num_build_jobs }} python -m pip install dist/$SDIST
|
||||||
displayName: "Install from sdist"
|
displayName: "Install from sdist"
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
|
python -W error -c "import spacy"
|
||||||
displayName: "Install test requirements"
|
displayName: "Test import"
|
||||||
|
|
||||||
- script: |
|
|
||||||
${{ parameters.prefix }} python -m pip install -U cupy-cuda110 -f https://github.com/cupy/cupy/releases/v9.0.0
|
|
||||||
${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
displayName: "Install GPU requirements"
|
|
||||||
condition: eq(${{ parameters.gpu }}, true)
|
|
||||||
|
|
||||||
- script: |
|
|
||||||
${{ parameters.prefix }} python -m pytest --pyargs spacy -W error
|
|
||||||
displayName: "Run CPU tests"
|
|
||||||
condition: eq(${{ parameters.gpu }}, false)
|
|
||||||
|
|
||||||
- script: |
|
|
||||||
${{ parameters.prefix }} python -m pytest --pyargs spacy -W error -p spacy.tests.enable_gpu
|
|
||||||
displayName: "Run GPU tests"
|
|
||||||
condition: eq(${{ parameters.gpu }}, true)
|
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
python -m spacy download ca_core_news_sm
|
python -m spacy download ca_core_news_sm
|
||||||
|
@ -106,13 +93,22 @@ steps:
|
||||||
displayName: 'Test assemble CLI vectors warning'
|
displayName: 'Test assemble CLI vectors warning'
|
||||||
condition: eq(variables['python_version'], '3.8')
|
condition: eq(variables['python_version'], '3.8')
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
python -m pip install -U -r requirements.txt
|
||||||
|
displayName: "Install test requirements"
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
python -m pytest --pyargs spacy -W error
|
||||||
|
displayName: "Run CPU tests"
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
python -m pip install --pre thinc-apple-ops
|
||||||
|
python -m pytest --pyargs spacy
|
||||||
|
displayName: "Run CPU tests with thinc-apple-ops"
|
||||||
|
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
python .github/validate_universe_json.py website/meta/universe.json
|
python .github/validate_universe_json.py website/meta/universe.json
|
||||||
displayName: 'Test website/meta/universe.json'
|
displayName: 'Test website/meta/universe.json'
|
||||||
condition: eq(variables['python_version'], '3.8')
|
condition: eq(variables['python_version'], '3.8')
|
||||||
|
|
||||||
- script: |
|
|
||||||
${{ parameters.prefix }} python -m pip install --pre thinc-apple-ops
|
|
||||||
${{ parameters.prefix }} python -m pytest --pyargs spacy
|
|
||||||
displayName: "Run CPU tests with thinc-apple-ops"
|
|
||||||
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10'))
|
|
||||||
|
|
9
.github/workflows/autoblack.yml
vendored
9
.github/workflows/autoblack.yml
vendored
|
@ -12,10 +12,10 @@ jobs:
|
||||||
if: github.repository_owner == 'explosion'
|
if: github.repository_owner == 'explosion'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
ref: ${{ github.head_ref }}
|
ref: ${{ github.head_ref }}
|
||||||
- uses: actions/setup-python@v2
|
- uses: actions/setup-python@v4
|
||||||
- run: pip install black
|
- run: pip install black
|
||||||
- name: Auto-format code if needed
|
- name: Auto-format code if needed
|
||||||
run: black spacy
|
run: black spacy
|
||||||
|
@ -23,10 +23,11 @@ jobs:
|
||||||
# code and makes GitHub think the action failed
|
# code and makes GitHub think the action failed
|
||||||
- name: Check for modified files
|
- name: Check for modified files
|
||||||
id: git-check
|
id: git-check
|
||||||
run: echo ::set-output name=modified::$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi)
|
run: echo modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
if: steps.git-check.outputs.modified == 'true'
|
if: steps.git-check.outputs.modified == 'true'
|
||||||
uses: peter-evans/create-pull-request@v3
|
uses: peter-evans/create-pull-request@v4
|
||||||
with:
|
with:
|
||||||
title: Auto-format code with black
|
title: Auto-format code with black
|
||||||
labels: meta
|
labels: meta
|
||||||
|
|
6
.github/workflows/explosionbot.yml
vendored
6
.github/workflows/explosionbot.yml
vendored
|
@ -8,14 +8,14 @@ on:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
explosion-bot:
|
explosion-bot:
|
||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Dump GitHub context
|
- name: Dump GitHub context
|
||||||
env:
|
env:
|
||||||
GITHUB_CONTEXT: ${{ toJson(github) }}
|
GITHUB_CONTEXT: ${{ toJson(github) }}
|
||||||
run: echo "$GITHUB_CONTEXT"
|
run: echo "$GITHUB_CONTEXT"
|
||||||
- uses: actions/checkout@v1
|
- uses: actions/checkout@v3
|
||||||
- uses: actions/setup-python@v1
|
- uses: actions/setup-python@v4
|
||||||
- name: Install and run explosion-bot
|
- name: Install and run explosion-bot
|
||||||
run: |
|
run: |
|
||||||
pip install git+https://${{ secrets.EXPLOSIONBOT_TOKEN }}@github.com/explosion/explosion-bot
|
pip install git+https://${{ secrets.EXPLOSIONBOT_TOKEN }}@github.com/explosion/explosion-bot
|
||||||
|
|
6
.github/workflows/slowtests.yml
vendored
6
.github/workflows/slowtests.yml
vendored
|
@ -14,7 +14,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
ref: ${{ matrix.branch }}
|
ref: ${{ matrix.branch }}
|
||||||
- name: Get commits from past 24 hours
|
- name: Get commits from past 24 hours
|
||||||
|
@ -23,9 +23,9 @@ jobs:
|
||||||
today=$(date '+%Y-%m-%d %H:%M:%S')
|
today=$(date '+%Y-%m-%d %H:%M:%S')
|
||||||
yesterday=$(date -d "yesterday" '+%Y-%m-%d %H:%M:%S')
|
yesterday=$(date -d "yesterday" '+%Y-%m-%d %H:%M:%S')
|
||||||
if git log --after="$yesterday" --before="$today" | grep commit ; then
|
if git log --after="$yesterday" --before="$today" | grep commit ; then
|
||||||
echo "::set-output name=run_tests::true"
|
echo run_tests=true >> $GITHUB_OUTPUT
|
||||||
else
|
else
|
||||||
echo "::set-output name=run_tests::false"
|
echo run_tests=false >> $GITHUB_OUTPUT
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Trigger buildkite build
|
- name: Trigger buildkite build
|
||||||
|
|
4
.github/workflows/spacy_universe_alert.yml
vendored
4
.github/workflows/spacy_universe_alert.yml
vendored
|
@ -17,8 +17,8 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
echo "$GITHUB_CONTEXT"
|
echo "$GITHUB_CONTEXT"
|
||||||
|
|
||||||
- uses: actions/checkout@v1
|
- uses: actions/checkout@v3
|
||||||
- uses: actions/setup-python@v1
|
- uses: actions/setup-python@v4
|
||||||
- name: Install Bernadette app dependency and send an alert
|
- name: Install Bernadette app dependency and send an alert
|
||||||
env:
|
env:
|
||||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
|
@ -8,7 +8,7 @@ be used in real products.
|
||||||
|
|
||||||
spaCy comes with
|
spaCy comes with
|
||||||
[pretrained pipelines](https://spacy.io/models) and
|
[pretrained pipelines](https://spacy.io/models) and
|
||||||
currently supports tokenization and training for **60+ languages**. It features
|
currently supports tokenization and training for **70+ languages**. It features
|
||||||
state-of-the-art speed and **neural network models** for tagging,
|
state-of-the-art speed and **neural network models** for tagging,
|
||||||
parsing, **named entity recognition**, **text classification** and more,
|
parsing, **named entity recognition**, **text classification** and more,
|
||||||
multi-task learning with pretrained **transformers** like BERT, as well as a
|
multi-task learning with pretrained **transformers** like BERT, as well as a
|
||||||
|
@ -16,7 +16,7 @@ production-ready [**training system**](https://spacy.io/usage/training) and easy
|
||||||
model packaging, deployment and workflow management. spaCy is commercial
|
model packaging, deployment and workflow management. spaCy is commercial
|
||||||
open-source software, released under the MIT license.
|
open-source software, released under the MIT license.
|
||||||
|
|
||||||
💫 **Version 3.4.0 out now!**
|
💫 **Version 3.4 out now!**
|
||||||
[Check out the release notes here.](https://github.com/explosion/spaCy/releases)
|
[Check out the release notes here.](https://github.com/explosion/spaCy/releases)
|
||||||
|
|
||||||
[![Azure Pipelines](https://img.shields.io/azure-devops/build/explosion-ai/public/8/master.svg?logo=azure-pipelines&style=flat-square&label=build)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
|
[![Azure Pipelines](https://img.shields.io/azure-devops/build/explosion-ai/public/8/master.svg?logo=azure-pipelines&style=flat-square&label=build)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
|
||||||
|
@ -79,7 +79,7 @@ more people can benefit from it.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Support for **60+ languages**
|
- Support for **70+ languages**
|
||||||
- **Trained pipelines** for different languages and tasks
|
- **Trained pipelines** for different languages and tasks
|
||||||
- Multi-task learning with pretrained **transformers** like BERT
|
- Multi-task learning with pretrained **transformers** like BERT
|
||||||
- Support for pretrained **word vectors** and embeddings
|
- Support for pretrained **word vectors** and embeddings
|
||||||
|
|
|
@ -76,24 +76,24 @@ jobs:
|
||||||
# Python39Mac:
|
# Python39Mac:
|
||||||
# imageName: "macos-latest"
|
# imageName: "macos-latest"
|
||||||
# python.version: "3.9"
|
# python.version: "3.9"
|
||||||
Python310Linux:
|
# Python310Linux:
|
||||||
imageName: "ubuntu-latest"
|
# imageName: "ubuntu-latest"
|
||||||
python.version: "3.10"
|
# python.version: "3.10"
|
||||||
Python310Windows:
|
Python310Windows:
|
||||||
imageName: "windows-latest"
|
imageName: "windows-latest"
|
||||||
python.version: "3.10"
|
python.version: "3.10"
|
||||||
Python310Mac:
|
# Python310Mac:
|
||||||
imageName: "macos-latest"
|
# imageName: "macos-latest"
|
||||||
python.version: "3.10"
|
# python.version: "3.10"
|
||||||
Python311Linux:
|
Python311Linux:
|
||||||
imageName: 'ubuntu-latest'
|
imageName: 'ubuntu-latest'
|
||||||
python.version: '3.11.0-rc.2'
|
python.version: '3.11.0'
|
||||||
Python311Windows:
|
Python311Windows:
|
||||||
imageName: 'windows-latest'
|
imageName: 'windows-latest'
|
||||||
python.version: '3.11.0-rc.2'
|
python.version: '3.11.0'
|
||||||
Python311Mac:
|
Python311Mac:
|
||||||
imageName: 'macos-latest'
|
imageName: 'macos-latest'
|
||||||
python.version: '3.11.0-rc.2'
|
python.version: '3.11.0'
|
||||||
maxParallel: 4
|
maxParallel: 4
|
||||||
pool:
|
pool:
|
||||||
vmImage: $(imageName)
|
vmImage: $(imageName)
|
||||||
|
@ -101,20 +101,3 @@ jobs:
|
||||||
- template: .github/azure-steps.yml
|
- template: .github/azure-steps.yml
|
||||||
parameters:
|
parameters:
|
||||||
python_version: '$(python.version)'
|
python_version: '$(python.version)'
|
||||||
architecture: 'x64'
|
|
||||||
|
|
||||||
# - job: "TestGPU"
|
|
||||||
# dependsOn: "Validate"
|
|
||||||
# strategy:
|
|
||||||
# matrix:
|
|
||||||
# Python38LinuxX64_GPU:
|
|
||||||
# python.version: '3.8'
|
|
||||||
# pool:
|
|
||||||
# name: "LinuxX64_GPU"
|
|
||||||
# steps:
|
|
||||||
# - template: .github/azure-steps.yml
|
|
||||||
# parameters:
|
|
||||||
# python_version: '$(python.version)'
|
|
||||||
# architecture: 'x64'
|
|
||||||
# gpu: true
|
|
||||||
# num_build_jobs: 24
|
|
||||||
|
|
|
@ -71,11 +71,10 @@ def span_maker_forward(model, docs: List[Doc], is_train) -> Tuple[Ragged, Callab
|
||||||
cands.append((start_token, end_token))
|
cands.append((start_token, end_token))
|
||||||
|
|
||||||
candidates.append(ops.asarray2i(cands))
|
candidates.append(ops.asarray2i(cands))
|
||||||
candlens = ops.asarray1i([len(cands) for cands in candidates])
|
lengths = model.ops.asarray1i([len(cands) for cands in candidates])
|
||||||
candidates = ops.xp.concatenate(candidates)
|
out = Ragged(model.ops.flatten(candidates), lengths)
|
||||||
outputs = Ragged(candidates, candlens)
|
|
||||||
# because this is just rearranging docs, the backprop does nothing
|
# because this is just rearranging docs, the backprop does nothing
|
||||||
return outputs, lambda x: []
|
return out, lambda x: []
|
||||||
|
|
||||||
|
|
||||||
@registry.misc("spacy.KBFromFile.v1")
|
@registry.misc("spacy.KBFromFile.v1")
|
||||||
|
|
|
@ -27,8 +27,8 @@ single_label_default_config = """
|
||||||
[model.tok2vec.embed]
|
[model.tok2vec.embed]
|
||||||
@architectures = "spacy.MultiHashEmbed.v2"
|
@architectures = "spacy.MultiHashEmbed.v2"
|
||||||
width = 64
|
width = 64
|
||||||
rows = [2000, 2000, 1000, 1000, 1000, 1000]
|
rows = [2000, 2000, 500, 1000, 500]
|
||||||
attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
|
attrs = ["NORM", "LOWER", "PREFIX", "SUFFIX", "SHAPE"]
|
||||||
include_static_vectors = false
|
include_static_vectors = false
|
||||||
|
|
||||||
[model.tok2vec.encode]
|
[model.tok2vec.encode]
|
||||||
|
@ -75,7 +75,7 @@ subword_features = true
|
||||||
"textcat",
|
"textcat",
|
||||||
assigns=["doc.cats"],
|
assigns=["doc.cats"],
|
||||||
default_config={
|
default_config={
|
||||||
"threshold": 0.5,
|
"threshold": 0.0,
|
||||||
"model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
"model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
||||||
"scorer": {"@scorers": "spacy.textcat_scorer.v1"},
|
"scorer": {"@scorers": "spacy.textcat_scorer.v1"},
|
||||||
"save_activations": False,
|
"save_activations": False,
|
||||||
|
@ -158,7 +158,8 @@ class TextCategorizer(TrainablePipe):
|
||||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
threshold (float): Cutoff to consider a prediction "positive".
|
threshold (float): Unused, not needed for single-label (exclusive
|
||||||
|
classes) classification.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_cats for the attribute "cats".
|
Scorer.score_cats for the attribute "cats".
|
||||||
|
|
||||||
|
@ -168,7 +169,7 @@ class TextCategorizer(TrainablePipe):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.name = name
|
self.name = name
|
||||||
self._rehearsal_model = None
|
self._rehearsal_model = None
|
||||||
cfg = {"labels": [], "threshold": threshold, "positive_label": None}
|
cfg: Dict[str, Any] = {"labels": [], "threshold": threshold, "positive_label": None}
|
||||||
self.cfg = dict(cfg)
|
self.cfg = dict(cfg)
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.save_activations = save_activations
|
self.save_activations = save_activations
|
||||||
|
|
|
@ -24,8 +24,8 @@ multi_label_default_config = """
|
||||||
[model.tok2vec.embed]
|
[model.tok2vec.embed]
|
||||||
@architectures = "spacy.MultiHashEmbed.v2"
|
@architectures = "spacy.MultiHashEmbed.v2"
|
||||||
width = 64
|
width = 64
|
||||||
rows = [2000, 2000, 1000, 1000, 1000, 1000]
|
rows = [2000, 2000, 500, 1000, 500]
|
||||||
attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
|
attrs = ["NORM", "LOWER", "PREFIX", "SUFFIX", "SHAPE"]
|
||||||
include_static_vectors = false
|
include_static_vectors = false
|
||||||
|
|
||||||
[model.tok2vec.encode]
|
[model.tok2vec.encode]
|
||||||
|
|
|
@ -446,7 +446,7 @@ class Scorer:
|
||||||
labels (Iterable[str]): The set of possible labels. Defaults to [].
|
labels (Iterable[str]): The set of possible labels. Defaults to [].
|
||||||
multi_label (bool): Whether the attribute allows multiple labels.
|
multi_label (bool): Whether the attribute allows multiple labels.
|
||||||
Defaults to True. When set to False (exclusive labels), missing
|
Defaults to True. When set to False (exclusive labels), missing
|
||||||
gold labels are interpreted as 0.0.
|
gold labels are interpreted as 0.0 and the threshold is set to 0.0.
|
||||||
positive_label (str): The positive label for a binary task with
|
positive_label (str): The positive label for a binary task with
|
||||||
exclusive classes. Defaults to None.
|
exclusive classes. Defaults to None.
|
||||||
threshold (float): Cutoff to consider a prediction "positive". Defaults
|
threshold (float): Cutoff to consider a prediction "positive". Defaults
|
||||||
|
@ -471,6 +471,8 @@ class Scorer:
|
||||||
"""
|
"""
|
||||||
if threshold is None:
|
if threshold is None:
|
||||||
threshold = 0.5 if multi_label else 0.0
|
threshold = 0.5 if multi_label else 0.0
|
||||||
|
if not multi_label:
|
||||||
|
threshold = 0.0
|
||||||
f_per_type = {label: PRFScore() for label in labels}
|
f_per_type = {label: PRFScore() for label in labels}
|
||||||
auc_per_type = {label: ROCAUCScore() for label in labels}
|
auc_per_type = {label: ROCAUCScore() for label in labels}
|
||||||
labels = set(labels)
|
labels = set(labels)
|
||||||
|
@ -505,20 +507,18 @@ class Scorer:
|
||||||
# Get the highest-scoring for each.
|
# Get the highest-scoring for each.
|
||||||
pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
|
pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
|
||||||
gold_label, gold_score = max(gold_cats.items(), key=lambda it: it[1])
|
gold_label, gold_score = max(gold_cats.items(), key=lambda it: it[1])
|
||||||
if pred_label == gold_label and pred_score >= threshold:
|
if pred_label == gold_label:
|
||||||
f_per_type[pred_label].tp += 1
|
f_per_type[pred_label].tp += 1
|
||||||
else:
|
else:
|
||||||
f_per_type[gold_label].fn += 1
|
f_per_type[gold_label].fn += 1
|
||||||
if pred_score >= threshold:
|
f_per_type[pred_label].fp += 1
|
||||||
f_per_type[pred_label].fp += 1
|
|
||||||
elif gold_cats:
|
elif gold_cats:
|
||||||
gold_label, gold_score = max(gold_cats, key=lambda it: it[1])
|
gold_label, gold_score = max(gold_cats, key=lambda it: it[1])
|
||||||
if gold_score > 0:
|
if gold_score > 0:
|
||||||
f_per_type[gold_label].fn += 1
|
f_per_type[gold_label].fn += 1
|
||||||
elif pred_cats:
|
elif pred_cats:
|
||||||
pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
|
pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
|
||||||
if pred_score >= threshold:
|
f_per_type[pred_label].fp += 1
|
||||||
f_per_type[pred_label].fp += 1
|
|
||||||
micro_prf = PRFScore()
|
micro_prf = PRFScore()
|
||||||
for label_prf in f_per_type.values():
|
for label_prf in f_per_type.values():
|
||||||
micro_prf.tp += label_prf.tp
|
micro_prf.tp += label_prf.tp
|
||||||
|
|
|
@ -10,6 +10,7 @@ from spacy.compat import pickle
|
||||||
from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
|
from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.ml import load_kb
|
from spacy.ml import load_kb
|
||||||
|
from spacy.ml.models.entity_linker import build_span_maker
|
||||||
from spacy.pipeline import EntityLinker, TrainablePipe
|
from spacy.pipeline import EntityLinker, TrainablePipe
|
||||||
from spacy.pipeline.legacy import EntityLinker_v1
|
from spacy.pipeline.legacy import EntityLinker_v1
|
||||||
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
|
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
|
||||||
|
@ -716,7 +717,11 @@ TRAIN_DATA = [
|
||||||
("Russ Cochran was a member of University of Kentucky's golf team.",
|
("Russ Cochran was a member of University of Kentucky's golf team.",
|
||||||
{"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
|
{"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
|
||||||
"entities": [(0, 12, "PERSON"), (43, 51, "LOC")],
|
"entities": [(0, 12, "PERSON"), (43, 51, "LOC")],
|
||||||
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]})
|
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}),
|
||||||
|
# having a blank instance shouldn't break things
|
||||||
|
("The weather is nice today.",
|
||||||
|
{"links": {}, "entities": [],
|
||||||
|
"sent_starts": [1, -1, 0, 0, 0, 0]})
|
||||||
]
|
]
|
||||||
GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
|
GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
@ -1260,3 +1265,18 @@ def test_save_activations():
|
||||||
assert scores.data.shape == (2, 1)
|
assert scores.data.shape == (2, 1)
|
||||||
assert scores.data.dtype == "float32"
|
assert scores.data.dtype == "float32"
|
||||||
assert scores.lengths.shape == (1,)
|
assert scores.lengths.shape == (1,)
|
||||||
|
|
||||||
|
|
||||||
|
def test_span_maker_forward_with_empty():
|
||||||
|
"""The forward pass of the span maker may have a doc with no entities."""
|
||||||
|
nlp = English()
|
||||||
|
doc1 = nlp("a b c")
|
||||||
|
ent = doc1[0:1]
|
||||||
|
ent.label_ = "X"
|
||||||
|
doc1.ents = [ent]
|
||||||
|
# no entities
|
||||||
|
doc2 = nlp("x y z")
|
||||||
|
|
||||||
|
# just to get a model
|
||||||
|
span_maker = build_span_maker()
|
||||||
|
span_maker([doc1, doc2], False)
|
||||||
|
|
|
@ -824,10 +824,10 @@ def test_textcat_loss(multi_label: bool, expected_loss: float):
|
||||||
assert loss == expected_loss
|
assert loss == expected_loss
|
||||||
|
|
||||||
|
|
||||||
def test_textcat_threshold():
|
def test_textcat_multilabel_threshold():
|
||||||
# Ensure the scorer can be called with a different threshold
|
# Ensure the scorer can be called with a different threshold
|
||||||
nlp = English()
|
nlp = English()
|
||||||
nlp.add_pipe("textcat")
|
nlp.add_pipe("textcat_multilabel")
|
||||||
|
|
||||||
train_examples = []
|
train_examples = []
|
||||||
for text, annotations in TRAIN_DATA_SINGLE_LABEL:
|
for text, annotations in TRAIN_DATA_SINGLE_LABEL:
|
||||||
|
@ -850,7 +850,7 @@ def test_textcat_threshold():
|
||||||
)
|
)
|
||||||
pos_f = scores["cats_score"]
|
pos_f = scores["cats_score"]
|
||||||
assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
|
assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
|
||||||
assert pos_f > macro_f
|
assert pos_f >= macro_f
|
||||||
|
|
||||||
|
|
||||||
def test_textcat_multi_threshold():
|
def test_textcat_multi_threshold():
|
||||||
|
|
|
@ -231,7 +231,7 @@ def test_tok2vec_listener_callback():
|
||||||
|
|
||||||
|
|
||||||
def test_tok2vec_listener_overfitting():
|
def test_tok2vec_listener_overfitting():
|
||||||
""" Test that a pipeline with a listener properly overfits, even if 'tok2vec' is in the annotating components """
|
"""Test that a pipeline with a listener properly overfits, even if 'tok2vec' is in the annotating components"""
|
||||||
orig_config = Config().from_str(cfg_string)
|
orig_config = Config().from_str(cfg_string)
|
||||||
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
|
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
|
||||||
train_examples = []
|
train_examples = []
|
||||||
|
@ -264,7 +264,7 @@ def test_tok2vec_listener_overfitting():
|
||||||
|
|
||||||
|
|
||||||
def test_tok2vec_frozen_not_annotating():
|
def test_tok2vec_frozen_not_annotating():
|
||||||
""" Test that a pipeline with a frozen tok2vec raises an error when the tok2vec is not annotating """
|
"""Test that a pipeline with a frozen tok2vec raises an error when the tok2vec is not annotating"""
|
||||||
orig_config = Config().from_str(cfg_string)
|
orig_config = Config().from_str(cfg_string)
|
||||||
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
|
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
|
||||||
train_examples = []
|
train_examples = []
|
||||||
|
@ -274,12 +274,16 @@ def test_tok2vec_frozen_not_annotating():
|
||||||
|
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
losses = {}
|
losses = {}
|
||||||
with pytest.raises(ValueError, match=r"the tok2vec embedding layer is not updated"):
|
with pytest.raises(
|
||||||
nlp.update(train_examples, sgd=optimizer, losses=losses, exclude=["tok2vec"])
|
ValueError, match=r"the tok2vec embedding layer is not updated"
|
||||||
|
):
|
||||||
|
nlp.update(
|
||||||
|
train_examples, sgd=optimizer, losses=losses, exclude=["tok2vec"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_tok2vec_frozen_overfitting():
|
def test_tok2vec_frozen_overfitting():
|
||||||
""" Test that a pipeline with a frozen & annotating tok2vec can still overfit """
|
"""Test that a pipeline with a frozen & annotating tok2vec can still overfit"""
|
||||||
orig_config = Config().from_str(cfg_string)
|
orig_config = Config().from_str(cfg_string)
|
||||||
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
|
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
|
||||||
train_examples = []
|
train_examples = []
|
||||||
|
@ -289,7 +293,13 @@ def test_tok2vec_frozen_overfitting():
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
losses = {}
|
losses = {}
|
||||||
nlp.update(train_examples, sgd=optimizer, losses=losses, exclude=["tok2vec"], annotates=["tok2vec"])
|
nlp.update(
|
||||||
|
train_examples,
|
||||||
|
sgd=optimizer,
|
||||||
|
losses=losses,
|
||||||
|
exclude=["tok2vec"],
|
||||||
|
annotates=["tok2vec"],
|
||||||
|
)
|
||||||
assert losses["tagger"] < 0.0001
|
assert losses["tagger"] < 0.0001
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
|
|
|
@ -23,7 +23,7 @@ def get_textcat_bow_kwargs():
|
||||||
|
|
||||||
|
|
||||||
def get_textcat_cnn_kwargs():
|
def get_textcat_cnn_kwargs():
|
||||||
return {"tok2vec": test_tok2vec(), "exclusive_classes": False, "nO": 13}
|
return {"tok2vec": make_test_tok2vec(), "exclusive_classes": False, "nO": 13}
|
||||||
|
|
||||||
|
|
||||||
def get_all_params(model):
|
def get_all_params(model):
|
||||||
|
@ -65,7 +65,7 @@ def get_tok2vec_kwargs():
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_tok2vec():
|
def make_test_tok2vec():
|
||||||
return build_Tok2Vec_model(**get_tok2vec_kwargs())
|
return build_Tok2Vec_model(**get_tok2vec_kwargs())
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -474,3 +474,50 @@ def test_prf_score():
|
||||||
assert (a.precision, a.recall, a.fscore) == approx(
|
assert (a.precision, a.recall, a.fscore) == approx(
|
||||||
(c.precision, c.recall, c.fscore)
|
(c.precision, c.recall, c.fscore)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_cats(en_tokenizer):
|
||||||
|
text = "some text"
|
||||||
|
gold_doc = en_tokenizer(text)
|
||||||
|
gold_doc.cats = {"POSITIVE": 1.0, "NEGATIVE": 0.0}
|
||||||
|
pred_doc = en_tokenizer(text)
|
||||||
|
pred_doc.cats = {"POSITIVE": 0.75, "NEGATIVE": 0.25}
|
||||||
|
example = Example(pred_doc, gold_doc)
|
||||||
|
# threshold is ignored for multi_label=False
|
||||||
|
scores1 = Scorer.score_cats(
|
||||||
|
[example],
|
||||||
|
"cats",
|
||||||
|
labels=list(gold_doc.cats.keys()),
|
||||||
|
multi_label=False,
|
||||||
|
positive_label="POSITIVE",
|
||||||
|
threshold=0.1,
|
||||||
|
)
|
||||||
|
scores2 = Scorer.score_cats(
|
||||||
|
[example],
|
||||||
|
"cats",
|
||||||
|
labels=list(gold_doc.cats.keys()),
|
||||||
|
multi_label=False,
|
||||||
|
positive_label="POSITIVE",
|
||||||
|
threshold=0.9,
|
||||||
|
)
|
||||||
|
assert scores1["cats_score"] == 1.0
|
||||||
|
assert scores2["cats_score"] == 1.0
|
||||||
|
assert scores1 == scores2
|
||||||
|
# threshold is relevant for multi_label=True
|
||||||
|
scores = Scorer.score_cats(
|
||||||
|
[example],
|
||||||
|
"cats",
|
||||||
|
labels=list(gold_doc.cats.keys()),
|
||||||
|
multi_label=True,
|
||||||
|
threshold=0.9,
|
||||||
|
)
|
||||||
|
assert scores["cats_macro_f"] == 0.0
|
||||||
|
# threshold is relevant for multi_label=True
|
||||||
|
scores = Scorer.score_cats(
|
||||||
|
[example],
|
||||||
|
"cats",
|
||||||
|
labels=list(gold_doc.cats.keys()),
|
||||||
|
multi_label=True,
|
||||||
|
threshold=0.1,
|
||||||
|
)
|
||||||
|
assert scores["cats_macro_f"] == 0.5
|
||||||
|
|
|
@ -229,16 +229,17 @@ The reported `{attr}_score` depends on the classification properties:
|
||||||
> print(scores["cats_macro_auc"])
|
> print(scores["cats_macro_auc"])
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||||
| `attr` | The attribute to score. ~~str~~ |
|
| `attr` | The attribute to score. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `getter` | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the cats for an individual `Doc`. ~~Callable[[Doc, str], Dict[str, float]]~~ |
|
| `getter` | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the cats for an individual `Doc`. ~~Callable[[Doc, str], Dict[str, float]]~~ |
|
||||||
| labels | The set of possible labels. Defaults to `[]`. ~~Iterable[str]~~ |
|
| labels | The set of possible labels. Defaults to `[]`. ~~Iterable[str]~~ |
|
||||||
| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. ~~bool~~ |
|
| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. When set to `False` (exclusive labels), missing gold labels are interpreted as `0.0` and the threshold is set to `0.0`. ~~bool~~ |
|
||||||
| `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~ |
|
| `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~ |
|
||||||
| **RETURNS** | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~ |
|
| `threshold` | Cutoff to consider a prediction "positive". Defaults to `0.5` for multi-label, and `0.0` (i.e. whatever's highest scoring) otherwise. ~~float~~ |
|
||||||
|
| **RETURNS** | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~ |
|
||||||
|
|
||||||
## Scorer.score_links {#score_links tag="staticmethod" new="3"}
|
## Scorer.score_links {#score_links tag="staticmethod" new="3"}
|
||||||
|
|
||||||
|
|
|
@ -63,7 +63,6 @@ architectures and their arguments and hyperparameters.
|
||||||
> ```python
|
> ```python
|
||||||
> from spacy.pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
|
> from spacy.pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
|
||||||
> config = {
|
> config = {
|
||||||
> "threshold": 0.5,
|
|
||||||
> "model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
> "model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
||||||
> }
|
> }
|
||||||
> nlp.add_pipe("textcat", config=config)
|
> nlp.add_pipe("textcat", config=config)
|
||||||
|
@ -82,7 +81,7 @@ architectures and their arguments and hyperparameters.
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
| `threshold` | Cutoff to consider a prediction "positive", relevant for `textcat_multilabel` when calculating accuracy scores. ~~float~~ |
|
||||||
| `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
||||||
|
|
||||||
|
@ -123,7 +122,7 @@ shortcut for this and instantiate the component using its string name and
|
||||||
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
| `threshold` | Cutoff to consider a prediction "positive", relevant for `textcat_multilabel` when calculating accuracy scores. ~~float~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
||||||
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probabilities"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probabilities"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
||||||
|
|
|
@ -1791,7 +1791,7 @@ the entity `Span` – for example `._.orgs` or `._.prev_orgs` and
|
||||||
> [`Doc.retokenize`](/api/doc#retokenize) context manager:
|
> [`Doc.retokenize`](/api/doc#retokenize) context manager:
|
||||||
>
|
>
|
||||||
> ```python
|
> ```python
|
||||||
> with doc.retokenize() as retokenize:
|
> with doc.retokenize() as retokenizer:
|
||||||
> for ent in doc.ents:
|
> for ent in doc.ents:
|
||||||
> retokenizer.merge(ent)
|
> retokenizer.merge(ent)
|
||||||
> ```
|
> ```
|
||||||
|
|
|
@ -4,12 +4,22 @@
|
||||||
"code": "af",
|
"code": "af",
|
||||||
"name": "Afrikaans"
|
"name": "Afrikaans"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"code": "am",
|
||||||
|
"name": "Amharic",
|
||||||
|
"has_examples": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"code": "ar",
|
"code": "ar",
|
||||||
"name": "Arabic",
|
"name": "Arabic",
|
||||||
"example": "هذه جملة",
|
"example": "هذه جملة",
|
||||||
"has_examples": true
|
"has_examples": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"code": "az",
|
||||||
|
"name": "Azerbaijani",
|
||||||
|
"has_examples": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"code": "bg",
|
"code": "bg",
|
||||||
"name": "Bulgarian",
|
"name": "Bulgarian",
|
||||||
|
@ -65,7 +75,7 @@
|
||||||
{
|
{
|
||||||
"code": "dsb",
|
"code": "dsb",
|
||||||
"name": "Lower Sorbian",
|
"name": "Lower Sorbian",
|
||||||
"has_examples": true
|
"has_examples": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"code": "el",
|
"code": "el",
|
||||||
|
@ -142,6 +152,11 @@
|
||||||
"code": "ga",
|
"code": "ga",
|
||||||
"name": "Irish"
|
"name": "Irish"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"code": "grc",
|
||||||
|
"name": "Ancient Greek",
|
||||||
|
"has_examples": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"code": "gu",
|
"code": "gu",
|
||||||
"name": "Gujarati",
|
"name": "Gujarati",
|
||||||
|
@ -172,7 +187,7 @@
|
||||||
{
|
{
|
||||||
"code": "hsb",
|
"code": "hsb",
|
||||||
"name": "Upper Sorbian",
|
"name": "Upper Sorbian",
|
||||||
"has_examples": true
|
"has_examples": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"code": "hu",
|
"code": "hu",
|
||||||
|
@ -260,6 +275,10 @@
|
||||||
"example": "Адамга эң кыйыны — күн сайын адам болуу",
|
"example": "Адамга эң кыйыны — күн сайын адам болуу",
|
||||||
"has_examples": true
|
"has_examples": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"code": "la",
|
||||||
|
"name": "Latin"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"code": "lb",
|
"code": "lb",
|
||||||
"name": "Luxembourgish",
|
"name": "Luxembourgish",
|
||||||
|
@ -448,6 +467,11 @@
|
||||||
"example": "นี่คือประโยค",
|
"example": "นี่คือประโยค",
|
||||||
"has_examples": true
|
"has_examples": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"code": "ti",
|
||||||
|
"name": "Tigrinya",
|
||||||
|
"has_examples": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"code": "tl",
|
"code": "tl",
|
||||||
"name": "Tagalog"
|
"name": "Tagalog"
|
||||||
|
|
|
@ -149,6 +149,9 @@
|
||||||
& > span
|
& > span
|
||||||
display: block
|
display: block
|
||||||
|
|
||||||
|
a
|
||||||
|
text-decoration: underline
|
||||||
|
|
||||||
.small
|
.small
|
||||||
font-size: var(--font-size-code)
|
font-size: var(--font-size-code)
|
||||||
line-height: 1.65
|
line-height: 1.65
|
||||||
|
|
|
@ -159,6 +159,9 @@ const QuickstartInstall = ({ id, title }) => {
|
||||||
setters={setters}
|
setters={setters}
|
||||||
showDropdown={showDropdown}
|
showDropdown={showDropdown}
|
||||||
>
|
>
|
||||||
|
<QS os="mac" hardware="gpu" platform="arm">
|
||||||
|
# Note M1 GPU support is experimental, see <a href="https://github.com/explosion/thinc/issues/792">Thinc issue #792</a>
|
||||||
|
</QS>
|
||||||
<QS package="pip" config="venv">
|
<QS package="pip" config="venv">
|
||||||
python -m venv .env
|
python -m venv .env
|
||||||
</QS>
|
</QS>
|
||||||
|
@ -198,7 +201,13 @@ const QuickstartInstall = ({ id, title }) => {
|
||||||
{nightly ? ' --pre' : ''}
|
{nightly ? ' --pre' : ''}
|
||||||
</QS>
|
</QS>
|
||||||
<QS package="conda">conda install -c conda-forge spacy</QS>
|
<QS package="conda">conda install -c conda-forge spacy</QS>
|
||||||
<QS package="conda" hardware="gpu">
|
<QS package="conda" hardware="gpu" os="windows">
|
||||||
|
conda install -c conda-forge cupy
|
||||||
|
</QS>
|
||||||
|
<QS package="conda" hardware="gpu" os="linux">
|
||||||
|
conda install -c conda-forge cupy
|
||||||
|
</QS>
|
||||||
|
<QS package="conda" hardware="gpu" os="mac" platform="x86">
|
||||||
conda install -c conda-forge cupy
|
conda install -c conda-forge cupy
|
||||||
</QS>
|
</QS>
|
||||||
<QS package="conda" config="train">
|
<QS package="conda" config="train">
|
||||||
|
|
Loading…
Reference in New Issue
Block a user