Merge pull request #9563 from adrianeboyd/chore/update-develop-from-master-v3.2-3

Update develop from master for v3.2
This commit is contained in:
Adriane Boyd 2021-10-29 14:08:14 +02:00 committed by GitHub
commit 5e9db156c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 140 additions and 45 deletions

View File

@ -27,6 +27,7 @@ steps:
- script: python -m mypy spacy - script: python -m mypy spacy
displayName: 'Run mypy' displayName: 'Run mypy'
condition: ne(variables['python_version'], '3.10')
- task: DeleteFiles@1 - task: DeleteFiles@1
inputs: inputs:

View File

@ -42,7 +42,7 @@ jobs:
imageName: "ubuntu-18.04" imageName: "ubuntu-18.04"
python.version: "3.6" python.version: "3.6"
# Python36Windows: # Python36Windows:
# imageName: "vs2017-win2016" # imageName: "windows-2019"
# python.version: "3.6" # python.version: "3.6"
# Python36Mac: # Python36Mac:
# imageName: "macos-10.14" # imageName: "macos-10.14"
@ -51,7 +51,7 @@ jobs:
# imageName: "ubuntu-18.04" # imageName: "ubuntu-18.04"
# python.version: "3.7" # python.version: "3.7"
Python37Windows: Python37Windows:
imageName: "vs2017-win2016" imageName: "windows-2019"
python.version: "3.7" python.version: "3.7"
# Python37Mac: # Python37Mac:
# imageName: "macos-10.14" # imageName: "macos-10.14"
@ -60,7 +60,7 @@ jobs:
# imageName: "ubuntu-18.04" # imageName: "ubuntu-18.04"
# python.version: "3.8" # python.version: "3.8"
# Python38Windows: # Python38Windows:
# imageName: "vs2017-win2016" # imageName: "windows-2019"
# python.version: "3.8" # python.version: "3.8"
Python38Mac: Python38Mac:
imageName: "macos-10.14" imageName: "macos-10.14"
@ -68,12 +68,21 @@ jobs:
Python39Linux: Python39Linux:
imageName: "ubuntu-18.04" imageName: "ubuntu-18.04"
python.version: "3.9" python.version: "3.9"
Python39Windows: # Python39Windows:
imageName: "vs2017-win2016" # imageName: "windows-2019"
python.version: "3.9" # python.version: "3.9"
Python39Mac: # Python39Mac:
imageName: "macos-10.14" # imageName: "macos-10.14"
python.version: "3.9" # python.version: "3.9"
Python310Linux:
imageName: "ubuntu-20.04"
python.version: "3.10"
Python310Windows:
imageName: "windows-2019"
python.version: "3.10"
Python310Mac:
imageName: "macos-10.15"
python.version: "3.10"
maxParallel: 4 maxParallel: 4
pool: pool:
vmImage: $(imageName) vmImage: $(imageName)

View File

@ -2,4 +2,5 @@
numpy==1.15.0; python_version<='3.7' numpy==1.15.0; python_version<='3.7'
numpy==1.17.3; python_version=='3.8' numpy==1.17.3; python_version=='3.8'
numpy==1.19.3; python_version=='3.9' numpy==1.19.3; python_version=='3.9'
numpy; python_version>='3.10' numpy==1.21.3; python_version=='3.10'
numpy; python_version>='3.11'

View File

@ -5,7 +5,7 @@ requires = [
"cymem>=2.0.2,<2.1.0", "cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0", "preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0", "murmurhash>=0.28.0,<1.1.0",
"thinc>=8.0.11,<8.1.0", "thinc>=8.0.12,<8.1.0",
"blis>=0.4.0,<0.8.0", "blis>=0.4.0,<0.8.0",
"pathy", "pathy",
"numpy>=1.15.0", "numpy>=1.15.0",

View File

@ -3,7 +3,7 @@ spacy-legacy>=3.0.8,<3.1.0
spacy-loggers>=1.0.0,<2.0.0 spacy-loggers>=1.0.0,<2.0.0
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0
thinc>=8.0.11,<8.1.0 thinc>=8.0.12,<8.1.0
blis>=0.4.0,<0.8.0 blis>=0.4.0,<0.8.0
ml_datasets>=0.2.0,<0.3.0 ml_datasets>=0.2.0,<0.3.0
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0

View File

@ -21,6 +21,7 @@ classifiers =
Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Topic :: Scientific/Engineering Topic :: Scientific/Engineering
project_urls = project_urls =
Release notes = https://github.com/explosion/spaCy/releases Release notes = https://github.com/explosion/spaCy/releases
@ -37,7 +38,7 @@ setup_requires =
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0
thinc>=8.0.11,<8.1.0 thinc>=8.0.12,<8.1.0
install_requires = install_requires =
# Our libraries # Our libraries
spacy-legacy>=3.0.8,<3.1.0 spacy-legacy>=3.0.8,<3.1.0
@ -45,7 +46,7 @@ install_requires =
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0
thinc>=8.0.11,<8.1.0 thinc>=8.0.12,<8.1.0
blis>=0.4.0,<0.8.0 blis>=0.4.0,<0.8.0
wasabi>=0.8.1,<1.1.0 wasabi>=0.8.1,<1.1.0
srsly>=2.4.1,<3.0.0 srsly>=2.4.1,<3.0.0
@ -97,6 +98,12 @@ cuda111 =
cupy-cuda111>=5.0.0b4,<10.0.0 cupy-cuda111>=5.0.0b4,<10.0.0
cuda112 = cuda112 =
cupy-cuda112>=5.0.0b4,<10.0.0 cupy-cuda112>=5.0.0b4,<10.0.0
cuda113 =
cupy-cuda113>=5.0.0b4,<10.0.0
cuda114 =
cupy-cuda114>=5.0.0b4,<10.0.0
apple =
thinc-apple-ops>=0.0.4,<1.0.0
# Language tokenizers with external dependencies # Language tokenizers with external dependencies
ja = ja =
sudachipy>=0.4.9 sudachipy>=0.4.9

View File

@ -1,4 +1,4 @@
from typing import Optional, Dict, Any from typing import Optional, Dict, Any, Union
from pathlib import Path from pathlib import Path
from wasabi import msg from wasabi import msg
import typer import typer
@ -46,12 +46,14 @@ def train_cli(
def train( def train(
config_path: Path, config_path: Union[str, Path],
output_path: Optional[Path] = None, output_path: Optional[Union[str, Path]] = None,
*, *,
use_gpu: int = -1, use_gpu: int = -1,
overrides: Dict[str, Any] = util.SimpleFrozenDict(), overrides: Dict[str, Any] = util.SimpleFrozenDict(),
): ):
config_path = util.ensure_path(config_path)
output_path = util.ensure_path(output_path)
# Make sure all files and paths exists if they are needed # Make sure all files and paths exists if they are needed
if not config_path or (str(config_path) != "-" and not config_path.exists()): if not config_path or (str(config_path) != "-" and not config_path.exists()):
msg.fail("Config file not found", config_path, exits=1) msg.fail("Config file not found", config_path, exits=1)

View File

@ -893,6 +893,7 @@ class Errors:
"filename. Specify an epoch to resume from.") "filename. Specify an epoch to resume from.")
E1021 = ("`pos` value \"{pp}\" is not a valid Universal Dependencies tag. " E1021 = ("`pos` value \"{pp}\" is not a valid Universal Dependencies tag. "
"Non-UD tags should use the `tag` property.") "Non-UD tags should use the `tag` property.")
E1022 = ("Words must be of type str or int, but input is of type '{wtype}'")
# Deprecated model shortcuts, only used in errors and warnings # Deprecated model shortcuts, only used in errors and warnings

View File

@ -25,6 +25,7 @@ def test_build_dependencies():
"sudachipy", "sudachipy",
"sudachidict_core", "sudachidict_core",
"spacy-pkuseg", "spacy-pkuseg",
"thinc-apple-ops",
] ]
# check requirements.txt # check requirements.txt

View File

@ -1,6 +1,6 @@
from typing import Dict, Iterable, Callable from typing import Dict, Iterable, Callable
import pytest import pytest
from thinc.api import Config from thinc.api import Config, fix_random_seed
from spacy import Language from spacy import Language
from spacy.util import load_model_from_config, registry, resolve_dot_names from spacy.util import load_model_from_config, registry, resolve_dot_names
from spacy.schemas import ConfigSchemaTraining from spacy.schemas import ConfigSchemaTraining
@ -64,8 +64,8 @@ def test_readers():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"reader,additional_config", "reader,additional_config",
[ [
("ml_datasets.imdb_sentiment.v1", {"train_limit": 10, "dev_limit": 2}), ("ml_datasets.imdb_sentiment.v1", {"train_limit": 10, "dev_limit": 10}),
("ml_datasets.dbpedia.v1", {"train_limit": 10, "dev_limit": 2}), ("ml_datasets.dbpedia.v1", {"train_limit": 10, "dev_limit": 10}),
("ml_datasets.cmu_movies.v1", {"limit": 10, "freq_cutoff": 200, "split": 0.8}), ("ml_datasets.cmu_movies.v1", {"limit": 10, "freq_cutoff": 200, "split": 0.8}),
], ],
) )
@ -93,6 +93,7 @@ def test_cat_readers(reader, additional_config):
factory = "textcat_multilabel" factory = "textcat_multilabel"
""" """
config = Config().from_str(nlp_config_string) config = Config().from_str(nlp_config_string)
fix_random_seed(config["training"]["seed"])
config["corpora"]["@readers"] = reader config["corpora"]["@readers"] = reader
config["corpora"].update(additional_config) config["corpora"].update(additional_config)
nlp = load_model_from_config(config, auto_fill=True) nlp = load_model_from_config(config, auto_fill=True)

View File

@ -194,11 +194,12 @@ cdef class Doc:
vocab (Vocab): A vocabulary object, which must match any models you vocab (Vocab): A vocabulary object, which must match any models you
want to use (e.g. tokenizer, parser, entity recognizer). want to use (e.g. tokenizer, parser, entity recognizer).
words (Optional[List[str]]): A list of unicode strings to add to the document words (Optional[List[Union[str, int]]]): A list of unicode strings or
as words. If `None`, defaults to empty list. hash values to add to the document as words. If `None`, defaults to
spaces (Optional[List[bool]]): A list of boolean values, of the same length as empty list.
words. True means that the word is followed by a space, False means spaces (Optional[List[bool]]): A list of boolean values, of the same
it is not. If `None`, defaults to `[True]*len(words)` length as `words`. `True` means that the word is followed by a space,
`False` means it is not. If `None`, defaults to `[True]*len(words)`
user_data (dict or None): Optional extra data to attach to the Doc. user_data (dict or None): Optional extra data to attach to the Doc.
tags (Optional[List[str]]): A list of unicode strings, of the same tags (Optional[List[str]]): A list of unicode strings, of the same
length as words, to assign as token.tag. Defaults to None. length as words, to assign as token.tag. Defaults to None.
@ -266,7 +267,10 @@ cdef class Doc:
elif isinstance(word, bytes): elif isinstance(word, bytes):
raise ValueError(Errors.E028.format(value=word)) raise ValueError(Errors.E028.format(value=word))
else: else:
lexeme = self.vocab.get_by_orth(self.mem, word) try:
lexeme = self.vocab.get_by_orth(self.mem, word)
except TypeError:
raise TypeError(Errors.E1022.format(wtype=type(word)))
self.push_back(lexeme, has_space) self.push_back(lexeme, has_space)
if heads is not None: if heads is not None:

View File

@ -820,6 +820,29 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [--gpu-id]
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~ | | overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~ |
| **CREATES** | The final trained pipeline and the best trained pipeline. | | **CREATES** | The final trained pipeline and the best trained pipeline. |
### Calling the training function from Python {#train-function new="3.2"}
The training CLI exposes a `train` helper function that lets you run the
training just like `spacy train`. Usually it's easier to use the command line
directly, but if you need to kick off training from code this is how to do it.
> #### Example
>
> ```python
> from spacy.cli.train import train
>
> train("./config.cfg", overrides={"paths.train": "./train.spacy", "paths.dev": "./dev.spacy"})
>
> ```
| Name | Description |
| -------------- | ----------------------------------------------------------------------------------------------------------------------------- |
| `config_path` | Path to the config to use for training. ~~Union[str, Path]~~ |
| `output_path` | Optional name of directory to save output model in. If not provided a model will not be saved. ~~Optional[Union[str, Path]]~~ |
| _keyword-only_ | |
| `use_gpu` | Which GPU to use. Defaults to -1 for no GPU. ~~int~~ |
| `overrides` | Values to override config settings. ~~Dict[str, Any]~~ |
## pretrain {#pretrain new="2.1" tag="command,experimental"} ## pretrain {#pretrain new="2.1" tag="command,experimental"}
Pretrain the "token to vector" ([`Tok2vec`](/api/tok2vec)) layer of pipeline Pretrain the "token to vector" ([`Tok2vec`](/api/tok2vec)) layer of pipeline

View File

@ -34,7 +34,7 @@ Construct a `Doc` object. The most common way to get a `Doc` object is via the
| Name | Description | | Name | Description |
| ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | A storage container for lexical types. ~~Vocab~~ | | `vocab` | A storage container for lexical types. ~~Vocab~~ |
| `words` | A list of strings to add to the container. ~~Optional[List[str]]~~ | | `words` | A list of strings or integer hash values to add to the document as words. ~~Optional[List[Union[str,int]]]~~ |
| `spaces` | A list of boolean values indicating whether each word has a subsequent space. Must have the same length as `words`, if specified. Defaults to a sequence of `True`. ~~Optional[List[bool]]~~ | | `spaces` | A list of boolean values indicating whether each word has a subsequent space. Must have the same length as `words`, if specified. Defaults to a sequence of `True`. ~~Optional[List[bool]]~~ |
| _keyword-only_ | | | _keyword-only_ | |
| `user\_data` | Optional extra data to attach to the Doc. ~~Dict~~ | | `user\_data` | Optional extra data to attach to the Doc. ~~Dict~~ |

View File

@ -773,17 +773,17 @@ from the specified model. Intended for use in `[initialize.before_init]`.
> after_pipeline_creation = {"@callbacks":"spacy.models_with_nvtx_range.v1"} > after_pipeline_creation = {"@callbacks":"spacy.models_with_nvtx_range.v1"}
> ``` > ```
Recursively wrap the models in each pipe using [NVTX](https://nvidia.github.io/NVTX/) Recursively wrap the models in each pipe using
range markers. These markers aid in GPU profiling by attributing specific operations [NVTX](https://nvidia.github.io/NVTX/) range markers. These markers aid in GPU
to a ~~Model~~'s forward or backprop passes. profiling by attributing specific operations to a ~~Model~~'s forward or
backprop passes.
| Name | Description | | Name | Description |
|------------------|------------------------------------------------------------------------------------------------------------------------------| | ---------------- | ---------------------------------------------------------------------------------------------------------------------------- |
| `forward_color` | Color identifier for forward passes. Defaults to `-1`. ~~int~~ | | `forward_color` | Color identifier for forward passes. Defaults to `-1`. ~~int~~ |
| `backprop_color` | Color identifier for backpropagation passes. Defaults to `-1`. ~~int~~ | | `backprop_color` | Color identifier for backpropagation passes. Defaults to `-1`. ~~int~~ |
| **CREATES** | A function that takes the current `nlp` and wraps forward/backprop passes in NVTX ranges. ~~Callable[[Language], Language]~~ | | **CREATES** | A function that takes the current `nlp` and wraps forward/backprop passes in NVTX ranges. ~~Callable[[Language], Language]~~ |
## Training data and alignment {#gold source="spacy/training"} ## Training data and alignment {#gold source="spacy/training"}
### training.offsets_to_biluo_tags {#offsets_to_biluo_tags tag="function"} ### training.offsets_to_biluo_tags {#offsets_to_biluo_tags tag="function"}

View File

@ -71,13 +71,14 @@ spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included.
> $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS > $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS
> ``` > ```
| Name | Description | | Name | Description |
| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. | | `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. | | `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. |
| `ray` | Install [`spacy-ray`](https://github.com/explosion/spacy-ray) to add CLI commands for [parallel training](/usage/training#parallel-training). | | `ray` | Install [`spacy-ray`](https://github.com/explosion/spacy-ray) to add CLI commands for [parallel training](/usage/training#parallel-training). |
| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. | | `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. |
| `ja`, `ko`, `th`, `zh` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). | | `apple` | Install [`thinc-apple-ops`](https://github.com/explosion/thinc-apple-ops) to improve performance on an Apple M1. |
| `ja`, `ko`, `th` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). |
### conda {#conda} ### conda {#conda}

View File

@ -301,8 +301,6 @@ fly without having to save to and load from disk.
$ python -m spacy init config - --lang en --pipeline ner,textcat --optimize accuracy | python -m spacy train - --paths.train ./corpus/train.spacy --paths.dev ./corpus/dev.spacy $ python -m spacy init config - --lang en --pipeline ner,textcat --optimize accuracy | python -m spacy train - --paths.train ./corpus/train.spacy --paths.dev ./corpus/dev.spacy
``` ```
<!-- TODO: add reference to Prodigy's commands once Prodigy nightly is available -->
### Using variable interpolation {#config-interpolation} ### Using variable interpolation {#config-interpolation}
Another very useful feature of the config system is that it supports variable Another very useful feature of the config system is that it supports variable
@ -1647,7 +1645,7 @@ workers are stuck waiting for it to complete before they can continue.
## Internal training API {#api} ## Internal training API {#api}
<Infobox variant="warning"> <Infobox variant="danger">
spaCy gives you full control over the training loop. However, for most use spaCy gives you full control over the training loop. However, for most use
cases, it's recommended to train your pipelines via the cases, it's recommended to train your pipelines via the
@ -1659,6 +1657,32 @@ typically give you everything you need to train fully custom pipelines with
</Infobox> </Infobox>
### Training from a Python script {#api-train new="3.2"}
If you want to run the training from a Python script instead of using the
[`spacy train`](/api/cli#train) CLI command, you can call into the
[`train`](/api/cli#train-function) helper function directly. It takes the path
to the config file, an optional output directory and an optional dictionary of
[config overrides](#config-overrides).
```python
from spacy.cli.train import train
train("./config.cfg", overrides={"paths.train": "./train.spacy", "paths.dev": "./dev.spacy"})
```
### Internal training loop API {#api-loop}
<Infobox variant="warning">
This section documents how the training loop and updates to the `nlp` object
work internally. You typically shouldn't have to implement this in Python unless
you're writing your own trainable components. To train a pipeline, use
[`spacy train`](/api/cli#train) or the [`train`](/api/cli#train-function) helper
function instead.
</Infobox>
The [`Example`](/api/example) object contains annotated training data, also The [`Example`](/api/example) object contains annotated training data, also
called the **gold standard**. It's initialized with a [`Doc`](/api/doc) object called the **gold standard**. It's initialized with a [`Doc`](/api/doc) object
that will hold the predictions, and another `Doc` object that holds the that will hold the predictions, and another `Doc` object that holds the

View File

@ -1138,7 +1138,7 @@
{ {
"id": "deplacy", "id": "deplacy",
"slogan": "CUI-based Tree Visualizer for Universal Dependencies and Immediate Catena Analysis", "slogan": "CUI-based Tree Visualizer for Universal Dependencies and Immediate Catena Analysis",
"discreption": "Simple dependency visualizer for [spaCy](https://spacy.io/), [UniDic2UD](https://pypi.org/project/unidic2ud), [Stanza](https://stanfordnlp.github.io/stanza/), [NLP-Cube](https://github.com/Adobe/NLP-Cube), [Trankit](https://github.com/nlp-uoregon/trankit), etc.", "description": "Simple dependency visualizer for [spaCy](https://spacy.io/), [UniDic2UD](https://pypi.org/project/unidic2ud), [Stanza](https://stanfordnlp.github.io/stanza/), [NLP-Cube](https://github.com/Adobe/NLP-Cube), [Trankit](https://github.com/nlp-uoregon/trankit), etc.",
"github": "KoichiYasuoka/deplacy", "github": "KoichiYasuoka/deplacy",
"image": "https://i.imgur.com/6uOI4Op.png", "image": "https://i.imgur.com/6uOI4Op.png",
"code_example": [ "code_example": [
@ -1270,7 +1270,7 @@
"description": "`textacy` is a Python library for performing a variety of natural language processing (NLP) tasks, built on the high-performance `spacy` library. With the fundamentals tokenization, part-of-speech tagging, dependency parsing, etc. delegated to another library, `textacy` focuses on the tasks that come before and follow after.", "description": "`textacy` is a Python library for performing a variety of natural language processing (NLP) tasks, built on the high-performance `spacy` library. With the fundamentals tokenization, part-of-speech tagging, dependency parsing, etc. delegated to another library, `textacy` focuses on the tasks that come before and follow after.",
"github": "chartbeat-labs/textacy", "github": "chartbeat-labs/textacy",
"pip": "textacy", "pip": "textacy",
"url": "https://chartbeat-labs.github.io/textacy/", "url": "https://github.com/chartbeat-labs/textacy",
"author": "Burton DeWilde", "author": "Burton DeWilde",
"author_links": { "author_links": {
"github": "bdewilde", "github": "bdewilde",

View File

@ -4,10 +4,12 @@ import { StaticQuery, graphql } from 'gatsby'
import { Quickstart, QS } from '../components/quickstart' import { Quickstart, QS } from '../components/quickstart'
import { repo, DEFAULT_BRANCH } from '../components/util' import { repo, DEFAULT_BRANCH } from '../components/util'
const DEFAULT_OS = 'mac'
const DEFAULT_PLATFORM = 'x86'
const DEFAULT_MODELS = ['en'] const DEFAULT_MODELS = ['en']
const DEFAULT_OPT = 'efficiency' const DEFAULT_OPT = 'efficiency'
const DEFAULT_HARDWARE = 'cpu' const DEFAULT_HARDWARE = 'cpu'
const DEFAULT_CUDA = 'cuda102' const DEFAULT_CUDA = 'cuda113'
const CUDA = { const CUDA = {
'8.0': 'cuda80', '8.0': 'cuda80',
'9.0': 'cuda90', '9.0': 'cuda90',
@ -19,11 +21,15 @@ const CUDA = {
'11.0': 'cuda110', '11.0': 'cuda110',
'11.1': 'cuda111', '11.1': 'cuda111',
'11.2': 'cuda112', '11.2': 'cuda112',
'11.3': 'cuda113',
'11.4': 'cuda114',
} }
const LANG_EXTRAS = ['ja'] // only for languages with models const LANG_EXTRAS = ['ja'] // only for languages with models
const QuickstartInstall = ({ id, title }) => { const QuickstartInstall = ({ id, title }) => {
const [train, setTrain] = useState(false) const [train, setTrain] = useState(false)
const [platform, setPlatform] = useState(DEFAULT_PLATFORM)
const [os, setOs] = useState(DEFAULT_OS)
const [hardware, setHardware] = useState(DEFAULT_HARDWARE) const [hardware, setHardware] = useState(DEFAULT_HARDWARE)
const [cuda, setCuda] = useState(DEFAULT_CUDA) const [cuda, setCuda] = useState(DEFAULT_CUDA)
const [selectedModels, setModels] = useState(DEFAULT_MODELS) const [selectedModels, setModels] = useState(DEFAULT_MODELS)
@ -33,15 +39,19 @@ const QuickstartInstall = ({ id, title }) => {
config: v => setTrain(v.includes('train')), config: v => setTrain(v.includes('train')),
models: setModels, models: setModels,
optimize: v => setEfficiency(v.includes('efficiency')), optimize: v => setEfficiency(v.includes('efficiency')),
platform: v => setPlatform(v[0]),
os: v => setOs(v[0]),
} }
const showDropdown = { const showDropdown = {
hardware: () => hardware === 'gpu', hardware: () => hardware === 'gpu',
} }
const modelExtras = train ? selectedModels.filter(m => LANG_EXTRAS.includes(m)) : [] const modelExtras = train ? selectedModels.filter(m => LANG_EXTRAS.includes(m)) : []
const apple = os === 'mac' && platform === 'arm'
const pipExtras = [ const pipExtras = [
hardware === 'gpu' && cuda, hardware === 'gpu' && cuda,
train && 'transformers', train && 'transformers',
train && 'lookups', train && 'lookups',
apple && 'apple',
...modelExtras, ...modelExtras,
] ]
.filter(e => e) .filter(e => e)
@ -62,6 +72,16 @@ const QuickstartInstall = ({ id, title }) => {
{ id: 'windows', title: 'Windows' }, { id: 'windows', title: 'Windows' },
{ id: 'linux', title: 'Linux' }, { id: 'linux', title: 'Linux' },
], ],
defaultValue: DEFAULT_OS,
},
{
id: 'platform',
title: 'Platform',
options: [
{ id: 'x86', title: 'x86', checked: true },
{ id: 'arm', title: 'ARM / M1' },
],
defaultValue: DEFAULT_PLATFORM,
}, },
{ {
id: 'package', id: 'package',