Update install docs and quickstarts [ci skip]

This commit is contained in:
Ines Montani 2020-10-03 11:35:42 +02:00
parent 52e4586ec1
commit eb9b3ff9c5
5 changed files with 309 additions and 201 deletions

View File

@ -8,10 +8,7 @@ menu:
- ['Changelog', 'changelog']
---
spaCy is compatible with **64-bit CPython 3.6+** and runs on **Unix/Linux**,
**macOS/OS X** and **Windows**. The latest spaCy releases are available over
[pip](https://pypi.python.org/pypi/spacy) and
[conda](https://anaconda.org/conda-forge/spacy).
## Quickstart {hidden="true"}
> #### 📖 Looking for the old docs?
>
@ -19,21 +16,22 @@ spaCy is compatible with **64-bit CPython 3.6+** and runs on **Unix/Linux**,
> website to [**v2.spacy.io**](https://v2.spacy.io/docs). To see what's changed
> and how to migrate, see the guide on [v3.0 guide](/usage/v3).
## Quickstart {hidden="true"}
import QuickstartInstall from 'widgets/quickstart-install.js'
<QuickstartInstall title="Quickstart" id="quickstart" />
<QuickstartInstall id="quickstart" />
## Installation instructions {#installation}
spaCy is compatible with **64-bit CPython 3.6+** and runs on **Unix/Linux**,
**macOS/OS X** and **Windows**. The latest spaCy releases are available over
[pip](https://pypi.python.org/pypi/spacy) and
[conda](https://anaconda.org/conda-forge/spacy).
### pip {#pip}
Using pip, spaCy releases are available as source packages and binary wheels.
```bash
$ pip install -U spacy
```
Before you install spaCy and its dependencies, make sure that your `pip`,
`setuptools` and `wheel` are up to date.
> #### Download pipelines
>
@ -47,16 +45,10 @@ $ pip install -U spacy
> >>> nlp = spacy.load("en_core_web_sm")
> ```
<Infobox variant="warning">
To install additional data tables for lemmatization you can run
`pip install spacy[lookups]` or install
[`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data)
separately. The lookups package is needed to provide normalization and
lemmatization data for new models and to lemmatize in languages that don't yet
come with trained pipelines and aren't powered by third-party libraries.
</Infobox>
```bash
$ pip install -U pip setuptools wheel
$ pip install -U spacy
```
When using pip it is generally recommended to install packages in a virtual
environment to avoid modifying system state:
@ -64,9 +56,28 @@ environment to avoid modifying system state:
```bash
$ python -m venv .env
$ source .env/bin/activate
$ pip install -U pip setuptools wheel
$ pip install spacy
```
spaCy also lets you install extra dependencies by specifying the following
keywords in brackets, e.g. `spacy[ja]` or `spacy[lookups,transformers]` (with
multiple comma-separated extras). See the `[options.extras_require]` section in
spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included.
> #### Example
>
> ```bash
> $ pip install spacy[lookups,transformers]
> ```
| Name | Description |
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. |
| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. |
| `ja`, `ko`, `th` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). |
### conda {#conda}
Thanks to our great community, we've been able to re-add conda support. You can
@ -112,10 +123,9 @@ $ python -m spacy validate
### Run spaCy with GPU {#gpu new="2.0.14"}
As of v2.0, spaCy comes with neural network models that are implemented in our
machine learning library, [Thinc](https://github.com/explosion/thinc). For GPU
support, we've been grateful to use the work of Chainer's
[CuPy](https://cupy.chainer.org) module, which provides a numpy-compatible
interface for GPU arrays.
machine learning library, [Thinc](https://thinc.ai). For GPU support, we've been
grateful to use the work of Chainer's [CuPy](https://cupy.chainer.org) module,
which provides a numpy-compatible interface for GPU arrays.
spaCy can be installed on GPU by specifying `spacy[cuda]`, `spacy[cuda90]`,
`spacy[cuda91]`, `spacy[cuda92]`, `spacy[cuda100]`, `spacy[cuda101]` or

View File

@ -24,6 +24,7 @@ const Quickstart = ({
rawContent = null,
id = 'quickstart',
setters = {},
showDropdown = {},
hidePrompts,
small,
codeLang,
@ -107,6 +108,8 @@ const Quickstart = ({
}) => {
// Optional function that's called with the value
const setterFunc = setters[id] || (() => {})
// Check if dropdown should be shown
const dropdownGetter = showDropdown[id] || (() => true)
return (
<div key={id} data-quickstart-group={id} className={classes.group}>
<style data-quickstart-style={id} scoped>
@ -123,37 +126,6 @@ const Quickstart = ({
)}
</div>
<div className={classes.fields}>
{!!dropdown.length && (
<select
defaultValue={defaultValue}
className={classes.select}
onChange={({ target }) => {
const value = target.value
if (value != other) {
setterFunc(value)
setOther(id, false)
} else {
setterFunc('')
setOther(id, true)
}
}}
>
{dropdown.map(({ id, title }) => (
<option key={id} value={id}>
{title}
</option>
))}
{other && <option value={other}>{other}</option>}
</select>
)}
{other && otherState[id] && (
<input
type="text"
className={classes.textInput}
placeholder="Type here..."
onChange={({ target }) => setterFunc(target.value)}
/>
)}
{options.map(option => {
const optionType = multiple ? 'checkbox' : 'radio'
const checkedForId = checked[id] || []
@ -179,7 +151,10 @@ const Quickstart = ({
type={optionType}
className={classNames(
classes.input,
classes[optionType]
classes[optionType],
{
[classes.long]: options.length >= 4,
}
)}
name={id}
id={`quickstart-${option.id}`}
@ -209,6 +184,41 @@ const Quickstart = ({
</Fragment>
)
})}
<span className={classes.fieldExtra}>
{!!dropdown.length && (
<select
defaultValue={defaultValue}
className={classNames(classes.select, {
[classes.selectHidden]: !dropdownGetter(),
})}
onChange={({ target }) => {
const value = target.value
if (value != other) {
setterFunc(value)
setOther(id, false)
} else {
setterFunc('')
setOther(id, true)
}
}}
>
{dropdown.map(({ id, title }) => (
<option key={id} value={id}>
{title}
</option>
))}
{other && <option value={other}>{other}</option>}
</select>
)}
{other && otherState[id] && (
<input
type="text"
className={classes.textInput}
placeholder="Type here..."
onChange={({ target }) => setterFunc(target.value)}
/>
)}
</span>
</div>
</div>
)

View File

@ -36,22 +36,37 @@
.label
cursor: pointer
border: 1px solid var(--color-subtle)
border-radius: var(--border-radius)
display: inline-block
padding: 0.65rem 1.25rem
margin: 0 0.5rem 0.75rem 0
padding: 0.35rem 0.5rem 0.25rem 0
margin: 0 1rem 0.75rem 0
font-size: var(--font-size-xs)
font-weight: bold
background: var(--color-back)
&:hover
background: var(--color-subtle-light)
.input:focus + &
.input:focus +
border: 1px solid var(--color-theme)
outline: none
.radio + &
margin: 0 0 0.75rem 0
border-radius: 0
border-width: 1px 0 1px 1px
border-style: solid
border-color: var(--color-subtle)
background: var(--color-back)
padding: 0.65rem 1.25rem
&:nth-child(2) // first child is checkbox
border-top-left-radius: var(--border-radius)
border-bottom-left-radius: var(--border-radius)
&:nth-last-child(2) // last child is additional container
border-top-right-radius: var(--border-radius)
border-bottom-right-radius: var(--border-radius)
border-right-width: 1px
.radio:checked + &
color: var(--color-back)
border-color: var(--color-theme)
@ -64,9 +79,10 @@
height: 20px
border: 1px solid var(--color-subtle)
vertical-align: middle
margin-right: 1rem
margin-right: 0.5rem
cursor: pointer
border-radius: var(--border-radius)
background: var(--color-back)
.checkbox:checked + &:before
// Embed "check" icon here for simplicity
@ -74,6 +90,9 @@
background-size: contain
border-color: var(--color-theme)
.field-extra:not(:empty):not(:first-child)
margin-left: 1rem
.legend
color: var(--color-dark)
padding: 0.75rem 0
@ -93,6 +112,9 @@
font-size: var(--font-size-sm)
background: var(--color-back)
.select-hidden
display: none
.text-input
border: 1px solid var(--color-subtle)
border-radius: var(--border-radius)

View File

@ -1,9 +1,20 @@
import React from 'react'
import React, { useState } from 'react'
import { StaticQuery, graphql } from 'gatsby'
import { Quickstart, QS } from '../components/quickstart'
import { repo } from '../components/util'
const DEFAULT_HARDWARE = 'cpu'
const DEFAULT_CUDA = 'cuda100'
const CUDA = {
'8.0': 'cuda80',
'9.0': 'cuda90',
'9.1': 'cuda91',
'9.2': 'cuda92',
'10.0': 'cuda100',
'10.1': 'cuda101',
'10.2': 'cuda102',
}
const DATA = [
{
id: 'os',
@ -23,6 +34,16 @@ const DATA = [
{ id: 'source', title: 'from source' },
],
},
{
id: 'hardware',
title: 'Hardware',
options: [
{ id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' },
{ id: 'gpu', title: 'GPU', checked: DEFAULT_HARDWARE == 'gpu' },
],
dropdown: Object.keys(CUDA).map(id => ({ id: CUDA[id], title: `CUDA ${id}` })),
defaultValue: DEFAULT_CUDA,
},
{
id: 'config',
title: 'Configuration',
@ -30,100 +51,115 @@ const DATA = [
options: [
{
id: 'venv',
title: 'virtualenv',
title: 'virtual env',
help: 'Use a virtual environment and install spaCy into a user directory',
},
],
},
{
id: 'addition',
title: 'Additions',
multiple: true,
options: [
{
id: 'transformers',
title: 'Transformers',
help: 'Use transformers like BERT to train your spaCy pipelines',
},
{
id: 'lookups',
title: 'Lemmatizer data',
help: 'Install additional lookup tables and rules for lemmatization',
id: 'train',
title: 'train models',
help:
'Check this if you plan to train your own models with spaCy to install extra dependencies and data resources',
},
],
},
]
const QuickstartInstall = ({ id, title }) => (
<StaticQuery
query={query}
render={({ site }) => {
const { nightly, languages } = site.siteMetadata
const models = languages.filter(({ models }) => models !== null)
const data = [
...DATA,
{
id: 'models',
title: 'Trained Pipelines',
multiple: true,
options: models.map(({ code, name }) => ({ id: code, title: name })),
},
]
return (
<Quickstart data={data} title={title} id={id}>
<QS config="venv">python -m venv .env</QS>
<QS config="venv" os="mac">
source .env/bin/activate
</QS>
<QS config="venv" os="linux">
source .env/bin/activate
</QS>
<QS config="venv" os="windows">
.env\Scripts\activate
</QS>
<QS package="pip">pip install -U spacy</QS>
<QS package="conda">conda install -c conda-forge spacy</QS>
<QS package="source">
git clone https://github.com/{repo}
{nightly ? ` --branch develop` : ''}
</QS>
<QS package="source">cd spaCy</QS>
<QS package="source" os="linux">
export PYTHONPATH=`pwd`
</QS>
<QS package="source" os="windows">
set PYTHONPATH=C:\path\to\spaCy
</QS>
<QS package="source">pip install -r requirements.txt</QS>
<QS addition="transformers" package="pip">
pip install -U spacy-transformers
</QS>
<QS addition="transformers" package="source">
pip install -U spacy-transformers
</QS>
<QS addition="transformers" package="conda">
conda install -c conda-forge spacy-transformers
</QS>
<QS addition="lookups" package="pip">
pip install -U spacy-lookups-data
</QS>
<QS addition="lookups" package="source">
pip install -U spacy-lookups-data
</QS>
<QS addition="lookups" package="conda">
conda install -c conda-forge spacy-lookups-data
</QS>
<QS package="source">python setup.py build_ext --inplace</QS>
{models.map(({ code, models: modelOptions }) => (
<QS models={code} key={code}>
python -m spacy download {modelOptions[0]}
const QuickstartInstall = ({ id, title }) => {
const [train, setTrain] = useState(false)
const [hardware, setHardware] = useState(DEFAULT_HARDWARE)
const [cuda, setCuda] = useState(DEFAULT_CUDA)
const setters = {
hardware: v => (Array.isArray(v) ? setHardware(v[0]) : setCuda(v)),
config: v => setTrain(v.includes('train')),
}
const showDropdown = {
hardware: () => hardware === 'gpu',
}
const pipExtras = [hardware === 'gpu' && cuda, train && 'transformers', train && 'lookups']
.filter(e => e)
.join(',')
return (
<StaticQuery
query={query}
render={({ site }) => {
const { nightly, languages } = site.siteMetadata
const pkg = nightly ? 'spacy-nightly' : 'spacy'
const models = languages.filter(({ models }) => models !== null)
const data = [
...DATA,
{
id: 'models',
title: 'Trained Pipelines',
multiple: true,
options: models
.sort((a, b) => a.name.localeCompare(b.name))
.map(({ code, name }) => ({ id: code, title: name })),
},
]
return (
<Quickstart
data={data}
title={title}
id={id}
setters={setters}
showDropdown={showDropdown}
>
<QS config="venv">python -m venv .env</QS>
<QS config="venv" os="mac">
source .env/bin/activate
</QS>
))}
</Quickstart>
)
}}
/>
)
<QS config="venv" os="linux">
source .env/bin/activate
</QS>
<QS config="venv" os="windows">
.env\Scripts\activate
</QS>
<QS package="pip">pip install -U pip setuptools wheel</QS>
<QS package="source">pip install -U pip setuptools wheel</QS>
<QS package="pip">
pip install -U {pkg}
{pipExtras && `[${pipExtras}]`}
{nightly ? ' --pre' : ''}
</QS>
<QS package="conda">conda install -c conda-forge spacy</QS>
<QS package="conda" hardware="gpu">
conda install -c conda-forge cupy
</QS>
<QS package="source">
git clone https://github.com/{repo}
{nightly ? ` --branch develop` : ''}
</QS>
<QS package="source">cd spaCy</QS>
<QS package="source" os="linux">
export PYTHONPATH=`pwd`
</QS>
<QS package="source" os="windows">
set PYTHONPATH=C:\path\to\spaCy
</QS>
<QS package="source">pip install -r requirements.txt</QS>
<QS package="source">python setup.py build_ext --inplace</QS>
<QS package="source" config="train">
pip install -e '.[{pipExtras}]'
</QS>
<QS config="train" package="conda">
conda install -c conda-forge spacy-transformers
</QS>
<QS config="train" package="conda">
conda install -c conda-forge spacy-lookups-data
</QS>
{models.map(({ code, models: modelOptions }) => (
<QS models={code} key={code}>
python -m spacy download {modelOptions[0]}
</QS>
))}
</Quickstart>
)
}}
/>
)
}
export default QuickstartInstall

View File

@ -1,12 +1,16 @@
import React, { Fragment } from 'react'
import React, { Fragment, useState } from 'react'
import { StaticQuery, graphql } from 'gatsby'
import { Quickstart, QS } from '../components/quickstart'
const DEFAULT_LANG = 'en'
const DEFAULT_OPT = 'efficiency'
const data = [
{
id: 'lang',
title: 'Language',
defaultValue: DEFAULT_LANG,
},
{
id: 'load',
@ -25,6 +29,16 @@ const data = [
},
],
},
{
id: 'optimize',
title: 'Optimize for',
help:
'Optimize for efficiency (faster & smaller model) or higher accuracy (larger & slower model)',
options: [
{ id: 'efficiency', title: 'efficiency', checked: DEFAULT_OPT === 'efficiency' },
{ id: 'accuracy', title: 'accuracy', checked: DEFAULT_OPT === 'accuracy' },
],
},
{
id: 'config',
title: 'Options',
@ -33,57 +47,73 @@ const data = [
},
]
const QuickstartInstall = ({ id, title, description, defaultLang = 'en', children }) => (
<StaticQuery
query={query}
render={({ site }) => {
const models = site.siteMetadata.languages.filter(({ models }) => models !== null)
data[0].options = models.map(({ code, name }) => ({
id: code,
title: name,
checked: code === defaultLang,
}))
return (
<Quickstart data={data} title={title} id={id} description={description}>
{models.map(({ code, models, example }) => {
const pkg = models[0]
const exampleText = example || 'No text available yet'
return (
<Fragment key={code}>
<QS lang={code}>python -m spacy download {pkg}</QS>
<QS lang={code} divider />
<QS lang={code} load="spacy" prompt="python">
import spacy
</QS>
<QS lang={code} load="spacy" prompt="python">
nlp = spacy.load("{pkg}")
</QS>
<QS lang={code} load="module" prompt="python">
import {pkg}
</QS>
<QS lang={code} load="module" prompt="python">
nlp = {pkg}.load()
</QS>
<QS lang={code} config="example" prompt="python">
doc = nlp("{exampleText}")
</QS>
<QS lang={code} config="example" prompt="python">
print([
{code === 'xx'
? '(ent.text, ent.label) for ent in doc.ents'
: '(w.text, w.pos_) for w in doc'}
])
</QS>
</Fragment>
)
})}
const QuickstartInstall = ({ id, title, description, children }) => {
const [lang, setLang] = useState(DEFAULT_LANG)
const [efficiency, setEfficiency] = useState(DEFAULT_OPT)
const setters = {
lang: setLang,
optimize: v => setEfficiency(v.includes('efficiency')),
}
return (
<StaticQuery
query={query}
render={({ site }) => {
const models = site.siteMetadata.languages.filter(({ models }) => models !== null)
data[0].dropdown = models
.sort((a, b) => a.name.localeCompare(b.name))
.map(({ code, name }) => ({
id: code,
title: name,
}))
return (
<Quickstart
data={data}
title={title}
id={id}
description={description}
setters={setters}
copy={false}
>
{models.map(({ code, models, example }) => {
const pkg = efficiency ? models[0] : models[models.length - 1]
const exampleText = example || 'No text available yet'
return lang !== code ? null : (
<Fragment key={code}>
<QS>python -m spacy download {pkg}</QS>
<QS divider />
<QS load="spacy" prompt="python">
import spacy
</QS>
<QS load="spacy" prompt="python">
nlp = spacy.load("{pkg}")
</QS>
<QS load="module" prompt="python">
import {pkg}
</QS>
<QS load="module" prompt="python">
nlp = {pkg}.load()
</QS>
<QS config="example" prompt="python">
doc = nlp("{exampleText}")
</QS>
<QS config="example" prompt="python">
print([
{code === 'xx'
? '(ent.text, ent.label) for ent in doc.ents'
: '(w.text, w.pos_) for w in doc'}
])
</QS>
</Fragment>
)
})}
{children}
</Quickstart>
)
}}
/>
)
{children}
</Quickstart>
)
}}
/>
)
}
export default QuickstartInstall