mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-02 04:03:19 +03:00
Update docs [ci skip]
This commit is contained in:
parent
ca491722ad
commit
160f1a5f94
|
@ -489,6 +489,8 @@ All other settings can be passed in by the user via the `config` argument on
|
||||||
[`@Language.factory`](/api/language#factory) decorator also lets you define a
|
[`@Language.factory`](/api/language#factory) decorator also lets you define a
|
||||||
`default_config` that's used as a fallback.
|
`default_config` that's used as a fallback.
|
||||||
|
|
||||||
|
<!-- TODO: add example of passing in a custom Python object via the config based on a registered function -->
|
||||||
|
|
||||||
```python
|
```python
|
||||||
### With config {highlight="4,9"}
|
### With config {highlight="4,9"}
|
||||||
import spacy
|
import spacy
|
||||||
|
|
|
@ -3,7 +3,8 @@ title: Training Models
|
||||||
next: /usage/projects
|
next: /usage/projects
|
||||||
menu:
|
menu:
|
||||||
- ['Introduction', 'basics']
|
- ['Introduction', 'basics']
|
||||||
- ['CLI & Config', 'cli-config']
|
- ['Quickstart', 'quickstart']
|
||||||
|
- ['Config System', 'config']
|
||||||
- ['Transfer Learning', 'transfer-learning']
|
- ['Transfer Learning', 'transfer-learning']
|
||||||
- ['Custom Models', 'custom-models']
|
- ['Custom Models', 'custom-models']
|
||||||
- ['Parallel Training', 'parallel-training']
|
- ['Parallel Training', 'parallel-training']
|
||||||
|
@ -29,12 +30,13 @@ ready-to-use spaCy models.
|
||||||
|
|
||||||
</Infobox>
|
</Infobox>
|
||||||
|
|
||||||
## Training CLI & config {#cli-config}
|
### Training CLI & config {#cli-config}
|
||||||
|
|
||||||
<!-- TODO: intro describing the new v3 training philosophy -->
|
<!-- TODO: intro describing the new v3 training philosophy -->
|
||||||
|
|
||||||
The recommended way to train your spaCy models is via the
|
The recommended way to train your spaCy models is via the
|
||||||
[`spacy train`](/api/cli#train) command on the command line.
|
[`spacy train`](/api/cli#train) command on the command line. You can pass in the
|
||||||
|
following data and information:
|
||||||
|
|
||||||
1. The **training and evaluation data** in spaCy's
|
1. The **training and evaluation data** in spaCy's
|
||||||
[binary `.spacy` format](/api/data-formats#binary-training) created using
|
[binary `.spacy` format](/api/data-formats#binary-training) created using
|
||||||
|
@ -68,38 +70,22 @@ workflows, from data preprocessing to training and packaging your model.
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|
||||||
<Accordion title="Understanding the training output">
|
## Quickstart {#quickstart}
|
||||||
|
|
||||||
When you train a model using the [`spacy train`](/api/cli#train) command, you'll
|
> #### Instructions
|
||||||
see a table showing metrics after each pass over the data. Here's what those
|
>
|
||||||
metrics means:
|
> 1. Select your requirements and settings. The quickstart widget will
|
||||||
|
> auto-generate a recommended starter config for you.
|
||||||
|
> 2. Use the buttons at the bottom to save the result to your clipboard or a
|
||||||
|
> file `config.cfg`.
|
||||||
|
> 3. TOOD: recommended approach for filling config
|
||||||
|
> 4. Run [`spacy train`](/api/cli#train) with your config and data.
|
||||||
|
|
||||||
<!-- TODO: update table below and include note about scores in config -->
|
import QuickstartTraining from 'widgets/quickstart-training.js'
|
||||||
|
|
||||||
| Name | Description |
|
<QuickstartTraining />
|
||||||
| ---------- | ------------------------------------------------------------------------------------------------- |
|
|
||||||
| `Dep Loss` | Training loss for dependency parser. Should decrease, but usually not to 0. |
|
|
||||||
| `NER Loss` | Training loss for named entity recognizer. Should decrease, but usually not to 0. |
|
|
||||||
| `UAS` | Unlabeled attachment score for parser. The percentage of unlabeled correct arcs. Should increase. |
|
|
||||||
| `NER P.` | NER precision on development data. Should increase. |
|
|
||||||
| `NER R.` | NER recall on development data. Should increase. |
|
|
||||||
| `NER F.` | NER F-score on development data. Should increase. |
|
|
||||||
| `Tag %` | Fine-grained part-of-speech tag accuracy on development data. Should increase. |
|
|
||||||
| `Token %` | Tokenization accuracy on development data. |
|
|
||||||
| `CPU WPS` | Prediction speed on CPU in words per second, if available. Should stay stable. |
|
|
||||||
| `GPU WPS` | Prediction speed on GPU in words per second, if available. Should stay stable. |
|
|
||||||
|
|
||||||
Note that if the development data has raw text, some of the gold-standard
|
## Training config {#config}
|
||||||
entities might not align to the predicted tokenization. These tokenization
|
|
||||||
errors are **excluded from the NER evaluation**. If your tokenization makes it
|
|
||||||
impossible for the model to predict 50% of your entities, your NER F-score might
|
|
||||||
still look good.
|
|
||||||
|
|
||||||
</Accordion>
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Training config files {#config}
|
|
||||||
|
|
||||||
> #### Migration from spaCy v2.x
|
> #### Migration from spaCy v2.x
|
||||||
>
|
>
|
||||||
|
@ -237,7 +223,70 @@ compound = 1.001
|
||||||
|
|
||||||
<!-- TODO: refer to architectures API: /api/architectures. This should document the architectures in spacy/ml/models -->
|
<!-- TODO: refer to architectures API: /api/architectures. This should document the architectures in spacy/ml/models -->
|
||||||
|
|
||||||
<!-- TODO: how do we document the default configs? -->
|
### Metrics, training output and weighted scores {#metrics}
|
||||||
|
|
||||||
|
When you train a model using the [`spacy train`](/api/cli#train) command, you'll
|
||||||
|
see a table showing the metrics after each pass over the data. The available
|
||||||
|
metrics **depend on the pipeline components**. Pipeline components also define
|
||||||
|
which scores are shown and how they should be **weighted in the final score**
|
||||||
|
that decides about the best model.
|
||||||
|
|
||||||
|
The `training.score_weights` setting in your `config.cfg` lets you customize the
|
||||||
|
scores shown in the table and how they should be weighted. In this example, the
|
||||||
|
labeled dependency accuracy and NER F-score count towards the final score with
|
||||||
|
40% each and the tagging accuracy makes up the remaining 20%. The tokenization
|
||||||
|
accuracy and speed are both shown in the table, but not counted towards the
|
||||||
|
score.
|
||||||
|
|
||||||
|
> #### Why do I need score weights?
|
||||||
|
>
|
||||||
|
> At the end of your training process, you typically want to select the **best
|
||||||
|
> model** – but what "best" means depends on the available components and your
|
||||||
|
> specific use case. For instance, you may prefer a model with higher NER and
|
||||||
|
> lower POS tagging accuracy over a model with lower NER and higher POS
|
||||||
|
> accuracy. You can express this preference in the score weights, e.g. by
|
||||||
|
> assigning `ents_f` (NER F-score) a higher weight.
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[training.score_weights]
|
||||||
|
dep_las = 0.4
|
||||||
|
ents_f = 0.4
|
||||||
|
tag_acc = 0.2
|
||||||
|
token_acc = 0.0
|
||||||
|
speed = 0.0
|
||||||
|
```
|
||||||
|
|
||||||
|
The `score_weights` don't _have to_ sum to `1.0` – but it's recommended. When
|
||||||
|
you generate a config for a given pipeline, the score weights are generated by
|
||||||
|
combining and normalizing the default score weights of the pipeline components.
|
||||||
|
The default score weights are defined by each pipeline component via the
|
||||||
|
`default_score_weights` setting on the
|
||||||
|
[`@Language.component`](/api/language#component) or
|
||||||
|
[`@Language.factory`](/api/language#factory). By default, all pipeline
|
||||||
|
components are weighted equally.
|
||||||
|
|
||||||
|
<Accordion title="Understanding the training output and score types" spaced>
|
||||||
|
|
||||||
|
<!-- TODO: come up with good short explanation of precision and recall -->
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| **Loss** | The training loss representing the amount of work left for the optimizer. Should decrease, but usually not to `0`. |
|
||||||
|
| **Precision** (P) | Should increase. |
|
||||||
|
| **Recall** (R) | Should increase. |
|
||||||
|
| **F-Score** (F) | The weighted average of precision and recall. Should increase. |
|
||||||
|
| **UAS** / **LAS** | Unlabeled and labeled attachment score for the dependency parser, i.e. the percentage of correct arcs. Should increase. |
|
||||||
|
| **Words per second** (WPS) | Prediction speed in words per second. Should stay stable. |
|
||||||
|
|
||||||
|
<!-- TODO: is this still relevant? -->
|
||||||
|
|
||||||
|
Note that if the development data has raw text, some of the gold-standard
|
||||||
|
entities might not align to the predicted tokenization. These tokenization
|
||||||
|
errors are **excluded from the NER evaluation**. If your tokenization makes it
|
||||||
|
impossible for the model to predict 50% of your entities, your NER F-score might
|
||||||
|
still look good.
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
## Transfer learning {#transfer-learning}
|
## Transfer learning {#transfer-learning}
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,8 @@ The recommended workflow for training is to use spaCy's
|
||||||
[`spacy train`](/api/cli#train) command. The training config defines all
|
[`spacy train`](/api/cli#train) command. The training config defines all
|
||||||
component settings and hyperparameters in one place and lets you describe a tree
|
component settings and hyperparameters in one place and lets you describe a tree
|
||||||
of objects by referring to creation functions, including functions you register
|
of objects by referring to creation functions, including functions you register
|
||||||
yourself.
|
yourself. For details on how to get started with training your own model, check
|
||||||
|
out the [training quickstart](/usage/training#quickstart).
|
||||||
|
|
||||||
<Project id="en_core_bert">
|
<Project id="en_core_bert">
|
||||||
|
|
||||||
|
|
|
@ -3,21 +3,23 @@ import React, { useState, useRef } from 'react'
|
||||||
import Icon from './icon'
|
import Icon from './icon'
|
||||||
import classes from '../styles/copy.module.sass'
|
import classes from '../styles/copy.module.sass'
|
||||||
|
|
||||||
|
export function copyToClipboard(ref, callback) {
|
||||||
|
const isClient = typeof window !== 'undefined'
|
||||||
|
if (ref.current && isClient) {
|
||||||
|
ref.current.select()
|
||||||
|
document.execCommand('copy')
|
||||||
|
callback(true)
|
||||||
|
ref.current.blur()
|
||||||
|
setTimeout(() => callback(false), 1000)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const CopyInput = ({ text, prefix }) => {
|
const CopyInput = ({ text, prefix }) => {
|
||||||
const isClient = typeof window !== 'undefined'
|
const isClient = typeof window !== 'undefined'
|
||||||
const supportsCopy = isClient && document.queryCommandSupported('copy')
|
const supportsCopy = isClient && document.queryCommandSupported('copy')
|
||||||
const textareaRef = useRef()
|
const textareaRef = useRef()
|
||||||
const [copySuccess, setCopySuccess] = useState(false)
|
const [copySuccess, setCopySuccess] = useState(false)
|
||||||
|
const onClick = () => copyToClipboard(textareaRef, setCopySuccess)
|
||||||
function copyToClipboard() {
|
|
||||||
if (textareaRef.current && isClient) {
|
|
||||||
textareaRef.current.select()
|
|
||||||
document.execCommand('copy')
|
|
||||||
setCopySuccess(true)
|
|
||||||
textareaRef.current.blur()
|
|
||||||
setTimeout(() => setCopySuccess(false), 1000)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function selectText() {
|
function selectText() {
|
||||||
if (textareaRef.current && isClient) {
|
if (textareaRef.current && isClient) {
|
||||||
|
@ -37,7 +39,7 @@ const CopyInput = ({ text, prefix }) => {
|
||||||
onClick={selectText}
|
onClick={selectText}
|
||||||
/>
|
/>
|
||||||
{supportsCopy && (
|
{supportsCopy && (
|
||||||
<button title="Copy to clipboard" onClick={copyToClipboard}>
|
<button title="Copy to clipboard" onClick={onClick}>
|
||||||
<Icon width={16} name={copySuccess ? 'accept' : 'clipboard'} />
|
<Icon width={16} name={copySuccess ? 'accept' : 'clipboard'} />
|
||||||
</button>
|
</button>
|
||||||
)}
|
)}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import { ReactComponent as SearchIcon } from '../images/icons/search.svg'
|
||||||
import { ReactComponent as MoonIcon } from '../images/icons/moon.svg'
|
import { ReactComponent as MoonIcon } from '../images/icons/moon.svg'
|
||||||
import { ReactComponent as ClipboardIcon } from '../images/icons/clipboard.svg'
|
import { ReactComponent as ClipboardIcon } from '../images/icons/clipboard.svg'
|
||||||
import { ReactComponent as NetworkIcon } from '../images/icons/network.svg'
|
import { ReactComponent as NetworkIcon } from '../images/icons/network.svg'
|
||||||
|
import { ReactComponent as DownloadIcon } from '../images/icons/download.svg'
|
||||||
|
|
||||||
import classes from '../styles/icon.module.sass'
|
import classes from '../styles/icon.module.sass'
|
||||||
|
|
||||||
|
@ -46,7 +47,8 @@ const icons = {
|
||||||
search: SearchIcon,
|
search: SearchIcon,
|
||||||
moon: MoonIcon,
|
moon: MoonIcon,
|
||||||
clipboard: ClipboardIcon,
|
clipboard: ClipboardIcon,
|
||||||
network: NetworkIcon
|
network: NetworkIcon,
|
||||||
|
download: DownloadIcon,
|
||||||
}
|
}
|
||||||
|
|
||||||
const Icon = ({ name, width, height, inline, variant, className }) => {
|
const Icon = ({ name, width, height, inline, variant, className }) => {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import React, { Fragment, useState, useEffect } from 'react'
|
import React, { Fragment, useState, useEffect, useRef } from 'react'
|
||||||
import PropTypes from 'prop-types'
|
import PropTypes from 'prop-types'
|
||||||
import classNames from 'classnames'
|
import classNames from 'classnames'
|
||||||
import { window } from 'browser-monads'
|
import { window } from 'browser-monads'
|
||||||
|
@ -6,6 +6,7 @@ import { window } from 'browser-monads'
|
||||||
import Section from './section'
|
import Section from './section'
|
||||||
import Icon from './icon'
|
import Icon from './icon'
|
||||||
import { H2 } from './typography'
|
import { H2 } from './typography'
|
||||||
|
import { copyToClipboard } from './copy'
|
||||||
import classes from '../styles/quickstart.module.sass'
|
import classes from '../styles/quickstart.module.sass'
|
||||||
|
|
||||||
function getNewChecked(optionId, checkedForId, multiple) {
|
function getNewChecked(optionId, checkedForId, multiple) {
|
||||||
|
@ -14,10 +15,41 @@ function getNewChecked(optionId, checkedForId, multiple) {
|
||||||
return [...checkedForId, optionId]
|
return [...checkedForId, optionId]
|
||||||
}
|
}
|
||||||
|
|
||||||
const Quickstart = ({ data, title, description, id, children }) => {
|
function getRawContent(ref) {
|
||||||
|
if (ref.current && ref.current.childNodes) {
|
||||||
|
// Select all currently visible nodes (spans and text nodes)
|
||||||
|
const result = [...ref.current.childNodes].filter(el => el.offsetParent !== null)
|
||||||
|
return result.map(el => el.textContent).join('\n')
|
||||||
|
}
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
const Quickstart = ({
|
||||||
|
data,
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
copy,
|
||||||
|
download,
|
||||||
|
id,
|
||||||
|
setters = {},
|
||||||
|
hidePrompts,
|
||||||
|
children,
|
||||||
|
}) => {
|
||||||
|
const contentRef = useRef()
|
||||||
|
const copyAreaRef = useRef()
|
||||||
|
const isClient = typeof window !== 'undefined'
|
||||||
|
const supportsCopy = isClient && document.queryCommandSupported('copy')
|
||||||
|
const showCopy = supportsCopy && copy
|
||||||
const [styles, setStyles] = useState({})
|
const [styles, setStyles] = useState({})
|
||||||
const [checked, setChecked] = useState({})
|
const [checked, setChecked] = useState({})
|
||||||
const [initialized, setInitialized] = useState(false)
|
const [initialized, setInitialized] = useState(false)
|
||||||
|
const [copySuccess, setCopySuccess] = useState(false)
|
||||||
|
const [otherState, setOtherState] = useState({})
|
||||||
|
const setOther = (id, value) => setOtherState({ ...otherState, [id]: value })
|
||||||
|
const onClickCopy = () => {
|
||||||
|
copyAreaRef.current.value = getRawContent(contentRef)
|
||||||
|
copyToClipboard(copyAreaRef, setCopySuccess)
|
||||||
|
}
|
||||||
|
|
||||||
const getCss = (id, checkedOptions) => {
|
const getCss = (id, checkedOptions) => {
|
||||||
const checkedForId = checkedOptions[id] || []
|
const checkedForId = checkedOptions[id] || []
|
||||||
|
@ -32,7 +64,7 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
const initialChecked = Object.assign(
|
const initialChecked = Object.assign(
|
||||||
{},
|
{},
|
||||||
...data.map(({ id, options }) => ({
|
...data.map(({ id, options = [] }) => ({
|
||||||
[id]: options.filter(option => option.checked).map(({ id }) => id),
|
[id]: options.filter(option => option.checked).map(({ id }) => id),
|
||||||
}))
|
}))
|
||||||
)
|
)
|
||||||
|
@ -48,7 +80,7 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
|
|
||||||
return !data.length ? null : (
|
return !data.length ? null : (
|
||||||
<Section id={id}>
|
<Section id={id}>
|
||||||
<div className={classes.root}>
|
<div className={classNames(classes.root, { [classes.hidePrompts]: !!hidePrompts })}>
|
||||||
{title && (
|
{title && (
|
||||||
<H2 className={classes.title} name={id}>
|
<H2 className={classes.title} name={id}>
|
||||||
<a href={`#${id}`}>{title}</a>
|
<a href={`#${id}`}>{title}</a>
|
||||||
|
@ -57,9 +89,22 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
|
|
||||||
{description && <p className={classes.description}>{description}</p>}
|
{description && <p className={classes.description}>{description}</p>}
|
||||||
|
|
||||||
{data.map(({ id, title, options = [], multiple, help }) => (
|
{data.map(
|
||||||
|
({
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
options = [],
|
||||||
|
dropdown = [],
|
||||||
|
defaultValue,
|
||||||
|
multiple,
|
||||||
|
other,
|
||||||
|
help,
|
||||||
|
}) => {
|
||||||
|
// Optional function that's called with the value
|
||||||
|
const setterFunc = setters[id] || (() => {})
|
||||||
|
return (
|
||||||
<div key={id} data-quickstart-group={id} className={classes.group}>
|
<div key={id} data-quickstart-group={id} className={classes.group}>
|
||||||
<style data-quickstart-style={id}>
|
<style data-quickstart-style={id} scoped>
|
||||||
{styles[id] ||
|
{styles[id] ||
|
||||||
`[data-quickstart-results]>[data-quickstart-${id}] { display: none }`}
|
`[data-quickstart-results]>[data-quickstart-${id}] { display: none }`}
|
||||||
</style>
|
</style>
|
||||||
|
@ -73,6 +118,37 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className={classes.fields}>
|
<div className={classes.fields}>
|
||||||
|
{!!dropdown.length && (
|
||||||
|
<select
|
||||||
|
defaultValue={defaultValue}
|
||||||
|
className={classes.select}
|
||||||
|
onChange={({ target }) => {
|
||||||
|
const value = target.value
|
||||||
|
if (value != other) {
|
||||||
|
setterFunc(value)
|
||||||
|
setOther(id, false)
|
||||||
|
} else {
|
||||||
|
setterFunc('')
|
||||||
|
setOther(id, true)
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{dropdown.map(({ id, title }) => (
|
||||||
|
<option key={id} value={id}>
|
||||||
|
{title}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
{other && <option value={other}>{other}</option>}
|
||||||
|
</select>
|
||||||
|
)}
|
||||||
|
{other && otherState[id] && (
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
className={classes.textInput}
|
||||||
|
placeholder="Type here..."
|
||||||
|
onChange={({ target }) => setterFunc(target.value)}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
{options.map(option => {
|
{options.map(option => {
|
||||||
const optionType = multiple ? 'checkbox' : 'radio'
|
const optionType = multiple ? 'checkbox' : 'radio'
|
||||||
const checkedForId = checked[id] || []
|
const checkedForId = checked[id] || []
|
||||||
|
@ -93,6 +169,7 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
...styles,
|
...styles,
|
||||||
[id]: getCss(id, newChecked),
|
[id]: getCss(id, newChecked),
|
||||||
})
|
})
|
||||||
|
setterFunc(newChecked[id])
|
||||||
}}
|
}}
|
||||||
type={optionType}
|
type={optionType}
|
||||||
className={classNames(
|
className={classNames(
|
||||||
|
@ -110,7 +187,9 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
>
|
>
|
||||||
{option.title}
|
{option.title}
|
||||||
{option.meta && (
|
{option.meta && (
|
||||||
<span className={classes.meta}>{option.meta}</span>
|
<span className={classes.meta}>
|
||||||
|
{option.meta}
|
||||||
|
</span>
|
||||||
)}
|
)}
|
||||||
{option.help && (
|
{option.help && (
|
||||||
<span
|
<span
|
||||||
|
@ -127,12 +206,37 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
})}
|
})}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
))}
|
)
|
||||||
|
}
|
||||||
|
)}
|
||||||
<pre className={classes.code}>
|
<pre className={classes.code}>
|
||||||
<code className={classes.results} data-quickstart-results="">
|
<code className={classes.results} data-quickstart-results="" ref={contentRef}>
|
||||||
{children}
|
{children}
|
||||||
</code>
|
</code>
|
||||||
|
|
||||||
|
<menu className={classes.menu}>
|
||||||
|
{showCopy && (
|
||||||
|
<button
|
||||||
|
title="Copy to clipboard"
|
||||||
|
onClick={onClickCopy}
|
||||||
|
className={classes.iconButton}
|
||||||
|
>
|
||||||
|
<Icon width={18} name={copySuccess ? 'accept' : 'clipboard'} />
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
{download && (
|
||||||
|
<a
|
||||||
|
href={`data:application/octet-stream,${getRawContent(contentRef)}`}
|
||||||
|
title="Download file"
|
||||||
|
download={download}
|
||||||
|
className={classes.iconButton}
|
||||||
|
>
|
||||||
|
<Icon width={18} name="download" />
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
|
</menu>
|
||||||
</pre>
|
</pre>
|
||||||
|
{showCopy && <textarea ref={copyAreaRef} className={classes.copyArea} rows={1} />}
|
||||||
</div>
|
</div>
|
||||||
</Section>
|
</Section>
|
||||||
)
|
)
|
||||||
|
@ -141,6 +245,7 @@ const Quickstart = ({ data, title, description, id, children }) => {
|
||||||
Quickstart.defaultProps = {
|
Quickstart.defaultProps = {
|
||||||
data: [],
|
data: [],
|
||||||
id: 'quickstart',
|
id: 'quickstart',
|
||||||
|
copy: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
Quickstart.propTypes = {
|
Quickstart.propTypes = {
|
||||||
|
@ -164,12 +269,13 @@ Quickstart.propTypes = {
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
const QS = ({ children, prompt = 'bash', divider = false, ...props }) => {
|
const QS = ({ children, prompt = 'bash', divider = false, comment = false, ...props }) => {
|
||||||
const qsClassNames = classNames({
|
const qsClassNames = classNames({
|
||||||
[classes.prompt]: !!prompt && !divider,
|
[classes.prompt]: !!prompt && !divider,
|
||||||
[classes.bash]: prompt === 'bash' && !divider,
|
[classes.bash]: prompt === 'bash' && !divider,
|
||||||
[classes.python]: prompt === 'python' && !divider,
|
[classes.python]: prompt === 'python' && !divider,
|
||||||
[classes.divider]: !!divider,
|
[classes.divider]: !!divider,
|
||||||
|
[classes.comment]: !!comment,
|
||||||
})
|
})
|
||||||
const attrs = Object.assign(
|
const attrs = Object.assign(
|
||||||
{},
|
{},
|
||||||
|
|
4
website/src/images/icons/download.svg
Normal file
4
website/src/images/icons/download.svg
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
|
||||||
|
<path d="M16.707 7.404c-0.189-0.188-0.448-0.283-0.707-0.283s-0.518 0.095-0.707 0.283l-2.293 2.293v-6.697c0-0.552-0.448-1-1-1s-1 0.448-1 1v6.697l-2.293-2.293c-0.189-0.188-0.44-0.293-0.707-0.293s-0.518 0.105-0.707 0.293c-0.39 0.39-0.39 1.024 0 1.414l4.707 4.682 4.709-4.684c0.388-0.387 0.388-1.022-0.002-1.412z"></path>
|
||||||
|
<path d="M20.987 16c0-0.105-0.004-0.211-0.039-0.316l-2-6c-0.136-0.409-0.517-0.684-0.948-0.684h-0.219c-0.094 0.188-0.21 0.368-0.367 0.525l-1.482 1.475h1.348l1.667 5h-13.893l1.667-5h1.348l-1.483-1.475c-0.157-0.157-0.274-0.337-0.367-0.525h-0.219c-0.431 0-0.812 0.275-0.948 0.684l-2 6c-0.035 0.105-0.039 0.211-0.039 0.316-0.013 0-0.013 5-0.013 5 0 0.553 0.447 1 1 1h16c0.553 0 1-0.447 1-1 0 0 0-5-0.013-5z"></path>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 821 B |
|
@ -83,6 +83,24 @@
|
||||||
.fields
|
.fields
|
||||||
flex: 100%
|
flex: 100%
|
||||||
|
|
||||||
|
.select
|
||||||
|
cursor: pointer
|
||||||
|
border: 1px solid var(--color-subtle)
|
||||||
|
border-radius: var(--border-radius)
|
||||||
|
display: inline-block
|
||||||
|
padding: 0.35rem 1.25rem
|
||||||
|
margin: 0 1rem 0.75rem 0
|
||||||
|
font-size: var(--font-size-sm)
|
||||||
|
background: var(--color-back)
|
||||||
|
|
||||||
|
.text-input
|
||||||
|
border: 1px solid var(--color-subtle)
|
||||||
|
border-radius: var(--border-radius)
|
||||||
|
display: inline-block
|
||||||
|
padding: 0.35rem 0.75rem
|
||||||
|
font-size: var(--font-size-sm)
|
||||||
|
background: var(--color-back)
|
||||||
|
|
||||||
.code
|
.code
|
||||||
background: var(--color-front)
|
background: var(--color-front)
|
||||||
color: var(--color-back)
|
color: var(--color-back)
|
||||||
|
@ -95,6 +113,7 @@
|
||||||
border-bottom-right-radius: var(--border-radius)
|
border-bottom-right-radius: var(--border-radius)
|
||||||
-webkit-font-smoothing: subpixel-antialiased
|
-webkit-font-smoothing: subpixel-antialiased
|
||||||
-moz-osx-font-smoothing: auto
|
-moz-osx-font-smoothing: auto
|
||||||
|
position: relative
|
||||||
|
|
||||||
.results
|
.results
|
||||||
display: block
|
display: block
|
||||||
|
@ -105,6 +124,9 @@
|
||||||
& > span
|
& > span
|
||||||
display: block
|
display: block
|
||||||
|
|
||||||
|
.hide-prompts .prompt:before
|
||||||
|
content: initial !important
|
||||||
|
|
||||||
.prompt:before
|
.prompt:before
|
||||||
color: var(--color-theme)
|
color: var(--color-theme)
|
||||||
margin-right: 1em
|
margin-right: 1em
|
||||||
|
@ -115,6 +137,9 @@
|
||||||
.python:before
|
.python:before
|
||||||
content: ">>>"
|
content: ">>>"
|
||||||
|
|
||||||
|
.comment
|
||||||
|
color: var(--syntax-comment)
|
||||||
|
|
||||||
.divider
|
.divider
|
||||||
padding: 1.5rem 0
|
padding: 1.5rem 0
|
||||||
|
|
||||||
|
@ -123,3 +148,29 @@
|
||||||
|
|
||||||
.input:checked + .label &
|
.input:checked + .label &
|
||||||
color: inherit
|
color: inherit
|
||||||
|
|
||||||
|
.copy-area
|
||||||
|
width: 1px
|
||||||
|
height: 1px
|
||||||
|
opacity: 0
|
||||||
|
position: absolute
|
||||||
|
|
||||||
|
.menu
|
||||||
|
color: var(--color-subtle)
|
||||||
|
padding-right: 1.5rem
|
||||||
|
display: inline-block
|
||||||
|
position: absolute
|
||||||
|
bottom: var(--spacing-xs)
|
||||||
|
right: 0
|
||||||
|
|
||||||
|
.icon-button
|
||||||
|
display: inline-block
|
||||||
|
color: inherit
|
||||||
|
cursor: pointer
|
||||||
|
transition: transform 0.05s ease
|
||||||
|
|
||||||
|
&:not(:last-child)
|
||||||
|
margin-right: 1.5rem
|
||||||
|
|
||||||
|
&:hover
|
||||||
|
transform: scale(1.1)
|
||||||
|
|
|
@ -92,7 +92,7 @@ const QuickstartInstall = ({ id, title }) => (
|
||||||
</QS>
|
</QS>
|
||||||
<QS package="source">pip install -r requirements.txt</QS>
|
<QS package="source">pip install -r requirements.txt</QS>
|
||||||
<QS addition="transformers" package="pip">
|
<QS addition="transformers" package="pip">
|
||||||
pip install -U spacy-lookups-transformers
|
pip install -U spacy-transformers
|
||||||
</QS>
|
</QS>
|
||||||
<QS addition="transformers" package="source">
|
<QS addition="transformers" package="source">
|
||||||
pip install -U spacy-transformers
|
pip install -U spacy-transformers
|
||||||
|
|
118
website/src/widgets/quickstart-training.js
Normal file
118
website/src/widgets/quickstart-training.js
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
import React, { useState } from 'react'
|
||||||
|
import { StaticQuery, graphql } from 'gatsby'
|
||||||
|
|
||||||
|
import { Quickstart, QS } from '../components/quickstart'
|
||||||
|
|
||||||
|
const DEFAULT_LANG = 'en'
|
||||||
|
const MODELS_SMALL = { en: 'roberta-base-small' }
|
||||||
|
const MODELS_LARGE = { en: 'roberta-base' }
|
||||||
|
|
||||||
|
const COMPONENTS = ['tagger', 'parser', 'ner', 'textcat']
|
||||||
|
const COMMENT = `# This is an auto-generated partial config for training a model.
|
||||||
|
# TODO: intructions for how to fill and use it`
|
||||||
|
const DATA = [
|
||||||
|
{
|
||||||
|
id: 'lang',
|
||||||
|
title: 'Language',
|
||||||
|
defaultValue: DEFAULT_LANG,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'components',
|
||||||
|
title: 'Components',
|
||||||
|
help: 'Pipeline components to train. Requires training data for those annotations.',
|
||||||
|
options: COMPONENTS.map(id => ({ id, title: id })),
|
||||||
|
multiple: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'hardware',
|
||||||
|
title: 'Hardware',
|
||||||
|
options: [
|
||||||
|
{ id: 'cpu-only', title: 'CPU only' },
|
||||||
|
{ id: 'cpu', title: 'CPU preferred' },
|
||||||
|
{ id: 'gpu', title: 'GPU', checked: true },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'optimize',
|
||||||
|
title: 'Optimize for',
|
||||||
|
help: '...',
|
||||||
|
options: [
|
||||||
|
{ id: 'efficiency', title: 'efficiency', checked: true },
|
||||||
|
{ id: 'accuracy', title: 'accuracy' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'config',
|
||||||
|
title: 'Configuration',
|
||||||
|
options: [
|
||||||
|
{
|
||||||
|
id: 'independent',
|
||||||
|
title: 'independent components',
|
||||||
|
help: "Make components independent and don't share weights",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
multiple: true,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
const QuickstartTraining = ({ id, title, download = 'config.cfg' }) => {
|
||||||
|
const [lang, setLang] = useState(DEFAULT_LANG)
|
||||||
|
const [pipeline, setPipeline] = useState([])
|
||||||
|
const setters = { lang: setLang, components: setPipeline }
|
||||||
|
return (
|
||||||
|
<StaticQuery
|
||||||
|
query={query}
|
||||||
|
render={({ site }) => {
|
||||||
|
const langs = site.siteMetadata.languages
|
||||||
|
DATA[0].dropdown = langs.map(({ name, code }) => ({
|
||||||
|
id: code,
|
||||||
|
title: name,
|
||||||
|
}))
|
||||||
|
return (
|
||||||
|
<Quickstart
|
||||||
|
download={download}
|
||||||
|
data={DATA}
|
||||||
|
title={title}
|
||||||
|
id={id}
|
||||||
|
setters={setters}
|
||||||
|
hidePrompts
|
||||||
|
>
|
||||||
|
<QS comment>{COMMENT}</QS>
|
||||||
|
<span>[nlp]</span>
|
||||||
|
<span>lang = "{lang}"</span>
|
||||||
|
<span>pipeline = {JSON.stringify(pipeline).replace(/,/g, ', ')}</span>
|
||||||
|
<br />
|
||||||
|
<span>[components]</span>
|
||||||
|
<br />
|
||||||
|
<span>[components.transformer]</span>
|
||||||
|
<QS optimize="efficiency">name = "{MODELS_SMALL[lang]}"</QS>
|
||||||
|
<QS optimize="accuracy">name = "{MODELS_LARGE[lang]}"</QS>
|
||||||
|
{!!pipeline.length && <br />}
|
||||||
|
{pipeline.map((pipe, i) => (
|
||||||
|
<>
|
||||||
|
{i !== 0 && <br />}
|
||||||
|
<span>[components.{pipe}]</span>
|
||||||
|
<span>factory = "{pipe}"</span>
|
||||||
|
</>
|
||||||
|
))}
|
||||||
|
</Quickstart>
|
||||||
|
)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const query = graphql`
|
||||||
|
query QuickstartTrainingQuery {
|
||||||
|
site {
|
||||||
|
siteMetadata {
|
||||||
|
languages {
|
||||||
|
code
|
||||||
|
name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
`
|
||||||
|
|
||||||
|
export default QuickstartTraining
|
Loading…
Reference in New Issue
Block a user