spaCy/website/src/widgets/quickstart-training.js
Marcus Blättermann 056b73468c
Load components dynamically (decrease initial file size for docs) (#12175)
* Extract `CodeBlock` component into own file

* Extract `InlineCode` component into own file

* Extract `TypeAnnotation` component into own file

* Convert named `export` to `default export`

* Remove unused `export`

* Simplify `TypeAnnotation` to remove dependency for Prism

* Load `Code` component dynamically

* Extract `MarkdownToReact` component into own file

* WIP Code Dynamic

* Load `MarkdownToReact` component dynamically

* Extract `htmlToReact` to own file

* Load `htmlToReact` component dynamically

* Dynamically load `Juniper`
2023-01-25 17:30:41 +01:00

145 lines
4.7 KiB
JavaScript

import React, { useState } from 'react'
import Prism from 'prismjs'
import 'prismjs/components/prism-ini.min.js'
import { Quickstart } from '../components/quickstart'
import generator, { DATA as GENERATOR_DATA } from './quickstart-training-generator'
import models from '../../meta/languages.json'
import dynamic from 'next/dynamic'
const DEFAULT_LANG = 'en'
const DEFAULT_HARDWARE = 'cpu'
const DEFAULT_OPT = 'efficiency'
const DEFAULT_TEXTCAT_EXCLUSIVE = true
const COMPONENTS = [
'tagger',
'morphologizer',
'trainable_lemmatizer',
'parser',
'ner',
'spancat',
'textcat',
]
const COMMENT = `# This is an auto-generated partial config. To use it with 'spacy train'
# you can run spacy init fill-config to auto-fill all default settings:
# python -m spacy init fill-config ./base_config.cfg ./config.cfg`
const DATA = [
{
id: 'lang',
title: 'Language',
defaultValue: DEFAULT_LANG,
},
{
id: 'components',
title: 'Components',
help: 'Pipeline components to train. Requires training data for those annotations.',
options: COMPONENTS.map((id) => ({ id, title: id })),
multiple: true,
},
{
id: 'textcat',
title: 'Text Classification',
multiple: true,
options: [
{
id: 'exclusive',
title: 'exclusive categories',
checked: DEFAULT_TEXTCAT_EXCLUSIVE,
help: 'only one label can apply',
},
],
},
{
id: 'hardware',
title: 'Hardware',
options: [
{ id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' },
{ id: 'gpu', title: 'GPU (transformer)', checked: DEFAULT_HARDWARE === 'gpu' },
],
},
{
id: 'optimize',
title: 'Optimize for',
help: 'Optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger & slower model). Will impact the choice of architecture, pretrained weights and hyperparameters.',
options: [
{ id: 'efficiency', title: 'efficiency', checked: DEFAULT_OPT === 'efficiency' },
{ id: 'accuracy', title: 'accuracy', checked: DEFAULT_OPT === 'accuracy' },
],
},
]
const HtmlToReactDynamic = dynamic(() => import('../components/htmlToReact'), {
loading: () => <></>,
})
export default function QuickstartTraining({ id, title, download = 'base_config.cfg' }) {
const [lang, setLang] = useState(DEFAULT_LANG)
const [_components, _setComponents] = useState([])
const [components, setComponents] = useState([])
const [[hardware], setHardware] = useState([DEFAULT_HARDWARE])
const [[optimize], setOptimize] = useState([DEFAULT_OPT])
const [textcatExclusive, setTextcatExclusive] = useState(DEFAULT_TEXTCAT_EXCLUSIVE)
function updateComponents(value, isExclusive) {
_setComponents(value)
const updated = value.map((c) =>
c === 'textcat' && !isExclusive ? 'textcat_multilabel' : c
)
setComponents(updated)
}
const setters = {
lang: setLang,
components: (v) => updateComponents(v, textcatExclusive),
hardware: setHardware,
optimize: setOptimize,
textcat: (v) => {
const isExclusive = v.includes('exclusive')
setTextcatExclusive(isExclusive)
updateComponents(_components, isExclusive)
},
}
const defaultData = GENERATOR_DATA.__default__
const reco = GENERATOR_DATA[lang] || defaultData
const content = generator({
lang,
components,
optimize,
hardware,
transformer_data: reco.transformer || defaultData.transformer,
word_vectors: reco.word_vectors,
has_letters: reco.has_letters,
})
const rawStr = content.trim().replace(/\n\n\n+/g, '\n\n')
const rawContent = `${COMMENT}\n${rawStr}`
const displayContent = Prism.highlight(rawContent, Prism.languages.ini, 'ini')
let data = DATA
data[0].dropdown = models.languages
.map(({ name, code }) => ({
id: code,
title: name,
}))
.sort((a, b) => a.title.localeCompare(b.title))
if (!_components.includes('textcat')) {
data = data.map((field) => (field.id === 'textcat' ? { ...field, hidden: true } : field))
}
return (
<Quickstart
Container="div"
download={download}
rawContent={rawContent}
data={data}
title={title}
id={id}
setters={setters}
hidePrompts
small
codeLang="ini"
>
<HtmlToReactDynamic>{displayContent}</HtmlToReactDynamic>
</Quickstart>
)
}