import React, { useState } from 'react' import Prism from 'prismjs' import 'prismjs/components/prism-ini.min.js' import { Quickstart } from '../components/quickstart' import generator, { DATA as GENERATOR_DATA } from './quickstart-training-generator' import models from '../../meta/languages.json' import dynamic from 'next/dynamic' const DEFAULT_LANG = 'en' const DEFAULT_HARDWARE = 'cpu' const DEFAULT_OPT = 'efficiency' const DEFAULT_TEXTCAT_EXCLUSIVE = true const COMPONENTS = [ 'tagger', 'morphologizer', 'trainable_lemmatizer', 'parser', 'ner', 'spancat', 'textcat', ] const COMMENT = `# This is an auto-generated partial config. To use it with 'spacy train' # you can run spacy init fill-config to auto-fill all default settings: # python -m spacy init fill-config ./base_config.cfg ./config.cfg` const DATA = [ { id: 'lang', title: 'Language', defaultValue: DEFAULT_LANG, }, { id: 'components', title: 'Components', help: 'Pipeline components to train. Requires training data for those annotations.', options: COMPONENTS.map((id) => ({ id, title: id })), multiple: true, }, { id: 'textcat', title: 'Text Classification', multiple: true, options: [ { id: 'exclusive', title: 'exclusive categories', checked: DEFAULT_TEXTCAT_EXCLUSIVE, help: 'only one label can apply', }, ], }, { id: 'hardware', title: 'Hardware', options: [ { id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' }, { id: 'gpu', title: 'GPU (transformer)', checked: DEFAULT_HARDWARE === 'gpu' }, ], }, { id: 'optimize', title: 'Optimize for', help: 'Optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger & slower model). Will impact the choice of architecture, pretrained weights and hyperparameters.', options: [ { id: 'efficiency', title: 'efficiency', checked: DEFAULT_OPT === 'efficiency' }, { id: 'accuracy', title: 'accuracy', checked: DEFAULT_OPT === 'accuracy' }, ], }, ] const HtmlToReactDynamic = dynamic(() => import('../components/htmlToReact'), { loading: () => <>, }) export default function QuickstartTraining({ id, title, download = 'base_config.cfg' }) { const [lang, setLang] = useState(DEFAULT_LANG) const [_components, _setComponents] = useState([]) const [components, setComponents] = useState([]) const [[hardware], setHardware] = useState([DEFAULT_HARDWARE]) const [[optimize], setOptimize] = useState([DEFAULT_OPT]) const [textcatExclusive, setTextcatExclusive] = useState(DEFAULT_TEXTCAT_EXCLUSIVE) function updateComponents(value, isExclusive) { _setComponents(value) const updated = value.map((c) => c === 'textcat' && !isExclusive ? 'textcat_multilabel' : c ) setComponents(updated) } const setters = { lang: setLang, components: (v) => updateComponents(v, textcatExclusive), hardware: setHardware, optimize: setOptimize, textcat: (v) => { const isExclusive = v.includes('exclusive') setTextcatExclusive(isExclusive) updateComponents(_components, isExclusive) }, } const defaultData = GENERATOR_DATA.__default__ const reco = GENERATOR_DATA[lang] || defaultData const content = generator({ lang, components, optimize, hardware, transformer_data: reco.transformer || defaultData.transformer, word_vectors: reco.word_vectors, has_letters: reco.has_letters, }) const rawStr = content.trim().replace(/\n\n\n+/g, '\n\n') const rawContent = `${COMMENT}\n${rawStr}` const displayContent = Prism.highlight(rawContent, Prism.languages.ini, 'ini') let data = DATA data[0].dropdown = models.languages .map(({ name, code }) => ({ id: code, title: name, })) .sort((a, b) => a.title.localeCompare(b.title)) if (!_components.includes('textcat')) { data = data.map((field) => (field.id === 'textcat' ? { ...field, hidden: true } : field)) } return ( {displayContent} ) }