mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Update config generation defaults and transformers (#6832)
This commit is contained in:
parent
6b68ad027b
commit
ec5f55aa5b
|
@ -140,7 +140,8 @@ def init_config(
|
|||
template = Template(f.read())
|
||||
# Filter out duplicates since tok2vec and transformer are added by template
|
||||
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
|
||||
reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, {})).dict()
|
||||
defaults = RECOMMENDATIONS["__default__"]
|
||||
reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).dict()
|
||||
variables = {
|
||||
"lang": lang,
|
||||
"components": pipeline,
|
||||
|
@ -167,7 +168,9 @@ def init_config(
|
|||
"Pipeline": ", ".join(pipeline),
|
||||
"Optimize for": optimize,
|
||||
"Hardware": variables["hardware"].upper(),
|
||||
"Transformer": template_vars.transformer.get("name", False),
|
||||
"Transformer": template_vars.transformer.get("name")
|
||||
if template_vars.use_transformer
|
||||
else None,
|
||||
}
|
||||
msg.info("Generated config template specific for your use case")
|
||||
for label, value in use_case.items():
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{# This is a template for training configs used for the quickstart widget in
|
||||
the docs and the init config command. It encodes various best practices and
|
||||
can help generate the best possible configuration, given a user's requirements. #}
|
||||
{%- set use_transformer = (transformer_data and hardware != "cpu") -%}
|
||||
{%- set use_transformer = hardware != "cpu" -%}
|
||||
{%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
|
||||
[paths]
|
||||
train = null
|
||||
|
@ -196,11 +196,6 @@ no_output_layer = false
|
|||
{# NON-TRANSFORMER PIPELINE #}
|
||||
{% else -%}
|
||||
|
||||
{%- if hardware == "gpu" -%}
|
||||
# There are no recommended transformer weights available for language '{{ lang }}'
|
||||
# yet, so the pipeline described here is not transformer-based.
|
||||
{%- endif %}
|
||||
|
||||
[components.tok2vec]
|
||||
factory = "tok2vec"
|
||||
|
||||
|
|
|
@ -1,6 +1,15 @@
|
|||
# Recommended settings and available resources for each language, if available.
|
||||
# Not all languages have recommended word vectors or transformers and for some,
|
||||
# the recommended transformer for efficiency and accuracy may be the same.
|
||||
__default__:
|
||||
word_vectors: null
|
||||
transformer:
|
||||
efficiency:
|
||||
name: bert-base-multilingual-uncased
|
||||
size_factor: 3
|
||||
accuracy:
|
||||
name: bert-base-multilingual-uncased
|
||||
size_factor: 3
|
||||
ar:
|
||||
word_vectors: null
|
||||
transformer:
|
||||
|
|
|
@ -7,7 +7,7 @@ import generator, { DATA as GENERATOR_DATA } from './quickstart-training-generat
|
|||
import { htmlToReact } from '../components/util'
|
||||
|
||||
const DEFAULT_LANG = 'en'
|
||||
const DEFAULT_HARDWARE = 'gpu'
|
||||
const DEFAULT_HARDWARE = 'cpu'
|
||||
const DEFAULT_OPT = 'efficiency'
|
||||
const COMPONENTS = ['tagger', 'parser', 'ner', 'textcat']
|
||||
const COMMENT = `# This is an auto-generated partial config. To use it with 'spacy train'
|
||||
|
@ -31,8 +31,8 @@ const DATA = [
|
|||
id: 'hardware',
|
||||
title: 'Hardware',
|
||||
options: [
|
||||
{ id: 'cpu', title: 'CPU preferred', checked: DEFAULT_HARDWARE === 'cpu' },
|
||||
{ id: 'gpu', title: 'GPU', checked: DEFAULT_HARDWARE === 'gpu' },
|
||||
{ id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' },
|
||||
{ id: 'gpu', title: 'GPU (transformer)', checked: DEFAULT_HARDWARE === 'gpu' },
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -58,7 +58,7 @@ export default function QuickstartTraining({ id, title, download = 'base_config.
|
|||
hardware: setHardware,
|
||||
optimize: setOptimize,
|
||||
}
|
||||
const reco = GENERATOR_DATA[lang] || {}
|
||||
const reco = GENERATOR_DATA[lang] || GENERATOR_DATA.__default__
|
||||
const content = generator({
|
||||
lang,
|
||||
components,
|
||||
|
|
Loading…
Reference in New Issue
Block a user