diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index e862454f7..25e4faa50 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -140,7 +140,8 @@ def init_config( template = Template(f.read()) # Filter out duplicates since tok2vec and transformer are added by template pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")] - reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, {})).dict() + defaults = RECOMMENDATIONS["__default__"] + reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).dict() variables = { "lang": lang, "components": pipeline, @@ -167,7 +168,9 @@ def init_config( "Pipeline": ", ".join(pipeline), "Optimize for": optimize, "Hardware": variables["hardware"].upper(), - "Transformer": template_vars.transformer.get("name", False), + "Transformer": template_vars.transformer.get("name") + if template_vars.use_transformer + else None, } msg.info("Generated config template specific for your use case") for label, value in use_case.items(): diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index ab1d69894..f70b54dca 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -1,7 +1,7 @@ {# This is a template for training configs used for the quickstart widget in the docs and the init config command. It encodes various best practices and can help generate the best possible configuration, given a user's requirements. #} -{%- set use_transformer = (transformer_data and hardware != "cpu") -%} +{%- set use_transformer = hardware != "cpu" -%} {%- set transformer = transformer_data[optimize] if use_transformer else {} -%} [paths] train = null @@ -196,11 +196,6 @@ no_output_layer = false {# NON-TRANSFORMER PIPELINE #} {% else -%} -{%- if hardware == "gpu" -%} -# There are no recommended transformer weights available for language '{{ lang }}' -# yet, so the pipeline described here is not transformer-based. -{%- endif %} - [components.tok2vec] factory = "tok2vec" diff --git a/spacy/cli/templates/quickstart_training_recommendations.yml b/spacy/cli/templates/quickstart_training_recommendations.yml index 47b3abbf6..b89f96bcd 100644 --- a/spacy/cli/templates/quickstart_training_recommendations.yml +++ b/spacy/cli/templates/quickstart_training_recommendations.yml @@ -1,6 +1,15 @@ # Recommended settings and available resources for each language, if available. # Not all languages have recommended word vectors or transformers and for some, # the recommended transformer for efficiency and accuracy may be the same. +__default__: + word_vectors: null + transformer: + efficiency: + name: bert-base-multilingual-uncased + size_factor: 3 + accuracy: + name: bert-base-multilingual-uncased + size_factor: 3 ar: word_vectors: null transformer: diff --git a/website/src/widgets/quickstart-training.js b/website/src/widgets/quickstart-training.js index a719defd5..913b334df 100644 --- a/website/src/widgets/quickstart-training.js +++ b/website/src/widgets/quickstart-training.js @@ -7,7 +7,7 @@ import generator, { DATA as GENERATOR_DATA } from './quickstart-training-generat import { htmlToReact } from '../components/util' const DEFAULT_LANG = 'en' -const DEFAULT_HARDWARE = 'gpu' +const DEFAULT_HARDWARE = 'cpu' const DEFAULT_OPT = 'efficiency' const COMPONENTS = ['tagger', 'parser', 'ner', 'textcat'] const COMMENT = `# This is an auto-generated partial config. To use it with 'spacy train' @@ -31,8 +31,8 @@ const DATA = [ id: 'hardware', title: 'Hardware', options: [ - { id: 'cpu', title: 'CPU preferred', checked: DEFAULT_HARDWARE === 'cpu' }, - { id: 'gpu', title: 'GPU', checked: DEFAULT_HARDWARE === 'gpu' }, + { id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' }, + { id: 'gpu', title: 'GPU (transformer)', checked: DEFAULT_HARDWARE === 'gpu' }, ], }, { @@ -58,7 +58,7 @@ export default function QuickstartTraining({ id, title, download = 'base_config. hardware: setHardware, optimize: setOptimize, } - const reco = GENERATOR_DATA[lang] || {} + const reco = GENERATOR_DATA[lang] || GENERATOR_DATA.__default__ const content = generator({ lang, components,