mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Update config generation defaults and transformers (#6832)
This commit is contained in:
parent
6b68ad027b
commit
ec5f55aa5b
|
@ -140,7 +140,8 @@ def init_config(
|
||||||
template = Template(f.read())
|
template = Template(f.read())
|
||||||
# Filter out duplicates since tok2vec and transformer are added by template
|
# Filter out duplicates since tok2vec and transformer are added by template
|
||||||
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
|
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
|
||||||
reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, {})).dict()
|
defaults = RECOMMENDATIONS["__default__"]
|
||||||
|
reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).dict()
|
||||||
variables = {
|
variables = {
|
||||||
"lang": lang,
|
"lang": lang,
|
||||||
"components": pipeline,
|
"components": pipeline,
|
||||||
|
@ -167,7 +168,9 @@ def init_config(
|
||||||
"Pipeline": ", ".join(pipeline),
|
"Pipeline": ", ".join(pipeline),
|
||||||
"Optimize for": optimize,
|
"Optimize for": optimize,
|
||||||
"Hardware": variables["hardware"].upper(),
|
"Hardware": variables["hardware"].upper(),
|
||||||
"Transformer": template_vars.transformer.get("name", False),
|
"Transformer": template_vars.transformer.get("name")
|
||||||
|
if template_vars.use_transformer
|
||||||
|
else None,
|
||||||
}
|
}
|
||||||
msg.info("Generated config template specific for your use case")
|
msg.info("Generated config template specific for your use case")
|
||||||
for label, value in use_case.items():
|
for label, value in use_case.items():
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{# This is a template for training configs used for the quickstart widget in
|
{# This is a template for training configs used for the quickstart widget in
|
||||||
the docs and the init config command. It encodes various best practices and
|
the docs and the init config command. It encodes various best practices and
|
||||||
can help generate the best possible configuration, given a user's requirements. #}
|
can help generate the best possible configuration, given a user's requirements. #}
|
||||||
{%- set use_transformer = (transformer_data and hardware != "cpu") -%}
|
{%- set use_transformer = hardware != "cpu" -%}
|
||||||
{%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
|
{%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
|
||||||
[paths]
|
[paths]
|
||||||
train = null
|
train = null
|
||||||
|
@ -196,11 +196,6 @@ no_output_layer = false
|
||||||
{# NON-TRANSFORMER PIPELINE #}
|
{# NON-TRANSFORMER PIPELINE #}
|
||||||
{% else -%}
|
{% else -%}
|
||||||
|
|
||||||
{%- if hardware == "gpu" -%}
|
|
||||||
# There are no recommended transformer weights available for language '{{ lang }}'
|
|
||||||
# yet, so the pipeline described here is not transformer-based.
|
|
||||||
{%- endif %}
|
|
||||||
|
|
||||||
[components.tok2vec]
|
[components.tok2vec]
|
||||||
factory = "tok2vec"
|
factory = "tok2vec"
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,15 @@
|
||||||
# Recommended settings and available resources for each language, if available.
|
# Recommended settings and available resources for each language, if available.
|
||||||
# Not all languages have recommended word vectors or transformers and for some,
|
# Not all languages have recommended word vectors or transformers and for some,
|
||||||
# the recommended transformer for efficiency and accuracy may be the same.
|
# the recommended transformer for efficiency and accuracy may be the same.
|
||||||
|
__default__:
|
||||||
|
word_vectors: null
|
||||||
|
transformer:
|
||||||
|
efficiency:
|
||||||
|
name: bert-base-multilingual-uncased
|
||||||
|
size_factor: 3
|
||||||
|
accuracy:
|
||||||
|
name: bert-base-multilingual-uncased
|
||||||
|
size_factor: 3
|
||||||
ar:
|
ar:
|
||||||
word_vectors: null
|
word_vectors: null
|
||||||
transformer:
|
transformer:
|
||||||
|
|
|
@ -7,7 +7,7 @@ import generator, { DATA as GENERATOR_DATA } from './quickstart-training-generat
|
||||||
import { htmlToReact } from '../components/util'
|
import { htmlToReact } from '../components/util'
|
||||||
|
|
||||||
const DEFAULT_LANG = 'en'
|
const DEFAULT_LANG = 'en'
|
||||||
const DEFAULT_HARDWARE = 'gpu'
|
const DEFAULT_HARDWARE = 'cpu'
|
||||||
const DEFAULT_OPT = 'efficiency'
|
const DEFAULT_OPT = 'efficiency'
|
||||||
const COMPONENTS = ['tagger', 'parser', 'ner', 'textcat']
|
const COMPONENTS = ['tagger', 'parser', 'ner', 'textcat']
|
||||||
const COMMENT = `# This is an auto-generated partial config. To use it with 'spacy train'
|
const COMMENT = `# This is an auto-generated partial config. To use it with 'spacy train'
|
||||||
|
@ -31,8 +31,8 @@ const DATA = [
|
||||||
id: 'hardware',
|
id: 'hardware',
|
||||||
title: 'Hardware',
|
title: 'Hardware',
|
||||||
options: [
|
options: [
|
||||||
{ id: 'cpu', title: 'CPU preferred', checked: DEFAULT_HARDWARE === 'cpu' },
|
{ id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' },
|
||||||
{ id: 'gpu', title: 'GPU', checked: DEFAULT_HARDWARE === 'gpu' },
|
{ id: 'gpu', title: 'GPU (transformer)', checked: DEFAULT_HARDWARE === 'gpu' },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -58,7 +58,7 @@ export default function QuickstartTraining({ id, title, download = 'base_config.
|
||||||
hardware: setHardware,
|
hardware: setHardware,
|
||||||
optimize: setOptimize,
|
optimize: setOptimize,
|
||||||
}
|
}
|
||||||
const reco = GENERATOR_DATA[lang] || {}
|
const reco = GENERATOR_DATA[lang] || GENERATOR_DATA.__default__
|
||||||
const content = generator({
|
const content = generator({
|
||||||
lang,
|
lang,
|
||||||
components,
|
components,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user