From e2f2ef3a5a7d5113c34c44205bf4ac3263b0dd2a Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 19 Aug 2020 13:33:15 +0200 Subject: [PATCH] Update init config and recommendations - As much as I dislike YAML, it seemed like a better format here because it allows us to add comments if we want to explain the different recommendations - Don't include the generated JS in the repo by default and build it on the fly when running or deploying the site. This ensures it's always up to date. - Simplify jinja_to_js script and use fewer dependencies --- .gitignore | 1 + spacy/cli/init_config.py | 34 ++---- .../quickstart_training_recommendations.json | 13 --- .../quickstart_training_recommendations.yml | 103 ++++++++++++++++++ spacy/schemas.py | 19 ++++ spacy/tests/test_cli.py | 9 +- website/package.json | 2 +- website/setup/jinja_to_js.py | 38 ++++--- website/setup/requirements.txt | 2 +- website/setup/setup.sh | 2 +- .../widgets/quickstart-training-generator.js | 2 +- website/src/widgets/quickstart-training.js | 12 +- 12 files changed, 166 insertions(+), 71 deletions(-) delete mode 100644 spacy/cli/templates/quickstart_training_recommendations.json create mode 100644 spacy/cli/templates/quickstart_training_recommendations.yml diff --git a/.gitignore b/.gitignore index 087163761..136a8f26d 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ website/logs npm-debug.log* website/www/ website/_deploy.sh +quickstart-training-generator.js # Cython / C extensions cythonize.json diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index 7d80eb289..9b47dea14 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -3,17 +3,17 @@ from enum import Enum from pathlib import Path from wasabi import Printer, diff_strings from thinc.api import Config -from pydantic import BaseModel import srsly import re from .. import util +from ..schemas import RecommendationSchema from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND -TEMPLATE_ROOT = Path(__file__).parent / "templates" -TEMPLATE_PATH = TEMPLATE_ROOT / "quickstart_training.jinja" -RECOMMENDATIONS_PATH = TEMPLATE_ROOT / "quickstart_training_recommendations.json" +ROOT = Path(__file__).parent / "templates" +TEMPLATE_PATH = ROOT / "quickstart_training.jinja" +RECOMMENDATIONS = srsly.read_yaml(ROOT / "quickstart_training_recommendations.yml") class Optimizations(str, Enum): @@ -21,21 +21,6 @@ class Optimizations(str, Enum): accuracy = "accuracy" -class RecommendationsTrfItem(BaseModel): - name: str - size_factor: int - - -class RecommendationsTrf(BaseModel): - efficiency: RecommendationsTrfItem - accuracy: RecommendationsTrfItem - - -class RecommendationSchema(BaseModel): - word_vectors: Optional[str] = None - transformer: Optional[RecommendationsTrf] = None - - @init_cli.command("config") def init_config_cli( # fmt: off @@ -111,14 +96,11 @@ def init_config( from jinja2 import Template except ImportError: msg.fail("This command requires jinja2", "pip install jinja2", exits=1) - recommendations = srsly.read_json(RECOMMENDATIONS_PATH) - lang_defaults = util.get_lang_class(lang).Defaults - has_letters = lang_defaults.writing_system.get("has_letters", True) - # Filter out duplicates since tok2vec and transformer are added by template - pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")] - reco = RecommendationSchema(**recommendations.get(lang, {})).dict() with TEMPLATE_PATH.open("r") as f: template = Template(f.read()) + # Filter out duplicates since tok2vec and transformer are added by template + pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")] + reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, {})).dict() variables = { "lang": lang, "components": pipeline, @@ -126,7 +108,7 @@ def init_config( "hardware": "cpu" if cpu else "gpu", "transformer_data": reco["transformer"], "word_vectors": reco["word_vectors"], - "has_letters": has_letters, + "has_letters": reco["has_letters"], } base_template = template.render(variables).strip() # Giving up on getting the newlines right in jinja for now diff --git a/spacy/cli/templates/quickstart_training_recommendations.json b/spacy/cli/templates/quickstart_training_recommendations.json deleted file mode 100644 index 8a3acc438..000000000 --- a/spacy/cli/templates/quickstart_training_recommendations.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "en": { - "word_vectors": "en_vectors_web_lg", - "transformer": { - "efficiency": { "name": "roberta-base", "size_factor": 3 }, - "accuracy": { "name": "roberta-base", "size_factor": 3 } - } - }, - "de": { - "word_vectors": null, - "transformer": null - } -} diff --git a/spacy/cli/templates/quickstart_training_recommendations.yml b/spacy/cli/templates/quickstart_training_recommendations.yml new file mode 100644 index 000000000..efb6da2be --- /dev/null +++ b/spacy/cli/templates/quickstart_training_recommendations.yml @@ -0,0 +1,103 @@ +# Recommended settings and available resources for each language, if available. +# Not all languages have recommended word vecotrs or transformers and for some, +# the recommended transformer for efficiency and accuracy may be the same. +en: + word_vectors: en_vectors_web_lg + transformer: + efficiency: + name: roberta-base + size_factor: 3 + accuracy: + name: roberta-base + size_factor: 3 +de: + word_vectors: null + transformer: + efficiency: + name: bert-base-german-cased + size_factor: 3 + accuracy: + name: bert-base-german-cased + size_factor: 3 +fr: + word_vectors: null + transformer: + efficiency: + name: camembert-base + size_factor: 3 + accuracy: + name: camembert-base + size_factor: 3 +es: + word_vectors: null + transformer: + efficiency: + name: mrm8488/RuPERTa-base + size_factor: 3 + accuracy: + name: mrm8488/RuPERTa-base + size_factor: 3 +sv: + word_vectors: null + transformer: + efficiency: + name: KB/bert-base-swedish-cased + size_factor: 3 + accuracy: + name: KB/bert-base-swedish-cased + size_factor: 3 +fi: + word_vectors: null + transformer: + efficiency: + name: TurkuNLP/bert-base-finnish-cased-v1 + size_factor: 3 + accuracy: + name: TurkuNLP/bert-base-finnish-cased-v1 + size_factor: 3 +el: + word_vectors: null + transformer: + efficiency: + name: nlpaueb/bert-base-greek-uncased-v1 + size_factor: 3 + accuracy: + name: nlpaueb/bert-base-greek-uncased-v1 + size_factor: 3 +tr: + word_vectors: null + transformer: + efficiency: + name: dbmdz/bert-base-turkish-cased + size_factor: 3 + accuracy: + name: dbmdz/bert-base-turkish-cased + size_factor: 3 +zh: + word_vectors: null + transformer: + efficiency: + name: bert-base-chinese + size_factor: 3 + accuracy: + name: bert-base-chinese + size_factor: 3 + has_letters: false +ar: + word_vectors: null + transformer: + efficiency: + name: asafaya/bert-base-arabic + size_factor: 3 + accuracy: + name: asafaya/bert-base-arabic + size_factor: 3 +pl: + word_vectors: null + transformer: + efficiency: + name: dkleczek/bert-base-polish-cased-v1 + size_factor: 3 + accuracy: + name: dkleczek/bert-base-polish-cased-v1 + size_factor: 3 diff --git a/spacy/schemas.py b/spacy/schemas.py index e219c2009..3eef814c6 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -311,3 +311,22 @@ class ProjectConfigSchema(BaseModel): class Config: title = "Schema for project configuration file" + + +# Recommendations for init config workflows + + +class RecommendationTrfItem(BaseModel): + name: str + size_factor: int + + +class RecommendationTrf(BaseModel): + efficiency: RecommendationTrfItem + accuracy: RecommendationTrfItem + + +class RecommendationSchema(BaseModel): + word_vectors: Optional[str] = None + transformer: Optional[RecommendationTrf] = None + has_letters: bool = True diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 1da257fd5..89ce740e0 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -2,10 +2,9 @@ import pytest from spacy.gold import docs_to_json, biluo_tags_from_offsets from spacy.gold.converters import iob2docs, conll_ner2docs, conllu2docs from spacy.lang.en import English -from spacy.schemas import ProjectConfigSchema, validate +from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate from spacy.cli.pretrain import make_docs -from spacy.cli.init_config import init_config, RECOMMENDATIONS_PATH -from spacy.cli.init_config import RecommendationSchema +from spacy.cli.init_config import init_config, RECOMMENDATIONS from spacy.cli._util import validate_project_commands, parse_config_overrides from spacy.util import get_lang_class import srsly @@ -335,7 +334,5 @@ def test_init_config(lang, pipeline, optimize): def test_model_recommendations(): - recommendations = srsly.read_json(RECOMMENDATIONS_PATH) - for lang, data in recommendations.items(): - assert get_lang_class(lang) + for lang, data in RECOMMENDATIONS.items(): assert RecommendationSchema(**data) diff --git a/website/package.json b/website/package.json index 441d996fe..9e02cda82 100644 --- a/website/package.json +++ b/website/package.json @@ -53,7 +53,7 @@ "remark-react": "^5.0.1" }, "scripts": { - "build": "npm run python:setup && gatsby build", + "build": "npm run python:install && npm run python:setup && gatsby build", "dev": "npm run python:setup && gatsby develop", "dev:nightly": "BRANCH=nightly.spacy.io npm run dev", "lint": "eslint **", diff --git a/website/setup/jinja_to_js.py b/website/setup/jinja_to_js.py index 0d363375e..114d0e172 100644 --- a/website/setup/jinja_to_js.py +++ b/website/setup/jinja_to_js.py @@ -11,7 +11,8 @@ from os import path from io import StringIO from jinja2 import Environment, FileSystemLoader, nodes from pathlib import Path -import typer +import srsly +import sys OPERANDS = { @@ -437,7 +438,8 @@ class JinjaToJS(object): with self._interpolation(): with self._python_bool_wrapper(**kwargs): if node.items: - raise ValueError(f"Can't process non-empty dict in epxression: {node}") + err = f"Can't process non-empty dict in expression: {node}" + raise ValueError(err) self.output.write("{}") def _process_getattr(self, node, **kwargs): @@ -1232,18 +1234,22 @@ class JinjaToJS(object): self.output.write(")") -def main( - # fmt: off - template_path: Path = typer.Argument(..., exists=True, dir_okay=False, help="Path to .jinja file"), - output: Path = typer.Argument(None, help="Path to output module (stdout if unset)"), - data_path: Path = typer.Option(None, "--data", help="Optional JSON file with additional data to be included as DATA") - # fmt: on -): - """Convert a jinja2 template to a JavaScript module.""" +def main(template_path, output=None, data_path=None): + """Convert a jinja2 template to a JavaScript module. + + template_path (Path): Path to .jijna file. + output (Optional[Path]): Path to output .js module (stdout if unset). + data_path (Optional[Path]): Optional JSON or YAML file with additional data + to be included in the JS module as the exported variable DATA. + """ data = "{}" if data_path is not None: - with data_path.open("r", encoding="utf8") as f: - data = json.dumps(json.loads(f.read())) # dump and load for compactness + if data_path.suffix in (".yml", ".yaml"): + data = srsly.read_yaml(data_path) + else: + data = srsly.read_json(data_path) + data = srsly.json_dumps(data) # dump and load for compactness + template_path = Path(template_path) tpl_file = template_path.parts[-1] compiler = JinjaToJS(template_path.parent, tpl_file, js_module_format="es6") header = f"// This file was auto-generated by {__file__} based on {tpl_file}" @@ -1258,4 +1264,10 @@ def main( if __name__ == "__main__": - typer.run(main) + args = sys.argv[1:] + if not len(args): + raise ValueError("Need at least one argument: path to .jinja template") + template_path = Path(args[0]) + output = Path(args[1]) if len(args) > 1 else None + data_path = Path(args[2]) if len(args) > 2 else None + main(template_path, output, data_path) diff --git a/website/setup/requirements.txt b/website/setup/requirements.txt index 7ffb6df0b..e7a8e65a7 100644 --- a/website/setup/requirements.txt +++ b/website/setup/requirements.txt @@ -1,3 +1,3 @@ # These are used to compile the training quickstart config jinja2 -typer +srsly diff --git a/website/setup/setup.sh b/website/setup/setup.sh index a6bbd3294..674b25674 100755 --- a/website/setup/setup.sh +++ b/website/setup/setup.sh @@ -1 +1 @@ -python jinja_to_js.py ../../spacy/cli/templates/quickstart_training.jinja ../src/widgets/quickstart-training-generator.js --data ../../spacy/cli/templates/quickstart_training_recommendations.json +python jinja_to_js.py ../../spacy/cli/templates/quickstart_training.jinja ../src/widgets/quickstart-training-generator.js ../../spacy/cli/templates/quickstart_training_recommendations.yml diff --git a/website/src/widgets/quickstart-training-generator.js b/website/src/widgets/quickstart-training-generator.js index c7f856073..b5389d4d7 100644 --- a/website/src/widgets/quickstart-training-generator.js +++ b/website/src/widgets/quickstart-training-generator.js @@ -9,4 +9,4 @@ import jinjaToJS from "jinja-to-js";export default function templateQuickstartTr var use_transformer = context.transformer_data && context.hardware!=="cpu";var transformer = (use_transformer ? context.transformer_data[context.optimize] : {});__result += "[paths]\ntrain = \"\"\ndev = \"\"\n\n[system]\nuse_pytorch_for_gpu_memory = ";__result += "" + __runtime.escape((__tmp = ((use_transformer ? "true" : "false"))) == null ? "" : __tmp);__result += "\n\n[nlp]\nlang = \"";__result += "" + __runtime.escape((__tmp = (context.lang)) == null ? "" : __tmp);__result += "\"";var full_pipeline = [(use_transformer ? "transformer" : "tok2vec")].concat(context.components);__result += "\npipeline = ";__result += "" + ((__tmp = (JSON.stringify(full_pipeline).split("'").join("\""))) == null ? "" : __tmp);__result += "\ntokenizer = {\"@tokenizers\": \"spacy.Tokenizer.v1\"}\n\n[components]\n\n";if(__runtime.boolean(use_transformer)){__result += "[components.transformer]\nfactory = \"transformer\"\n\n[components.transformer.model]\n@architectures = \"spacy-transformers.TransformerModel.v1\"\nname = \"";__result += "" + __runtime.escape((__tmp = (transformer["name"])) == null ? "" : __tmp);__result += "\"\ntokenizer_config = {\"use_fast\": true}\n\n[components.transformer.model.get_spans]\n@span_getters = \"strided_spans.v1\"\nwindow = 128\nstride = 96\n\n";if(context.components.includes("tagger")){__result += "\n[components.tagger]\nfactory = \"tagger\"\n\n[components.tagger.model]\n@architectures = \"spacy.Tagger.v1\"\nnO = null\n\n[components.tagger.model.tok2vec]\n@architectures = \"spacy-transformers.Tok2VecListener.v1\"\ngrad_factor = 1.0\n\n[components.tagger.model.tok2vec.pooling]\n@layers = \"reduce_mean.v1\"";}__result += "\n\n";if(context.components.includes("parser")){__result += "[components.parser]\nfactory = \"parser\"\n\n[components.parser.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 8\nhidden_width = 128\nmaxout_pieces = 3\nuse_upper = false\nnO = null\n\n[components.parser.model.tok2vec]\n@architectures = \"spacy-transformers.Tok2VecListener.v1\"\ngrad_factor = 1.0\n\n[components.parser.model.tok2vec.pooling]\n@layers = \"reduce_mean.v1\"";}__result += "\n\n";if(context.components.includes("ner")){__result += "[components.ner]\nfactory = \"ner\"\n\n[components.ner.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 3\nhidden_width = 64\nmaxout_pieces = 2\nuse_upper = false\nnO = null\n\n[components.ner.model.tok2vec]\n@architectures = \"spacy-transformers.Tok2VecListener.v1\"\ngrad_factor = 1.0\n\n[components.ner.model.tok2vec.pooling]\n@layers = \"reduce_mean.v1\"\n";}__result += "\n";} else {if(context.hardware==="gpu"){__result += "# There are no recommended transformer weights available for language '";__result += "" + __runtime.escape((__tmp = (context.lang)) == null ? "" : __tmp);__result += "'\n# yet, so the pipeline described here is not transformer-based.";}__result += "\n\n[components.tok2vec]\nfactory = \"tok2vec\"\n\n[components.tok2vec.model]\n@architectures = \"spacy.Tok2Vec.v1\"\n\n[components.tok2vec.model.embed]\n@architectures = \"spacy.MultiHashEmbed.v1\"\nwidth = ${components.tok2vec.model.encode:width}\nrows = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="efficiency" ? 2000 : 7000))) == null ? "" : __tmp);__result += "\nalso_embed_subwords = ";__result += "" + __runtime.escape((__tmp = ((context.has_letters ? true : false))) == null ? "" : __tmp);__result += "\nalso_use_static_vectors = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="accuracy" ? true : false))) == null ? "" : __tmp);__result += "\n\n[components.tok2vec.model.encode]\n@architectures = \"spacy.MaxoutWindowEncoder.v1\"\nwidth = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="efficiency" ? 96 : 256))) == null ? "" : __tmp);__result += "\ndepth = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="efficiency" ? 4 : 8))) == null ? "" : __tmp);__result += "\nwindow_size = 1\nmaxout_pieces = 3\n\n";if(context.components.includes("tagger")){__result += "\n[components.tagger]\nfactory = \"tagger\"\n\n[components.tagger.model]\n@architectures = \"spacy.Tagger.v1\"\nnO = null\n\n[components.tagger.model.tok2vec]\n@architectures = \"spacy.Tok2VecListener.v1\"\nwidth = ${components.tok2vec.model.encode:width}";}__result += "\n\n";if(context.components.includes("parser")){__result += "[components.parser]\nfactory = \"parser\"\n\n[components.parser.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 8\nhidden_width = 128\nmaxout_pieces = 3\nuse_upper = true\nnO = null\n\n[components.parser.model.tok2vec]\n@architectures = \"spacy.Tok2VecListener.v1\"\nwidth = ${components.tok2vec.model.encode:width}";}__result += "\n\n";if(context.components.includes("ner")){__result += "\n[components.ner]\nfactory = \"ner\"\n\n[components.ner.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 6\nhidden_width = 64\nmaxout_pieces = 2\nuse_upper = true\nnO = null\n\n[components.ner.model.tok2vec]\n@architectures = \"spacy.Tok2VecListener.v1\"\nwidth = ${components.tok2vec.model.encode:width}\n";}__result += "\n";}__result += "\n\n";__runtime.each(context.components,function(pipe){var __$0 = context.pipe;context.pipe = pipe;__result += "\n";if(!["tagger","parser","ner"].includes(pipe)){__result += "\n";__result += "\n[components.";__result += "" + __runtime.escape((__tmp = (pipe)) == null ? "" : __tmp);__result += "]\nfactory = \"";__result += "" + __runtime.escape((__tmp = (pipe)) == null ? "" : __tmp);__result += "\"\n";}__result += "\n";context.pipe = __$0;});__result += "\n\n[training]\n";if(__runtime.boolean(use_transformer) || context.optimize==="efficiency" || !__runtime.boolean(context.word_vectors)){__result += "vectors = null\n";} else {__result += "vectors = \"";__result += "" + __runtime.escape((__tmp = (context.word_vectors)) == null ? "" : __tmp);__result += "\"\n";}if(__runtime.boolean(use_transformer)){__result += "accumulate_gradient = ";__result += "" + __runtime.escape((__tmp = (transformer["size_factor"])) == null ? "" : __tmp);__result += "\n";}__result += "\n\n[training.optimizer]\n@optimizers = \"Adam.v1\"\n\n[training.optimizer.learn_rate]\n@schedules = \"warmup_linear.v1\"\nwarmup_steps = 250\ntotal_steps = 20000\ninitial_rate = 5e-5\n\n[training.train_corpus]\n@readers = \"spacy.Corpus.v1\"\npath = ${paths:train}\nmax_length = ";__result += "" + __runtime.escape((__tmp = ((context.hardware==="gpu" ? 500 : 0))) == null ? "" : __tmp);__result += "\n\n[training.dev_corpus]\n@readers = \"spacy.Corpus.v1\"\npath = ${paths:dev}\nmax_length = 0\n\n";if(__runtime.boolean(use_transformer)){__result += "\n[training.batcher]\n@batchers = \"batch_by_padded.v1\"\ndiscard_oversize = true\nsize = 2000\nbuffer = 256";} else {__result += "\n[training.batcher]\n@batchers = \"batch_by_words.v1\"\ndiscard_oversize = false\ntolerance = 0.2\n\n[training.batcher.size]\n@schedules = \"compounding.v1\"\nstart = 100\nstop = 1000\ncompound = 1.001\n";}__result += "\n\n[training.score_weights]";if(context.components.includes("tagger")){__result += "\ntag_acc = ";__result += "" + __runtime.escape((__tmp = (Math.round((1.0 / __filters.size(context.components)+ Number.EPSILON) * 10**2) / 10**2)) == null ? "" : __tmp);}if(context.components.includes("parser")){__result += "\ndep_uas = 0.0\ndep_las = ";__result += "" + __runtime.escape((__tmp = (Math.round((1.0 / __filters.size(context.components)+ Number.EPSILON) * 10**2) / 10**2)) == null ? "" : __tmp);__result += "\nsents_f = 0.0";}if(context.components.includes("ner")){__result += "\nents_f = ";__result += "" + __runtime.escape((__tmp = (Math.round((1.0 / __filters.size(context.components)+ Number.EPSILON) * 10**2) / 10**2)) == null ? "" : __tmp);__result += "\nents_p = 0.0\nents_r = 0.0";} return __result; } -export const DATA = {"en": {"word_vectors": "en_vectors_web_lg", "transformer": {"efficiency": {"name": "roberta-base", "size_factor": 3}, "accuracy": {"name": "roberta-base", "size_factor": 3}}}, "de": {"word_vectors": null, "transformer": null}} \ No newline at end of file +export const DATA = {"en":{"word_vectors":"en_vectors_web_lg","transformer":{"efficiency":{"name":"roberta-base","size_factor":3},"accuracy":{"name":"roberta-base","size_factor":3}}},"de":{"word_vectors":null,"transformer":{"efficiency":{"name":"bert-base-german-cased","size_factor":3},"accuracy":{"name":"bert-base-german-cased","size_factor":3}}},"fr":{"word_vectors":null,"transformer":{"efficiency":{"name":"camembert-base","size_factor":3},"accuracy":{"name":"camembert-base","size_factor":3}}},"es":{"word_vectors":null,"transformer":{"efficiency":{"name":"mrm8488/RuPERTa-base","size_factor":3},"accuracy":{"name":"mrm8488/RuPERTa-base","size_factor":3}}},"sv":{"word_vectors":null,"transformer":{"efficiency":{"name":"KB/bert-base-swedish-cased","size_factor":3},"accuracy":{"name":"KB/bert-base-swedish-cased","size_factor":3}}},"fi":{"word_vectors":null,"transformer":{"efficiency":{"name":"TurkuNLP/bert-base-finnish-cased-v1","size_factor":3},"accuracy":{"name":"TurkuNLP/bert-base-finnish-cased-v1","size_factor":3}}},"el":{"word_vectors":null,"transformer":{"efficiency":{"name":"nlpaueb/bert-base-greek-uncased-v1","size_factor":3},"accuracy":{"name":"nlpaueb/bert-base-greek-uncased-v1","size_factor":3}}},"tr":{"word_vectors":null,"transformer":{"efficiency":{"name":"dbmdz/bert-base-turkish-cased","size_factor":3},"accuracy":{"name":"dbmdz/bert-base-turkish-cased","size_factor":3}}},"zh":{"word_vectors":null,"transformer":{"efficiency":{"name":"bert-base-chinese","size_factor":3},"accuracy":{"name":"bert-base-chinese","size_factor":3}},"has_letters":false},"ar":{"word_vectors":null,"transformer":{"efficiency":{"name":"asafaya/bert-base-arabic","size_factor":3},"accuracy":{"name":"asafaya/bert-base-arabic","size_factor":3}}},"pl":{"word_vectors":null,"transformer":{"efficiency":{"name":"dkleczek/bert-base-polish-cased-v1","size_factor":3},"accuracy":{"name":"dkleczek/bert-base-polish-cased-v1","size_factor":3}}}} \ No newline at end of file diff --git a/website/src/widgets/quickstart-training.js b/website/src/widgets/quickstart-training.js index 1a77cc338..ae8d41b64 100644 --- a/website/src/widgets/quickstart-training.js +++ b/website/src/widgets/quickstart-training.js @@ -4,7 +4,7 @@ import highlightCode from 'gatsby-remark-prismjs/highlight-code.js' import { Quickstart } from '../components/quickstart' import generator, { DATA as GENERATOR_DATA } from './quickstart-training-generator' -import { isString, htmlToReact } from '../components/util' +import { htmlToReact } from '../components/util' const DEFAULT_LANG = 'en' const DEFAULT_HARDWARE = 'gpu' @@ -47,13 +47,6 @@ const DATA = [ }, ] -function stringify(value) { - if (isString(value) && value.startsWith('${')) return value - const string = JSON.stringify(value) - if (Array.isArray(value)) return string.replace(/,/g, ', ') - return string -} - export default function QuickstartTraining({ id, title, download = 'config.cfg' }) { const [lang, setLang] = useState(DEFAULT_LANG) const [components, setComponents] = useState([]) @@ -73,6 +66,7 @@ export default function QuickstartTraining({ id, title, download = 'config.cfg' hardware, transformer_data: reco.transformer, word_vectors: reco.word_vectors, + has_letters: reco.has_letters, }) const rawStr = content.trim().replace(/\n\n\n+/g, '\n\n') const rawContent = `${COMMENT}\n${rawStr}` @@ -90,7 +84,7 @@ export default function QuickstartTraining({ id, title, download = 'config.cfg' id: code, title: name, })) - .sort((a, b) => a.id.localeCompare(b.id)) + .sort((a, b) => a.title.localeCompare(b.title)) return (