2020-08-19 14:33:15 +03:00
|
|
|
# Recommended settings and available resources for each language, if available.
|
2020-08-19 19:46:08 +03:00
|
|
|
# Not all languages have recommended word vectors or transformers and for some,
|
2020-08-19 14:33:15 +03:00
|
|
|
# the recommended transformer for efficiency and accuracy may be the same.
|
2021-01-27 15:56:33 +03:00
|
|
|
__default__:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: bert-base-multilingual-uncased
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: bert-base-multilingual-uncased
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
ar:
|
|
|
|
word_vectors: null
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: asafaya/bert-base-arabic
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: asafaya/bert-base-arabic
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2021-01-28 03:14:49 +03:00
|
|
|
bg:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: iarfmoose/roberta-base-bulgarian
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: iarfmoose/roberta-base-bulgarian
|
|
|
|
size_factor: 3
|
2021-02-12 17:55:17 +03:00
|
|
|
bn:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: sagorsarker/bangla-bert-base
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: sagorsarker/bangla-bert-base
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
da:
|
|
|
|
word_vectors: da_core_news_lg
|
2021-01-28 03:14:49 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: DJSammy/bert-base-danish-uncased_BotXO,ai
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: DJSammy/bert-base-danish-uncased_BotXO,ai
|
|
|
|
size_factor: 3
|
2020-08-19 14:33:15 +03:00
|
|
|
de:
|
2020-10-16 09:17:53 +03:00
|
|
|
word_vectors: de_core_news_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: bert-base-german-cased
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: bert-base-german-cased
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
el:
|
|
|
|
word_vectors: el_core_news_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: nlpaueb/bert-base-greek-uncased-v1
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: nlpaueb/bert-base-greek-uncased-v1
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
en:
|
|
|
|
word_vectors: en_core_web_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: roberta-base
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: roberta-base
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
es:
|
|
|
|
word_vectors: es_core_news_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: dccuchile/bert-base-spanish-wwm-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: dccuchile/bert-base-spanish-wwm-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2021-01-28 03:14:49 +03:00
|
|
|
eu:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: mrm8488/RoBasquERTa
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: mrm8488/RoBasquERTa
|
|
|
|
size_factor: 3
|
2020-08-19 14:33:15 +03:00
|
|
|
fi:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: TurkuNLP/bert-base-finnish-cased-v1
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: TurkuNLP/bert-base-finnish-cased-v1
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
fr:
|
|
|
|
word_vectors: fr_core_news_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: camembert-base
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: camembert-base
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2021-01-28 03:14:49 +03:00
|
|
|
hi:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
2021-02-12 17:55:17 +03:00
|
|
|
name: ai4bharat/indic-bert
|
2021-01-28 03:14:49 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2021-02-12 17:55:17 +03:00
|
|
|
name: ai4bharat/indic-bert
|
2021-01-28 03:14:49 +03:00
|
|
|
size_factor: 3
|
|
|
|
id:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: indolem/indobert-base-uncased
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: indolem/indobert-base-uncased
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
it:
|
|
|
|
word_vectors: it_core_news_lg
|
2021-01-28 03:14:49 +03:00
|
|
|
transformer: null
|
2020-10-16 09:25:16 +03:00
|
|
|
ja:
|
|
|
|
word_vectors: ja_core_news_lg
|
2021-01-28 03:14:49 +03:00
|
|
|
transformer: null
|
2020-10-16 09:25:16 +03:00
|
|
|
lt:
|
|
|
|
word_vectors: lt_core_news_lg
|
2021-01-28 03:14:49 +03:00
|
|
|
transformer: null
|
|
|
|
mk:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: anon-submission-mk/bert-base-macedonian-cased
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: anon-submission-mk/bert-base-macedonian-cased
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
nb:
|
|
|
|
word_vectors: nb_core_news_lg
|
2021-01-28 03:14:49 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: NbAiLab/nb-bert-base
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: NbAiLab/nb-bert-base
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
nl:
|
|
|
|
word_vectors: nl_core_news_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2021-02-14 07:30:16 +03:00
|
|
|
name: GroNLP/bert-base-dutch-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2021-02-14 07:30:16 +03:00
|
|
|
name: GroNLP/bert-base-dutch-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
pl:
|
|
|
|
word_vectors: pl_core_news_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: dkleczek/bert-base-polish-cased-v1
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: dkleczek/bert-base-polish-cased-v1
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
pt:
|
|
|
|
word_vectors: pt_core_news_lg
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: neuralmind/bert-base-portuguese-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: neuralmind/bert-base-portuguese-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
ro:
|
|
|
|
word_vectors: ro_core_news_lg
|
2021-01-28 03:14:49 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: dumitrescustefan/bert-base-romanian-cased-v1
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: dumitrescustefan/bert-base-romanian-cased-v1
|
|
|
|
size_factor: 3
|
|
|
|
si:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
2021-02-12 17:55:17 +03:00
|
|
|
name: setu4993/LaBSE
|
2021-01-28 03:14:49 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2021-02-12 17:55:17 +03:00
|
|
|
name: setu4993/LaBSE
|
2021-01-28 03:14:49 +03:00
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
sv:
|
|
|
|
word_vectors: null
|
2020-08-19 14:33:15 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: KB/bert-base-swedish-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: KB/bert-base-swedish-cased
|
2020-08-19 14:33:15 +03:00
|
|
|
size_factor: 3
|
2021-01-28 03:14:49 +03:00
|
|
|
ta:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
2021-02-12 17:55:17 +03:00
|
|
|
name: ai4bharat/indic-bert
|
2021-01-28 03:14:49 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2021-02-12 17:55:17 +03:00
|
|
|
name: ai4bharat/indic-bert
|
2021-01-28 03:14:49 +03:00
|
|
|
size_factor: 3
|
|
|
|
te:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: kuppuluri/telugu_bertu
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: kuppuluri/telugu_bertu
|
|
|
|
size_factor: 3
|
|
|
|
th:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: monsoon-nlp/bert-base-thai
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: monsoon-nlp/bert-base-thai
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
tr:
|
|
|
|
word_vectors: null
|
2020-10-13 16:41:17 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: dbmdz/bert-base-turkish-cased
|
2020-10-13 16:41:17 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: dbmdz/bert-base-turkish-cased
|
2020-10-13 16:41:17 +03:00
|
|
|
size_factor: 3
|
2021-01-28 03:14:49 +03:00
|
|
|
uk:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: youscan/ukr-roberta-base
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: youscan/ukr-roberta-base
|
|
|
|
size_factor: 3
|
|
|
|
ur:
|
|
|
|
word_vectors: null
|
|
|
|
transformer:
|
|
|
|
efficiency:
|
|
|
|
name: urduhack/roberta-urdu-small
|
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
|
|
|
name: urduhack/roberta-urdu-small
|
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
zh:
|
|
|
|
word_vectors: zh_core_web_lg
|
2020-10-13 16:41:17 +03:00
|
|
|
transformer:
|
|
|
|
efficiency:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: bert-base-chinese
|
2020-10-13 16:41:17 +03:00
|
|
|
size_factor: 3
|
|
|
|
accuracy:
|
2020-10-16 09:25:16 +03:00
|
|
|
name: bert-base-chinese
|
2020-10-13 16:41:17 +03:00
|
|
|
size_factor: 3
|
2020-10-16 09:25:16 +03:00
|
|
|
has_letters: false
|