mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
311133e579
* bring back default build_text_classifier method * remove _set_dims_ hack in favor of proper dim inference * add tok2vec initialize to unit test * small fixes * add unit test for various textcat config settings * logistic output layer does not have nO * fix window_size setting * proper fix * fix W initialization * Update textcat training example * Use ml_datasets * Convert training data to `Example` format * Use `n_texts` to set proportionate dev size * fix _init renaming on latest thinc * avoid setting a non-existing dim * update to thinc==8.0.0a2 * add BOW and CNN defaults for easy testing * various experiments with train_textcat script, fix softmax activation in textcat bow * allow textcat train script to work on other datasets as well * have dataset as a parameter * train textcat from config, with example config * add config for training textcat * formatting * fix exclusive_classes * fixing BOW for GPU * bump thinc to 8.0.0a3 (not published yet so CI will fail) * add in link_vectors_to_models which got deleted Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
20 lines
359 B
INI
20 lines
359 B
INI
[nlp]
|
|
lang = "en"
|
|
|
|
[nlp.pipeline.textcat]
|
|
factory = "textcat"
|
|
|
|
[nlp.pipeline.textcat.model]
|
|
@architectures = "spacy.TextCatCNN.v1"
|
|
exclusive_classes = false
|
|
|
|
[nlp.pipeline.textcat.model.tok2vec]
|
|
@architectures = "spacy.HashEmbedCNN.v1"
|
|
pretrained_vectors = null
|
|
width = 96
|
|
depth = 4
|
|
embed_size = 2000
|
|
window_size = 1
|
|
maxout_pieces = 3
|
|
subword_features = true
|