diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index fb2333b7d..b20a8a7e8 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -38,6 +38,7 @@ from .train import _load_pretrained_tok2vec width=("Width of CNN layers", "option", "cw", int), depth=("Depth of CNN layers", "option", "cd", int), cnn_window=("Window size for CNN layers", "option", "cW", int), + cnn_pieces=("Maxout size for CNN layers. 1 for Mish", "option", "cP", int), use_chars=("Whether to use character-based embedding", "flag", "chr", bool), sa_depth=("Depth of self-attention layers", "option", "sa", int), bilstm_depth=("Depth of BiLSTM layers (requires PyTorch)", "option", "lstm", int), @@ -87,6 +88,7 @@ def pretrain( width=96, depth=4, bilstm_depth=0, + cnn_pieces=3, sa_depth=0, use_chars=False, cnn_window=1, @@ -165,7 +167,7 @@ def pretrain( char_embed=use_chars, self_attn_depth=sa_depth, # Experimental. bilstm_depth=bilstm_depth, # Requires PyTorch. Experimental. - cnn_maxout_pieces=3, # You can try setting this higher + cnn_maxout_pieces=cnn_pieces, # You can try setting this higher subword_features=not use_chars, # Set to False for Chinese etc ), objective=loss_func