diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 60f703d2f..891e15fa2 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -35,6 +35,7 @@ from .train import _load_pretrained_tok2vec output_dir=("Directory to write models to on each epoch", "positional", None, str), width=("Width of CNN layers", "option", "cw", int), depth=("Depth of CNN layers", "option", "cd", int), + bilstm_depth=("Depth of BiLSTM layers (requires PyTorch)", "option", "lstm", int), embed_rows=("Number of embedding rows", "option", "er", int), loss_func=( "Loss function to use for the objective. Either 'L2' or 'cosine'", @@ -80,6 +81,7 @@ def pretrain( output_dir, width=96, depth=4, + bilstm_depth=2, embed_rows=2000, loss_func="cosine", use_vectors=False, @@ -116,6 +118,10 @@ def pretrain( util.fix_random_seed(seed) has_gpu = prefer_gpu() + if has_gpu: + import torch + + torch.set_default_tensor_type("torch.cuda.FloatTensor") msg.info("Using GPU" if has_gpu else "Not using GPU") output_dir = Path(output_dir) @@ -151,7 +157,7 @@ def pretrain( embed_rows, conv_depth=depth, pretrained_vectors=pretrained_vectors, - bilstm_depth=0, # Requires PyTorch. Experimental. + bilstm_depth=bilstm_depth, # Requires PyTorch. Experimental. cnn_maxout_pieces=3, # You can try setting this higher subword_features=True, # Set to False for Chinese etc ),