Improve GPU usage for train-with-config (#5330)

* Adjust for no ops in Optimizer

* Fix gpu in train-from-config

* Update train-from-config script

* Fix parser

* Fix GPU efficiency of padding backprop
This commit is contained in:
Matthew Honnibal 2020-04-20 22:06:28 +02:00 committed by GitHub
parent 42364dcd9f
commit 6918d99b6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 27 additions and 32 deletions

View File

@ -1,4 +1,5 @@
from typing import Optional, Dict, List, Union, Sequence from typing import Optional, Dict, List, Union, Sequence
from timeit import default_timer as timer
from pydantic import BaseModel, FilePath from pydantic import BaseModel, FilePath
import plac import plac
import tqdm import tqdm
@ -146,30 +147,29 @@ def train_from_config_cli(
if output_path is not None and not output_path.exists(): if output_path is not None and not output_path.exists():
output_path.mkdir() output_path.mkdir()
try: train_from_config(
train_from_config( config_path,
config_path, {"train": train_path, "dev": dev_path},
{"train": train_path, "dev": dev_path}, output_path=output_path,
output_path=output_path, meta_path=meta_path,
meta_path=meta_path, raw_text=raw_text,
raw_text=raw_text, )
)
except KeyboardInterrupt:
msg.warn("Cancelled.")
def train_from_config( def train_from_config(
config_path, data_paths, raw_text=None, meta_path=None, output_path=None, config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
): ):
msg.info(f"Loading config from: {config_path}") msg.info(f"Loading config from: {config_path}")
config = util.load_config(config_path, create_objects=True) config = util.load_config(config_path, create_objects=False)
nlp_config = config["nlp"]
use_gpu = config["training"]["use_gpu"] use_gpu = config["training"]["use_gpu"]
if use_gpu >= 0: if use_gpu >= 0:
msg.info("Using GPU") msg.info("Using GPU")
util.use_gpu(use_gpu)
else: else:
msg.info("Using CPU") msg.info("Using CPU")
config = util.load_config(config_path, create_objects=True)
msg.info("Creating nlp from config") msg.info("Creating nlp from config")
nlp_config = util.load_config(config_path, create_objects=False)["nlp"]
nlp = util.load_model_from_config(nlp_config) nlp = util.load_model_from_config(nlp_config)
optimizer = config["optimizer"] optimizer = config["optimizer"]
training = config["training"] training = config["training"]
@ -240,12 +240,17 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True
) )
) )
n_words = sum(len(ex.doc) for ex in dev_examples)
start_time = timer()
scorer = nlp.evaluate(dev_examples) scorer = nlp.evaluate(dev_examples)
end_time = timer()
wps = n_words / (end_time - start_time)
scores = scorer.scores scores = scorer.scores
# Calculate a weighted sum based on score_weights for the main score # Calculate a weighted sum based on score_weights for the main score
weights = cfg["score_weights"] weights = cfg["score_weights"]
weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights) weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
return weighted_score, scorer.scores scores["speed"] = wps
return weighted_score, scores
return evaluate return evaluate
@ -346,13 +351,13 @@ def setup_printer(training, nlp):
def print_row(info): def print_row(info):
losses = [ losses = [
"{0:.2f}".format(info["losses"].get(pipe_name, 0.0)) "{0:.2f}".format(float(info["losses"].get(pipe_name, 0.0)))
for pipe_name in nlp.pipe_names for pipe_name in nlp.pipe_names
] ]
scores = [ scores = [
"{0:.2f}".format(info["other_scores"].get(col, 0.0)) for col in score_cols "{0:.2f}".format(float(info["other_scores"].get(col, 0.0))) for col in score_cols
] ]
data = [info["step"]] + losses + scores + ["{0:.2f}".format(info["score"])] data = [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
msg.row(data, widths=table_widths, aligns=table_aligns) msg.row(data, widths=table_widths, aligns=table_aligns)
return print_row return print_row

View File

@ -79,23 +79,14 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
# for b in range(nB): # for b in range(nB):
# for f in range(nF): # for f in range(nF):
# if ids[b, f] < 0: # if ids[b, f] < 0:
# d_pad[0, f] += dY[b] # d_pad[f] += dY[b]
# #
# Which can be rewritten as: # Which can be rewritten as:
# #
# for b in range(nB): # (ids < 0).T @ dY
# d_pad[0, ids[b] < 0] += dY[b] mask = model.ops.asarray(ids < 0, dtype="f")
# d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True)
# I don't know how to avoid the loop without building a whole array :(. return d_pad.reshape((1, nF, nO, nP))
# Cursed numpy.
#
# Note by Sofie: rewritten to longer loop because "CuPy only supports slices that consist of one boolean array."
d_pad = model.ops.alloc((1, nF, nO, nP))
for b in range(nB):
for f in range(nF):
if ids[b, f] < 0:
d_pad[0, f] += dY[b]
return d_pad
def init(model, X=None, Y=None): def init(model, X=None, Y=None):

View File

@ -216,6 +216,7 @@ cdef class Parser:
# expand our model output. # expand our model output.
self._resize() self._resize()
model = self.model.predict(docs) model = self.model.predict(docs)
W_param = model.vec2scores.get_param("W")
weights = get_c_weights(model) weights = get_c_weights(model)
for state in batch: for state in batch:
if not state.is_final(): if not state.is_final():

View File

@ -784,7 +784,6 @@ VECTORS_KEY = "spacy_pretrained_vectors"
def create_default_optimizer(): def create_default_optimizer():
ops = get_current_ops()
learn_rate = env_opt("learn_rate", 0.001) learn_rate = env_opt("learn_rate", 0.001)
beta1 = env_opt("optimizer_B1", 0.9) beta1 = env_opt("optimizer_B1", 0.9)
beta2 = env_opt("optimizer_B2", 0.999) beta2 = env_opt("optimizer_B2", 0.999)
@ -798,7 +797,6 @@ def create_default_optimizer():
beta1=beta1, beta1=beta1,
beta2=beta2, beta2=beta2,
eps=eps, eps=eps,
ops=ops,
grad_clip=grad_clip, grad_clip=grad_clip,
L2_is_weight_decay=L2_is_weight_decay, L2_is_weight_decay=L2_is_weight_decay,
) )