mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
Improve GPU usage for train-with-config (#5330)
* Adjust for no ops in Optimizer * Fix gpu in train-from-config * Update train-from-config script * Fix parser * Fix GPU efficiency of padding backprop
This commit is contained in:
parent
42364dcd9f
commit
6918d99b6c
|
@ -1,4 +1,5 @@
|
||||||
from typing import Optional, Dict, List, Union, Sequence
|
from typing import Optional, Dict, List, Union, Sequence
|
||||||
|
from timeit import default_timer as timer
|
||||||
from pydantic import BaseModel, FilePath
|
from pydantic import BaseModel, FilePath
|
||||||
import plac
|
import plac
|
||||||
import tqdm
|
import tqdm
|
||||||
|
@ -146,30 +147,29 @@ def train_from_config_cli(
|
||||||
if output_path is not None and not output_path.exists():
|
if output_path is not None and not output_path.exists():
|
||||||
output_path.mkdir()
|
output_path.mkdir()
|
||||||
|
|
||||||
try:
|
train_from_config(
|
||||||
train_from_config(
|
config_path,
|
||||||
config_path,
|
{"train": train_path, "dev": dev_path},
|
||||||
{"train": train_path, "dev": dev_path},
|
output_path=output_path,
|
||||||
output_path=output_path,
|
meta_path=meta_path,
|
||||||
meta_path=meta_path,
|
raw_text=raw_text,
|
||||||
raw_text=raw_text,
|
)
|
||||||
)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
msg.warn("Cancelled.")
|
|
||||||
|
|
||||||
|
|
||||||
def train_from_config(
|
def train_from_config(
|
||||||
config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
|
config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
|
||||||
):
|
):
|
||||||
msg.info(f"Loading config from: {config_path}")
|
msg.info(f"Loading config from: {config_path}")
|
||||||
config = util.load_config(config_path, create_objects=True)
|
config = util.load_config(config_path, create_objects=False)
|
||||||
|
nlp_config = config["nlp"]
|
||||||
use_gpu = config["training"]["use_gpu"]
|
use_gpu = config["training"]["use_gpu"]
|
||||||
if use_gpu >= 0:
|
if use_gpu >= 0:
|
||||||
msg.info("Using GPU")
|
msg.info("Using GPU")
|
||||||
|
util.use_gpu(use_gpu)
|
||||||
else:
|
else:
|
||||||
msg.info("Using CPU")
|
msg.info("Using CPU")
|
||||||
|
config = util.load_config(config_path, create_objects=True)
|
||||||
msg.info("Creating nlp from config")
|
msg.info("Creating nlp from config")
|
||||||
nlp_config = util.load_config(config_path, create_objects=False)["nlp"]
|
|
||||||
nlp = util.load_model_from_config(nlp_config)
|
nlp = util.load_model_from_config(nlp_config)
|
||||||
optimizer = config["optimizer"]
|
optimizer = config["optimizer"]
|
||||||
training = config["training"]
|
training = config["training"]
|
||||||
|
@ -240,12 +240,17 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
|
||||||
nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True
|
nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
n_words = sum(len(ex.doc) for ex in dev_examples)
|
||||||
|
start_time = timer()
|
||||||
scorer = nlp.evaluate(dev_examples)
|
scorer = nlp.evaluate(dev_examples)
|
||||||
|
end_time = timer()
|
||||||
|
wps = n_words / (end_time - start_time)
|
||||||
scores = scorer.scores
|
scores = scorer.scores
|
||||||
# Calculate a weighted sum based on score_weights for the main score
|
# Calculate a weighted sum based on score_weights for the main score
|
||||||
weights = cfg["score_weights"]
|
weights = cfg["score_weights"]
|
||||||
weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
|
weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
|
||||||
return weighted_score, scorer.scores
|
scores["speed"] = wps
|
||||||
|
return weighted_score, scores
|
||||||
|
|
||||||
return evaluate
|
return evaluate
|
||||||
|
|
||||||
|
@ -346,13 +351,13 @@ def setup_printer(training, nlp):
|
||||||
|
|
||||||
def print_row(info):
|
def print_row(info):
|
||||||
losses = [
|
losses = [
|
||||||
"{0:.2f}".format(info["losses"].get(pipe_name, 0.0))
|
"{0:.2f}".format(float(info["losses"].get(pipe_name, 0.0)))
|
||||||
for pipe_name in nlp.pipe_names
|
for pipe_name in nlp.pipe_names
|
||||||
]
|
]
|
||||||
scores = [
|
scores = [
|
||||||
"{0:.2f}".format(info["other_scores"].get(col, 0.0)) for col in score_cols
|
"{0:.2f}".format(float(info["other_scores"].get(col, 0.0))) for col in score_cols
|
||||||
]
|
]
|
||||||
data = [info["step"]] + losses + scores + ["{0:.2f}".format(info["score"])]
|
data = [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
|
||||||
msg.row(data, widths=table_widths, aligns=table_aligns)
|
msg.row(data, widths=table_widths, aligns=table_aligns)
|
||||||
|
|
||||||
return print_row
|
return print_row
|
||||||
|
|
|
@ -79,23 +79,14 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
|
||||||
# for b in range(nB):
|
# for b in range(nB):
|
||||||
# for f in range(nF):
|
# for f in range(nF):
|
||||||
# if ids[b, f] < 0:
|
# if ids[b, f] < 0:
|
||||||
# d_pad[0, f] += dY[b]
|
# d_pad[f] += dY[b]
|
||||||
#
|
#
|
||||||
# Which can be rewritten as:
|
# Which can be rewritten as:
|
||||||
#
|
#
|
||||||
# for b in range(nB):
|
# (ids < 0).T @ dY
|
||||||
# d_pad[0, ids[b] < 0] += dY[b]
|
mask = model.ops.asarray(ids < 0, dtype="f")
|
||||||
#
|
d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True)
|
||||||
# I don't know how to avoid the loop without building a whole array :(.
|
return d_pad.reshape((1, nF, nO, nP))
|
||||||
# Cursed numpy.
|
|
||||||
#
|
|
||||||
# Note by Sofie: rewritten to longer loop because "CuPy only supports slices that consist of one boolean array."
|
|
||||||
d_pad = model.ops.alloc((1, nF, nO, nP))
|
|
||||||
for b in range(nB):
|
|
||||||
for f in range(nF):
|
|
||||||
if ids[b, f] < 0:
|
|
||||||
d_pad[0, f] += dY[b]
|
|
||||||
return d_pad
|
|
||||||
|
|
||||||
|
|
||||||
def init(model, X=None, Y=None):
|
def init(model, X=None, Y=None):
|
||||||
|
|
|
@ -216,6 +216,7 @@ cdef class Parser:
|
||||||
# expand our model output.
|
# expand our model output.
|
||||||
self._resize()
|
self._resize()
|
||||||
model = self.model.predict(docs)
|
model = self.model.predict(docs)
|
||||||
|
W_param = model.vec2scores.get_param("W")
|
||||||
weights = get_c_weights(model)
|
weights = get_c_weights(model)
|
||||||
for state in batch:
|
for state in batch:
|
||||||
if not state.is_final():
|
if not state.is_final():
|
||||||
|
|
|
@ -784,7 +784,6 @@ VECTORS_KEY = "spacy_pretrained_vectors"
|
||||||
|
|
||||||
|
|
||||||
def create_default_optimizer():
|
def create_default_optimizer():
|
||||||
ops = get_current_ops()
|
|
||||||
learn_rate = env_opt("learn_rate", 0.001)
|
learn_rate = env_opt("learn_rate", 0.001)
|
||||||
beta1 = env_opt("optimizer_B1", 0.9)
|
beta1 = env_opt("optimizer_B1", 0.9)
|
||||||
beta2 = env_opt("optimizer_B2", 0.999)
|
beta2 = env_opt("optimizer_B2", 0.999)
|
||||||
|
@ -798,7 +797,6 @@ def create_default_optimizer():
|
||||||
beta1=beta1,
|
beta1=beta1,
|
||||||
beta2=beta2,
|
beta2=beta2,
|
||||||
eps=eps,
|
eps=eps,
|
||||||
ops=ops,
|
|
||||||
grad_clip=grad_clip,
|
grad_clip=grad_clip,
|
||||||
L2_is_weight_decay=L2_is_weight_decay,
|
L2_is_weight_decay=L2_is_weight_decay,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user