Improve GPU usage for train-with-config (#5330)

* Adjust for no ops in Optimizer * Fix gpu in train-from-config * Update train-from-config script * Fix parser * Fix GPU efficiency of padding backprop
2025-08-09 22:54:53 +03:00 · 2020-04-20 22:06:28 +02:00 · 2020-04-20 22:06:28 +02:00 · 6918d99b6c
commit 6918d99b6c
parent 42364dcd9f
4 changed files with 27 additions and 32 deletions
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@ -1,4 +1,5 @@
 from typing import Optional, Dict, List, Union, Sequence
+from timeit import default_timer as timer
 from pydantic import BaseModel, FilePath
 import plac
 import tqdm
@ -146,30 +147,29 @@ def train_from_config_cli(
    if output_path is not None and not output_path.exists():
        output_path.mkdir()

-    try:
-        train_from_config(
-            config_path,
-            {"train": train_path, "dev": dev_path},
-            output_path=output_path,
-            meta_path=meta_path,
-            raw_text=raw_text,
-        )
-    except KeyboardInterrupt:
-        msg.warn("Cancelled.")
+    train_from_config(
+        config_path,
+        {"train": train_path, "dev": dev_path},
+        output_path=output_path,
+        meta_path=meta_path,
+        raw_text=raw_text,
+    )


 def train_from_config(
    config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
 ):
    msg.info(f"Loading config from: {config_path}")
-    config = util.load_config(config_path, create_objects=True)
+    config = util.load_config(config_path, create_objects=False)
+    nlp_config = config["nlp"]
    use_gpu = config["training"]["use_gpu"]
    if use_gpu >= 0:
        msg.info("Using GPU")
+        util.use_gpu(use_gpu)
    else:
        msg.info("Using CPU")
+    config = util.load_config(config_path, create_objects=True)
    msg.info("Creating nlp from config")
-    nlp_config = util.load_config(config_path, create_objects=False)["nlp"]
    nlp = util.load_model_from_config(nlp_config)
    optimizer = config["optimizer"]
    training = config["training"]
@ -240,12 +240,17 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
                    nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True
                )
            )
+            n_words = sum(len(ex.doc) for ex in dev_examples)
+            start_time = timer()
            scorer = nlp.evaluate(dev_examples)
+            end_time = timer()
+            wps = n_words / (end_time - start_time)
            scores = scorer.scores
            # Calculate a weighted sum based on score_weights for the main score
            weights = cfg["score_weights"]
            weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
-        return weighted_score, scorer.scores
+            scores["speed"] = wps
+        return weighted_score, scores

    return evaluate

@ -346,13 +351,13 @@ def setup_printer(training, nlp):

    def print_row(info):
        losses = [
-            "{0:.2f}".format(info["losses"].get(pipe_name, 0.0))
+            "{0:.2f}".format(float(info["losses"].get(pipe_name, 0.0)))
            for pipe_name in nlp.pipe_names
        ]
        scores = [
-            "{0:.2f}".format(info["other_scores"].get(col, 0.0)) for col in score_cols
+            "{0:.2f}".format(float(info["other_scores"].get(col, 0.0))) for col in score_cols
        ]
-        data = [info["step"]] + losses + scores + ["{0:.2f}".format(info["score"])]
+        data = [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
        msg.row(data, widths=table_widths, aligns=table_aligns)

    return print_row
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@ -79,23 +79,14 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
    # for b in range(nB):
    #     for f in range(nF):
    #         if ids[b, f] < 0:
-    #             d_pad[0, f] += dY[b]
+    #             d_pad[f] += dY[b]
    #
    # Which can be rewritten as:
    #
-    # for b in range(nB):
-    #     d_pad[0, ids[b] < 0] += dY[b]
-    #
-    # I don't know how to avoid the loop without building a whole array :(.
-    # Cursed numpy.
-    #
-    # Note by Sofie: rewritten to longer loop because "CuPy only supports slices that consist of one boolean array."
-    d_pad = model.ops.alloc((1, nF, nO, nP))
-    for b in range(nB):
-        for f in range(nF):
-            if ids[b, f] < 0:
-                d_pad[0, f] += dY[b]
-    return d_pad
+    # (ids < 0).T @ dY
+    mask = model.ops.asarray(ids < 0, dtype="f")
+    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True)
+    return d_pad.reshape((1, nF, nO, nP))


 def init(model, X=None, Y=None):
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -216,6 +216,7 @@ cdef class Parser:
        # expand our model output.
        self._resize()
        model = self.model.predict(docs)
+        W_param = model.vec2scores.get_param("W")
        weights = get_c_weights(model)
        for state in batch:
            if not state.is_final():
--- a/spacy/util.py
+++ b/spacy/util.py
@ -784,7 +784,6 @@ VECTORS_KEY = "spacy_pretrained_vectors"


 def create_default_optimizer():
-    ops = get_current_ops()
    learn_rate = env_opt("learn_rate", 0.001)
    beta1 = env_opt("optimizer_B1", 0.9)
    beta2 = env_opt("optimizer_B2", 0.999)
@ -798,7 +797,6 @@ def create_default_optimizer():
        beta1=beta1,
        beta2=beta2,
        eps=eps,
-        ops=ops,
        grad_clip=grad_clip,
        L2_is_weight_decay=L2_is_weight_decay,
    )