mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Improve GPU usage for train-with-config (#5330)
* Adjust for no ops in Optimizer * Fix gpu in train-from-config * Update train-from-config script * Fix parser * Fix GPU efficiency of padding backprop
This commit is contained in:
		
							parent
							
								
									42364dcd9f
								
							
						
					
					
						commit
						6918d99b6c
					
				| 
						 | 
					@ -1,4 +1,5 @@
 | 
				
			||||||
from typing import Optional, Dict, List, Union, Sequence
 | 
					from typing import Optional, Dict, List, Union, Sequence
 | 
				
			||||||
 | 
					from timeit import default_timer as timer
 | 
				
			||||||
from pydantic import BaseModel, FilePath
 | 
					from pydantic import BaseModel, FilePath
 | 
				
			||||||
import plac
 | 
					import plac
 | 
				
			||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
| 
						 | 
					@ -146,7 +147,6 @@ def train_from_config_cli(
 | 
				
			||||||
    if output_path is not None and not output_path.exists():
 | 
					    if output_path is not None and not output_path.exists():
 | 
				
			||||||
        output_path.mkdir()
 | 
					        output_path.mkdir()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
    train_from_config(
 | 
					    train_from_config(
 | 
				
			||||||
        config_path,
 | 
					        config_path,
 | 
				
			||||||
        {"train": train_path, "dev": dev_path},
 | 
					        {"train": train_path, "dev": dev_path},
 | 
				
			||||||
| 
						 | 
					@ -154,22 +154,22 @@ def train_from_config_cli(
 | 
				
			||||||
        meta_path=meta_path,
 | 
					        meta_path=meta_path,
 | 
				
			||||||
        raw_text=raw_text,
 | 
					        raw_text=raw_text,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        msg.warn("Cancelled.")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def train_from_config(
 | 
					def train_from_config(
 | 
				
			||||||
    config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
 | 
					    config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    msg.info(f"Loading config from: {config_path}")
 | 
					    msg.info(f"Loading config from: {config_path}")
 | 
				
			||||||
    config = util.load_config(config_path, create_objects=True)
 | 
					    config = util.load_config(config_path, create_objects=False)
 | 
				
			||||||
 | 
					    nlp_config = config["nlp"]
 | 
				
			||||||
    use_gpu = config["training"]["use_gpu"]
 | 
					    use_gpu = config["training"]["use_gpu"]
 | 
				
			||||||
    if use_gpu >= 0:
 | 
					    if use_gpu >= 0:
 | 
				
			||||||
        msg.info("Using GPU")
 | 
					        msg.info("Using GPU")
 | 
				
			||||||
 | 
					        util.use_gpu(use_gpu)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        msg.info("Using CPU")
 | 
					        msg.info("Using CPU")
 | 
				
			||||||
 | 
					    config = util.load_config(config_path, create_objects=True)
 | 
				
			||||||
    msg.info("Creating nlp from config")
 | 
					    msg.info("Creating nlp from config")
 | 
				
			||||||
    nlp_config = util.load_config(config_path, create_objects=False)["nlp"]
 | 
					 | 
				
			||||||
    nlp = util.load_model_from_config(nlp_config)
 | 
					    nlp = util.load_model_from_config(nlp_config)
 | 
				
			||||||
    optimizer = config["optimizer"]
 | 
					    optimizer = config["optimizer"]
 | 
				
			||||||
    training = config["training"]
 | 
					    training = config["training"]
 | 
				
			||||||
| 
						 | 
					@ -240,12 +240,17 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
 | 
				
			||||||
                    nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True
 | 
					                    nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					            n_words = sum(len(ex.doc) for ex in dev_examples)
 | 
				
			||||||
 | 
					            start_time = timer()
 | 
				
			||||||
            scorer = nlp.evaluate(dev_examples)
 | 
					            scorer = nlp.evaluate(dev_examples)
 | 
				
			||||||
 | 
					            end_time = timer()
 | 
				
			||||||
 | 
					            wps = n_words / (end_time - start_time)
 | 
				
			||||||
            scores = scorer.scores
 | 
					            scores = scorer.scores
 | 
				
			||||||
            # Calculate a weighted sum based on score_weights for the main score
 | 
					            # Calculate a weighted sum based on score_weights for the main score
 | 
				
			||||||
            weights = cfg["score_weights"]
 | 
					            weights = cfg["score_weights"]
 | 
				
			||||||
            weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
 | 
					            weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
 | 
				
			||||||
        return weighted_score, scorer.scores
 | 
					            scores["speed"] = wps
 | 
				
			||||||
 | 
					        return weighted_score, scores
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return evaluate
 | 
					    return evaluate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -346,13 +351,13 @@ def setup_printer(training, nlp):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_row(info):
 | 
					    def print_row(info):
 | 
				
			||||||
        losses = [
 | 
					        losses = [
 | 
				
			||||||
            "{0:.2f}".format(info["losses"].get(pipe_name, 0.0))
 | 
					            "{0:.2f}".format(float(info["losses"].get(pipe_name, 0.0)))
 | 
				
			||||||
            for pipe_name in nlp.pipe_names
 | 
					            for pipe_name in nlp.pipe_names
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
        scores = [
 | 
					        scores = [
 | 
				
			||||||
            "{0:.2f}".format(info["other_scores"].get(col, 0.0)) for col in score_cols
 | 
					            "{0:.2f}".format(float(info["other_scores"].get(col, 0.0))) for col in score_cols
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
        data = [info["step"]] + losses + scores + ["{0:.2f}".format(info["score"])]
 | 
					        data = [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
 | 
				
			||||||
        msg.row(data, widths=table_widths, aligns=table_aligns)
 | 
					        msg.row(data, widths=table_widths, aligns=table_aligns)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return print_row
 | 
					    return print_row
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -79,23 +79,14 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
 | 
				
			||||||
    # for b in range(nB):
 | 
					    # for b in range(nB):
 | 
				
			||||||
    #     for f in range(nF):
 | 
					    #     for f in range(nF):
 | 
				
			||||||
    #         if ids[b, f] < 0:
 | 
					    #         if ids[b, f] < 0:
 | 
				
			||||||
    #             d_pad[0, f] += dY[b]
 | 
					    #             d_pad[f] += dY[b]
 | 
				
			||||||
    #
 | 
					    #
 | 
				
			||||||
    # Which can be rewritten as:
 | 
					    # Which can be rewritten as:
 | 
				
			||||||
    #
 | 
					    #
 | 
				
			||||||
    # for b in range(nB):
 | 
					    # (ids < 0).T @ dY
 | 
				
			||||||
    #     d_pad[0, ids[b] < 0] += dY[b]
 | 
					    mask = model.ops.asarray(ids < 0, dtype="f")
 | 
				
			||||||
    #
 | 
					    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True)
 | 
				
			||||||
    # I don't know how to avoid the loop without building a whole array :(.
 | 
					    return d_pad.reshape((1, nF, nO, nP))
 | 
				
			||||||
    # Cursed numpy.
 | 
					 | 
				
			||||||
    #
 | 
					 | 
				
			||||||
    # Note by Sofie: rewritten to longer loop because "CuPy only supports slices that consist of one boolean array."
 | 
					 | 
				
			||||||
    d_pad = model.ops.alloc((1, nF, nO, nP))
 | 
					 | 
				
			||||||
    for b in range(nB):
 | 
					 | 
				
			||||||
        for f in range(nF):
 | 
					 | 
				
			||||||
            if ids[b, f] < 0:
 | 
					 | 
				
			||||||
                d_pad[0, f] += dY[b]
 | 
					 | 
				
			||||||
    return d_pad
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def init(model, X=None, Y=None):
 | 
					def init(model, X=None, Y=None):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -216,6 +216,7 @@ cdef class Parser:
 | 
				
			||||||
        # expand our model output.
 | 
					        # expand our model output.
 | 
				
			||||||
        self._resize()
 | 
					        self._resize()
 | 
				
			||||||
        model = self.model.predict(docs)
 | 
					        model = self.model.predict(docs)
 | 
				
			||||||
 | 
					        W_param = model.vec2scores.get_param("W")
 | 
				
			||||||
        weights = get_c_weights(model)
 | 
					        weights = get_c_weights(model)
 | 
				
			||||||
        for state in batch:
 | 
					        for state in batch:
 | 
				
			||||||
            if not state.is_final():
 | 
					            if not state.is_final():
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -784,7 +784,6 @@ VECTORS_KEY = "spacy_pretrained_vectors"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def create_default_optimizer():
 | 
					def create_default_optimizer():
 | 
				
			||||||
    ops = get_current_ops()
 | 
					 | 
				
			||||||
    learn_rate = env_opt("learn_rate", 0.001)
 | 
					    learn_rate = env_opt("learn_rate", 0.001)
 | 
				
			||||||
    beta1 = env_opt("optimizer_B1", 0.9)
 | 
					    beta1 = env_opt("optimizer_B1", 0.9)
 | 
				
			||||||
    beta2 = env_opt("optimizer_B2", 0.999)
 | 
					    beta2 = env_opt("optimizer_B2", 0.999)
 | 
				
			||||||
| 
						 | 
					@ -798,7 +797,6 @@ def create_default_optimizer():
 | 
				
			||||||
        beta1=beta1,
 | 
					        beta1=beta1,
 | 
				
			||||||
        beta2=beta2,
 | 
					        beta2=beta2,
 | 
				
			||||||
        eps=eps,
 | 
					        eps=eps,
 | 
				
			||||||
        ops=ops,
 | 
					 | 
				
			||||||
        grad_clip=grad_clip,
 | 
					        grad_clip=grad_clip,
 | 
				
			||||||
        L2_is_weight_decay=L2_is_weight_decay,
 | 
					        L2_is_weight_decay=L2_is_weight_decay,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user