mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	Improve GPU usage for train-with-config (#5330)
* Adjust for no ops in Optimizer * Fix gpu in train-from-config * Update train-from-config script * Fix parser * Fix GPU efficiency of padding backprop
This commit is contained in:
		
							parent
							
								
									42364dcd9f
								
							
						
					
					
						commit
						6918d99b6c
					
				|  | @ -1,4 +1,5 @@ | |||
| from typing import Optional, Dict, List, Union, Sequence | ||||
| from timeit import default_timer as timer | ||||
| from pydantic import BaseModel, FilePath | ||||
| import plac | ||||
| import tqdm | ||||
|  | @ -146,30 +147,29 @@ def train_from_config_cli( | |||
|     if output_path is not None and not output_path.exists(): | ||||
|         output_path.mkdir() | ||||
| 
 | ||||
|     try: | ||||
|         train_from_config( | ||||
|             config_path, | ||||
|             {"train": train_path, "dev": dev_path}, | ||||
|             output_path=output_path, | ||||
|             meta_path=meta_path, | ||||
|             raw_text=raw_text, | ||||
|         ) | ||||
|     except KeyboardInterrupt: | ||||
|         msg.warn("Cancelled.") | ||||
|     train_from_config( | ||||
|         config_path, | ||||
|         {"train": train_path, "dev": dev_path}, | ||||
|         output_path=output_path, | ||||
|         meta_path=meta_path, | ||||
|         raw_text=raw_text, | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def train_from_config( | ||||
|     config_path, data_paths, raw_text=None, meta_path=None, output_path=None, | ||||
| ): | ||||
|     msg.info(f"Loading config from: {config_path}") | ||||
|     config = util.load_config(config_path, create_objects=True) | ||||
|     config = util.load_config(config_path, create_objects=False) | ||||
|     nlp_config = config["nlp"] | ||||
|     use_gpu = config["training"]["use_gpu"] | ||||
|     if use_gpu >= 0: | ||||
|         msg.info("Using GPU") | ||||
|         util.use_gpu(use_gpu) | ||||
|     else: | ||||
|         msg.info("Using CPU") | ||||
|     config = util.load_config(config_path, create_objects=True) | ||||
|     msg.info("Creating nlp from config") | ||||
|     nlp_config = util.load_config(config_path, create_objects=False)["nlp"] | ||||
|     nlp = util.load_model_from_config(nlp_config) | ||||
|     optimizer = config["optimizer"] | ||||
|     training = config["training"] | ||||
|  | @ -240,12 +240,17 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg): | |||
|                     nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True | ||||
|                 ) | ||||
|             ) | ||||
|             n_words = sum(len(ex.doc) for ex in dev_examples) | ||||
|             start_time = timer() | ||||
|             scorer = nlp.evaluate(dev_examples) | ||||
|             end_time = timer() | ||||
|             wps = n_words / (end_time - start_time) | ||||
|             scores = scorer.scores | ||||
|             # Calculate a weighted sum based on score_weights for the main score | ||||
|             weights = cfg["score_weights"] | ||||
|             weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights) | ||||
|         return weighted_score, scorer.scores | ||||
|             scores["speed"] = wps | ||||
|         return weighted_score, scores | ||||
| 
 | ||||
|     return evaluate | ||||
| 
 | ||||
|  | @ -346,13 +351,13 @@ def setup_printer(training, nlp): | |||
| 
 | ||||
|     def print_row(info): | ||||
|         losses = [ | ||||
|             "{0:.2f}".format(info["losses"].get(pipe_name, 0.0)) | ||||
|             "{0:.2f}".format(float(info["losses"].get(pipe_name, 0.0))) | ||||
|             for pipe_name in nlp.pipe_names | ||||
|         ] | ||||
|         scores = [ | ||||
|             "{0:.2f}".format(info["other_scores"].get(col, 0.0)) for col in score_cols | ||||
|             "{0:.2f}".format(float(info["other_scores"].get(col, 0.0))) for col in score_cols | ||||
|         ] | ||||
|         data = [info["step"]] + losses + scores + ["{0:.2f}".format(info["score"])] | ||||
|         data = [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))] | ||||
|         msg.row(data, widths=table_widths, aligns=table_aligns) | ||||
| 
 | ||||
|     return print_row | ||||
|  |  | |||
|  | @ -79,23 +79,14 @@ def _backprop_precomputable_affine_padding(model, dY, ids): | |||
|     # for b in range(nB): | ||||
|     #     for f in range(nF): | ||||
|     #         if ids[b, f] < 0: | ||||
|     #             d_pad[0, f] += dY[b] | ||||
|     #             d_pad[f] += dY[b] | ||||
|     # | ||||
|     # Which can be rewritten as: | ||||
|     # | ||||
|     # for b in range(nB): | ||||
|     #     d_pad[0, ids[b] < 0] += dY[b] | ||||
|     # | ||||
|     # I don't know how to avoid the loop without building a whole array :(. | ||||
|     # Cursed numpy. | ||||
|     # | ||||
|     # Note by Sofie: rewritten to longer loop because "CuPy only supports slices that consist of one boolean array." | ||||
|     d_pad = model.ops.alloc((1, nF, nO, nP)) | ||||
|     for b in range(nB): | ||||
|         for f in range(nF): | ||||
|             if ids[b, f] < 0: | ||||
|                 d_pad[0, f] += dY[b] | ||||
|     return d_pad | ||||
|     # (ids < 0).T @ dY | ||||
|     mask = model.ops.asarray(ids < 0, dtype="f") | ||||
|     d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True) | ||||
|     return d_pad.reshape((1, nF, nO, nP)) | ||||
| 
 | ||||
| 
 | ||||
| def init(model, X=None, Y=None): | ||||
|  |  | |||
|  | @ -216,6 +216,7 @@ cdef class Parser: | |||
|         # expand our model output. | ||||
|         self._resize() | ||||
|         model = self.model.predict(docs) | ||||
|         W_param = model.vec2scores.get_param("W") | ||||
|         weights = get_c_weights(model) | ||||
|         for state in batch: | ||||
|             if not state.is_final(): | ||||
|  |  | |||
|  | @ -784,7 +784,6 @@ VECTORS_KEY = "spacy_pretrained_vectors" | |||
| 
 | ||||
| 
 | ||||
| def create_default_optimizer(): | ||||
|     ops = get_current_ops() | ||||
|     learn_rate = env_opt("learn_rate", 0.001) | ||||
|     beta1 = env_opt("optimizer_B1", 0.9) | ||||
|     beta2 = env_opt("optimizer_B2", 0.999) | ||||
|  | @ -798,7 +797,6 @@ def create_default_optimizer(): | |||
|         beta1=beta1, | ||||
|         beta2=beta2, | ||||
|         eps=eps, | ||||
|         ops=ops, | ||||
|         grad_clip=grad_clip, | ||||
|         L2_is_weight_decay=L2_is_weight_decay, | ||||
|     ) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user