Move set_{gpu_allocator,seed}_from_config to spacy.util

This commit is contained in:
Daniël de Kok 2023-04-20 16:48:37 +02:00
parent c355367f87
commit 94b3d5cb37
3 changed files with 32 additions and 32 deletions

View File

@ -1,6 +1,5 @@
from typing import Union, Dict, Optional, Any, IO, TYPE_CHECKING from typing import Union, Dict, Optional, Any, IO, TYPE_CHECKING
from thinc.api import Config, fix_random_seed, set_gpu_allocator from thinc.api import Config, ConfigValidationError
from thinc.api import ConfigValidationError
from pathlib import Path from pathlib import Path
import srsly import srsly
import numpy import numpy
@ -19,6 +18,7 @@ from ..schemas import ConfigSchemaDistill, ConfigSchemaTraining
from ..util import registry, load_model_from_config, resolve_dot_names, logger from ..util import registry, load_model_from_config, resolve_dot_names, logger
from ..util import load_model, ensure_path, get_sourced_components from ..util import load_model, ensure_path, get_sourced_components
from ..util import OOV_RANK, DEFAULT_OOV_PROB from ..util import OOV_RANK, DEFAULT_OOV_PROB
from ..util import set_gpu_allocator_from_config, set_seed_from_config
if TYPE_CHECKING: if TYPE_CHECKING:
from ..language import Language # noqa: F401 from ..language import Language # noqa: F401
@ -27,8 +27,8 @@ if TYPE_CHECKING:
def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language": def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
raw_config = config raw_config = config
config = raw_config.interpolate() config = raw_config.interpolate()
_set_seed_from_config(config) set_seed_from_config(config)
_set_gpu_allocator_from_config(config, use_gpu) set_gpu_allocator_from_config(config, use_gpu)
# Use original config here before it's resolved to functions # Use original config here before it's resolved to functions
sourced = get_sourced_components(config) sourced = get_sourced_components(config)
nlp = load_model_from_config(raw_config, auto_fill=True) nlp = load_model_from_config(raw_config, auto_fill=True)
@ -106,8 +106,8 @@ def init_nlp_student(
""" """
raw_config = config raw_config = config
config = raw_config.interpolate() config = raw_config.interpolate()
_set_seed_from_config(config) set_seed_from_config(config)
_set_gpu_allocator_from_config(config, use_gpu) set_gpu_allocator_from_config(config, use_gpu)
# Use original config here before it's resolved to functions # Use original config here before it's resolved to functions
sourced = get_sourced_components(config) sourced = get_sourced_components(config)
@ -422,18 +422,3 @@ def ensure_shape(vectors_loc):
yield from lines2 yield from lines2
lines2.close() lines2.close()
lines.close() lines.close()
def _set_gpu_allocator_from_config(config: Config, use_gpu: int):
if "gpu_allocator" not in config["training"]:
raise ValueError(Errors.E1015.format(value="[training] gpu_allocator"))
allocator = config["training"]["gpu_allocator"]
if use_gpu >= 0 and allocator:
set_gpu_allocator(allocator)
def _set_seed_from_config(config: Config):
if "seed" not in config["training"]:
raise ValueError(Errors.E1015.format(value="[training] seed"))
if config["training"]["seed"] is not None:
fix_random_seed(config["training"]["seed"])

View File

@ -2,7 +2,7 @@ from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
from typing import Optional, TYPE_CHECKING from typing import Optional, TYPE_CHECKING
from pathlib import Path from pathlib import Path
from timeit import default_timer as timer from timeit import default_timer as timer
from thinc.api import Optimizer, Config, constant, fix_random_seed, set_gpu_allocator from thinc.api import Optimizer, Config, constant
from wasabi import Printer from wasabi import Printer
import random import random
import sys import sys
@ -15,6 +15,7 @@ from ..errors import Errors
from ..tokens.doc import Doc from ..tokens.doc import Doc
from .. import ty from .. import ty
from ..util import resolve_dot_names, registry, logger from ..util import resolve_dot_names, registry, logger
from ..util import set_gpu_allocator_from_config, set_seed_from_config
if TYPE_CHECKING: if TYPE_CHECKING:
from ..language import Language # noqa: F401 from ..language import Language # noqa: F401
@ -53,11 +54,8 @@ def distill(
msg = Printer(no_print=True) msg = Printer(no_print=True)
# Create iterator, which yields out info after each optimization step. # Create iterator, which yields out info after each optimization step.
config = student.config.interpolate() config = student.config.interpolate()
if config["training"]["seed"] is not None: set_seed_from_config(config)
fix_random_seed(config["training"]["seed"]) set_gpu_allocator_from_config(config, use_gpu)
allocator = config["training"]["gpu_allocator"]
if use_gpu >= 0 and allocator:
set_gpu_allocator(allocator)
T = registry.resolve(config["training"], schema=ConfigSchemaTraining) T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
D = registry.resolve(config["distillation"], schema=ConfigSchemaDistill) D = registry.resolve(config["distillation"], schema=ConfigSchemaDistill)
dot_names = [D["corpus"], T["dev_corpus"]] dot_names = [D["corpus"], T["dev_corpus"]]
@ -175,11 +173,8 @@ def train(
msg = Printer(no_print=True) msg = Printer(no_print=True)
# Create iterator, which yields out info after each optimization step. # Create iterator, which yields out info after each optimization step.
config = nlp.config.interpolate() config = nlp.config.interpolate()
if config["training"]["seed"] is not None: set_seed_from_config(config)
fix_random_seed(config["training"]["seed"]) set_gpu_allocator_from_config(config, use_gpu)
allocator = config["training"]["gpu_allocator"]
if use_gpu >= 0 and allocator:
set_gpu_allocator(allocator)
T = registry.resolve(config["training"], schema=ConfigSchemaTraining) T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
dot_names = [T["train_corpus"], T["dev_corpus"]] dot_names = [T["train_corpus"], T["dev_corpus"]]
train_corpus, dev_corpus = resolve_dot_names(config, dot_names) train_corpus, dev_corpus = resolve_dot_names(config, dot_names)

View File

@ -11,6 +11,7 @@ from pathlib import Path
import thinc import thinc
from thinc.api import NumpyOps, get_current_ops, Adam, Config, Optimizer from thinc.api import NumpyOps, get_current_ops, Adam, Config, Optimizer
from thinc.api import ConfigValidationError, Model, constant as constant_schedule from thinc.api import ConfigValidationError, Model, constant as constant_schedule
from thinc.api import fix_random_seed, set_gpu_allocator
import functools import functools
import itertools import itertools
import numpy import numpy
@ -1790,3 +1791,22 @@ def find_available_port(start: int, host: str, auto_select: bool = False) -> int
# if we get here, the port changed # if we get here, the port changed
warnings.warn(Warnings.W124.format(host=host, port=start, serve_port=port)) warnings.warn(Warnings.W124.format(host=host, port=start, serve_port=port))
return port return port
def set_gpu_allocator_from_config(config: Config, use_gpu: int):
"""Change the global GPU allocator based to the value in
the configuration."""
if "gpu_allocator" not in config["training"]:
raise ValueError(Errors.E1015.format(value="[training] gpu_allocator"))
allocator = config["training"]["gpu_allocator"]
if use_gpu >= 0 and allocator:
set_gpu_allocator(allocator)
def set_seed_from_config(config: Config):
"""Set the random number generator seed to the value in
the configuration."""
if "seed" not in config["training"]:
raise ValueError(Errors.E1015.format(value="[training] seed"))
if config["training"]["seed"] is not None:
fix_random_seed(config["training"]["seed"])