diff --git a/pyproject.toml b/pyproject.toml index 4b0da39b9..72f04dee3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "cymem>=2.0.2,<2.1.0", "preshed>=3.0.2,<3.1.0", "murmurhash>=0.28.0,<1.1.0", - "thinc>=9.0.0.dev0,<9.1.0", + "thinc>=9.0.0.dev1,<9.1.0", "numpy>=1.15.0", ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index 2bf16ec2b..02479f946 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ spacy-legacy>=3.0.10,<3.1.0 spacy-loggers>=1.0.0,<2.0.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 -thinc>=9.0.0.dev0,<9.1.0 +thinc>=9.0.0.dev1,<9.1.0 ml_datasets>=0.2.0,<0.3.0 murmurhash>=0.28.0,<1.1.0 wasabi>=0.9.1,<1.2.0 diff --git a/setup.cfg b/setup.cfg index 5158a1086..4a8c350cd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,7 +38,7 @@ install_requires = murmurhash>=0.28.0,<1.1.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 - thinc>=9.0.0.dev0,<9.1.0 + thinc>=9.0.0.dev1,<9.1.0 wasabi>=0.9.1,<1.2.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py index f0b6c3123..73678c7fc 100644 --- a/spacy/training/batchers.py +++ b/spacy/training/batchers.py @@ -2,11 +2,12 @@ from typing import Union, Iterable, Sequence, TypeVar, List, Callable, Iterator from typing import Optional, Any from functools import partial import itertools +from thinc.schedules import Schedule, constant as constant_schedule from ..util import registry, minibatch -Sizing = Union[Sequence[int], int] +Sizing = Union[Sequence[int], int, Schedule[int]] ItemT = TypeVar("ItemT") BatcherT = Callable[[Iterable[ItemT]], Iterable[List[ItemT]]] @@ -111,12 +112,13 @@ def minibatch_by_padded_size( The `len` function is used by default. """ if isinstance(size, int): - size_ = itertools.repeat(size) # type: Iterator[int] + size_ = constant_schedule(size) else: - size_ = iter(size) - for outer_batch in minibatch(seqs, size=buffer): + assert isinstance(size, Schedule) + size_ = size + for step, outer_batch in enumerate(minibatch(seqs, size=buffer)): outer_batch = list(outer_batch) - target_size = next(size_) + target_size = size_(step) for indices in _batch_by_length(outer_batch, target_size, get_length): subbatch = [outer_batch[i] for i in indices] padded_size = max(len(seq) for seq in subbatch) * len(subbatch) @@ -147,10 +149,12 @@ def minibatch_by_words( item. The `len` function is used by default. """ if isinstance(size, int): - size_ = itertools.repeat(size) # type: Iterator[int] + size_ = constant_schedule(size) else: - size_ = iter(size) - target_size = next(size_) + assert isinstance(size, Schedule) + size_ = size + step = 0 + target_size = size_(step) tol_size = target_size * tolerance batch = [] overflow = [] @@ -175,7 +179,8 @@ def minibatch_by_words( else: if batch: yield batch - target_size = next(size_) + step += 1 + target_size = size_(step) tol_size = target_size * tolerance batch = overflow batch_size = overflow_size @@ -193,7 +198,8 @@ def minibatch_by_words( else: if batch: yield batch - target_size = next(size_) + step += 1 + target_size = size_(step) tol_size = target_size * tolerance batch = [seq] batch_size = n_words diff --git a/spacy/training/loop.py b/spacy/training/loop.py index 885257772..c93cba7a7 100644 --- a/spacy/training/loop.py +++ b/spacy/training/loop.py @@ -204,7 +204,7 @@ def train_while_improving( if before_update: before_update_args = {"step": step, "epoch": epoch} before_update(nlp, before_update_args) - dropout = next(dropouts) # type: ignore + dropout = dropouts(optimizer.step) # type: ignore for subbatch in subdivide_batch(batch, accumulate_gradient): nlp.update( subbatch, @@ -230,6 +230,7 @@ def train_while_improving( score, other_scores = evaluate() else: score, other_scores = evaluate() + optimizer.last_score = score results.append((score, step)) is_best_checkpoint = score == max(results)[0] else: diff --git a/spacy/util.py b/spacy/util.py index d674fb9ce..aafbbb5de 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -9,7 +9,7 @@ import re from pathlib import Path import thinc from thinc.api import NumpyOps, get_current_ops, Adam, Config, Optimizer -from thinc.api import ConfigValidationError, Model +from thinc.api import ConfigValidationError, Model, constant as constant_schedule import functools import itertools import numpy @@ -1582,12 +1582,12 @@ def minibatch(items, size): so that batch-size can vary on each step. """ if isinstance(size, int): - size_ = itertools.repeat(size) + size_ = constant_schedule(size) else: size_ = size items = iter(items) - while True: - batch_size = next(size_) + for step in itertools.count(): + batch_size = size_(step) batch = list(itertools.islice(items, int(batch_size))) if len(batch) == 0: break