From 03cfb2d2f4afbcc96f99757010ce3263cbc28ebd Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 5 Oct 2020 09:33:05 +0200 Subject: [PATCH 1/3] Always serialize lookups and vectors to disk --- spacy/lookups.py | 13 ++++++------- spacy/vocab.pyx | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/spacy/lookups.py b/spacy/lookups.py index fb5e3d748..133cb0672 100644 --- a/spacy/lookups.py +++ b/spacy/lookups.py @@ -289,13 +289,12 @@ class Lookups: DOCS: https://nightly.spacy.io/api/lookups#to_disk """ - if len(self._tables): - path = ensure_path(path) - if not path.exists(): - path.mkdir() - filepath = path / filename - with filepath.open("wb") as file_: - file_.write(self.to_bytes()) + path = ensure_path(path) + if not path.exists(): + path.mkdir() + filepath = path / filename + with filepath.open("wb") as file_: + file_.write(self.to_bytes()) def from_disk( self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index a22f12c65..93918250b 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -445,9 +445,9 @@ cdef class Vocab: setters = ["strings", "vectors"] if "strings" not in exclude: self.strings.to_disk(path / "strings.json") - if "vectors" not in "exclude" and self.vectors is not None: + if "vectors" not in "exclude": self.vectors.to_disk(path) - if "lookups" not in "exclude" and self.lookups is not None: + if "lookups" not in "exclude": self.lookups.to_disk(path) def from_disk(self, path, *, exclude=tuple()): From f4f49f5877d4a0cca4ef9e03ea1c39aa742ba797 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Mon, 5 Oct 2020 14:58:56 +0200 Subject: [PATCH 2/3] update blis (#6198) * allow higher blis version * fix typo * bump to 3.0.0a34 * fix pins in other files --- pyproject.toml | 2 +- requirements.txt | 2 +- setup.cfg | 2 +- spacy/about.py | 2 +- spacy/pipeline/morphologizer.pyx | 2 +- spacy/pipeline/pipe.pyx | 2 +- spacy/pipeline/senter.pyx | 2 +- spacy/pipeline/tagger.pyx | 2 +- spacy/pipeline/textcat.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 611a95d27..d48886e0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires = [ "preshed>=3.0.2,<3.1.0", "murmurhash>=0.28.0,<1.1.0", "thinc>=8.0.0a43,<8.0.0a50", - "blis>=0.4.0,<0.5.0", + "blis>=0.4.0,<0.8.0", "pytokenizations", "pathy" ] diff --git a/requirements.txt b/requirements.txt index 44dad38e3..29695e9b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 thinc>=8.0.0a43,<8.0.0a50 -blis>=0.4.0,<0.5.0 +blis>=0.4.0,<0.8.0 ml_datasets==0.2.0a0 murmurhash>=0.28.0,<1.1.0 wasabi>=0.8.0,<1.1.0 diff --git a/setup.cfg b/setup.cfg index 7192ba9d4..d8362c4bd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ install_requires = cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 thinc>=8.0.0a43,<8.0.0a50 - blis>=0.4.0,<0.5.0 + blis>=0.4.0,<0.8.0 wasabi>=0.8.0,<1.1.0 srsly>=2.3.0,<3.0.0 catalogue>=2.0.1,<2.1.0 diff --git a/spacy/about.py b/spacy/about.py index dce627a38..392bfd589 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,6 +1,6 @@ # fmt: off __title__ = "spacy-nightly" -__version__ = "3.0.0a33" +__version__ = "3.0.0a34" __release__ = True __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 82f3bf37d..6d97b062f 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -210,7 +210,7 @@ class Morphologizer(Tagger): examples (Iterable[Examples]): The batch of examples. scores: Scores representing the model's predictions. - RETUTNRS (Tuple[float, float]): The loss and the gradient. + RETURNS (Tuple[float, float]): The loss and the gradient. DOCS: https://nightly.spacy.io/api/morphologizer#get_loss """ diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx index 41ca23ace..8e103a638 100644 --- a/spacy/pipeline/pipe.pyx +++ b/spacy/pipeline/pipe.pyx @@ -162,7 +162,7 @@ cdef class Pipe: examples (Iterable[Examples]): The batch of examples. scores: Scores representing the model's predictions. - RETUTNRS (Tuple[float, float]): The loss and the gradient. + RETURNS (Tuple[float, float]): The loss and the gradient. DOCS: https://nightly.spacy.io/api/pipe#get_loss """ diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 0bfef7c7b..8fb1e664f 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -104,7 +104,7 @@ class SentenceRecognizer(Tagger): examples (Iterable[Examples]): The batch of examples. scores: Scores representing the model's predictions. - RETUTNRS (Tuple[float, float]): The loss and the gradient. + RETURNS (Tuple[float, float]): The loss and the gradient. DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss """ diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 6cb582b36..94ac0c082 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -249,7 +249,7 @@ class Tagger(Pipe): examples (Iterable[Examples]): The batch of examples. scores: Scores representing the model's predictions. - RETUTNRS (Tuple[float, float]): The loss and the gradient. + RETURNS (Tuple[float, float]): The loss and the gradient. DOCS: https://nightly.spacy.io/api/tagger#get_loss """ diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index fc60ebf89..292598e3a 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -281,7 +281,7 @@ class TextCategorizer(Pipe): examples (Iterable[Examples]): The batch of examples. scores: Scores representing the model's predictions. - RETUTNRS (Tuple[float, float]): The loss and the gradient. + RETURNS (Tuple[float, float]): The loss and the gradient. DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss """ From 8171e28b20aafc52ccf571b813b142b3355e550b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 5 Oct 2020 15:09:52 +0200 Subject: [PATCH 3/3] Remove logging [ci skip] This would be fired on each example, which is wrong --- spacy/training/augment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spacy/training/augment.py b/spacy/training/augment.py index e6d10a195..ee5992b36 100644 --- a/spacy/training/augment.py +++ b/spacy/training/augment.py @@ -5,7 +5,7 @@ import copy from functools import partial from pydantic import BaseModel, StrictStr -from ..util import registry, logger +from ..util import registry from ..tokens import Doc from .example import Example @@ -119,7 +119,6 @@ def make_orth_variants( orig_token_dict = copy.deepcopy(token_dict) ndsv = orth_variants.get("single", []) ndpv = orth_variants.get("paired", []) - logger.debug(f"Data augmentation: {len(ndsv)} single / {len(ndpv)} paired variants") words = token_dict.get("words", []) tags = token_dict.get("tags", []) # keep unmodified if words or tags are not defined