Fix augment

2025-08-08 06:04:57 +03:00 · 2020-10-05 16:41:45 +02:00 · 2020-10-05 16:41:45 +02:00 · 8deed614e9
commit 8deed614e9
parent 4ed3e037df 7946fd84bb
12 changed files with 19 additions and 21 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ requires = [
    "preshed>=3.0.2,<3.1.0",
    "murmurhash>=0.28.0,<1.1.0",
    "thinc>=8.0.0a43,<8.0.0a50",
-    "blis>=0.4.0,<0.5.0",
+    "blis>=0.4.0,<0.8.0",
    "pytokenizations",
    "pathy"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,7 @@
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
 thinc>=8.0.0a43,<8.0.0a50
-blis>=0.4.0,<0.5.0
+blis>=0.4.0,<0.8.0
 ml_datasets==0.2.0a0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.8.0,<1.1.0
--- a/setup.cfg
+++ b/setup.cfg
@ -41,7 +41,7 @@ install_requires =
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
    thinc>=8.0.0a43,<8.0.0a50
-    blis>=0.4.0,<0.5.0
+    blis>=0.4.0,<0.8.0
    wasabi>=0.8.0,<1.1.0
    srsly>=2.3.0,<3.0.0
    catalogue>=2.0.1,<2.1.0
--- a/spacy/about.py
+++ b/spacy/about.py
@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy-nightly"
-__version__ = "3.0.0a33"
+__version__ = "3.0.0a34"
 __release__ = True
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
--- a/spacy/lookups.py
+++ b/spacy/lookups.py
@ -289,13 +289,12 @@ class Lookups:

        DOCS: https://nightly.spacy.io/api/lookups#to_disk
        """
-        if len(self._tables):
-            path = ensure_path(path)
-            if not path.exists():
-                path.mkdir()
-            filepath = path / filename
-            with filepath.open("wb") as file_:
-                file_.write(self.to_bytes())
+        path = ensure_path(path)
+        if not path.exists():
+            path.mkdir()
+        filepath = path / filename
+        with filepath.open("wb") as file_:
+            file_.write(self.to_bytes())

    def from_disk(
        self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -210,7 +210,7 @@ class Morphologizer(Tagger):

        examples (Iterable[Examples]): The batch of examples.
        scores: Scores representing the model's predictions.
-        RETUTNRS (Tuple[float, float]): The loss and the gradient.
+        RETURNS (Tuple[float, float]): The loss and the gradient.

        DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
        """
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@ -162,7 +162,7 @@ cdef class Pipe:

        examples (Iterable[Examples]): The batch of examples.
        scores: Scores representing the model's predictions.
-        RETUTNRS (Tuple[float, float]): The loss and the gradient.
+        RETURNS (Tuple[float, float]): The loss and the gradient.

        DOCS: https://nightly.spacy.io/api/pipe#get_loss
        """
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -104,7 +104,7 @@ class SentenceRecognizer(Tagger):

        examples (Iterable[Examples]): The batch of examples.
        scores: Scores representing the model's predictions.
-        RETUTNRS (Tuple[float, float]): The loss and the gradient.
+        RETURNS (Tuple[float, float]): The loss and the gradient.

        DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
        """
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -249,7 +249,7 @@ class Tagger(Pipe):

        examples (Iterable[Examples]): The batch of examples.
        scores: Scores representing the model's predictions.
-        RETUTNRS (Tuple[float, float]): The loss and the gradient.
+        RETURNS (Tuple[float, float]): The loss and the gradient.

        DOCS: https://nightly.spacy.io/api/tagger#get_loss
        """
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@ -281,7 +281,7 @@ class TextCategorizer(Pipe):

        examples (Iterable[Examples]): The batch of examples.
        scores: Scores representing the model's predictions.
-        RETUTNRS (Tuple[float, float]): The loss and the gradient.
+        RETURNS (Tuple[float, float]): The loss and the gradient.

        DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
        """
--- a/spacy/training/augment.py
+++ b/spacy/training/augment.py
@ -5,7 +5,7 @@ import copy
 from functools import partial
 from pydantic import BaseModel, StrictStr

-from ..util import registry, logger
+from ..util import registry
 from ..tokens import Doc
 from .example import Example

@ -119,7 +119,6 @@ def make_orth_variants(
    orig_token_dict = copy.deepcopy(token_dict)
    ndsv = orth_variants.get("single", [])
    ndpv = orth_variants.get("paired", [])
-    logger.debug(f"Data augmentation: {len(ndsv)} single / {len(ndpv)} paired variants")
    words = token_dict.get("ORTH", [])
    tags = token_dict.get("TAG", [])
    # keep unmodified if words or tags are not defined
@ -139,7 +138,7 @@ def make_orth_variants(
        punct_choices = [random.choice(x["variants"]) for x in ndpv]
        for word_idx in range(len(words)):
            for punct_idx in range(len(ndpv)):
-                if tags[word_idx] in ndpv[punct_idx]["TAG"] and words[
+                if tags[word_idx] in ndpv[punct_idx]["tags"] and words[
                    word_idx
                ] in itertools.chain.from_iterable(ndpv[punct_idx]["variants"]):
                    # backup option: random left vs. right from pair
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -445,9 +445,9 @@ cdef class Vocab:
        setters = ["strings", "vectors"]
        if "strings" not in exclude:
            self.strings.to_disk(path / "strings.json")
-        if "vectors" not in "exclude" and self.vectors is not None:
+        if "vectors" not in "exclude":
            self.vectors.to_disk(path)
-        if "lookups" not in "exclude" and self.lookups is not None:
+        if "lookups" not in "exclude":
            self.lookups.to_disk(path)

    def from_disk(self, path, *, exclude=tuple()):