Fix augment

This commit is contained in:
Matthew Honnibal 2020-10-05 16:41:45 +02:00
commit 8deed614e9
12 changed files with 19 additions and 21 deletions

View File

@ -7,7 +7,7 @@ requires = [
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
"thinc>=8.0.0a43,<8.0.0a50",
"blis>=0.4.0,<0.5.0",
"blis>=0.4.0,<0.8.0",
"pytokenizations",
"pathy"
]

View File

@ -2,7 +2,7 @@
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.0.0a43,<8.0.0a50
blis>=0.4.0,<0.5.0
blis>=0.4.0,<0.8.0
ml_datasets==0.2.0a0
murmurhash>=0.28.0,<1.1.0
wasabi>=0.8.0,<1.1.0

View File

@ -41,7 +41,7 @@ install_requires =
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.0.0a43,<8.0.0a50
blis>=0.4.0,<0.5.0
blis>=0.4.0,<0.8.0
wasabi>=0.8.0,<1.1.0
srsly>=2.3.0,<3.0.0
catalogue>=2.0.1,<2.1.0

View File

@ -1,6 +1,6 @@
# fmt: off
__title__ = "spacy-nightly"
__version__ = "3.0.0a33"
__version__ = "3.0.0a34"
__release__ = True
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

View File

@ -289,13 +289,12 @@ class Lookups:
DOCS: https://nightly.spacy.io/api/lookups#to_disk
"""
if len(self._tables):
path = ensure_path(path)
if not path.exists():
path.mkdir()
filepath = path / filename
with filepath.open("wb") as file_:
file_.write(self.to_bytes())
path = ensure_path(path)
if not path.exists():
path.mkdir()
filepath = path / filename
with filepath.open("wb") as file_:
file_.write(self.to_bytes())
def from_disk(
self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs

View File

@ -210,7 +210,7 @@ class Morphologizer(Tagger):
examples (Iterable[Examples]): The batch of examples.
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
"""

View File

@ -162,7 +162,7 @@ cdef class Pipe:
examples (Iterable[Examples]): The batch of examples.
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/pipe#get_loss
"""

View File

@ -104,7 +104,7 @@ class SentenceRecognizer(Tagger):
examples (Iterable[Examples]): The batch of examples.
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
"""

View File

@ -249,7 +249,7 @@ class Tagger(Pipe):
examples (Iterable[Examples]): The batch of examples.
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/tagger#get_loss
"""

View File

@ -281,7 +281,7 @@ class TextCategorizer(Pipe):
examples (Iterable[Examples]): The batch of examples.
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
"""

View File

@ -5,7 +5,7 @@ import copy
from functools import partial
from pydantic import BaseModel, StrictStr
from ..util import registry, logger
from ..util import registry
from ..tokens import Doc
from .example import Example
@ -119,7 +119,6 @@ def make_orth_variants(
orig_token_dict = copy.deepcopy(token_dict)
ndsv = orth_variants.get("single", [])
ndpv = orth_variants.get("paired", [])
logger.debug(f"Data augmentation: {len(ndsv)} single / {len(ndpv)} paired variants")
words = token_dict.get("ORTH", [])
tags = token_dict.get("TAG", [])
# keep unmodified if words or tags are not defined
@ -139,7 +138,7 @@ def make_orth_variants(
punct_choices = [random.choice(x["variants"]) for x in ndpv]
for word_idx in range(len(words)):
for punct_idx in range(len(ndpv)):
if tags[word_idx] in ndpv[punct_idx]["TAG"] and words[
if tags[word_idx] in ndpv[punct_idx]["tags"] and words[
word_idx
] in itertools.chain.from_iterable(ndpv[punct_idx]["variants"]):
# backup option: random left vs. right from pair

View File

@ -445,9 +445,9 @@ cdef class Vocab:
setters = ["strings", "vectors"]
if "strings" not in exclude:
self.strings.to_disk(path / "strings.json")
if "vectors" not in "exclude" and self.vectors is not None:
if "vectors" not in "exclude":
self.vectors.to_disk(path)
if "lookups" not in "exclude" and self.lookups is not None:
if "lookups" not in "exclude":
self.lookups.to_disk(path)
def from_disk(self, path, *, exclude=tuple()):