mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Fix augment
This commit is contained in:
commit
8deed614e9
|
@ -7,7 +7,7 @@ requires = [
|
|||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.0a43,<8.0.0a50",
|
||||
"blis>=0.4.0,<0.5.0",
|
||||
"blis>=0.4.0,<0.8.0",
|
||||
"pytokenizations",
|
||||
"pathy"
|
||||
]
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a43,<8.0.0a50
|
||||
blis>=0.4.0,<0.5.0
|
||||
blis>=0.4.0,<0.8.0
|
||||
ml_datasets==0.2.0a0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
wasabi>=0.8.0,<1.1.0
|
||||
|
|
|
@ -41,7 +41,7 @@ install_requires =
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a43,<8.0.0a50
|
||||
blis>=0.4.0,<0.5.0
|
||||
blis>=0.4.0,<0.8.0
|
||||
wasabi>=0.8.0,<1.1.0
|
||||
srsly>=2.3.0,<3.0.0
|
||||
catalogue>=2.0.1,<2.1.0
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# fmt: off
|
||||
__title__ = "spacy-nightly"
|
||||
__version__ = "3.0.0a33"
|
||||
__version__ = "3.0.0a34"
|
||||
__release__ = True
|
||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||
|
|
|
@ -289,13 +289,12 @@ class Lookups:
|
|||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#to_disk
|
||||
"""
|
||||
if len(self._tables):
|
||||
path = ensure_path(path)
|
||||
if not path.exists():
|
||||
path.mkdir()
|
||||
filepath = path / filename
|
||||
with filepath.open("wb") as file_:
|
||||
file_.write(self.to_bytes())
|
||||
path = ensure_path(path)
|
||||
if not path.exists():
|
||||
path.mkdir()
|
||||
filepath = path / filename
|
||||
with filepath.open("wb") as file_:
|
||||
file_.write(self.to_bytes())
|
||||
|
||||
def from_disk(
|
||||
self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs
|
||||
|
|
|
@ -210,7 +210,7 @@ class Morphologizer(Tagger):
|
|||
|
||||
examples (Iterable[Examples]): The batch of examples.
|
||||
scores: Scores representing the model's predictions.
|
||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
|
||||
"""
|
||||
|
|
|
@ -162,7 +162,7 @@ cdef class Pipe:
|
|||
|
||||
examples (Iterable[Examples]): The batch of examples.
|
||||
scores: Scores representing the model's predictions.
|
||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#get_loss
|
||||
"""
|
||||
|
|
|
@ -104,7 +104,7 @@ class SentenceRecognizer(Tagger):
|
|||
|
||||
examples (Iterable[Examples]): The batch of examples.
|
||||
scores: Scores representing the model's predictions.
|
||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
|
||||
"""
|
||||
|
|
|
@ -249,7 +249,7 @@ class Tagger(Pipe):
|
|||
|
||||
examples (Iterable[Examples]): The batch of examples.
|
||||
scores: Scores representing the model's predictions.
|
||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#get_loss
|
||||
"""
|
||||
|
|
|
@ -281,7 +281,7 @@ class TextCategorizer(Pipe):
|
|||
|
||||
examples (Iterable[Examples]): The batch of examples.
|
||||
scores: Scores representing the model's predictions.
|
||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
|
||||
"""
|
||||
|
|
|
@ -5,7 +5,7 @@ import copy
|
|||
from functools import partial
|
||||
from pydantic import BaseModel, StrictStr
|
||||
|
||||
from ..util import registry, logger
|
||||
from ..util import registry
|
||||
from ..tokens import Doc
|
||||
from .example import Example
|
||||
|
||||
|
@ -119,7 +119,6 @@ def make_orth_variants(
|
|||
orig_token_dict = copy.deepcopy(token_dict)
|
||||
ndsv = orth_variants.get("single", [])
|
||||
ndpv = orth_variants.get("paired", [])
|
||||
logger.debug(f"Data augmentation: {len(ndsv)} single / {len(ndpv)} paired variants")
|
||||
words = token_dict.get("ORTH", [])
|
||||
tags = token_dict.get("TAG", [])
|
||||
# keep unmodified if words or tags are not defined
|
||||
|
@ -139,7 +138,7 @@ def make_orth_variants(
|
|||
punct_choices = [random.choice(x["variants"]) for x in ndpv]
|
||||
for word_idx in range(len(words)):
|
||||
for punct_idx in range(len(ndpv)):
|
||||
if tags[word_idx] in ndpv[punct_idx]["TAG"] and words[
|
||||
if tags[word_idx] in ndpv[punct_idx]["tags"] and words[
|
||||
word_idx
|
||||
] in itertools.chain.from_iterable(ndpv[punct_idx]["variants"]):
|
||||
# backup option: random left vs. right from pair
|
||||
|
|
|
@ -445,9 +445,9 @@ cdef class Vocab:
|
|||
setters = ["strings", "vectors"]
|
||||
if "strings" not in exclude:
|
||||
self.strings.to_disk(path / "strings.json")
|
||||
if "vectors" not in "exclude" and self.vectors is not None:
|
||||
if "vectors" not in "exclude":
|
||||
self.vectors.to_disk(path)
|
||||
if "lookups" not in "exclude" and self.lookups is not None:
|
||||
if "lookups" not in "exclude":
|
||||
self.lookups.to_disk(path)
|
||||
|
||||
def from_disk(self, path, *, exclude=tuple()):
|
||||
|
|
Loading…
Reference in New Issue
Block a user